Bug 944538 - Update libopus to 1.1rc2. r=cpearce
authorRalph Giles <giles@mozilla.com>
Thu, 28 Nov 2013 14:20:00 -0800
changeset 173974 07e357311cf304e2b55d78f4af0fa03a05a3e637
parent 173973 cb5d23080c7012b83282cf01f8dd3ed8ed187bf6
child 173975 27c14a1b1d4ca47172337cbd4962834ff3227486
push id445
push userffxbld
push dateMon, 10 Mar 2014 22:05:19 +0000
treeherdermozilla-release@dc38b741b04e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerscpearce
bugs944538
milestone28.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 944538 - Update libopus to 1.1rc2. r=cpearce Bumped to include the MULT16_32_P16 bugfix.
media/libopus/README_MOZILLA
media/libopus/celt/_kiss_fft_guts.h
media/libopus/celt/arch.h
media/libopus/celt/arm/arm_celt_map.c
media/libopus/celt/arm/armcpu.c
media/libopus/celt/arm/armcpu.h
media/libopus/celt/arm/armopts.s.in
media/libopus/celt/arm/celt_pitch_xcorr_arm.s
media/libopus/celt/arm/fixed_armv4.h
media/libopus/celt/arm/fixed_armv5e.h
media/libopus/celt/arm/pitch_arm.h
media/libopus/celt/bands.c
media/libopus/celt/celt.h
media/libopus/celt/celt_decoder.c
media/libopus/celt/celt_encoder.c
media/libopus/celt/celt_lpc.c
media/libopus/celt/celt_lpc.h
media/libopus/celt/cpu_support.h
media/libopus/celt/cwrs.c
media/libopus/celt/ecintrin.h
media/libopus/celt/entcode.h
media/libopus/celt/fixed_debug.h
media/libopus/celt/fixed_generic.h
media/libopus/celt/float_cast.h
media/libopus/celt/mathops.c
media/libopus/celt/mathops.h
media/libopus/celt/os_support.h
media/libopus/celt/pitch.c
media/libopus/celt/pitch.h
media/libopus/celt/quant_bands.c
media/libopus/celt/rate.c
media/libopus/celt/rate.h
media/libopus/celt/stack_alloc.h
media/libopus/celt/x86/pitch_sse.h
media/libopus/celt_sources.mk
media/libopus/include/opus.h
media/libopus/include/opus_custom.h
media/libopus/include/opus_defines.h
media/libopus/moz.build
media/libopus/silk/A2NLSF.c
media/libopus/silk/API.h
media/libopus/silk/CNG.c
media/libopus/silk/Inlines.h
media/libopus/silk/LP_variable_cutoff.c
media/libopus/silk/MacroCount.h
media/libopus/silk/MacroDebug.h
media/libopus/silk/NLSF2A.c
media/libopus/silk/NLSF_decode.c
media/libopus/silk/NLSF_del_dec_quant.c
media/libopus/silk/NSQ.c
media/libopus/silk/NSQ_del_dec.c
media/libopus/silk/PLC.c
media/libopus/silk/SigProc_FIX.h
media/libopus/silk/VAD.c
media/libopus/silk/VQ_WMat_EC.c
media/libopus/silk/arm/SigProc_FIX_armv4.h
media/libopus/silk/arm/SigProc_FIX_armv5e.h
media/libopus/silk/arm/macros_armv4.h
media/libopus/silk/arm/macros_armv5e.h
media/libopus/silk/control.h
media/libopus/silk/control_codec.c
media/libopus/silk/dec_API.c
media/libopus/silk/enc_API.c
media/libopus/silk/encode_pulses.c
media/libopus/silk/fixed/autocorr_FIX.c
media/libopus/silk/fixed/burg_modified_FIX.c
media/libopus/silk/fixed/encode_frame_FIX.c
media/libopus/silk/fixed/find_LPC_FIX.c
media/libopus/silk/fixed/find_pitch_lags_FIX.c
media/libopus/silk/fixed/find_pred_coefs_FIX.c
media/libopus/silk/fixed/main_FIX.h
media/libopus/silk/fixed/noise_shape_analysis_FIX.c
media/libopus/silk/fixed/pitch_analysis_core_FIX.c
media/libopus/silk/fixed/prefilter_FIX.c
media/libopus/silk/fixed/solve_LS_FIX.c
media/libopus/silk/float/LPC_analysis_filter_FLP.c
media/libopus/silk/float/SigProc_FLP.h
media/libopus/silk/float/encode_frame_FLP.c
media/libopus/silk/float/find_pitch_lags_FLP.c
media/libopus/silk/float/find_pred_coefs_FLP.c
media/libopus/silk/float/main_FLP.h
media/libopus/silk/float/noise_shape_analysis_FLP.c
media/libopus/silk/float/pitch_analysis_core_FLP.c
media/libopus/silk/float/prefilter_FLP.c
media/libopus/silk/float/solve_LS_FLP.c
media/libopus/silk/float/wrappers_FLP.c
media/libopus/silk/init_encoder.c
media/libopus/silk/log2lin.c
media/libopus/silk/macros.h
media/libopus/silk/main.h
media/libopus/silk/quant_LTP_gains.c
media/libopus/silk/resampler_private_IIR_FIR.c
media/libopus/silk/resampler_private_down_FIR.c
media/libopus/silk/shell_coder.c
media/libopus/silk/structs.h
media/libopus/silk/tables.h
media/libopus/silk/tables_LTP.c
media/libopus/silk/tuning_parameters.h
media/libopus/silk/typedef.h
media/libopus/src/analysis.c
media/libopus/src/analysis.h
media/libopus/src/mlp.c
media/libopus/src/mlp.h
media/libopus/src/opus.c
media/libopus/src/opus_decoder.c
media/libopus/src/opus_encoder.c
media/libopus/src/opus_multistream_decoder.c
media/libopus/src/opus_multistream_encoder.c
media/libopus/src/opus_private.h
media/libopus/src/repacketizer.c
media/libopus/src/tansig_table.h
--- a/media/libopus/README_MOZILLA
+++ b/media/libopus/README_MOZILLA
@@ -3,9 +3,9 @@ IETF Opus audio codec reference implemen
 The source in this directory was copied from an opus
 repository checkout by running the ./update.sh script.
 Any changes made to this version of the source should
 be reflected in that script, e.g. by applying patch
 files after the copy step.
 
 The upstream repository is https://git.xiph.org/opus.git
 
-The git tag/revision used was v1.1-beta-23-gf2446c2.
+The git tag/revision used was v1.1-rc2-1-g35a44c6.
--- a/media/libopus/celt/_kiss_fft_guts.h
+++ b/media/libopus/celt/_kiss_fft_guts.h
@@ -89,21 +89,21 @@
 #define C_ADDTO( res , a)\
     do {(res).r = ADD32((res).r, (a).r);  (res).i = ADD32((res).i,(a).i);\
     }while(0)
 
 #define C_SUBFROM( res , a)\
     do {(res).r = ADD32((res).r,(a).r);  (res).i = SUB32((res).i,(a).i); \
     }while(0)
 
-#if defined(ARMv4_ASM)
+#if defined(OPUS_ARM_INLINE_ASM)
 #include "arm/kiss_fft_armv4.h"
 #endif
 
-#if defined(ARMv5E_ASM)
+#if defined(OPUS_ARM_INLINE_EDSP)
 #include "arm/kiss_fft_armv5e.h"
 #endif
 
 #else  /* not FIXED_POINT*/
 
 #   define S_MUL(a,b) ( (a)*(b) )
 #define C_MUL(m,a,b) \
     do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
--- a/media/libopus/celt/arch.h
+++ b/media/libopus/celt/arch.h
@@ -30,16 +30,17 @@
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifndef ARCH_H
 #define ARCH_H
 
 #include "opus_types.h"
+#include "opus_defines.h"
 
 # if !defined(__GNUC_PREREQ)
 #  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
 #   define __GNUC_PREREQ(_maj,_min) \
  ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
 #  else
 #   define __GNUC_PREREQ(_maj,_min) 0
 #  endif
@@ -49,17 +50,17 @@
 
 #define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
 #ifdef ENABLE_ASSERTIONS
 #include <stdio.h>
 #include <stdlib.h>
 #ifdef __GNUC__
 __attribute__((noreturn))
 #endif
-static inline void _celt_fatal(const char *str, const char *file, int line)
+static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
 {
    fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
    abort();
 }
 #define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}}
 #define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}}
 #else
 #define celt_assert(cond)
@@ -108,19 +109,19 @@ typedef opus_val32 celt_ener;
 #define SCALEOUT(a)     (a)
 
 #ifdef FIXED_DEBUG
 #include "fixed_debug.h"
 #else
 
 #include "fixed_generic.h"
 
-#ifdef ARMv5E_ASM
+#ifdef OPUS_ARM_INLINE_EDSP
 #include "arm/fixed_armv5e.h"
-#elif defined (ARMv4_ASM)
+#elif defined (OPUS_ARM_INLINE_ASM)
 #include "arm/fixed_armv4.h"
 #elif defined (BFIN_ASM)
 #include "fixed_bfin.h"
 #elif defined (TI_C5X_ASM)
 #include "fixed_c5x.h"
 #elif defined (TI_C6X_ASM)
 #include "fixed_c6x.h"
 #endif
@@ -180,16 +181,17 @@ typedef float celt_ener;
 #define MULT16_32_Q15(a,b)     ((a)*(b))
 #define MULT16_32_Q16(a,b)     ((a)*(b))
 
 #define MULT32_32_Q31(a,b)     ((a)*(b))
 
 #define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
 
 #define MULT16_16_Q11_32(a,b)     ((a)*(b))
+#define MULT16_16_Q11(a,b)     ((a)*(b))
 #define MULT16_16_Q13(a,b)     ((a)*(b))
 #define MULT16_16_Q14(a,b)     ((a)*(b))
 #define MULT16_16_Q15(a,b)     ((a)*(b))
 #define MULT16_16_P15(a,b)     ((a)*(b))
 #define MULT16_16_P13(a,b)     ((a)*(b))
 #define MULT16_16_P14(a,b)     ((a)*(b))
 #define MULT16_32_P16(a,b)     ((a)*(b))
 
new file mode 100644
--- /dev/null
+++ b/media/libopus/celt/arm/arm_celt_map.c
@@ -0,0 +1,49 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pitch.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if defined(FIXED_POINT)
+opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+    const opus_val16 *, opus_val32 *, int , int) = {
+  celt_pitch_xcorr_c,               /* ARMv4 */
+  MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
+  MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
+  MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
+};
+# else
+#  error "Floating-point implementation is not supported by ARM asm yet." \
+ "Reconfigure with --disable-rtcd or send patches."
+# endif
+
+#endif
--- a/media/libopus/celt/arm/armcpu.c
+++ b/media/libopus/celt/arm/armcpu.c
@@ -44,41 +44,41 @@
 #define OPUS_CPU_ARM_NEON  (1<<3)
 
 #if defined(_MSC_VER)
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
 # define WIN32_LEAN_AND_MEAN
 # define WIN32_EXTRA_LEAN
 # include <windows.h>
 
-static inline opus_uint32 opus_cpu_capabilities(void){
+static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
   opus_uint32 flags;
   flags=0;
-  /* MSVC has no inline __asm support for ARM, but it does let you __emit
+  /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
    * instructions via their assembled hex code.
    * All of these instructions should be essentially nops. */
-# if defined(ARMv5E_ASM)
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
   __try{
     /*PLD [r13]*/
     __emit(0xF5DDF000);
     flags|=OPUS_CPU_ARM_EDSP;
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
-#  if defined(ARMv6E_ASM)
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
   __try{
     /*SHADD8 r3,r3,r3*/
     __emit(0xE6333F93);
     flags|=OPUS_CPU_ARM_MEDIA;
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
-#   if defined(ARM_HAVE_NEON)
+#   if defined(OPUS_ARM_MAY_HAVE_NEON)
   __try{
     /*VORR q0,q0,q0*/
     __emit(0xF2200150);
     flags|=OPUS_CPU_ARM_NEON;
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
@@ -102,38 +102,46 @@ opus_uint32 opus_cpu_capabilities(void)
   if(cpuinfo != NULL)
   {
     /* 512 should be enough for anybody (it's even enough for all the flags that
      * x86 has accumulated... so far). */
     char buf[512];
 
     while(fgets(buf, 512, cpuinfo) != NULL)
     {
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON)
       /* Search for edsp and neon flag */
       if(memcmp(buf, "Features", 8) == 0)
       {
         char *p;
+#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
         p = strstr(buf, " edsp");
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
           flags |= OPUS_CPU_ARM_EDSP;
+#  endif
 
+#  if defined(OPUS_ARM_MAY_HAVE_NEON)
         p = strstr(buf, " neon");
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
           flags |= OPUS_CPU_ARM_NEON;
+#  endif
       }
+# endif
 
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
       /* Search for media capabilities (>= ARMv6) */
       if(memcmp(buf, "CPU architecture:", 17) == 0)
       {
         int version;
         version = atoi(buf+17);
 
         if(version >= 6)
           flags |= OPUS_CPU_ARM_MEDIA;
       }
+# endif
     }
 
     fclose(cpuinfo);
   }
   return flags;
 }
 #else
 /* The feature registers which can tell us what the processor supports are
--- a/media/libopus/celt/arm/armcpu.h
+++ b/media/libopus/celt/arm/armcpu.h
@@ -20,16 +20,52 @@
    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-/* Original code from libtheora modified to suit to Opus */
+#if !defined(ARMCPU_H)
+# define ARMCPU_H
+
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+#  define MAY_HAVE_EDSP(name) name ## _edsp
+# else
+#  define MAY_HAVE_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#  define MAY_HAVE_MEDIA(name) name ## _media
+# else
+#  define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON)
+#  define MAY_HAVE_NEON(name) name ## _neon
+# else
+#  define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
+# endif
 
-#ifndef ARMCPU_H
-#define ARMCPU_H
+# if defined(OPUS_ARM_PRESUME_EDSP)
+#  define PRESUME_EDSP(name) name ## _edsp
+# else
+#  define PRESUME_EDSP(name) name ## _c
+# endif
 
+# if defined(OPUS_ARM_PRESUME_MEDIA)
+#  define PRESUME_MEDIA(name) name ## _media
+# else
+#  define PRESUME_MEDIA(name) PRESUME_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_NEON)
+#  define PRESUME_NEON(name) name ## _neon
+# else
+#  define PRESUME_NEON(name) PRESUME_MEDIA(name)
+# endif
+
+# if defined(OPUS_HAVE_RTCD)
 int opus_select_arch(void);
+# endif
 
 #endif
new file mode 100644
--- /dev/null
+++ b/media/libopus/celt/arm/armopts.s.in
@@ -0,0 +1,37 @@
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+; Set the following to 1 if we have EDSP instructions
+;  (LDRD/STRD, etc., ARMv5E and later).
+OPUS_ARM_MAY_HAVE_EDSP  * @OPUS_ARM_MAY_HAVE_EDSP@
+
+; Set the following to 1 if we have ARMv6 media instructions.
+OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@
+
+; Set the following to 1 if we have NEON (some ARMv7)
+OPUS_ARM_MAY_HAVE_NEON  * @OPUS_ARM_MAY_HAVE_NEON@
+
+END
new file mode 100644
--- /dev/null
+++ b/media/libopus/celt/arm/celt_pitch_xcorr_arm.s
@@ -0,0 +1,545 @@
+; Copyright (c) 2007-2008 CSIRO
+; Copyright (c) 2007-2009 Xiph.Org Foundation
+; Copyright (c) 2013      Parrot
+; Written by Aurélien Zanelli
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  AREA  |.text|, CODE, READONLY
+
+  GET    celt/arm/armopts.s
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+  EXPORT celt_pitch_xcorr_edsp
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+  EXPORT celt_pitch_xcorr_neon
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+
+; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
+xcorr_kernel_neon PROC
+  ; input:
+  ;   r3     = int         len
+  ;   r4     = opus_val16 *x
+  ;   r5     = opus_val16 *y
+  ;   q0     = opus_val32  sum[4]
+  ; output:
+  ;   q0     = opus_val32  sum[4]
+  ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
+  ; internal usage:
+  ;   r12 = int j
+  ;   d3  = y_3|y_2|y_1|y_0
+  ;   q2  = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
+  ;   q3  = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
+  ;   q8  = scratch
+  ;
+  ; Load y[0...3]
+  ; This requires len>0 to always be valid (which we assert in the C code).
+  VLD1.16      {d5}, [r5]!
+  SUBS         r12, r3, #8
+  BLE xcorr_kernel_neon_process4
+; Process 8 samples at a time.
+; This loop loads one y value more than we actually need. Therefore we have to
+; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
+; reading past the end of the array.
+xcorr_kernel_neon_process8
+  ; This loop has 19 total instructions (10 cycles to issue, minimum), with
+  ; - 2 cycles of ARM insrtuctions,
+  ; - 10 cycles of load/store/byte permute instructions, and
+  ; - 9 cycles of data processing instructions.
+  ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
+  ; latter two categories, meaning the whole loop should run in 10 cycles per
+  ; iteration, barring cache misses.
+  ;
+  ; Load x[0...7]
+  VLD1.16      {d6, d7}, [r4]!
+  ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get
+  ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
+  VAND         d3, d5, d5
+  SUBS         r12, r12, #8
+  ; Load y[4...11]
+  VLD1.16      {d4, d5}, [r5]!
+  VMLAL.S16    q0, d3, d6[0]
+  VEXT.16      d16, d3, d4, #1
+  VMLAL.S16    q0, d4, d7[0]
+  VEXT.16      d17, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d3, d4, #2
+  VMLAL.S16    q0, d17, d7[1]
+  VEXT.16      d17, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d3, d4, #3
+  VMLAL.S16    q0, d17, d7[2]
+  VEXT.16      d17, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+  VMLAL.S16    q0, d17, d7[3]
+  BGT xcorr_kernel_neon_process8
+; Process 4 samples here if we have > 4 left (still reading one extra y value).
+xcorr_kernel_neon_process4
+  ADDS         r12, r12, #4
+  BLE xcorr_kernel_neon_process2
+  ; Load x[0...3]
+  VLD1.16      d6, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #4
+  ; Load y[4...7]
+  VLD1.16      d5, [r5]!
+  VMLAL.S16    q0, d4, d6[0]
+  VEXT.16      d16, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+; Process 2 samples here if we have > 2 left (still reading one extra y value).
+xcorr_kernel_neon_process2
+  ADDS         r12, r12, #2
+  BLE xcorr_kernel_neon_process1
+  ; Load x[0...1]
+  VLD2.16      {d6[],d7[]}, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #2
+  ; Load y[4...5]
+  VLD1.32      {d5[]}, [r5]!
+  VMLAL.S16    q0, d4, d6
+  VEXT.16      d16, d4, d5, #1
+  ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
+  ; instead of VEXT, since it's a data-processing instruction.
+  VSRI.64      d5, d4, #32
+  VMLAL.S16    q0, d16, d7
+; Process 1 sample using the extra y value we loaded above.
+xcorr_kernel_neon_process1
+  ; Load next *x
+  VLD1.16      {d6[]}, [r4]!
+  ADDS         r12, r12, #1
+  ; y[0...3] are left in d5 from prior iteration(s) (if any)
+  VMLAL.S16    q0, d5, d6
+  MOVLE        pc, lr
+; Now process 1 last sample, not reading ahead.
+  ; Load last *y
+  VLD1.16      {d4[]}, [r5]!
+  VSRI.64      d4, d5, #16
+  ; Load last *x
+  VLD1.16      {d6[]}, [r4]!
+  VMLAL.S16    q0, d4, d6
+  MOV          pc, lr
+  ENDP
+
+; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
+;  opus_val32 *xcorr, int len, int max_pitch)
+celt_pitch_xcorr_neon PROC
+  ; input:
+  ;   r0  = opus_val16 *_x
+  ;   r1  = opus_val16 *_y
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = int         maxcorr
+  ; internal usage:
+  ;   r4  = opus_val16 *x (for xcorr_kernel_neon())
+  ;   r5  = opus_val16 *y (for xcorr_kernel_neon())
+  ;   r6  = int         max_pitch
+  ;   r12 = int         j
+  ;   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
+  STMFD        sp!, {r4-r6, lr}
+  LDR          r6, [sp, #16]
+  VMOV.S32     q15, #1
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  SUBS         r6, r6, #4
+  BLT celt_pitch_xcorr_neon_process4_done
+celt_pitch_xcorr_neon_process4
+  ; xcorr_kernel_neon parameters:
+  ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
+  MOV          r4, r0
+  MOV          r5, r1
+  VEOR         q0, q0, q0
+  ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
+  ; So we don't save/restore any other registers.
+  BL xcorr_kernel_neon
+  SUBS         r6, r6, #4
+  VST1.32      {q0}, [r2]!
+  ; _y += 4
+  ADD          r1, r1, #8
+  VMAX.S32     q15, q15, q0
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  BGE celt_pitch_xcorr_neon_process4
+; We have less than 4 sums left to compute.
+celt_pitch_xcorr_neon_process4_done
+  ADDS         r6, r6, #4
+  ; Reduce maxcorr to a single value
+  VMAX.S32     d30, d30, d31
+  VPMAX.S32    d30, d30, d30
+  ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
+  BLE celt_pitch_xcorr_neon_done
+; Now compute each remaining sum one at a time.
+celt_pitch_xcorr_neon_process_remaining
+  MOV          r4, r0
+  MOV          r5, r1
+  VMOV.I32     q0, #0
+  SUBS         r12, r3, #8
+  BLT celt_pitch_xcorr_neon_process_remaining4
+; Sum terms 8 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop8
+  ; Load x[0...7]
+  VLD1.16      {q1}, [r4]!
+  ; Load y[0...7]
+  VLD1.16      {q2}, [r5]!
+  SUBS         r12, r12, #8
+  VMLAL.S16    q0, d4, d2
+  VMLAL.S16    q0, d5, d3
+  BGE celt_pitch_xcorr_neon_process_remaining_loop8
+; Sum terms 4 at a time.
+celt_pitch_xcorr_neon_process_remaining4
+  ADDS         r12, r12, #4
+  BLT celt_pitch_xcorr_neon_process_remaining4_done
+  ; Load x[0...3]
+  VLD1.16      {d2}, [r4]!
+  ; Load y[0...3]
+  VLD1.16      {d3}, [r5]!
+  SUB          r12, r12, #4
+  VMLAL.S16    q0, d3, d2
+celt_pitch_xcorr_neon_process_remaining4_done
+  ; Reduce the sum to a single value.
+  VADD.S32     d0, d0, d1
+  VPADDL.S32   d0, d0
+  ADDS         r12, r12, #4
+  BLE celt_pitch_xcorr_neon_process_remaining_loop_done
+; Sum terms 1 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop1
+  VLD1.16      {d2[]}, [r4]!
+  VLD1.16      {d3[]}, [r5]!
+  SUBS         r12, r12, #1
+  VMLAL.S16    q0, d2, d3
+  BGT celt_pitch_xcorr_neon_process_remaining_loop1
+celt_pitch_xcorr_neon_process_remaining_loop_done
+  VST1.32      {d0[0]}, [r2]!
+  VMAX.S32     d30, d30, d0
+  SUBS         r6, r6, #1
+  ; _y++
+  ADD          r1, r1, #2
+  ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
+  BGT celt_pitch_xcorr_neon_process_remaining
+celt_pitch_xcorr_neon_done
+  VMOV.32      r0, d30[0]
+  LDMFD        sp!, {r4-r6, pc}
+  ENDP
+
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+
+; This will get used on ARMv7 devices without NEON, so it has been optimized
+; to take advantage of dual-issuing where possible.
+xcorr_kernel_edsp PROC
+  ; input:
+  ;   r3      = int         len
+  ;   r4      = opus_val16 *_x (must be 32-bit aligned)
+  ;   r5      = opus_val16 *_y (must be 32-bit aligned)
+  ;   r6...r9 = opus_val32  sum[4]
+  ; output:
+  ;   r6...r9 = opus_val32  sum[4]
+  ; preserved: r0-r5
+  ; internal usage
+  ;   r2      = int         j
+  ;   r12,r14 = opus_val16  x[4]
+  ;   r10,r11 = opus_val16  y[4]
+  STMFD        sp!, {r2,r4,r5,lr}
+  LDR          r10, [r5], #4      ; Load y[0...1]
+  SUBS         r2, r3, #4         ; j = len-4
+  LDR          r11, [r5], #4      ; Load y[2...3]
+  BLE xcorr_kernel_edsp_process4_done
+  LDR          r12, [r4], #4      ; Load x[0...1]
+  ; Stall
+xcorr_kernel_edsp_process4
+  ; The multiplies must issue from pipeline 0, and can't dual-issue with each
+  ; other. Every other instruction here dual-issues with a multiply, and is
+  ; thus "free". There should be no stalls in the body of the loop.
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_0,y_0)
+  LDR          r14, [r4], #4      ; Load x[2...3]
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x_0,y_1)
+  SUBS         r2, r2, #4         ; j-=4
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_0,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x_0,y_3)
+  SMLATT       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_1,y_1)
+  LDR          r10, [r5], #4      ; Load y[4...5]
+  SMLATB       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],x_1,y_2)
+  SMLATT       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_1,y_3)
+  SMLATB       r9, r12, r10, r9   ; sum[3] = MAC16_16(sum[3],x_1,y_4)
+  LDRGT        r12, [r4], #4      ; Load x[0...1]
+  SMLABB       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_2,y_2)
+  SMLABT       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x_2,y_3)
+  SMLABB       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_2,y_4)
+  SMLABT       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x_2,y_5)
+  SMLATT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_3,y_3)
+  LDR          r11, [r5], #4      ; Load y[6...7]
+  SMLATB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],x_3,y_4)
+  SMLATT       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_3,y_5)
+  SMLATB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],x_3,y_6)
+  BGT xcorr_kernel_edsp_process4
+xcorr_kernel_edsp_process4_done
+  ADDS         r2, r2, #4
+  BLE xcorr_kernel_edsp_done
+  LDRH         r12, [r4], #2      ; r12 = *x++
+  SUBS         r2, r2, #1         ; j--
+  ; Stall
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_0)
+  LDRGTH       r14, [r4], #2      ; r14 = *x++
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x,y_1)
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x,y_3)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_1)
+  SUBS         r2, r2, #1         ; j--
+  SMLABB       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x,y_2)
+  LDRH         r10, [r5], #2      ; r10 = y_4 = *y++
+  SMLABT       r8, r14, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_3)
+  LDRGTH       r12, [r4], #2      ; r12 = *x++
+  SMLABB       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x,y_4)
+  BLE xcorr_kernel_edsp_done
+  SMLABB       r6, r12, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_2)
+  CMP          r2, #1             ; j--
+  SMLABT       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_3)
+  LDRH         r2, [r5], #2       ; r2 = y_5 = *y++
+  SMLABB       r8, r12, r10, r8   ; sum[2] = MAC16_16(sum[2],tmp,y_4)
+  LDRGTH       r14, [r4]          ; r14 = *x
+  SMLABB       r9, r12, r2, r9    ; sum[3] = MAC16_16(sum[3],tmp,y_5)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_3)
+  LDRH         r11, [r5]          ; r11 = y_6 = *y
+  SMLABB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_4)
+  SMLABB       r8, r14, r2, r8    ; sum[2] = MAC16_16(sum[2],tmp,y_5)
+  SMLABB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],tmp,y_6)
+xcorr_kernel_edsp_done
+  LDMFD        sp!, {r2,r4,r5,pc}
+  ENDP
+
+celt_pitch_xcorr_edsp PROC
+  ; input:
+  ;   r0  = opus_val16 *_x (must be 32-bit aligned)
+  ;   r1  = opus_val16 *_y (only needs to be 16-bit aligned)
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = maxcorr
+  ; internal usage
+  ;   r4  = opus_val16 *x
+  ;   r5  = opus_val16 *y
+  ;   r6  = opus_val32  sum0
+  ;   r7  = opus_val32  sum1
+  ;   r8  = opus_val32  sum2
+  ;   r9  = opus_val32  sum3
+  ;   r1  = int         max_pitch
+  ;   r12 = int         j
+  STMFD        sp!, {r4-r11, lr}
+  MOV          r5, r1
+  LDR          r1, [sp, #36]
+  MOV          r4, r0
+  TST          r5, #3
+  ; maxcorr = 1
+  MOV          r0, #1
+  BEQ          celt_pitch_xcorr_edsp_process1u_done
+; Compute one sum at the start to make y 32-bit aligned.
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  LDRH         r8, [r5], #2
+  BLE celt_pitch_xcorr_edsp_process1u_loop4_done
+  LDR          r6, [r4], #4
+  MOV          r8, r8, LSL #16
+celt_pitch_xcorr_edsp_process1u_loop4
+  LDR          r9, [r5], #4
+  SMLABT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLATB       r14, r6, r9, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLABT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATB       r14, r7, r8, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGT        r6, [r4], #4
+  BGT celt_pitch_xcorr_edsp_process1u_loop4
+  MOV          r8, r8, LSR #16
+celt_pitch_xcorr_edsp_process1u_loop4_done
+  ADDS         r12, r12, #4
+celt_pitch_xcorr_edsp_process1u_loop1
+  LDRGEH       r6, [r4], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14    ; sum = MAC16_16(sum, *x, *y)
+  SUBGES       r12, r12, #1
+  LDRGTH       r8, [r5], #2
+  BGT celt_pitch_xcorr_edsp_process1u_loop1
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ADD          r5, r5, #2
+  MOVLT        r0, r14
+  SUBS         r1, r1, #1
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  BLE celt_pitch_xcorr_edsp_done
+celt_pitch_xcorr_edsp_process1u_done
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
+  SUBS         r1, r1, #4
+  BLT celt_pitch_xcorr_edsp_process2
+celt_pitch_xcorr_edsp_process4
+  ; xcorr_kernel_edsp parameters:
+  ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
+  MOV          r6, #0
+  MOV          r7, #0
+  MOV          r8, #0
+  MOV          r9, #0
+  BL xcorr_kernel_edsp  ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+  ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
+  CMP          r0, r6
+  ; _y+=4
+  ADD          r5, r5, #8
+  MOVLT        r0, r6
+  CMP          r0, r7
+  MOVLT        r0, r7
+  CMP          r0, r8
+  MOVLT        r0, r8
+  CMP          r0, r9
+  MOVLT        r0, r9
+  STMIA        r2!, {r6-r9}
+  SUBS         r1, r1, #4
+  BGE celt_pitch_xcorr_edsp_process4
+celt_pitch_xcorr_edsp_process2
+  ADDS         r1, r1, #2
+  BLT celt_pitch_xcorr_edsp_process1a
+  SUBS         r12, r3, #4
+  ; {r10, r11} = {sum0, sum1} = {0, 0}
+  MOV          r10, #0
+  MOV          r11, #0
+  LDR          r8, [r5], #4
+  BLE celt_pitch_xcorr_edsp_process2_loop_done
+  LDR          r6, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process2_loop4
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+  LDRGT        r6, [r4], #4
+  SMLABB       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_2, y_2)
+  SMLABT       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_2, y_3)
+  SMLATT       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_3, y_3)
+  LDRGT        r9, [r5], #4
+  SMLATB       r11, r7, r8, r11     ; sum1 = MAC16_16(sum1, x_3, y_4)
+  BGT celt_pitch_xcorr_edsp_process2_loop4
+celt_pitch_xcorr_edsp_process2_loop_done
+  ADDS         r12, r12, #2
+  BLE  celt_pitch_xcorr_edsp_process2_1
+  LDR          r6, [r4], #4
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r9, [r5], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUB          r12, r12, #2
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  MOV          r8, r9
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+celt_pitch_xcorr_edsp_process2_1
+  LDRH         r6, [r4], #2
+  ADDS         r12, r12, #1
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDRGTH       r7, [r4], #2
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  BLE celt_pitch_xcorr_edsp_process2_done
+  LDRH         r9, [r5], #2
+  SMLABT       r10, r7, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_1)
+  SMLABB       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_0, y_2)
+celt_pitch_xcorr_edsp_process2_done
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum0)
+  CMP          r0, r10
+  ADD          r5, r5, #2
+  MOVLT        r0, r10
+  SUB          r1, r1, #2
+  ; maxcorr = max(maxcorr, sum1)
+  CMP          r0, r11
+  ; xcorr[i] = sum
+  STR          r10, [r2], #4
+  MOVLT        r0, r11
+  STR          r11, [r2], #4
+celt_pitch_xcorr_edsp_process1a
+  ADDS         r1, r1, #1
+  BLT celt_pitch_xcorr_edsp_done
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  BLT celt_pitch_xcorr_edsp_process1a_loop_done
+  LDR          r6, [r4], #4
+  LDR          r8, [r5], #4
+  LDR          r7, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process1a_loop4
+  SMLABB       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDRGE        r6, [r4], #4
+  SMLABB       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  LDRGE        r8, [r5], #4
+  SMLATT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGE        r7, [r4], #4
+  LDRGE        r9, [r5], #4
+  BGE celt_pitch_xcorr_edsp_process1a_loop4
+celt_pitch_xcorr_edsp_process1a_loop_done
+  ADDS         r12, r12, #2
+  LDRGE        r6, [r4], #4
+  LDRGE        r8, [r5], #4
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBGE        r12, r12, #2
+  SMLATTGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  ADDS         r12, r12, #1
+  LDRGEH       r6, [r4], #2
+  LDRGEH       r8, [r5], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, *x, *y)
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  MOVLT        r0, r14
+celt_pitch_xcorr_edsp_done
+  LDMFD        sp!, {r4-r11, pc}
+  ENDP
+
+ENDIF
+
+END
--- a/media/libopus/celt/arm/fixed_armv4.h
+++ b/media/libopus/celt/arm/fixed_armv4.h
@@ -24,34 +24,34 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifndef FIXED_ARMv4_H
 #define FIXED_ARMv4_H
 
 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
 #undef MULT16_32_Q16
-static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
 {
   unsigned rd_lo;
   int rd_hi;
   __asm__(
       "#MULT16_32_Q16\n\t"
       "smull %0, %1, %2, %3\n\t"
       : "=&r"(rd_lo), "=&r"(rd_hi)
       : "%r"(b),"r"(a<<16)
   );
   return rd_hi;
 }
 #define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b))
 
 
 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
 #undef MULT16_32_Q15
-static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
 {
   unsigned rd_lo;
   int rd_hi;
   __asm__(
       "#MULT16_32_Q15\n\t"
       "smull %0, %1, %2, %3\n\t"
       : "=&r"(rd_lo), "=&r"(rd_hi)
       : "%r"(b), "r"(a<<16)
--- a/media/libopus/celt/arm/fixed_armv5e.h
+++ b/media/libopus/celt/arm/fixed_armv5e.h
@@ -29,33 +29,33 @@
 
 #ifndef FIXED_ARMv5E_H
 #define FIXED_ARMv5E_H
 
 #include "fixed_armv4.h"
 
 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
 #undef MULT16_32_Q16
-static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
 {
   int res;
   __asm__(
       "#MULT16_32_Q16\n\t"
       "smulwb %0, %1, %2\n\t"
       : "=r"(res)
       : "r"(b),"r"(a)
   );
   return res;
 }
 #define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b))
 
 
 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
 #undef MULT16_32_Q15
-static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
 {
   int res;
   __asm__(
       "#MULT16_32_Q15\n\t"
       "smulwb %0, %1, %2\n\t"
       : "=r"(res)
       : "r"(b), "r"(a)
   );
@@ -63,49 +63,49 @@ static inline opus_val32 MULT16_32_Q15_a
 }
 #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
 
 
 /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
     b must fit in 31 bits.
     Result fits in 32 bits. */
 #undef MAC16_32_Q15
-static inline opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
+static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
  opus_val32 b)
 {
   int res;
   __asm__(
       "#MAC16_32_Q15\n\t"
       "smlawb %0, %1, %2, %3;\n"
       : "=r"(res)
       : "r"(b<<1), "r"(a), "r"(c)
   );
   return res;
 }
 #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
 
 /** 16x16 multiply-add where the result fits in 32 bits */
 #undef MAC16_16
-static inline opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
+static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
  opus_val16 b)
 {
   int res;
   __asm__(
       "#MAC16_16\n\t"
       "smlabb %0, %1, %2, %3;\n"
       : "=r"(res)
       : "r"(a), "r"(b), "r"(c)
   );
   return res;
 }
 #define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b))
 
 /** 16x16 multiplication where the result fits in 32 bits */
 #undef MULT16_16
-static inline opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
+static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
 {
   int res;
   __asm__(
       "#MULT16_16\n\t"
       "smulbb %0, %1, %2;\n"
       : "=r"(res)
       : "r"(a), "r"(b)
   );
new file mode 100644
--- /dev/null
+++ b/media/libopus/celt/arm/pitch_arm.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(PITCH_ARM_H)
+# define PITCH_ARM_H
+
+# include "armcpu.h"
+
+# if defined(FIXED_POINT)
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON)
+opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch);
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#   define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr)
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
+opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch);
+#  endif
+
+#  if !defined(OPUS_HAVE_RTCD)
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
+#  endif
+
+# endif
+
+#endif
--- a/media/libopus/celt/bands.c
+++ b/media/libopus/celt/bands.c
@@ -209,30 +209,46 @@ void denormalise_bands(const CELTMode *m
          opus_val16 g;
          opus_val16 lg;
 #ifdef FIXED_POINT
          int shift;
 #endif
          j=M*eBands[i];
          band_end = M*eBands[i+1];
          lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
-#ifdef FIXED_POINT
+#ifndef FIXED_POINT
+         g = celt_exp2(lg);
+#else
          /* Handle the integer part of the log energy */
          shift = 16-(lg>>DB_SHIFT);
          if (shift>31)
          {
             shift=0;
             g=0;
          } else {
             /* Handle the fractional part. */
             g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
          }
-#else
-         g = celt_exp2(lg);
+         /* Handle extreme gains with negative shift. */
+         if (shift<0)
+         {
+            /* For shift < -2 we'd be likely to overflow, so we're capping
+               the gain here. This shouldn't happen unless the bitstream is
+               already corrupted. */
+            if (shift < -2)
+            {
+               g = 32767;
+               shift = -2;
+            }
+            do {
+               *f++ = SHL32(MULT16_16(*x++, g), -shift);
+            } while (++j<band_end);
+         } else
 #endif
+         /* Be careful of the fixed-point "else" just above when changing this code */
          do {
             *f++ = SHR32(MULT16_16(*x++, g), shift);
          } while (++j<band_end);
       }
       celt_assert(start <= end);
       for (i=M*eBands[end];i<N;i++)
          *f++ = 0;
    } while (++c<C);
@@ -487,17 +503,17 @@ int spreading_decision(const CELTMode *m
       if (hf_sum > 22)
          *tapset_decision=2;
       else if (hf_sum > 18)
          *tapset_decision=1;
       else
          *tapset_decision=0;
    }
    /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
-   celt_assert(nbBands>0); /*M*(eBands[end]-eBands[end-1]) <= 8 assures this*/
+   celt_assert(nbBands>0); /* end has to be non-zero */
    sum /= nbBands;
    /* Recursive averaging */
    sum = (sum+*average)>>1;
    *average = sum;
    /* Hysteresis */
    sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
    if (sum < 80)
    {
@@ -864,17 +880,16 @@ static unsigned quant_partition(struct b
       int N, int b, int B, celt_norm *lowband,
       int LM,
       opus_val16 gain, int fill)
 {
    const unsigned char *cache;
    int q;
    int curr_bits;
    int imid=0, iside=0;
-   int N_B=N;
    int B0=B;
    opus_val16 mid=0, side=0;
    unsigned cm=0;
 #ifdef RESYNTH
    int resynth = 1;
 #else
    int resynth = !ctx->encode;
 #endif
@@ -886,18 +901,16 @@ static unsigned quant_partition(struct b
    ec_ctx *ec;
 
    encode = ctx->encode;
    m = ctx->m;
    i = ctx->i;
    spread = ctx->spread;
    ec = ctx->ec;
 
-   N_B /= B;
-
    /* If we need 1.5 more bit than we can produce, split the band in two. */
    cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
    if (LM != -1 && b > cache[cache[0]]+12 && N>2)
    {
       int mbits, sbits, delta;
       int itheta;
       int qalloc;
       struct split_ctx sctx;
@@ -1067,17 +1080,16 @@ static unsigned quant_band(struct band_c
    int tf_change;
 
    encode = ctx->encode;
    tf_change = ctx->tf_change;
 
    longBlocks = B0==1;
 
    N_B /= B;
-   N_B0 = N_B;
 
    /* Special case for one sample */
    if (N==1)
    {
       return quant_band_n1(ctx, X, NULL, b, lowband_out);
    }
 
    if (tf_change>0)
--- a/media/libopus/celt/celt.h
+++ b/media/libopus/celt/celt.h
@@ -47,30 +47,34 @@ extern "C" {
 #endif
 
 #define CELTEncoder OpusCustomEncoder
 #define CELTDecoder OpusCustomDecoder
 #define CELTMode OpusCustomMode
 
 typedef struct {
    int valid;
-   opus_val16 tonality;
-   opus_val16 tonality_slope;
-   opus_val16 noisiness;
-   opus_val16 activity;
-   opus_val16 music_prob;
+   float tonality;
+   float tonality_slope;
+   float noisiness;
+   float activity;
+   float music_prob;
    int        bandwidth;
 }AnalysisInfo;
 
 #define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
 
 #define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
 
 /* Encoder/decoder Requests */
 
+/* Expose this option again when variable framesize actually works */
+#define OPUS_FRAMESIZE_VARIABLE              5010 /**< Optimize the frame size dynamically */
+
+
 #define CELT_SET_PREDICTION_REQUEST    10002
 /** Controls the use of interframe prediction.
     0=Independent frames
     1=Short term interframe prediction allowed
     2=Long term prediction allowed
  */
 #define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x)
 
@@ -104,29 +108,27 @@ typedef struct {
 #define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x)
 
 #define CELT_SET_ANALYSIS_REQUEST    10022
 #define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x)
 
 #define OPUS_SET_LFE_REQUEST    10024
 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
 
-#define OPUS_SET_ENERGY_SAVE_REQUEST    10026
-#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x)
-
-#define OPUS_SET_ENERGY_MASK_REQUEST    10028
+#define OPUS_SET_ENERGY_MASK_REQUEST    10026
 #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
 
 /* Encoder stuff */
 
 int celt_encoder_get_size(int channels);
 
 int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);
 
-int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels);
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch);
 
 
 
 /* Decoder stuff */
 
 int celt_decoder_get_size(int channels);
 
 
@@ -136,17 +138,17 @@ int celt_decode_with_ec(OpusCustomDecode
 
 #define celt_encoder_ctl opus_custom_encoder_ctl
 #define celt_decoder_ctl opus_custom_decoder_ctl
 
 
 #ifdef CUSTOM_MODES
 #define OPUS_CUSTOM_NOSTATIC
 #else
-#define OPUS_CUSTOM_NOSTATIC static inline
+#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE
 #endif
 
 static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
 /* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
 static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
 
 static const unsigned char tapset_icdf[3]={2,1,0};
 
@@ -161,43 +163,46 @@ static const unsigned char toOpusTable[2
 
 static const unsigned char fromOpusTable[16] = {
       0x80, 0x88, 0x90, 0x98,
       0x40, 0x48, 0x50, 0x58,
       0x20, 0x28, 0x30, 0x38,
       0x00, 0x08, 0x10, 0x18
 };
 
-static inline int toOpus(unsigned char c)
+static OPUS_INLINE int toOpus(unsigned char c)
 {
    int ret=0;
    if (c<0xA0)
       ret = toOpusTable[c>>3];
    if (ret == 0)
       return -1;
    else
       return ret|(c&0x7);
 }
 
-static inline int fromOpus(unsigned char c)
+static OPUS_INLINE int fromOpus(unsigned char c)
 {
    if (c<0x80)
       return -1;
    else
       return fromOpusTable[(c>>3)-16] | (c&0x7);
 }
 #endif /* CUSTOM_MODES */
 
 #define COMBFILTER_MAXPERIOD 1024
 #define COMBFILTER_MINPERIOD 15
 
 extern const signed char tf_select_table[4][8];
 
 int resampling_factor(opus_int32 rate);
 
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
+
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
       const opus_val16 *window, int overlap);
 
 void init_caps(const CELTMode *m,int *cap,int LM,int C);
 
 #ifdef RESYNTH
 void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
--- a/media/libopus/celt/celt_decoder.c
+++ b/media/libopus/celt/celt_decoder.c
@@ -170,17 +170,17 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_dec
 
 #ifdef CUSTOM_MODES
 void opus_custom_decoder_destroy(CELTDecoder *st)
 {
    opus_free(st);
 }
 #endif /* CUSTOM_MODES */
 
-static inline opus_val16 SIG2WORD16(celt_sig x)
+static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
 {
 #ifdef FIXED_POINT
    x = PSHR32(x, SIG_SHIFT);
    x = MAX32(x, -32768);
    x = MIN32(x, 32767);
    return EXTRACT16(x);
 #else
    return (opus_val16)x;
@@ -442,20 +442,21 @@ static void celt_decode_lost(CELTDecoder
       int pitch_index;
       VARDECL(opus_val32, etmp);
       VARDECL(opus_val16, exc);
 
       if (loss_count == 0)
       {
          VARDECL( opus_val16, lp_pitch_buf );
          ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
-         pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C);
+         pitch_downsample(decode_mem, lp_pitch_buf,
+               DECODE_BUFFER_SIZE, C, st->arch);
          pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
                DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
-               PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index);
+               PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
          pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
          st->last_pitch_index = pitch_index;
       } else {
          pitch_index = st->last_pitch_index;
          fade = QCONST16(.8f,15);
       }
 
       ALLOC(etmp, overlap, opus_val32);
@@ -476,17 +477,18 @@ static void celt_decode_lost(CELTDecoder
             exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT);
          }
 
          if (loss_count == 0)
          {
             opus_val32 ac[LPC_ORDER+1];
             /* Compute LPC coefficients for the last MAX_PERIOD samples before
                the first loss so we can work in the excitation-filter domain. */
-            _celt_autocorr(exc, ac, window, overlap, LPC_ORDER, MAX_PERIOD);
+            _celt_autocorr(exc, ac, window, overlap,
+                   LPC_ORDER, MAX_PERIOD, st->arch);
             /* Add a noise floor of -40 dB. */
 #ifdef FIXED_POINT
             ac[0] += SHR32(ac[0],13);
 #else
             ac[0] *= 1.0001f;
 #endif
             /* Use lag windowing to stabilize the Levinson-Durbin recursion. */
             for (i=1;i<=LPC_ORDER;i++)
@@ -660,17 +662,16 @@ int celt_decode_with_ec(CELTDecoder * OP
    VARDECL(celt_norm, X);
    VARDECL(int, fine_quant);
    VARDECL(int, pulses);
    VARDECL(int, cap);
    VARDECL(int, offsets);
    VARDECL(int, fine_priority);
    VARDECL(int, tf_res);
    VARDECL(unsigned char, collapse_masks);
-   celt_sig *out_mem[2];
    celt_sig *decode_mem[2];
    celt_sig *out_syn[2];
    opus_val16 *lpc;
    opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
 
    int shortBlocks;
    int isTransient;
    int intra_ener;
@@ -701,17 +702,16 @@ int celt_decode_with_ec(CELTDecoder * OP
    mode = st->mode;
    nbEBands = mode->nbEBands;
    overlap = mode->overlap;
    eBands = mode->eBands;
    frame_size *= st->downsample;
 
    c=0; do {
       decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
-      out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
    } while (++c<CC);
    lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
    oldBandE = lpc+CC*LPC_ORDER;
    oldLogE = oldBandE + 2*nbEBands;
    oldLogE2 = oldLogE + 2*nbEBands;
    backgroundLogE = oldLogE2  + 2*nbEBands;
 
 #ifdef CUSTOM_MODES
@@ -931,17 +931,17 @@ int celt_decode_with_ec(CELTDecoder * OP
       int bound = M*eBands[effEnd];
       if (st->downsample!=1)
          bound = IMIN(bound, N/st->downsample);
       for (i=bound;i<N;i++)
          freq[c*N+i] = 0;
    } while (++c<C);
 
    c=0; do {
-      out_syn[c] = out_mem[c]+MAX_PERIOD-N;
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
    } while (++c<CC);
 
    if (CC==2&&C==1)
    {
       for (i=0;i<N;i++)
          freq[N+i] = freq[i];
    }
    if (CC==1&&C==2)
--- a/media/libopus/celt/celt_encoder.c
+++ b/media/libopus/celt/celt_encoder.c
@@ -106,17 +106,16 @@ struct OpusCustomEncoder {
    /* VBR-related parameters */
    opus_int32 vbr_reservoir;
    opus_int32 vbr_drift;
    opus_int32 vbr_offset;
    opus_int32 vbr_count;
    opus_val32 overlap_max;
    opus_val16 stereo_saving;
    int intensity;
-   opus_val16 *energy_save;
    opus_val16 *energy_mask;
    opus_val16 spec_avg;
 
 #ifdef RESYNTH
    /* +MAX_PERIOD/2 to make space for overlap */
    celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2];
 #endif
 
@@ -157,27 +156,18 @@ CELTEncoder *opus_custom_encoder_create(
       st = NULL;
    }
    if (error)
       *error = ret;
    return st;
 }
 #endif /* CUSTOM_MODES */
 
-int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels)
-{
-   int ret;
-   ret = opus_custom_encoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
-   if (ret != OPUS_OK)
-      return ret;
-   st->upsample = resampling_factor(sampling_rate);
-   return OPUS_OK;
-}
-
-OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
+static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
+                                         int channels, int arch)
 {
    if (channels < 0 || channels > 2)
       return OPUS_BAD_ARG;
 
    if (st==NULL || mode==NULL)
       return OPUS_ALLOC_FAIL;
 
    OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
@@ -186,33 +176,52 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_enc
    st->overlap = mode->overlap;
    st->stream_channels = st->channels = channels;
 
    st->upsample = 1;
    st->start = 0;
    st->end = st->mode->effEBands;
    st->signalling = 1;
 
-   st->arch = opus_select_arch();
+   st->arch = arch;
 
    st->constrained_vbr = 1;
    st->clip = 1;
 
    st->bitrate = OPUS_BITRATE_MAX;
    st->vbr = 0;
    st->force_intra  = 0;
    st->complexity = 5;
    st->lsb_depth=24;
 
    opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
 
    return OPUS_OK;
 }
 
 #ifdef CUSTOM_MODES
+int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
+{
+   return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch());
+}
+#endif
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch)
+{
+   int ret;
+   ret = opus_custom_encoder_init_arch(st,
+           opus_custom_mode_create(48000, 960, NULL), channels, arch);
+   if (ret != OPUS_OK)
+      return ret;
+   st->upsample = resampling_factor(sampling_rate);
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
 void opus_custom_encoder_destroy(CELTEncoder *st)
 {
    opus_free(st);
 }
 #endif /* CUSTOM_MODES */
 
 
 static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
@@ -236,17 +245,16 @@ static int transient_analysis(const opus
            5,  5,  5,  5,  5,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
            4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  3,  3,
            3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,
    };
    SAVE_STACK;
    ALLOC(tmp, len, opus_val16);
 
    len2=len/2;
-   tf_max = 0;
    for (c=0;c<C;c++)
    {
       opus_val32 mean;
       opus_int32 unmask=0;
       opus_val32 norm;
       opus_val16 maxE;
       mem0=0;
       mem1=0;
@@ -365,43 +373,43 @@ static int transient_analysis(const opus
    is_transient = rand()&0x1;
 #endif
    /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/
    return is_transient;
 }
 
 /* Looks for sudden increases of energy to decide whether we need to patch
    the transient decision */
-int patch_transient_decision(opus_val16 *new, opus_val16 *old, int nbEBands,
+int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
       int end, int C)
 {
    int i, c;
    opus_val32 mean_diff=0;
    opus_val16 spread_old[26];
    /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to
       avoid false detection caused by irrelevant bands */
    if (C==1)
    {
-      spread_old[0] = old[0];
+      spread_old[0] = oldE[0];
       for (i=1;i<end;i++)
-         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), old[i]);
+         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]);
    } else {
-      spread_old[0] = MAX16(old[0],old[nbEBands]);
+      spread_old[0] = MAX16(oldE[0],oldE[nbEBands]);
       for (i=1;i<end;i++)
          spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT),
-                               MAX16(old[i],old[i+nbEBands]));
+                               MAX16(oldE[i],oldE[i+nbEBands]));
    }
    for (i=end-2;i>=0;i--)
       spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT));
    /* Compute mean increase */
    c=0; do {
       for (i=2;i<end-1;i++)
       {
          opus_val16 x1, x2;
-         x1 = MAX16(0, new[i]);
+         x1 = MAX16(0, newE[i]);
          x2 = MAX16(0, spread_old[i]);
          mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2))));
       }
    } while (++c<C);
    mean_diff = DIV32(mean_diff, C*(end-3));
    /*printf("%f %f %d\n", mean_diff, max_diff, count);*/
    return mean_diff > QCONST16(1.f, DB_SHIFT);
 }
@@ -447,17 +455,17 @@ static void compute_mdcts(const CELTMode
             out[c*B*N+i] *= upsample;
          for (;i<B*N;i++)
             out[c*B*N+i] = 0;
       } while (++c<C);
    }
 }
 
 
-static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
                         int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
 {
    int i;
    opus_val16 coef0;
    celt_sig m;
    int Nu;
 
    coef0 = coef[0];
@@ -484,16 +492,18 @@ static void preemphasis(const opus_val16
 
 #ifndef FIXED_POINT
    if (clip)
    {
       /* Clip input to avoid encoding non-portable files */
       for (i=0;i<Nu;i++)
          inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample]));
    }
+#else
+   (void)clip; /* Avoids a warning about clip being unused. */
 #endif
    m = *mem;
 #ifdef CUSTOM_MODES
    if (coef[1] != 0)
    {
       opus_val16 coef1 = coef[1];
       opus_val16 coef2 = coef[2];
       for (i=0;i<N;i++)
@@ -739,17 +749,17 @@ static void tf_encode(int start, int end
       tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
    /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/
 }
 
 
 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       const opus_val16 *bandLogE, int end, int LM, int C, int N0,
       AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
-      int intensity)
+      int intensity, opus_val16 surround_trim)
 {
    int i;
    opus_val32 diff=0;
    int c;
    int trim_index = 5;
    opus_val16 trim = QCONST16(5.f, 8);
    opus_val16 logXC, logXC2;
    if (C==2)
@@ -813,21 +823,23 @@ static int alloc_trim_analysis(const CEL
       trim_index--;
    if (diff > QCONST16(8.f, DB_SHIFT))
       trim_index--;
    if (diff < -QCONST16(4.f, DB_SHIFT))
       trim_index++;
    if (diff < -QCONST16(10.f, DB_SHIFT))
       trim_index++;
    trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+   trim -= SHR16(surround_trim, DB_SHIFT-8);
    trim -= 2*SHR16(tf_estimate, 14-8);
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
    if (analysis->valid)
    {
-      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f)));
+      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8),
+            (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))));
    }
 #endif
 
 #ifdef FIXED_POINT
    trim_index = PSHR32(trim, 8);
 #else
    trim_index = (int)floor(.5f+trim);
 #endif
@@ -872,17 +884,17 @@ static int stereo_analysis(const CELTMod
       thetas -= 8;
    return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
          > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
 }
 
 static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
       int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
       int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
-      int effectiveBytes, opus_int32 *tot_boost_, int lfe)
+      int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)
 {
    int i, c;
    opus_int32 tot_boost=0;
    opus_val16 maxDepth;
    VARDECL(opus_val16, follower);
    VARDECL(opus_val16, noise_floor);
    SAVE_STACK;
    ALLOC(follower, C*nbEBands, opus_val16);
@@ -935,16 +947,18 @@ static opus_val16 dynalloc_analysis(cons
             follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i]));
          }
       } else {
          for (i=start;i<end;i++)
          {
             follower[i] = MAX16(0, bandLogE[i]-follower[i]);
          }
       }
+      for (i=start;i<end;i++)
+         follower[i] = MAX16(follower[i], surround_dynalloc[i]);
       /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
       if ((!vbr || constrained_vbr)&&!isTransient)
       {
          for (i=start;i<end;i++)
             follower[i] = HALF16(follower[i]);
       }
       for (i=start;i<end;i++)
       {
@@ -1016,21 +1030,22 @@ static int run_prefilter(CELTEncoder *st
       OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
    } while (++c<CC);
 
    if (enabled)
    {
       VARDECL(opus_val16, pitch_buf);
       ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
 
-      pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+      pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch);
       /* Don't search for the fir last 1.5 octave of the range because
          there's too many false-positives due to short-term correlation */
       pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
-            COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
+            COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index,
+            st->arch);
       pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
 
       gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
             N, &pitch_index, st->prefilter_period, st->prefilter_gain);
       if (pitch_index > COMBFILTER_MAXPERIOD-2)
          pitch_index = COMBFILTER_MAXPERIOD-2;
       gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
       /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
@@ -1135,54 +1150,55 @@ static int compute_vbr(const CELTMode *m
    coded_bands = lastCodedBands ? lastCodedBands : nbEBands;
    coded_bins = eBands[coded_bands]<<LM;
    if (C==2)
       coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM;
 
    target = base_target;
 
    /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
    if (analysis->valid && analysis->activity<.4)
       target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
 #endif
    /* Stereo savings */
    if (C==2)
    {
       int coded_stereo_bands;
       int coded_stereo_dof;
       opus_val16 max_frac;
       coded_stereo_bands = IMIN(intensity, coded_bands);
       coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
       /* Maximum fraction of the bits we can save if the signal is mono. */
       max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins);
+      stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8));
       /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
       target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target),
                       SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
    }
    /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */
    target += tot_boost-(16<<LM);
    /* Apply transient boost, compensating for average boost. */
    tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ?
                     QCONST16(0.02f,14) : QCONST16(0.04f,14);
    target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
 
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
    /* Apply tonality boost */
    if (analysis->valid && !lfe)
    {
       opus_int32 tonal_target;
       float tonal;
 
       /* Tonality boost (compensating for the average). */
       tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f;
       tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal);
       if (pitch_change)
          tonal_target +=  (opus_int32)((coded_bins<<BITRES)*.8f);
-      /*printf("%f %f ", st->analysis.tonality, tonal);*/
+      /*printf("%f %f ", analysis->tonality, tonal);*/
       target = tonal_target;
    }
 #endif
 
    if (has_surround_mask&&!lfe)
    {
       opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT);
       /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/
@@ -1286,32 +1302,41 @@ int celt_encode_with_ec(CELTEncoder * OP
    int nbEBands;
    int overlap;
    const opus_int16 *eBands;
    int secondMdct;
    int signalBandwidth;
    int transient_got_disabled=0;
    opus_val16 surround_masking=0;
    opus_val16 temporal_vbr=0;
+   opus_val16 surround_trim = 0;
+   opus_int32 equiv_rate = 510000;
+   VARDECL(opus_val16, surround_dynalloc);
    ALLOC_STACK;
 
    mode = st->mode;
    nbEBands = mode->nbEBands;
    overlap = mode->overlap;
    eBands = mode->eBands;
    tf_estimate = 0;
    if (nbCompressedBytes<2 || pcm==NULL)
-     return OPUS_BAD_ARG;
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
 
    frame_size *= st->upsample;
    for (LM=0;LM<=mode->maxLM;LM++)
       if (mode->shortMdctSize<<LM==frame_size)
          break;
    if (LM>mode->maxLM)
+   {
+      RESTORE_STACK;
       return OPUS_BAD_ARG;
+   }
    M=1<<LM;
    N = M*mode->shortMdctSize;
 
    prefilter_mem = st->in_mem+CC*(st->overlap);
    oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD));
    oldLogE = oldBandE + CC*nbEBands;
    oldLogE2 = oldLogE + CC*nbEBands;
 
@@ -1332,17 +1357,20 @@ int celt_encode_with_ec(CELTEncoder * OP
       compressed[0] = tmp<<5;
       compressed[0] |= LM<<3;
       compressed[0] |= (C==2)<<2;
       /* Convert "standard mode" to Opus header */
       if (mode->Fs==48000 && mode->shortMdctSize==120)
       {
          int c0 = toOpus(compressed[0]);
          if (c0<0)
+         {
+            RESTORE_STACK;
             return OPUS_BAD_ARG;
+         }
          compressed[0] = c0;
       }
       compressed++;
       nbCompressedBytes--;
    }
 #else
    celt_assert(st->signalling==0);
 #endif
@@ -1366,16 +1394,18 @@ int celt_encode_with_ec(CELTEncoder * OP
       tmp = st->bitrate*frame_size;
       if (tell>1)
          tmp += tell;
       if (st->bitrate!=OPUS_BITRATE_MAX)
          nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
                (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
       effectiveBytes = nbCompressedBytes;
    }
+   if (st->bitrate != OPUS_BITRATE_MAX)
+      equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50);
 
    if (enc==NULL)
    {
       ec_enc_init(&_enc, compressed, nbCompressedBytes);
       enc = &_enc;
    }
 
    if (vbr_rate>0)
@@ -1439,27 +1469,27 @@ int celt_encode_with_ec(CELTEncoder * OP
          ec_enc_shrink(enc, nbCompressedBytes);
       }
       /* Pretend we've filled all the remaining bits with zeros
             (that's what the initialiser did anyway) */
       tell = nbCompressedBytes*8;
       enc->nbits_total+=tell-ec_tell(enc);
    }
    c=0; do {
-      preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
+      celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
                   mode->preemph, st->preemph_memE+c, st->clip);
    } while (++c<CC);
 
 
 
    /* Find pitch period and gain */
    {
       int enabled;
       int qg;
-      enabled = (st->lfe || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf
+      enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf
             && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE);
 
       prefilter_tapset = st->tapset_decision;
       pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
       if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
             && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
          pitch_change = 1;
       if (pf_on==0)
@@ -1521,53 +1551,99 @@ int celt_encode_with_ec(CELTEncoder * OP
    {
       for (i=2;i<st->end;i++)
       {
          bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0]));
          bandE[i] = MAX32(bandE[i], EPSILON);
       }
    }
    amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
-   if (st->energy_save)
+
+   ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
+   for(i=0;i<st->end;i++)
+      surround_dynalloc[i] = 0;
+   /* This computes how much masking takes place between surround channels */
+   if (st->start==0&&st->energy_mask&&!st->lfe)
    {
-      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
-#ifdef FIXED_POINT
-      /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */
-      offset -= QCONST16(3.0f, DB_SHIFT);
-#endif
-      for(i=0;i<C*nbEBands;i++)
-         st->energy_save[i]=bandLogE[i]-offset;
-      st->energy_save=NULL;
-   }
-   /* This computes how much masking takes place between surround channels */
-   if (st->energy_mask&&!st->lfe)
-   {
+      int mask_end;
+      int midband;
+      int count_dynalloc;
       opus_val32 mask_avg=0;
-      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+      opus_val32 diff=0;
+      int count=0;
+      mask_end = IMAX(2,st->lastCodedBands);
       for (c=0;c<C;c++)
       {
-         opus_val16 followE, followMask;
-         followE = followMask = -QCONST16(14.f, DB_SHIFT);
-         for(i=0;i<st->end;i++)
+         for(i=0;i<mask_end;i++)
          {
-            /* We use a simple follower to approximate the masking spreading function. */
-            followE = MAX16(followE-QCONST16(1.f, DB_SHIFT), bandLogE[nbEBands*c+i]-offset);
-            followMask = MAX16(followMask-QCONST16(1.f, DB_SHIFT), st->energy_mask[nbEBands*c+i]);
-            mask_avg += followE-followMask;
+            opus_val16 mask;
+            mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],
+                   QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+            if (mask > 0)
+               mask = HALF16(mask);
+            mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);
+            count += eBands[i+1]-eBands[i];
+            diff += MULT16_16(mask, 1+2*i-mask_end);
          }
       }
-      surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.7f, DB_SHIFT);
-      surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.2f, DB_SHIFT));
-      surround_masking -= HALF16(HALF16(surround_masking));
+      mask_avg = DIV32_16(mask_avg,count);
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
+      /* Again, being conservative */
+      diff = HALF32(diff);
+      diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));
+      /* Find the band that's in the middle of the coded spectrum */
+      for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);
+      count_dynalloc=0;
+      for(i=0;i<mask_end;i++)
+      {
+         opus_val32 lin;
+         opus_val16 unmask;
+         lin = mask_avg + diff*(i-midband);
+         if (C==2)
+            unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);
+         else
+            unmask = st->energy_mask[i];
+         unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));
+         unmask -= lin;
+         if (unmask > QCONST16(.25f, DB_SHIFT))
+         {
+            surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);
+            count_dynalloc++;
+         }
+      }
+      if (count_dynalloc>=3)
+      {
+         /* If we need dynalloc in many bands, it's probably because our
+            initial masking rate was too low. */
+         mask_avg += QCONST16(.25f, DB_SHIFT);
+         if (mask_avg>0)
+         {
+            /* Something went really wrong in the original calculations,
+               disabling masking. */
+            mask_avg = 0;
+            diff = 0;
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = 0;
+         } else {
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
+         }
+      }
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      /* Convert to 1/64th units used for the trim */
+      surround_trim = 64*diff;
+      /*printf("%d %d ", mask_avg, surround_trim);*/
+      surround_masking = mask_avg;
    }
    /* Temporal VBR (but not for LFE) */
    if (!st->lfe)
    {
       opus_val16 follow=-QCONST16(10.0f,DB_SHIFT);
-      float frame_avg=0;
+      opus_val32 frame_avg=0;
       opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
       for(i=st->start;i<st->end;i++)
       {
          follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset);
          if (C==2)
             follow = MAX16(follow, bandLogE[i+nbEBands]-offset);
          frame_avg += follow;
       }
@@ -1655,40 +1731,43 @@ int celt_encode_with_ec(CELTEncoder * OP
          if (st->complexity == 0)
             st->spread_decision = SPREAD_NONE;
          else
             st->spread_decision = SPREAD_NORMAL;
       } else {
          /* Disable new spreading+tapset estimator until we can show it works
             better than the old one. So far it seems like spreading_decision()
             works best. */
-         if (0&&st->analysis.valid)
+#if 0
+         if (st->analysis.valid)
          {
             static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
             static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
             static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)};
             static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
             st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
             st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
-         } else {
+         } else
+#endif
+         {
             st->spread_decision = spreading_decision(mode, X,
                   &st->tonal_average, st->spread_decision, &st->hf_average,
                   &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
          }
          /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
          /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
       }
       ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
    }
 
    ALLOC(offsets, nbEBands, int);
 
    maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
          st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
-         eBands, LM, effectiveBytes, &tot_boost, st->lfe);
+         eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
    /* For LFE, everything interesting is in the first band */
    if (st->lfe)
       offsets[0] = IMIN(8, effectiveBytes/3);
    ALLOC(cap, nbEBands, int);
    init_caps(mode,cap,LM,C);
 
    dynalloc_logp = 6;
    total_bits<<=BITRES;
@@ -1722,46 +1801,39 @@ int celt_encode_with_ec(CELTEncoder * OP
       /* Making dynalloc more likely */
       if (j)
          dynalloc_logp = IMAX(2, dynalloc_logp-1);
       offsets[i] = boost;
    }
 
    if (C==2)
    {
-      int effectiveRate;
-
       static const opus_val16 intensity_thresholds[21]=
       /* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19  20  off*/
-        { 16,21,23,25,27,29,31,33,35,38,42,46,50,54,58,63,68,75,84,102,130};
+        {  1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134};
       static const opus_val16 intensity_histeresis[21]=
-        {  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6,  8, 12};
+        {  1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6,  8, 8};
 
       /* Always use MS for 2.5 ms frames until we can do a better analysis */
       if (LM!=0)
          dual_stereo = stereo_analysis(mode, X, LM, N);
 
-      /* Account for coarse energy */
-      effectiveRate = (8*effectiveBytes - 80)>>LM;
-
-      /* effectiveRate in kb/s */
-      effectiveRate = 2*effectiveRate/5;
-
-      st->intensity = hysteresis_decision((opus_val16)effectiveRate, intensity_thresholds, intensity_histeresis, 21, st->intensity);
+      st->intensity = hysteresis_decision((opus_val16)equiv_rate/1000,
+            intensity_thresholds, intensity_histeresis, 21, st->intensity);
       st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
    }
 
    alloc_trim = 5;
    if (tell+(6<<BITRES) <= total_bits - total_boost)
    {
       if (st->lfe)
          alloc_trim = 5;
       else
          alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
-            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity);
+            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);
       ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
       tell = ec_tell_frac(enc);
    }
 
    /* Variable bitrate */
    if (vbr_rate>0)
    {
      opus_val16 alpha;
@@ -1774,17 +1846,17 @@ int celt_encode_with_ec(CELTEncoder * OP
      /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
         The CELT allocator will just not be able to use more than that anyway. */
      nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
      base_target = vbr_rate - ((40*C+20)<<BITRES);
 
      if (st->constrained_vbr)
         base_target += (st->vbr_offset>>lm_diff);
 
-     target = compute_vbr(mode, &st->analysis, base_target, LM, st->bitrate,
+     target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
            st->lastCodedBands, C, st->intensity, st->constrained_vbr,
            st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth,
            st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking,
            temporal_vbr);
 
      /* The current offset is removed from the target and the space used
         so far is added*/
      target=target+tell;
@@ -1854,27 +1926,27 @@ int celt_encode_with_ec(CELTEncoder * OP
    ALLOC(pulses, nbEBands, int);
    ALLOC(fine_priority, nbEBands, int);
 
    /* bits =           packet size                    - where we are - safety*/
    bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
    anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
    bits -= anti_collapse_rsv;
    signalBandwidth = st->end-1;
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
    if (st->analysis.valid)
    {
       int min_bandwidth;
-      if (st->bitrate < (opus_int32)32000*C)
+      if (equiv_rate < (opus_int32)32000*C)
          min_bandwidth = 13;
-      else if (st->bitrate < (opus_int32)48000*C)
+      else if (equiv_rate < (opus_int32)48000*C)
          min_bandwidth = 16;
-      else if (st->bitrate < (opus_int32)60000*C)
+      else if (equiv_rate < (opus_int32)60000*C)
          min_bandwidth = 18;
-      else  if (st->bitrate < (opus_int32)80000*C)
+      else  if (equiv_rate < (opus_int32)80000*C)
          min_bandwidth = 19;
       else
          min_bandwidth = 20;
       signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth);
    }
 #endif
    if (st->lfe)
       signalBandwidth = 1;
@@ -2256,22 +2328,16 @@ int opus_custom_encoder_ctl(CELTEncoder 
       }
       break;
       case OPUS_SET_LFE_REQUEST:
       {
           opus_int32 value = va_arg(ap, opus_int32);
           st->lfe = value;
       }
       break;
-      case OPUS_SET_ENERGY_SAVE_REQUEST:
-      {
-          opus_val16 *value = va_arg(ap, opus_val16*);
-          st->energy_save=value;
-      }
-      break;
       case OPUS_SET_ENERGY_MASK_REQUEST:
       {
           opus_val16 *value = va_arg(ap, opus_val16*);
           st->energy_mask = value;
       }
       break;
       default:
          goto bad_request;
--- a/media/libopus/celt/celt_lpc.c
+++ b/media/libopus/celt/celt_lpc.c
@@ -221,17 +221,18 @@ void celt_iir(const opus_val32 *_x,
 }
 
 int _celt_autocorr(
                    const opus_val16 *x,   /*  in: [0...n-1] samples x   */
                    opus_val32       *ac,  /* out: [0...lag-1] ac values */
                    const opus_val16       *window,
                    int          overlap,
                    int          lag,
-                   int          n
+                   int          n,
+                   int          arch
                   )
 {
    opus_val32 d;
    int i, k;
    int fastN=n-lag;
    int shift;
    const opus_val16 *xptr;
    VARDECL(opus_val16, xx);
@@ -270,17 +271,17 @@ int _celt_autocorr(
       {
          for(i=0;i<n;i++)
             xx[i] = PSHR32(xptr[i], shift);
          xptr = xx;
       } else
          shift = 0;
    }
 #endif
-   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
    for (k=0;k<=lag;k++)
    {
       for (i = k+fastN, d = 0; i < n; i++)
          d = MAC16_16(d, xptr[i], xptr[i-k]);
       ac[k] += d;
    }
 #ifdef FIXED_POINT
    shift = 2*shift;
--- a/media/libopus/celt/celt_lpc.h
+++ b/media/libopus/celt/celt_lpc.h
@@ -43,11 +43,12 @@ void celt_fir(const opus_val16 *x,
 
 void celt_iir(const opus_val32 *x,
          const opus_val16 *den,
          opus_val32 *y,
          int N,
          int ord,
          opus_val16 *mem);
 
-int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, const opus_val16 *window, int overlap, int lag, int n);
+int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
+         const opus_val16 *window, int overlap, int lag, int n, int arch);
 
 #endif /* PLC_H */
--- a/media/libopus/celt/cpu_support.h
+++ b/media/libopus/celt/cpu_support.h
@@ -23,29 +23,32 @@
    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifndef CPU_SUPPORT_H
 #define CPU_SUPPORT_H
 
-#if defined(OPUS_HAVE_RTCD) && defined(ARMv4_ASM)
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM)
 #include "arm/armcpu.h"
 
 /* We currently support 4 ARM variants:
  * arch[0] -> ARMv4
  * arch[1] -> ARMv5E
  * arch[2] -> ARMv6
  * arch[3] -> NEON
  */
 #define OPUS_ARCHMASK 3
 
 #else
 #define OPUS_ARCHMASK 0
 
-static inline int opus_select_arch(void)
+static OPUS_INLINE int opus_select_arch(void)
 {
   return 0;
 }
 #endif
 
 #endif
--- a/media/libopus/celt/cwrs.c
+++ b/media/libopus/celt/cwrs.c
@@ -405,25 +405,25 @@ static const opus_uint32 CELT_PVQ_U_DATA
   3248227095U,
   /*N=13, K=13...16:*/
   251595969, 579168825, 1267854873, 2653649025U,
   /*N=14, K=14:*/
   1409933619
 };
 
 #if defined(CUSTOM_MODES)
-const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
   CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415,
   CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030,
   CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389,
   CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455,
   CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
 };
 #else
-const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
   CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
   CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
   CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
   CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240,
   CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257
 };
 #endif
 
@@ -529,33 +529,33 @@ void decode_pulses(int *_y,int _n,int _k
   cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
 }
 
 #else /* SMALL_FOOTPRINT */
 
 /*Computes the next row/column of any recurrence that obeys the relation
    u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
   _ui0 is the base case for the new row/column.*/
-static inline void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
+static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
   opus_uint32 ui1;
   unsigned      j;
   /*This do-while will overrun the array if we don't have storage for at least
      2 values.*/
   j=1; do {
     ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
     _ui[j-1]=_ui0;
     _ui0=ui1;
   } while (++j<_len);
   _ui[j-1]=_ui0;
 }
 
 /*Computes the previous row/column of any recurrence that obeys the relation
    u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
   _ui0 is the base case for the new row/column.*/
-static inline void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
+static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
   opus_uint32 ui1;
   unsigned      j;
   /*This do-while will overrun the array if we don't have storage for at least
      2 values.*/
   j=1; do {
     ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
     _ui[j-1]=_ui0;
     _ui0=ui1;
@@ -612,26 +612,26 @@ static void cwrsi(int _n,int _k,opus_uin
   }
   while(++j<_n);
 }
 
 /*Returns the index of the given combination of K elements chosen from a set
    of size 1 with associated sign bits.
   _y: The vector of pulses, whose sum of absolute values is K.
   _k: Returns K.*/
-static inline opus_uint32 icwrs1(const int *_y,int *_k){
+static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){
   *_k=abs(_y[0]);
   return _y[0]<0;
 }
 
 /*Returns the index of the given combination of K elements chosen from a set
    of size _n with associated sign bits.
   _y:  The vector of pulses, whose sum of absolute values must be _k.
   _nc: Returns V(_n,_k).*/
-static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
+static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
  opus_uint32 *_u){
   opus_uint32 i;
   int         j;
   int         k;
   /*We can't unroll the first two iterations of the loop unless _n>=2.*/
   celt_assert(_n>=2);
   _u[0]=0;
   for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
--- a/media/libopus/celt/ecintrin.h
+++ b/media/libopus/celt/ecintrin.h
@@ -28,17 +28,17 @@
 /*Some common macros for potential platform-specific optimization.*/
 #include "opus_types.h"
 #include <math.h>
 #include <limits.h>
 #include "arch.h"
 #if !defined(_ecintrin_H)
 # define _ecintrin_H (1)
 
-/*Some specific platforms may have optimized intrinsic or inline assembly
+/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly
    versions of these functions which can substantially improve performance.
   We define macros for them to allow easy incorporation of these non-ANSI
    features.*/
 
 /*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
    given an appropriate architecture, but the branchless bit-twiddling versions
    are just as fast, and do not require any special target architecture.
   Earlier gcc versions (3.x) compiled both code to the same assembly
--- a/media/libopus/celt/entcode.h
+++ b/media/libopus/celt/entcode.h
@@ -21,16 +21,17 @@
    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 #include "opus_types.h"
+#include "opus_defines.h"
 
 #if !defined(_entcode_H)
 # define _entcode_H (1)
 # include <limits.h>
 # include <stddef.h>
 # include "ecintrin.h"
 
 /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
@@ -78,35 +79,35 @@ struct ec_ctx{
      In the encoder: the number of oustanding carry propagating symbols.*/
    opus_uint32    ext;
    /*A buffered input/output symbol, awaiting carry propagation.*/
    int            rem;
    /*Nonzero if an error occurred.*/
    int            error;
 };
 
-static inline opus_uint32 ec_range_bytes(ec_ctx *_this){
+static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){
   return _this->offs;
 }
 
-static inline unsigned char *ec_get_buffer(ec_ctx *_this){
+static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){
   return _this->buf;
 }
 
-static inline int ec_get_error(ec_ctx *_this){
+static OPUS_INLINE int ec_get_error(ec_ctx *_this){
   return _this->error;
 }
 
 /*Returns the number of bits "used" by the encoded or decoded symbols so far.
   This same number can be computed in either the encoder or the decoder, and is
    suitable for making coding decisions.
   Return: The number of bits.
           This will always be slightly larger than the exact value (e.g., all
            rounding error is in the positive direction).*/
-static inline int ec_tell(ec_ctx *_this){
+static OPUS_INLINE int ec_tell(ec_ctx *_this){
   return _this->nbits_total-EC_ILOG(_this->rng);
 }
 
 /*Returns the number of bits "used" by the encoded or decoded symbols so far.
   This same number can be computed in either the encoder or the decoder, and is
    suitable for making coding decisions.
   Return: The number of bits scaled by 2**BITRES.
           This will always be slightly larger than the exact value (e.g., all
--- a/media/libopus/celt/fixed_debug.h
+++ b/media/libopus/celt/fixed_debug.h
@@ -28,19 +28,19 @@
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifndef FIXED_DEBUG_H
 #define FIXED_DEBUG_H
 
 #include <stdio.h>
+#include "opus_defines.h"
 
 #ifdef CELT_C
-#include "opus_defines.h"
 OPUS_EXPORT opus_int64 celt_mips=0;
 #else
 extern opus_int64 celt_mips;
 #endif
 
 #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
 #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
 
@@ -54,17 +54,17 @@ extern opus_int64 celt_mips;
 
 #define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768)
 #define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL)
 #define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1))
 
 #define SHR(a,b) SHR32(a,b)
 #define PSHR(a,b) PSHR32(a,b)
 
-static inline short NEG16(int x)
+static OPUS_INLINE short NEG16(int x)
 {
    int res;
    if (!VERIFY_SHORT(x))
    {
       fprintf (stderr, "NEG16: input is not short: %d\n", (int)x);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -75,17 +75,17 @@ static inline short NEG16(int x)
       fprintf (stderr, "NEG16: output is not short: %d\n", (int)res);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    celt_mips++;
    return res;
 }
-static inline int NEG32(opus_int64 x)
+static OPUS_INLINE int NEG32(opus_int64 x)
 {
    opus_int64 res;
    if (!VERIFY_INT(x))
    {
       fprintf (stderr, "NEG16: input is not int: %d\n", (int)x);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -98,49 +98,49 @@ static inline int NEG32(opus_int64 x)
       celt_assert(0);
 #endif
    }
    celt_mips+=2;
    return res;
 }
 
 #define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__)
-static inline short EXTRACT16_(int x, char *file, int line)
+static OPUS_INLINE short EXTRACT16_(int x, char *file, int line)
 {
    int res;
    if (!VERIFY_SHORT(x))
    {
       fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    res = x;
    celt_mips++;
    return res;
 }
 
 #define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__)
-static inline int EXTEND32_(int x, char *file, int line)
+static OPUS_INLINE int EXTEND32_(int x, char *file, int line)
 {
    int res;
    if (!VERIFY_SHORT(x))
    {
       fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    res = x;
    celt_mips++;
    return res;
 }
 
 #define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__)
-static inline short SHR16_(int a, int shift, char *file, int line)
+static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line)
 {
    int res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
    {
       fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -152,17 +152,17 @@ static inline short SHR16_(int a, int sh
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    celt_mips++;
    return res;
 }
 #define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__)
-static inline short SHL16_(int a, int shift, char *file, int line)
+static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line)
 {
    int res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
    {
       fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -174,17 +174,17 @@ static inline short SHL16_(int a, int sh
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    celt_mips++;
    return res;
 }
 
-static inline int SHR32(opus_int64 a, int shift)
+static OPUS_INLINE int SHR32(opus_int64 a, int shift)
 {
    opus_int64  res;
    if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
    {
       fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -196,17 +196,17 @@ static inline int SHR32(opus_int64 a, in
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    celt_mips+=2;
    return res;
 }
 #define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__)
-static inline int SHL32_(opus_int64 a, int shift, char *file, int line)
+static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
 {
    opus_int64  res;
    if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
    {
       fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -229,17 +229,17 @@ static inline int SHL32_(opus_int64 a, i
 #define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
 #define HALF16(x)  (SHR16(x,1))
 #define HALF32(x)  (SHR32(x,1))
 
 //#define SHR(a,shift) ((a) >> (shift))
 //#define SHL(a,shift) ((a) << (shift))
 
 #define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
-static inline short ADD16_(int a, int b, char *file, int line)
+static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
 {
    int res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -252,17 +252,17 @@ static inline short ADD16_(int a, int b,
       celt_assert(0);
 #endif
    }
    celt_mips++;
    return res;
 }
 
 #define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__)
-static inline short SUB16_(int a, int b, char *file, int line)
+static OPUS_INLINE short SUB16_(int a, int b, char *file, int line)
 {
    int res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -275,17 +275,17 @@ static inline short SUB16_(int a, int b,
       celt_assert(0);
 #endif
    }
    celt_mips++;
    return res;
 }
 
 #define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__)
-static inline int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
+static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
 {
    opus_int64 res;
    if (!VERIFY_INT(a) || !VERIFY_INT(b))
    {
       fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -298,17 +298,17 @@ static inline int ADD32_(opus_int64 a, o
       celt_assert(0);
 #endif
    }
    celt_mips+=2;
    return res;
 }
 
 #define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__)
-static inline int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)
+static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)
 {
    opus_int64 res;
    if (!VERIFY_INT(a) || !VERIFY_INT(b))
    {
       fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -322,17 +322,17 @@ static inline int SUB32_(opus_int64 a, o
 #endif
    }
    celt_mips+=2;
    return res;
 }
 
 #undef UADD32
 #define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__)
-static inline unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line)
 {
    opus_uint64 res;
    if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
    {
       fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -346,17 +346,17 @@ static inline unsigned int UADD32_(opus_
 #endif
    }
    celt_mips+=2;
    return res;
 }
 
 #undef USUB32
 #define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__)
-static inline unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line)
 {
    opus_uint64 res;
    if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
    {
       fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -376,17 +376,17 @@ static inline unsigned int USUB32_(opus_
       celt_assert(0);
 #endif
    }
    celt_mips+=2;
    return res;
 }
 
 /* result fits in 16 bits */
-static inline short MULT16_16_16(int a, int b)
+static OPUS_INLINE short MULT16_16_16(int a, int b)
 {
    int res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -399,17 +399,17 @@ static inline short MULT16_16_16(int a, 
       celt_assert(0);
 #endif
    }
    celt_mips++;
    return res;
 }
 
 #define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__)
-static inline int MULT16_16_(int a, int b, char *file, int line)
+static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -424,17 +424,17 @@ static inline int MULT16_16_(int a, int 
    }
    celt_mips++;
    return res;
 }
 
 #define MAC16_16(c,a,b)     (celt_mips-=2,ADD32((c),MULT16_16((a),(b))))
 
 #define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__)
-static inline int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line)
+static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
    {
       fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -457,17 +457,17 @@ static inline int MULT16_32_QX_(int a, o
    if (Q==15)
       celt_mips+=3;
    else
       celt_mips+=4;
    return res;
 }
 
 #define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__)
-static inline int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line)
+static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
    {
       fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -492,27 +492,37 @@ static inline int MULT16_32_PX_(int a, o
    else
       celt_mips+=5;
    return res;
 }
 
 #define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
 #define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
 
-static inline int SATURATE(int a, int b)
+static OPUS_INLINE int SATURATE(int a, int b)
 {
    if (a>b)
       a=b;
    if (a<-b)
       a = -b;
    celt_mips+=3;
    return a;
 }
 
-static inline int MULT16_16_Q11_32(int a, int b)
+static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a)
+{
+   celt_mips+=3;
+   if (a>32767)
+      return 32767;
+   else if (a<-32768)
+      return -32768;
+   else return a;
+}
+
+static OPUS_INLINE int MULT16_16_Q11_32(int a, int b)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -524,17 +534,17 @@ static inline int MULT16_16_Q11_32(int a
       fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    celt_mips+=3;
    return res;
 }
-static inline short MULT16_16_Q13(int a, int b)
+static OPUS_INLINE short MULT16_16_Q13(int a, int b)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -546,17 +556,17 @@ static inline short MULT16_16_Q13(int a,
       fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    celt_mips+=3;
    return res;
 }
-static inline short MULT16_16_Q14(int a, int b)
+static OPUS_INLINE short MULT16_16_Q14(int a, int b)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -570,17 +580,17 @@ static inline short MULT16_16_Q14(int a,
       celt_assert(0);
 #endif
    }
    celt_mips+=3;
    return res;
 }
 
 #define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__)
-static inline short MULT16_16_Q15_(int a, int b, char *file, int line)
+static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -593,17 +603,17 @@ static inline short MULT16_16_Q15_(int a
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    celt_mips+=1;
    return res;
 }
 
-static inline short MULT16_16_P13(int a, int b)
+static OPUS_INLINE short MULT16_16_P13(int a, int b)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -623,17 +633,17 @@ static inline short MULT16_16_P13(int a,
       fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    celt_mips+=4;
    return res;
 }
-static inline short MULT16_16_P14(int a, int b)
+static OPUS_INLINE short MULT16_16_P14(int a, int b)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -653,17 +663,17 @@ static inline short MULT16_16_P14(int a,
       fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
    }
    celt_mips+=4;
    return res;
 }
-static inline short MULT16_16_P15(int a, int b)
+static OPUS_INLINE short MULT16_16_P15(int a, int b)
 {
    opus_int64 res;
    if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
    {
       fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -686,17 +696,17 @@ static inline short MULT16_16_P15(int a,
 #endif
    }
    celt_mips+=2;
    return res;
 }
 
 #define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__)
 
-static inline int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
+static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
 {
    opus_int64 res;
    if (b==0)
    {
       fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
@@ -721,17 +731,17 @@ static inline int DIV32_16_(opus_int64 a
       celt_assert(0);
 #endif
    }
    celt_mips+=35;
    return res;
 }
 
 #define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__)
-static inline int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
+static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
 {
    opus_int64 res;
    if (b==0)
    {
       fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
       celt_assert(0);
 #endif
--- a/media/libopus/celt/fixed_generic.h
+++ b/media/libopus/celt/fixed_generic.h
@@ -35,17 +35,17 @@
 
 /** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */
 #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
 
 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
 #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
 
 /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
-#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16((a),((b)&0x0000ffff)),16))
+#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
 
 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
 #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
 
 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
 #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
 
 /** Compile-time conversion of float constant to 16-bit value */
@@ -111,16 +111,17 @@
 /** 16x16 multiply-add where the result fits in 32 bits */
 #define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
 /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
     b must fit in 31 bits.
     Result fits in 32 bits. */
 #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
 
 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
 #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
 #define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
 #define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
 
 #define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
 #define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
 #define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
 
--- a/media/libopus/celt/float_cast.h
+++ b/media/libopus/celt/float_cast.h
@@ -96,17 +96,17 @@
         __inline long int float2int(float value)
         {
                 return _mm_cvtss_si32(_mm_load_ss(&value));
         }
 #elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32))
         #include <math.h>
 
         /*      Win32 doesn't seem to have these functions.
-        **      Therefore implement inline versions of these functions here.
+        **      Therefore implement OPUS_INLINE versions of these functions here.
         */
 
         __inline long int
         float2int (float flt)
         {       int intgr;
 
                 _asm
                 {       fld flt
@@ -123,17 +123,17 @@
         #warning "Don't have the functions lrint() and lrintf ()."
         #warning "Replacing these functions with a standard C cast."
 #endif /* __STDC_VERSION__ >= 199901L */
         #include <math.h>
         #define float2int(flt) ((int)(floor(.5+flt)))
 #endif
 
 #ifndef DISABLE_FLOAT_API
-static inline opus_int16 FLOAT2INT16(float x)
+static OPUS_INLINE opus_int16 FLOAT2INT16(float x)
 {
    x = x*CELT_SIG_SCALE;
    x = MAX32(x, -32768);
    x = MIN32(x, 32767);
    return (opus_int16)float2int(x);
 }
 #endif /* DISABLE_FLOAT_API */
 
--- a/media/libopus/celt/mathops.c
+++ b/media/libopus/celt/mathops.c
@@ -134,17 +134,17 @@ opus_val32 celt_sqrt(opus_val32 x)
    return rt;
 }
 
 #define L1 32767
 #define L2 -7651
 #define L3 8277
 #define L4 -626
 
-static inline opus_val16 _celt_cos_pi_2(opus_val16 x)
+static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
 {
    opus_val16 x2;
 
    x2 = MULT16_16_P15(x,x);
    return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
                                                                                 ))))))));
 }
 
--- a/media/libopus/celt/mathops.h
+++ b/media/libopus/celt/mathops.h
@@ -39,33 +39,33 @@
 #include "os_support.h"
 
 /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
 #define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
 
 unsigned isqrt32(opus_uint32 _val);
 
 #ifndef OVERRIDE_CELT_MAXABS16
-static inline opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
 {
    int i;
    opus_val16 maxval = 0;
    opus_val16 minval = 0;
    for (i=0;i<len;i++)
    {
       maxval = MAX16(maxval, x[i]);
       minval = MIN16(minval, x[i]);
    }
    return MAX32(EXTEND32(maxval),-EXTEND32(minval));
 }
 #endif
 
 #ifndef OVERRIDE_CELT_MAXABS32
 #ifdef FIXED_POINT
-static inline opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
 {
    int i;
    opus_val32 maxval = 0;
    opus_val32 minval = 0;
    for (i=0;i<len;i++)
    {
       maxval = MAX32(maxval, x[i]);
       minval = MIN32(minval, x[i]);
@@ -90,17 +90,17 @@ static inline opus_val32 celt_maxabs32(c
 #define frac_div32(a,b) ((float)(a)/(b))
 
 #ifdef FLOAT_APPROX
 
 /* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127
          denorm, +/- inf and NaN are *not* handled */
 
 /** Base-2 log approximation (log2(x)). */
-static inline float celt_log2(float x)
+static OPUS_INLINE float celt_log2(float x)
 {
    int integer;
    float frac;
    union {
       float f;
       opus_uint32 i;
    } in;
    in.f = x;
@@ -108,17 +108,17 @@ static inline float celt_log2(float x)
    in.i -= integer<<23;
    frac = in.f - 1.5f;
    frac = -0.41445418f + frac*(0.95909232f
           + frac*(-0.33951290f + frac*0.16541097f));
    return 1+integer+frac;
 }
 
 /** Base-2 exponential approximation (2^x). */
-static inline float celt_exp2(float x)
+static OPUS_INLINE float celt_exp2(float x)
 {
    int integer;
    float frac;
    union {
       float f;
       opus_uint32 i;
    } res;
    integer = floor(x);
@@ -140,37 +140,38 @@ static inline float celt_exp2(float x)
 #endif
 
 #ifdef FIXED_POINT
 
 #include "os_support.h"
 
 #ifndef OVERRIDE_CELT_ILOG2
 /** Integer log in base2. Undefined for zero and negative numbers */
-static inline opus_int16 celt_ilog2(opus_int32 x)
+static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
 {
    celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers");
    return EC_ILOG(x)-1;
 }
 #endif
 
 
 /** Integer log in base2. Defined for zero, but not for negative numbers */
-static inline opus_int16 celt_zlog2(opus_val32 x)
+static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x)
 {
    return x <= 0 ? 0 : celt_ilog2(x);
 }
 
 opus_val16 celt_rsqrt_norm(opus_val32 x);
 
 opus_val32 celt_sqrt(opus_val32 x);
 
 opus_val16 celt_cos_norm(opus_val32 x);
 
-static inline opus_val16 celt_log2(opus_val32 x)
+/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */
+static OPUS_INLINE opus_val16 celt_log2(opus_val32 x)
 {
    int i;
    opus_val16 n, frac;
    /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605,
        0.15530808010959576, -0.08556153059057618 */
    static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401};
    if (x==0)
       return -32767;
@@ -186,24 +187,24 @@ static inline opus_val16 celt_log2(opus_
  K2 = 3-4*log(2)
  K3 = 3*log(2) - 2
 */
 #define D0 16383
 #define D1 22804
 #define D2 14819
 #define D3 10204
 
-static inline opus_val32 celt_exp2_frac(opus_val16 x)
+static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
 {
    opus_val16 frac;
    frac = SHL16(x, 4);
    return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
 }
 /** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
-static inline opus_val32 celt_exp2(opus_val16 x)
+static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
 {
    int integer;
    opus_val16 frac;
    integer = SHR16(x,10);
    if (integer>14)
       return 0x7f000000;
    else if (integer < -15)
       return 0;
@@ -219,28 +220,28 @@ opus_val32 frac_div32(opus_val32 a, opus
 
 #define M1 32767
 #define M2 -21
 #define M3 -11943
 #define M4 4936
 
 /* Atan approximation using a 4th order polynomial. Input is in Q15 format
    and normalized by pi/4. Output is in Q15 format */
-static inline opus_val16 celt_atan01(opus_val16 x)
+static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x)
 {
    return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x)))))));
 }
 
 #undef M1
 #undef M2
 #undef M3
 #undef M4
 
 /* atan2() approximation valid for positive input values */
-static inline opus_val16 celt_atan2p(opus_val16 y, opus_val16 x)
+static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x)
 {
    if (y < x)
    {
       opus_val32 arg;
       arg = celt_div(SHL32(EXTEND32(y),15),x);
       if (arg >= 32767)
          arg = 32767;
       return SHR16(celt_atan01(EXTRACT16(arg)),1);
--- a/media/libopus/celt/os_support.h
+++ b/media/libopus/celt/os_support.h
@@ -30,40 +30,43 @@
 
 #ifndef OS_SUPPORT_H
 #define OS_SUPPORT_H
 
 #ifdef CUSTOM_SUPPORT
 #  include "custom_support.h"
 #endif
 
+#include "opus_types.h"
+#include "opus_defines.h"
+
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 /** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
 #ifndef OVERRIDE_OPUS_ALLOC
-static inline void *opus_alloc (size_t size)
+static OPUS_INLINE void *opus_alloc (size_t size)
 {
    return malloc(size);
 }
 #endif
 
 /** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
 #ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
-static inline void *opus_alloc_scratch (size_t size)
+static OPUS_INLINE void *opus_alloc_scratch (size_t size)
 {
    /* Scratch space doesn't need to be cleared */
    return opus_alloc(size);
 }
 #endif
 
 /** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
 #ifndef OVERRIDE_OPUS_FREE
-static inline void opus_free (void *ptr)
+static OPUS_INLINE void opus_free (void *ptr)
 {
    free(ptr);
 }
 #endif
 
 /** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking  */
 #ifndef OVERRIDE_OPUS_COPY
 #define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
--- a/media/libopus/celt/pitch.c
+++ b/media/libopus/celt/pitch.c
@@ -140,17 +140,17 @@ static void celt_fir5(const opus_val16 *
    mem[1]=mem1;
    mem[2]=mem2;
    mem[3]=mem3;
    mem[4]=mem4;
 }
 
 
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
-      int len, int C)
+      int len, int C, int arch)
 {
    int i;
    opus_val32 ac[5];
    opus_val16 tmp=Q15ONE;
    opus_val16 lpc[4], mem[5]={0,0,0,0,0};
    opus_val16 lpc2[5];
    opus_val16 c1 = QCONST16(.8f,15);
 #ifdef FIXED_POINT
@@ -175,17 +175,17 @@ void pitch_downsample(celt_sig * OPUS_RE
    if (C==2)
    {
       for (i=1;i<len>>1;i++)
          x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
       x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
    }
 
    _celt_autocorr(x_lp, ac, NULL, 0,
-                  4, len>>1);
+                  4, len>>1, arch);
 
    /* Noise floor -40 dB */
 #ifdef FIXED_POINT
    ac[0] += SHR32(ac[0],13);
 #else
    ac[0] *= 1.0001f;
 #endif
    /* Lag windowing */
@@ -245,19 +245,24 @@ celt_pitch_xcorr(opus_val16 *x, opus_val
 
 #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
 
 #ifdef FIXED_POINT
 opus_val32
 #else
 void
 #endif
-celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
 {
    int i,j;
+   /*The EDSP version requires that max_pitch is at least 1, and that _x is
+      32-bit aligned.
+     Since it's hard to put asserts in assembly, put them here.*/
+   celt_assert(max_pitch>0);
+   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
 #ifdef FIXED_POINT
    opus_val32 maxcorr=1;
 #endif
    for (i=0;i<max_pitch-3;i+=4)
    {
       opus_val32 sum[4]={0,0,0,0};
       xcorr_kernel(_x, _y+i, sum, len);
       xcorr[i]=sum[0];
@@ -284,17 +289,17 @@ celt_pitch_xcorr(const opus_val16 *_x, c
    }
 #ifdef FIXED_POINT
    return maxcorr;
 #endif
 }
 
 #endif
 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
-                  int len, int max_pitch, int *pitch)
+                  int len, int max_pitch, int *pitch, int arch)
 {
    int i, j;
    int lag;
    int best_pitch[2]={0,0};
    VARDECL(opus_val16, x_lp4);
    VARDECL(opus_val16, y_lp4);
    VARDECL(opus_val32, xcorr);
 #ifdef FIXED_POINT
@@ -337,17 +342,17 @@ void pitch_search(const opus_val16 * OPU
    }
 #endif
 
    /* Coarse search with 4x decimation */
 
 #ifdef FIXED_POINT
    maxcorr =
 #endif
-   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
+   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
 
    find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
 #ifdef FIXED_POINT
                    , 0, maxcorr
 #endif
                    );
 
    /* Finer search with 2x decimation */
--- a/media/libopus/celt/pitch.h
+++ b/media/libopus/celt/pitch.h
@@ -30,37 +30,43 @@
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifndef PITCH_H
 #define PITCH_H
 
 #include "modes.h"
+#include "cpu_support.h"
 
 #if defined(__SSE__) && !defined(FIXED_POINT)
 #include "x86/pitch_sse.h"
 #endif
 
+#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT)
+# include "arm/pitch_arm.h"
+#endif
+
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
-      int len, int C);
+      int len, int C, int arch);
 
 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
-                  int len, int max_pitch, int *pitch);
+                  int len, int max_pitch, int *pitch, int arch);
 
 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       int N, int *T0, int prev_period, opus_val16 prev_gain);
 
 /* OPT: This is the kernel you really want to optimize. It gets used a lot
    by the prefilter and by the PLC. */
 #ifndef OVERRIDE_XCORR_KERNEL
-static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
 {
    int j;
    opus_val16 y_0, y_1, y_2, y_3;
+   celt_assert(len>=3);
    y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
    y_0=*y++;
    y_1=*y++;
    y_2=*y++;
    for (j=0;j<len-3;j+=4)
    {
       opus_val16 tmp;
       tmp = *x++;
@@ -114,17 +120,17 @@ static inline void xcorr_kernel(const op
       sum[1] = MAC16_16(sum[1],tmp,y_3);
       sum[2] = MAC16_16(sum[2],tmp,y_0);
       sum[3] = MAC16_16(sum[3],tmp,y_1);
    }
 }
 #endif /* OVERRIDE_XCORR_KERNEL */
 
 #ifndef OVERRIDE_DUAL_INNER_PROD
-static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
       int N, opus_val32 *xy1, opus_val32 *xy2)
 {
    int i;
    opus_val32 xy01=0;
    opus_val32 xy02=0;
    for (i=0;i<N;i++)
    {
       xy01 = MAC16_16(xy01, x[i], y01[i]);
@@ -135,11 +141,33 @@ static inline void dual_inner_prod(const
 }
 #endif
 
 #ifdef FIXED_POINT
 opus_val32
 #else
 void
 #endif
-celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch);
+
+#if !defined(OVERRIDE_PITCH_XCORR)
+/*Is run-time CPU detection enabled on this platform?*/
+# if defined(OPUS_HAVE_RTCD)
+extern
+#  if defined(FIXED_POINT)
+opus_val32
+#  else
+void
+#  endif
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+# else
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch))
+# endif
+#endif
 
 #endif
--- a/media/libopus/celt/quant_bands.c
+++ b/media/libopus/celt/quant_bands.c
@@ -307,27 +307,31 @@ void quant_coarse_energy(const CELTMode 
 
    if (!intra)
    {
       unsigned char *intra_buf;
       ec_enc enc_intra_state;
       opus_int32 tell_intra;
       opus_uint32 nstart_bytes;
       opus_uint32 nintra_bytes;
+      opus_uint32 save_bytes;
       int badness2;
       VARDECL(unsigned char, intra_bits);
 
       tell_intra = ec_tell_frac(enc);
 
       enc_intra_state = *enc;
 
       nstart_bytes = ec_range_bytes(&enc_start_state);
       nintra_bytes = ec_range_bytes(&enc_intra_state);
       intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes;
-      ALLOC(intra_bits, nintra_bytes-nstart_bytes, unsigned char);
+      save_bytes = nintra_bytes-nstart_bytes;
+      if (save_bytes == 0)
+         save_bytes = ALLOC_NONE;
+      ALLOC(intra_bits, save_bytes, unsigned char);
       /* Copy bits from intra bit-stream */
       OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes);
 
       *enc = enc_start_state;
 
       badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget,
             tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe);
 
--- a/media/libopus/celt/rate.c
+++ b/media/libopus/celt/rate.c
@@ -240,17 +240,17 @@ void compute_pulse_cache(CELTMode *m, in
       }
    }
 }
 
 #endif /* CUSTOM_MODES */
 
 #define ALLOC_STEPS 6
 
-static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
+static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
       const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance,
       int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits,
       int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
 {
    opus_int32 psum;
    int lo, hi;
    int i, j;
    int logM;
--- a/media/libopus/celt/rate.h
+++ b/media/libopus/celt/rate.h
@@ -40,22 +40,22 @@
 #define QTHETA_OFFSET 4
 #define QTHETA_OFFSET_TWOPHASE 16
 
 #include "cwrs.h"
 #include "modes.h"
 
 void compute_pulse_cache(CELTMode *m, int LM);
 
-static inline int get_pulses(int i)
+static OPUS_INLINE int get_pulses(int i)
 {
    return i<8 ? i : (8 + (i&7)) << ((i>>3)-1);
 }
 
-static inline int bits2pulses(const CELTMode *m, int band, int LM, int bits)
+static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits)
 {
    int i;
    int lo, hi;
    const unsigned char *cache;
 
    LM++;
    cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
 
@@ -72,17 +72,17 @@ static inline int bits2pulses(const CELT
          lo = mid;
    }
    if (bits- (lo == 0 ? -1 : (int)cache[lo]) <= (int)cache[hi]-bits)
       return lo;
    else
       return hi;
 }
 
-static inline int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
+static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
 {
    const unsigned char *cache;
 
    LM++;
    cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
    return pulses == 0 ? 0 : cache[pulses]+1;
 }
 
--- a/media/libopus/celt/stack_alloc.h
+++ b/media/libopus/celt/stack_alloc.h
@@ -27,16 +27,19 @@
    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifndef STACK_ALLOC_H
 #define STACK_ALLOC_H
 
+#include "opus_types.h"
+#include "opus_defines.h"
+
 #if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK))
 #error "Opus requires one of VAR_ARRAYS, USE_ALLOCA, or NONTHREADSAFE_PSEUDOSTACK be defined to select the temporary allocation mode."
 #endif
 
 #ifdef USE_ALLOCA
 # ifdef WIN32
 #  include <malloc.h>
 # else
@@ -87,30 +90,33 @@
 
 #if defined(VAR_ARRAYS)
 
 #define VARDECL(type, var)
 #define ALLOC(var, size, type) type var[size]
 #define SAVE_STACK
 #define RESTORE_STACK
 #define ALLOC_STACK
+/* C99 does not allow VLAs of size zero */
+#define ALLOC_NONE 1
 
 #elif defined(USE_ALLOCA)
 
 #define VARDECL(type, var) type *var
 
 # ifdef WIN32
 #  define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size)))
 # else
 #  define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
 # endif
 
 #define SAVE_STACK
 #define RESTORE_STACK
 #define ALLOC_STACK
+#define ALLOC_NONE 0
 
 #else
 
 #ifdef CELT_C
 char *global_stack=0;
 #else
 extern char *global_stack;
 #endif /* CELT_C */
@@ -138,33 +144,34 @@ extern char *global_stack_top;
 #define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack;
 
 #endif /* ENABLE_VALGRIND */
 
 #include "os_support.h"
 #define VARDECL(type, var) type *var
 #define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
 #define SAVE_STACK char *_saved_stack = global_stack;
+#define ALLOC_NONE 0
 
 #endif /* VAR_ARRAYS */
 
 
 #ifdef ENABLE_VALGRIND
 
 #include <valgrind/memcheck.h>
 #define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
 #define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
 #define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
 #define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
 #define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0)
 #define OPUS_FPRINTF fprintf
 
 #else
 
-static inline int _opus_false(void) {return 0;}
+static OPUS_INLINE int _opus_false(void) {return 0;}
 #define OPUS_CHECK_ARRAY(ptr, len) _opus_false()
 #define OPUS_CHECK_VALUE(value) _opus_false()
 #define OPUS_PRINT_INT(value) do{}while(0)
 #define OPUS_FPRINTF (void)
 
 #endif
 
 
--- a/media/libopus/celt/x86/pitch_sse.h
+++ b/media/libopus/celt/x86/pitch_sse.h
@@ -31,17 +31,17 @@
 
 #ifndef PITCH_SSE_H
 #define PITCH_SSE_H
 
 #include <xmmintrin.h>
 #include "arch.h"
 
 #define OVERRIDE_XCORR_KERNEL
-static inline void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
+static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
 {
    int j;
    __m128 xsum1, xsum2;
    xsum1 = _mm_loadu_ps(sum);
    xsum2 = _mm_setzero_ps();
 
    for (j = 0; j < len-3; j += 4)
    {
@@ -67,17 +67,17 @@ static inline void xcorr_kernel(const op
             xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
          }
       }
    }
    _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
 }
 
 #define OVERRIDE_DUAL_INNER_PROD
-static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
       int N, opus_val32 *xy1, opus_val32 *xy2)
 {
    int i;
    __m128 xsum1, xsum2;
    xsum1 = _mm_setzero_ps();
    xsum2 = _mm_setzero_ps();
    for (i=0;i<N-3;i+=4)
    {
@@ -97,17 +97,17 @@ static inline void dual_inner_prod(const
    for (;i<N;i++)
    {
       *xy1 = MAC16_16(*xy1, x[i], y01[i]);
       *xy2 = MAC16_16(*xy2, x[i], y02[i]);
    }
 }
 
 #define OVERRIDE_COMB_FILTER_CONST
-static inline void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
       opus_val16 g10, opus_val16 g11, opus_val16 g12)
 {
    int i;
    __m128 x0v;
    __m128 g10v, g11v, g12v;
    g10v = _mm_load1_ps(&g10);
    g11v = _mm_load1_ps(&g11);
    g12v = _mm_load1_ps(&g12);
--- a/media/libopus/celt_sources.mk
+++ b/media/libopus/celt_sources.mk
@@ -13,9 +13,16 @@ celt/mdct.c \
 celt/modes.c \
 celt/pitch.c \
 celt/celt_lpc.c \
 celt/quant_bands.c \
 celt/rate.c \
 celt/vq.c
 
 CELT_SOURCES_ARM = \
-celt/arm/armcpu.c
+celt/arm/armcpu.c \
+celt/arm/arm_celt_map.c
+
+CELT_SOURCES_ARM_ASM = \
+celt/arm/celt_pitch_xcorr_arm.s
+
+CELT_AM_SOURCES_ARM_ASM = \
+celt/arm/armopts.s.in
--- a/media/libopus/include/opus.h
+++ b/media/libopus/include/opus.h
@@ -906,15 +906,73 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int 
   *                                    sufficient, and possibly much smaller.
   * @returns The total size of the output packet on success, or an error code
   *          on failure.
   * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the
   *                                complete output packet.
   */
 OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1);
 
+/** Pads a given Opus packet to a larger size (possibly changing the TOC sequence).
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to pad.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+  *                                 This must be at least as large as len.
+  * @returns an error code
+  * @retval #OPUS_OK \a on success.
+  * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len);
+
+/** Remove all padding from a given Opus packet and rewrite the TOC sequence to
+  * minimize space usage.
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to strip.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @returns The new size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG \a len was less than 1.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len);
+
+/** Pads a given Opus multi-stream packet to a larger size (possibly changing the TOC sequence).
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to pad.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+  *                                 This must be at least 1.
+  * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+  *                                 This must be at least as large as len.
+  * @returns an error code
+  * @retval #OPUS_OK \a on success.
+  * @retval #OPUS_BAD_ARG \a len was less than 1.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams);
+
+/** Remove all padding from a given Opus multi-stream packet and rewrite the TOC sequence to
+  * minimize space usage.
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to strip.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+  *                                 This must be at least 1.
+  * @returns The new size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams);
+
 /**@}*/
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif /* OPUS_H */
--- a/media/libopus/include/opus_custom.h
+++ b/media/libopus/include/opus_custom.h
@@ -42,17 +42,17 @@ extern "C" {
 #endif
 
 #ifdef CUSTOM_MODES
 # define OPUS_CUSTOM_EXPORT OPUS_EXPORT
 # define OPUS_CUSTOM_EXPORT_STATIC OPUS_EXPORT
 #else
 # define OPUS_CUSTOM_EXPORT
 # ifdef OPUS_BUILD
-#  define OPUS_CUSTOM_EXPORT_STATIC static inline
+#  define OPUS_CUSTOM_EXPORT_STATIC static OPUS_INLINE
 # else
 #  define OPUS_CUSTOM_EXPORT_STATIC
 # endif
 #endif
 
 /** @defgroup opus_custom Opus Custom
   * @{
   *  Opus Custom is an optional part of the Opus specification and
@@ -135,34 +135,35 @@ OPUS_CUSTOM_EXPORT void opus_custom_mode
   * @param [in] channels <tt>int</tt>: Number of channels
   * @returns size
   */
 OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_size(
     const OpusCustomMode *mode,
     int channels
 ) OPUS_ARG_NONNULL(1);
 
+# ifdef CUSTOM_MODES
 /** Initializes a previously allocated encoder state
   * The memory pointed to by st must be the size returned by opus_custom_encoder_get_size.
   * This is intended for applications which use their own allocator instead of malloc.
   * @see opus_custom_encoder_create(),opus_custom_encoder_get_size()
   * To reset a previously initialized state use the OPUS_RESET_STATE CTL.
   * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
   * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of
   *  the stream (must be the same characteristics as used for the
   *  decoder)
   * @param [in] channels <tt>int</tt>: Number of channels
   * @return OPUS_OK Success or @ref opus_errorcodes
   */
-OPUS_CUSTOM_EXPORT_STATIC int opus_custom_encoder_init(
+OPUS_CUSTOM_EXPORT int opus_custom_encoder_init(
     OpusCustomEncoder *st,
     const OpusCustomMode *mode,
     int channels
 ) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
-
+# endif
 #endif
 
 
 /** Creates a new encoder state. Each stream needs its own encoder
   * state (can't be shared across simultaneous streams).
   * @param [in] mode <tt>OpusCustomMode*</tt>: Contains all the information about the characteristics of
   *  the stream (must be the same characteristics as used for the
   *  decoder)
--- a/media/libopus/include/opus_defines.h
+++ b/media/libopus/include/opus_defines.h
@@ -93,16 +93,28 @@ extern "C" {
 #  define OPUS_RESTRICT __restrict
 # else
 #  define OPUS_RESTRICT
 # endif
 #else
 # define OPUS_RESTRICT restrict
 #endif
 
+#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+# if OPUS_GNUC_PREREQ(2,7)
+#  define OPUS_INLINE __inline__
+# elif (defined(_MSC_VER))
+#  define OPUS_INLINE __inline
+# else
+#  define OPUS_INLINE
+# endif
+#else
+# define OPUS_INLINE inline
+#endif
+
 /**Warning attributes for opus functions
   * NONNULL is not used in OPUS_BUILD to avoid the compiler optimizing out
   * some paranoid null checks. */
 #if defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4)
 # define OPUS_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
 #else
 # define OPUS_WARN_UNUSED_RESULT
 #endif
@@ -146,16 +158,18 @@ extern "C" {
 #define OPUS_GET_PITCH_REQUEST               4033
 #define OPUS_SET_GAIN_REQUEST                4034
 #define OPUS_GET_GAIN_REQUEST                4045 /* Should have been 4035 */
 #define OPUS_SET_LSB_DEPTH_REQUEST           4036
 #define OPUS_GET_LSB_DEPTH_REQUEST           4037
 #define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039
 #define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040
 #define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
+#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042
+#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043
 
 /* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
 
 /* Macros to trigger compilation errors when the wrong types are provided to a CTL */
 #define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
 #define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr)))
 #define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr)))
 #define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr)))
@@ -189,17 +203,16 @@ extern "C" {
 
 #define OPUS_FRAMESIZE_ARG                   5000 /**< Select frame size from the argument (default) */
 #define OPUS_FRAMESIZE_2_5_MS                5001 /**< Use 2.5 ms frames */
 #define OPUS_FRAMESIZE_5_MS                  5002 /**< Use 5 ms frames */
 #define OPUS_FRAMESIZE_10_MS                 5003 /**< Use 10 ms frames */
 #define OPUS_FRAMESIZE_20_MS                 5004 /**< Use 20 ms frames */
 #define OPUS_FRAMESIZE_40_MS                 5005 /**< Use 40 ms frames */
 #define OPUS_FRAMESIZE_60_MS                 5006 /**< Use 60 ms frames */
-#define OPUS_FRAMESIZE_VARIABLE              5010 /**< Optimize the frame size dynamically */
 
 /**@}*/
 
 
 /** @defgroup opus_encoderctls Encoder related CTLs
   *
   * These are convenience macros for use with the \c opus_encode_ctl
   * interface. They are used to generate the appropriate series of
@@ -570,16 +583,24 @@ extern "C" {
   * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
   * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
   * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
   * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
   * </dl>
   * @hideinitializer */
 #define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
 
+/** If set to 1, disables almost all use of prediction, making frames almost
+    completely independent. This reduces quality. (default : 0)
+  * @hideinitializer */
+#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured prediction status.
+  * @hideinitializer */
+#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x)
+
 /**@}*/
 
 /** @defgroup opus_genericctls Generic CTLs
   *
   * These macros are used with the \c opus_decoder_ctl and
   * \c opus_encoder_ctl calls to generate a particular
   * request.
   *
--- a/media/libopus/moz.build
+++ b/media/libopus/moz.build
@@ -11,17 +11,17 @@ EXPORTS.opus += [
     'include/opus_types.h',
 ]
 
 MSVC_ENABLE_PGO = True
 
 FINAL_LIBRARY = 'gkmedias'
 
 DEFINES['OPUS_BUILD'] = True
-DEFINES['OPUS_VERSION'] = '"v1.1-beta-23-gf2446c2-mozilla"'
+DEFINES['OPUS_VERSION'] = '"v1.1-rc2-1-g35a44c6-mozilla"'
 DEFINES['USE_ALLOCA'] = True
 
 if CONFIG['OS_ARCH'] in ('Linux', 'Darwin', 'DragonFly', 'FreeBSD',
                          'NetBSD', 'OpenBSD'):
     DEFINES['HAVE_LRINTF'] = True
 
 if CONFIG['OS_ARCH'] == 'WINNT':
     DEFINES['inline'] = '__inline'
--- a/media/libopus/silk/A2NLSF.c
+++ b/media/libopus/silk/A2NLSF.c
@@ -39,50 +39,50 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "tables.h"
 
 /* Number of binary divisions, when not in low complexity mode */
 #define BIN_DIV_STEPS_A2NLSF_FIX      3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */
 #define MAX_ITERATIONS_A2NLSF_FIX    30
 
 /* Helper function for A2NLSF(..)                    */
 /* Transforms polynomials from cos(n*f) to cos(f)^n  */
-static inline void silk_A2NLSF_trans_poly(
+static OPUS_INLINE void silk_A2NLSF_trans_poly(
     opus_int32          *p,                     /* I/O    Polynomial                                */
     const opus_int      dd                      /* I      Polynomial order (= filter order / 2 )    */
 )
 {
     opus_int k, n;
 
     for( k = 2; k <= dd; k++ ) {
         for( n = dd; n > k; n-- ) {
             p[ n - 2 ] -= p[ n ];
         }
         p[ k - 2 ] -= silk_LSHIFT( p[ k ], 1 );
     }
 }
 /* Helper function for A2NLSF(..) */
 /* Polynomial evaluation          */
-static inline opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16     */
+static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16     */
     opus_int32          *p,                     /* I    Polynomial, Q16                         */
     const opus_int32    x,                      /* I    Evaluation point, Q12                   */
     const opus_int      dd                      /* I    Order                                   */
 )
 {
     opus_int   n;
     opus_int32 x_Q16, y32;
 
     y32 = p[ dd ];                                  /* Q16 */
     x_Q16 = silk_LSHIFT( x, 4 );
     for( n = dd - 1; n >= 0; n-- ) {
         y32 = silk_SMLAWW( p[ n ], y32, x_Q16 );    /* Q16 */
     }
     return y32;
 }
 
-static inline void silk_A2NLSF_init(
+static OPUS_INLINE void silk_A2NLSF_init(
      const opus_int32    *a_Q16,
      opus_int32          *P,
      opus_int32          *Q,
      const opus_int      dd
 )
 {
     opus_int k;
 
--- a/media/libopus/silk/API.h
+++ b/media/libopus/silk/API.h
@@ -59,16 +59,17 @@ opus_int silk_Get_Encoder_Size(         
     opus_int                        *encSizeBytes       /* O    Number of bytes in SILK encoder state           */
 );
 
 /*************************/
 /* Init or reset encoder */
 /*************************/
 opus_int silk_InitEncoder(                              /* O    Returns error code                              */
     void                            *encState,          /* I/O  State                                           */
+    int                              arch,              /* I    Run-time architecture                           */
     silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
 );
 
 /**************************/
 /* Encode frame with Silk */
 /**************************/
 /* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what                     */
 /* encControl->payloadSize_ms is set to                                                                         */
--- a/media/libopus/silk/CNG.c
+++ b/media/libopus/silk/CNG.c
@@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "stack_alloc.h"
 
 /* Generates excitation for CNG LPC synthesis */
-static inline void silk_CNG_exc(
+static OPUS_INLINE void silk_CNG_exc(
     opus_int32                       residual_Q10[],     /* O    CNG residual signal Q10                     */
     opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
     opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
     opus_int                         length,             /* I    Length                                      */
     opus_int32                       *rand_seed          /* I/O  Seed to random index generator              */
 )
 {
     opus_int32 seed;
--- a/media/libopus/silk/Inlines.h
+++ b/media/libopus/silk/Inlines.h
@@ -21,59 +21,59 @@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NO
 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 /*! \file silk_Inlines.h
- *  \brief silk_Inlines.h defines inline signal processing functions.
+ *  \brief silk_Inlines.h defines OPUS_INLINE signal processing functions.
  */
 
 #ifndef SILK_FIX_INLINES_H
 #define SILK_FIX_INLINES_H
 
 #ifdef  __cplusplus
 extern "C"
 {
 #endif
 
 /* count leading zeros of opus_int64 */
-static inline opus_int32 silk_CLZ64( opus_int64 in )
+static OPUS_INLINE opus_int32 silk_CLZ64( opus_int64 in )
 {
     opus_int32 in_upper;
 
     in_upper = (opus_int32)silk_RSHIFT64(in, 32);
     if (in_upper == 0) {
         /* Search in the lower 32 bits */
         return 32 + silk_CLZ32( (opus_int32) in );
     } else {
         /* Search in the upper 32 bits */
         return silk_CLZ32( in_upper );
     }
 }
 
 /* get number of leading zeros and fractional part (the bits right after the leading one */
-static inline void silk_CLZ_FRAC(
+static OPUS_INLINE void silk_CLZ_FRAC(
     opus_int32 in,            /* I  input                               */
     opus_int32 *lz,           /* O  number of leading zeros             */
     opus_int32 *frac_Q7       /* O  the 7 bits right after the leading one */
 )
 {
     opus_int32 lzeros = silk_CLZ32(in);
 
     * lz = lzeros;
     * frac_Q7 = silk_ROR32(in, 24 - lzeros) & 0x7f;
 }
 
 /* Approximation of square root                                          */
 /* Accuracy: < +/- 10%  for output values > 15                           */
 /*           < +/- 2.5% for output values > 120                          */
-static inline opus_int32 silk_SQRT_APPROX( opus_int32 x )
+static OPUS_INLINE opus_int32 silk_SQRT_APPROX( opus_int32 x )
 {
     opus_int32 y, lz, frac_Q7;
 
     if( x <= 0 ) {
         return 0;
     }
 
     silk_CLZ_FRAC(x, &lz, &frac_Q7);
@@ -89,17 +89,17 @@ static inline opus_int32 silk_SQRT_APPRO
 
     /* increment using fractional part of input */
     y = silk_SMLAWB(y, y, silk_SMULBB(213, frac_Q7));
 
     return y;
 }
 
 /* Divide two int32 values and return result as int32 in a given Q-domain */
-static inline opus_int32 silk_DIV32_varQ(   /* O    returns a good approximation of "(a32 << Qres) / b32" */
+static OPUS_INLINE opus_int32 silk_DIV32_varQ(   /* O    returns a good approximation of "(a32 << Qres) / b32" */
     const opus_int32     a32,               /* I    numerator (Q0)                  */
     const opus_int32     b32,               /* I    denominator (Q0)                */
     const opus_int       Qres               /* I    Q-domain of result (>= 0)       */
 )
 {
     opus_int   a_headrm, b_headrm, lshift;
     opus_int32 b32_inv, a32_nrm, b32_nrm, result;
 
@@ -135,17 +135,17 @@ static inline opus_int32 silk_DIV32_varQ
         } else {
             /* Avoid undefined result */
             return 0;
         }
     }
 }
 
 /* Invert int32 value and return result as int32 in a given Q-domain */
-static inline opus_int32 silk_INVERSE32_varQ(   /* O    returns a good approximation of "(1 << Qres) / b32" */
+static OPUS_INLINE opus_int32 silk_INVERSE32_varQ(   /* O    returns a good approximation of "(1 << Qres) / b32" */
     const opus_int32     b32,                   /* I    denominator (Q0)                */
     const opus_int       Qres                   /* I    Q-domain of result (> 0)        */
 )
 {
     opus_int   b_headrm, lshift;
     opus_int32 b32_inv, b32_nrm, err_Q32, result;
 
     silk_assert( b32 != 0 );
--- a/media/libopus/silk/LP_variable_cutoff.c
+++ b/media/libopus/silk/LP_variable_cutoff.c
@@ -33,17 +33,17 @@ POSSIBILITY OF SUCH DAMAGE.
     Elliptic/Cauer filters designed with 0.1 dB passband ripple,
     80 dB minimum stopband attenuation, and
     [0.95 : 0.15 : 0.35] normalized cut off frequencies.
 */
 
 #include "main.h"
 
 /* Helper function, interpolates the filter taps */
-static inline void silk_LP_interpolate_filter_taps(
+static OPUS_INLINE void silk_LP_interpolate_filter_taps(
     opus_int32           B_Q28[ TRANSITION_NB ],
     opus_int32           A_Q28[ TRANSITION_NA ],
     const opus_int       ind,
     const opus_int32     fac_Q16
 )
 {
     opus_int nb, na;
 
--- a/media/libopus/silk/MacroCount.h
+++ b/media/libopus/silk/MacroCount.h
@@ -29,134 +29,134 @@ POSSIBILITY OF SUCH DAMAGE.
 #define SIGPROCFIX_API_MACROCOUNT_H
 #include <stdio.h>
 
 #ifdef    silk_MACRO_COUNT
 #define varDefine opus_int64 ops_count = 0;
 
 extern opus_int64 ops_count;
 
-static inline opus_int64 silk_SaveCount(){
+static OPUS_INLINE opus_int64 silk_SaveCount(){
     return(ops_count);
 }
 
-static inline opus_int64 silk_SaveResetCount(){
+static OPUS_INLINE opus_int64 silk_SaveResetCount(){
     opus_int64 ret;
 
     ret = ops_count;
     ops_count = 0;
     return(ret);
 }
 
-static inline silk_PrintCount(){
+static OPUS_INLINE silk_PrintCount(){
     printf("ops_count = %d \n ", (opus_int32)ops_count);
 }
 
 #undef silk_MUL
-static inline opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){
     opus_int32 ret;
     ops_count += 4;
     ret = a32 * b32;
     return ret;
 }
 
 #undef silk_MUL_uint
-static inline opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){
+static OPUS_INLINE opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){
     opus_uint32 ret;
     ops_count += 4;
     ret = a32 * b32;
     return ret;
 }
 #undef silk_MLA
-static inline opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){
     opus_int32 ret;
     ops_count += 4;
     ret = a32 + b32 * c32;
     return ret;
 }
 
 #undef silk_MLA_uint
-static inline opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){
+static OPUS_INLINE opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){
     opus_uint32 ret;
     ops_count += 4;
     ret = a32 + b32 * c32;
     return ret;
 }
 
 #undef silk_SMULWB
-static inline opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){
     opus_int32 ret;
     ops_count += 5;
     ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
     return ret;
 }
 #undef    silk_SMLAWB
-static inline opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
     opus_int32 ret;
     ops_count += 5;
     ret = ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)));
     return ret;
 }
 
 #undef silk_SMULWT
-static inline opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){
     opus_int32 ret;
     ops_count += 4;
     ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
     return ret;
 }
 #undef silk_SMLAWT
-static inline opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
     opus_int32 ret;
     ops_count += 4;
     ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
     return ret;
 }
 
 #undef silk_SMULBB
-static inline opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){
     opus_int32 ret;
     ops_count += 1;
     ret = (opus_int32)((opus_int16)a32) * (opus_int32)((opus_int16)b32);
     return ret;
 }
 #undef silk_SMLABB
-static inline opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
     opus_int32 ret;
     ops_count += 1;
     ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
     return ret;
 }
 
 #undef silk_SMULBT
-static inline opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){
+static OPUS_INLINE opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){
     opus_int32 ret;
     ops_count += 4;
     ret = ((opus_int32)((opus_int16)a32)) * (b32 >> 16);
     return ret;
 }
 
 #undef silk_SMLABT
-static inline opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
     opus_int32 ret;
     ops_count += 1;
     ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
     return ret;
 }
 
 #undef silk_SMULTT
-static inline opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){
     opus_int32 ret;
     ops_count += 1;
     ret = (a32 >> 16) * (b32 >> 16);
     return ret;
 }
 
 #undef    silk_SMLATT
-static inline opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
     opus_int32 ret;
     ops_count += 1;
     ret = a32 + (b32 >> 16) * (c32 >> 16);
     return ret;
 }
 
 
 /* multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode)*/
@@ -174,40 +174,40 @@ static inline opus_int32 silk_SMLATT(opu
 
 #undef silk_SMLAWB_ovflw
 #define silk_SMLAWB_ovflw silk_SMLAWB
 
 #undef silk_SMLAWT_ovflw
 #define silk_SMLAWT_ovflw silk_SMLAWT
 
 #undef silk_SMULL
-static inline opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){
     opus_int64 ret;
     ops_count += 8;
     ret = ((opus_int64)(a32) * /*(opus_int64)*/(b32));
     return ret;
 }
 
 #undef    silk_SMLAL
-static inline opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){
     opus_int64 ret;
     ops_count += 8;
     ret = a64 + ((opus_int64)(b32) * /*(opus_int64)*/(c32));
     return ret;
 }
 #undef    silk_SMLALBB
-static inline opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){
+static OPUS_INLINE opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){
     opus_int64 ret;
     ops_count += 4;
     ret = a64 + ((opus_int64)(b16) * /*(opus_int64)*/(c16));
     return ret;
 }
 
 #undef    SigProcFIX_CLZ16
-static inline opus_int32 SigProcFIX_CLZ16(opus_int16 in16)
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ16(opus_int16 in16)
 {
     opus_int32 out32 = 0;
     ops_count += 10;
     if( in16 == 0 ) {
         return 16;
     }
     /* test nibbles */
     if( in16 & 0xFF00 ) {
@@ -235,252 +235,252 @@ static inline opus_int32 SigProcFIX_CLZ1
         if( in16 & 0xE )
             return out32 + 2;
         else
             return out32 + 3;
     }
 }
 
 #undef SigProcFIX_CLZ32
-static inline opus_int32 SigProcFIX_CLZ32(opus_int32 in32)
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ32(opus_int32 in32)
 {
     /* test highest 16 bits and convert to opus_int16 */
     ops_count += 2;
     if( in32 & 0xFFFF0000 ) {
         return SigProcFIX_CLZ16((opus_int16)(in32 >> 16));
     } else {
         return SigProcFIX_CLZ16((opus_int16)in32) + 16;
     }
 }
 
 #undef silk_DIV32
-static inline opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){
     ops_count += 64;
     return a32 / b32;
 }
 
 #undef silk_DIV32_16
-static inline opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){
     ops_count += 32;
     return a32 / b32;
 }
 
 #undef silk_SAT8
-static inline opus_int8 silk_SAT8(opus_int64 a){
+static OPUS_INLINE opus_int8 silk_SAT8(opus_int64 a){
     opus_int8 tmp;
     ops_count += 1;
     tmp = (opus_int8)((a) > silk_int8_MAX ? silk_int8_MAX  : \
                     ((a) < silk_int8_MIN ? silk_int8_MIN  : (a)));
     return(tmp);
 }
 
 #undef silk_SAT16
-static inline opus_int16 silk_SAT16(opus_int64 a){
+static OPUS_INLINE opus_int16 silk_SAT16(opus_int64 a){
     opus_int16 tmp;
     ops_count += 1;
     tmp = (opus_int16)((a) > silk_int16_MAX ? silk_int16_MAX  : \
                      ((a) < silk_int16_MIN ? silk_int16_MIN  : (a)));
     return(tmp);
 }
 #undef silk_SAT32
-static inline opus_int32 silk_SAT32(opus_int64 a){
+static OPUS_INLINE opus_int32 silk_SAT32(opus_int64 a){
     opus_int32 tmp;
     ops_count += 1;
     tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX  : \
                      ((a) < silk_int32_MIN ? silk_int32_MIN  : (a)));
     return(tmp);
 }
 #undef silk_POS_SAT32
-static inline opus_int32 silk_POS_SAT32(opus_int64 a){
+static OPUS_INLINE opus_int32 silk_POS_SAT32(opus_int64 a){
     opus_int32 tmp;
     ops_count += 1;
     tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : (a));
     return(tmp);
 }
 
 #undef silk_ADD_POS_SAT8
-static inline opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){
     opus_int8 tmp;
     ops_count += 1;
     tmp = (opus_int8)((((a)+(b)) & 0x80) ? silk_int8_MAX  : ((a)+(b)));
     return(tmp);
 }
 #undef silk_ADD_POS_SAT16
-static inline opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){
     opus_int16 tmp;
     ops_count += 1;
     tmp = (opus_int16)((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)));
     return(tmp);
 }
 
 #undef silk_ADD_POS_SAT32
-static inline opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
     opus_int32 tmp;
     ops_count += 1;
     tmp = (opus_int32)((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)));
     return(tmp);
 }
 
 #undef silk_ADD_POS_SAT64
-static inline opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
     opus_int64 tmp;
     ops_count += 1;
     tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)));
     return(tmp);
 }
 
 #undef    silk_LSHIFT8
-static inline opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
+static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
     opus_int8 ret;
     ops_count += 1;
     ret = a << shift;
     return ret;
 }
 #undef    silk_LSHIFT16
-static inline opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){
+static OPUS_INLINE opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){
     opus_int16 ret;
     ops_count += 1;
     ret = a << shift;
     return ret;
 }
 #undef    silk_LSHIFT32
-static inline opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){
     opus_int32 ret;
     ops_count += 1;
     ret = a << shift;
     return ret;
 }
 #undef    silk_LSHIFT64
-static inline opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){
+static OPUS_INLINE opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){
     ops_count += 1;
     return a << shift;
 }
 
 #undef    silk_LSHIFT_ovflw
-static inline opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){
     ops_count += 1;
     return a << shift;
 }
 
 #undef    silk_LSHIFT_uint
-static inline opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){
     opus_uint32 ret;
     ops_count += 1;
     ret = a << shift;
     return ret;
 }
 
 #undef    silk_RSHIFT8
-static inline opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){
+static OPUS_INLINE opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){
     ops_count += 1;
     return a >> shift;
 }
 #undef    silk_RSHIFT16
-static inline opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){
+static OPUS_INLINE opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){
     ops_count += 1;
     return a >> shift;
 }
 #undef    silk_RSHIFT32
-static inline opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){
     ops_count += 1;
     return a >> shift;
 }
 #undef    silk_RSHIFT64
-static inline opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){
+static OPUS_INLINE opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){
     ops_count += 1;
     return a >> shift;
 }
 
 #undef    silk_RSHIFT_uint
-static inline opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){
     ops_count += 1;
     return a >> shift;
 }
 
 #undef    silk_ADD_LSHIFT
-static inline opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
     opus_int32 ret;
     ops_count += 1;
     ret = a + (b << shift);
     return ret;                /* shift >= 0*/
 }
 #undef    silk_ADD_LSHIFT32
-static inline opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
     opus_int32 ret;
     ops_count += 1;
     ret = a + (b << shift);
     return ret;                /* shift >= 0*/
 }
 #undef    silk_ADD_LSHIFT_uint
-static inline opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
     opus_uint32 ret;
     ops_count += 1;
     ret = a + (b << shift);
     return ret;                /* shift >= 0*/
 }
 #undef    silk_ADD_RSHIFT
-static inline opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
     opus_int32 ret;
     ops_count += 1;
     ret = a + (b >> shift);
     return ret;                /* shift  > 0*/
 }
 #undef    silk_ADD_RSHIFT32
-static inline opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
     opus_int32 ret;
     ops_count += 1;
     ret = a + (b >> shift);
     return ret;                /* shift  > 0*/
 }
 #undef    silk_ADD_RSHIFT_uint
-static inline opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
     opus_uint32 ret;
     ops_count += 1;
     ret = a + (b >> shift);
     return ret;                /* shift  > 0*/
 }
 #undef    silk_SUB_LSHIFT32
-static inline opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
     opus_int32 ret;
     ops_count += 1;
     ret = a - (b << shift);
     return ret;                /* shift >= 0*/
 }
 #undef    silk_SUB_RSHIFT32
-static inline opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
     opus_int32 ret;
     ops_count += 1;
     ret = a - (b >> shift);
     return ret;                /* shift  > 0*/
 }
 
 #undef    silk_RSHIFT_ROUND
-static inline opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){
     opus_int32 ret;
     ops_count += 3;
     ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
     return ret;
 }
 
 #undef    silk_RSHIFT_ROUND64
-static inline opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){
     opus_int64 ret;
     ops_count += 6;
     ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
     return ret;
 }
 
 #undef    silk_abs_int64
-static inline opus_int64 silk_abs_int64(opus_int64 a){
+static OPUS_INLINE opus_int64 silk_abs_int64(opus_int64 a){
     ops_count += 1;
     return (((a) >  0)  ? (a) : -(a));            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN*/
 }
 
 #undef    silk_abs_int32
-static inline opus_int32 silk_abs_int32(opus_int32 a){
+static OPUS_INLINE opus_int32 silk_abs_int32(opus_int32 a){
     ops_count += 1;
     return silk_abs(a);
 }
 
 
 #undef silk_min
 static silk_min(a, b){
     ops_count += 1;
@@ -493,218 +493,218 @@ static silk_max(a, b){
 }
 #undef silk_sign
 static silk_sign(a){
     ops_count += 1;
     return ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ));
 }
 
 #undef    silk_ADD16
-static inline opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){
+static OPUS_INLINE opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){
     opus_int16 ret;
     ops_count += 1;
     ret = a + b;
     return ret;
 }
 
 #undef    silk_ADD32
-static inline opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){
+static OPUS_INLINE opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){
     opus_int32 ret;
     ops_count += 1;
     ret = a + b;
     return ret;
 }
 
 #undef    silk_ADD64
-static inline opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){
     opus_int64 ret;
     ops_count += 2;
     ret = a + b;
     return ret;
 }
 
 #undef    silk_SUB16
-static inline opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){
+static OPUS_INLINE opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){
     opus_int16 ret;
     ops_count += 1;
     ret = a - b;
     return ret;
 }
 
 #undef    silk_SUB32
-static inline opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){
+static OPUS_INLINE opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){
     opus_int32 ret;
     ops_count += 1;
     ret = a - b;
     return ret;
 }
 
 #undef    silk_SUB64
-static inline opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){
+static OPUS_INLINE opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){
     opus_int64 ret;
     ops_count += 2;
     ret = a - b;
     return ret;
 }
 
 #undef silk_ADD_SAT16
-static inline opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) {
+static OPUS_INLINE opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) {
     opus_int16 res;
     /* Nb will be counted in AKP_add32 and silk_SAT16*/
     res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
     return res;
 }
 
 #undef silk_ADD_SAT32
-static inline opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){
     opus_int32 res;
     ops_count += 1;
     res =    ((((a32) + (b32)) & 0x80000000) == 0 ?                                    \
             ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) :    \
             ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) );
     return res;
 }
 
 #undef silk_ADD_SAT64
-static inline opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) {
+static OPUS_INLINE opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) {
     opus_int64 res;
     ops_count += 1;
     res =    ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ?                                \
             ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) :    \
             ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) );
     return res;
 }
 
 #undef silk_SUB_SAT16
-static inline opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) {
+static OPUS_INLINE opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) {
     opus_int16 res;
     silk_assert(0);
     /* Nb will be counted in sub-macros*/
     res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) );
     return res;
 }
 
 #undef silk_SUB_SAT32
-static inline opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) {
+static OPUS_INLINE opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) {
     opus_int32 res;
     ops_count += 1;
     res =     ((((a32)-(b32)) & 0x80000000) == 0 ?                                            \
             (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) :    \
             ((((a32)^0x80000000) & (b32)  & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) );
     return res;
 }
 
 #undef silk_SUB_SAT64
-static inline opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) {
+static OPUS_INLINE opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) {
     opus_int64 res;
     ops_count += 1;
     res =    ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ?                                                        \
             (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) :    \
             ((((a64)^0x8000000000000000LL) & (b64)  & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) );
 
     return res;
 }
 
 #undef    silk_SMULWW
-static inline opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){
+static OPUS_INLINE opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){
     opus_int32 ret;
     /* Nb will be counted in sub-macros*/
     ret = silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16));
     return ret;
 }
 
 #undef    silk_SMLAWW
-static inline opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+static OPUS_INLINE opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){
     opus_int32 ret;
     /* Nb will be counted in sub-macros*/
     ret = silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16));
     return ret;
 }
 
 #undef    silk_min_int
-static inline opus_int silk_min_int(opus_int a, opus_int b)
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
 {
     ops_count += 1;
     return (((a) < (b)) ? (a) : (b));
 }
 
 #undef    silk_min_16
-static inline opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
 {
     ops_count += 1;
     return (((a) < (b)) ? (a) : (b));
 }
 #undef    silk_min_32
-static inline opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
 {
     ops_count += 1;
     return (((a) < (b)) ? (a) : (b));
 }
 #undef    silk_min_64
-static inline opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
 {
     ops_count += 1;
     return (((a) < (b)) ? (a) : (b));
 }
 
 /* silk_min() versions with typecast in the function call */
 #undef    silk_max_int
-static inline opus_int silk_max_int(opus_int a, opus_int b)
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
 {
     ops_count += 1;
     return (((a) > (b)) ? (a) : (b));
 }
 #undef    silk_max_16
-static inline opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
 {
     ops_count += 1;
     return (((a) > (b)) ? (a) : (b));
 }
 #undef    silk_max_32
-static inline opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
 {
     ops_count += 1;
     return (((a) > (b)) ? (a) : (b));
 }
 
 #undef    silk_max_64
-static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 {
     ops_count += 1;
     return (((a) > (b)) ? (a) : (b));
 }
 
 
 #undef silk_LIMIT_int
-static inline opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2)
+static OPUS_INLINE opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2)
 {
     opus_int ret;
     ops_count += 6;
 
     ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
         : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
 
     return(ret);
 }
 
 #undef silk_LIMIT_16
-static inline opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2)
+static OPUS_INLINE opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2)
 {
     opus_int16 ret;
     ops_count += 6;
 
     ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
         : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
 
 return(ret);
 }
 
 
 #undef silk_LIMIT_32
-static inline opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
+static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
 {
     opus_int32 ret;
     ops_count += 6;
 
     ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
         : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
     return(ret);
 }
--- a/media/libopus/silk/MacroDebug.h
+++ b/media/libopus/silk/MacroDebug.h
@@ -31,145 +31,145 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* Redefine macro functions with extensive assertion in DEBUG mode.
    As functions can't be undefined, this file can't work with SigProcFIX_MacroCount.h */
 
 #if ( defined (FIXED_DEBUG) || ( 0 && defined (_DEBUG) ) ) && !defined (silk_MACRO_COUNT)
 
 #undef silk_ADD16
 #define silk_ADD16(a,b) silk_ADD16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){
+static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){
     opus_int16 ret;
 
     ret = a + b;
     if ( ret != silk_ADD_SAT16( a, b ) )
     {
         fprintf (stderr, "silk_ADD16(%d, %d) in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_ADD32
 #define silk_ADD32(a,b) silk_ADD32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){
+static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){
     opus_int32 ret;
 
     ret = a + b;
     if ( ret != silk_ADD_SAT32( a, b ) )
     {
         fprintf (stderr, "silk_ADD32(%d, %d) in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_ADD64
 #define silk_ADD64(a,b) silk_ADD64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){
+static OPUS_INLINE opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){
     opus_int64 ret;
 
     ret = a + b;
     if ( ret != silk_ADD_SAT64( a, b ) )
     {
         fprintf (stderr, "silk_ADD64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_SUB16
 #define silk_SUB16(a,b) silk_SUB16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){
+static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){
     opus_int16 ret;
 
     ret = a - b;
     if ( ret != silk_SUB_SAT16( a, b ) )
     {
         fprintf (stderr, "silk_SUB16(%d, %d) in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_SUB32
 #define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){
     opus_int32 ret;
 
     ret = a - b;
     if ( ret != silk_SUB_SAT32( a, b ) )
     {
         fprintf (stderr, "silk_SUB32(%d, %d) in %s: line %d\n", a, b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_SUB64
 #define silk_SUB64(a,b) silk_SUB64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){
+static OPUS_INLINE opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){
     opus_int64 ret;
 
     ret = a - b;
     if ( ret != silk_SUB_SAT64( a, b ) )
     {
         fprintf (stderr, "silk_SUB64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_ADD_SAT16
 #define silk_ADD_SAT16(a,b) silk_ADD_SAT16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) {
+static OPUS_INLINE opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) {
     opus_int16 res;
     res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
     if ( res != silk_SAT16( (opus_int32)a16 + (opus_int32)b16 ) )
     {
         fprintf (stderr, "silk_ADD_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return res;
 }
 
 #undef silk_ADD_SAT32
 #define silk_ADD_SAT32(a,b) silk_ADD_SAT32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){
     opus_int32 res;
     res =   ((((opus_uint32)(a32) + (opus_uint32)(b32)) & 0x80000000) == 0 ?       \
             ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \
             ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) );
     if ( res != silk_SAT32( (opus_int64)a32 + (opus_int64)b32 ) )
     {
         fprintf (stderr, "silk_ADD_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return res;
 }
 
 #undef silk_ADD_SAT64
 #define silk_ADD_SAT64(a,b) silk_ADD_SAT64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) {
+static OPUS_INLINE opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) {
     opus_int64 res;
     int        fail = 0;
     res =   ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ?                                 \
             ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \
             ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) );
     if( res != a64 + b64 ) {
         /* Check that we saturated to the correct extreme value */
         if ( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) ||
@@ -188,49 +188,49 @@ static inline opus_int64 silk_ADD_SAT64_
         silk_assert( 0 );
 #endif
     }
     return res;
 }
 
 #undef silk_SUB_SAT16
 #define silk_SUB_SAT16(a,b) silk_SUB_SAT16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) {
+static OPUS_INLINE opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) {
     opus_int16 res;
     res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) );
     if ( res != silk_SAT16( (opus_int32)a16 - (opus_int32)b16 ) )
     {
         fprintf (stderr, "silk_SUB_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return res;
 }
 
 #undef silk_SUB_SAT32
 #define silk_SUB_SAT32(a,b) silk_SUB_SAT32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) {
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) {
     opus_int32 res;
     res =   ((((opus_uint32)(a32)-(opus_uint32)(b32)) & 0x80000000) == 0 ?                \
             (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \
             ((((a32)^0x80000000) & (b32)  & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) );
     if ( res != silk_SAT32( (opus_int64)a32 - (opus_int64)b32 ) )
     {
         fprintf (stderr, "silk_SUB_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return res;
 }
 
 #undef silk_SUB_SAT64
 #define silk_SUB_SAT64(a,b) silk_SUB_SAT64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) {
+static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) {
     opus_int64 res;
     int        fail = 0;
     res =   ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ?                                                    \
             (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \
             ((((a64)^0x8000000000000000LL) & (b64)  & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) );
     if( res != a64 - b64 ) {
         /* Check that we saturated to the correct extreme value */
         if( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) ||
@@ -249,139 +249,139 @@ static inline opus_int64 silk_SUB_SAT64_
         silk_assert( 0 );
 #endif
     }
     return res;
 }
 
 #undef silk_MUL
 #define silk_MUL(a,b) silk_MUL_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){
     opus_int32 ret;
     opus_int64 ret64;
     ret = a32 * b32;
     ret64 = (opus_int64)a32 * (opus_int64)b32;
     if ( (opus_int64)ret != ret64 )
     {
         fprintf (stderr, "silk_MUL(%d, %d) in %s: line %d\n", a32, b32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_MUL_uint
 #define silk_MUL_uint(a,b) silk_MUL_uint_((a), (b), __FILE__, __LINE__)
-static inline opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){
     opus_uint32 ret;
     ret = a32 * b32;
     if ( (opus_uint64)ret != (opus_uint64)a32 * (opus_uint64)b32 )
     {
         fprintf (stderr, "silk_MUL_uint(%u, %u) in %s: line %d\n", a32, b32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_MLA
 #define silk_MLA(a,b,c) silk_MLA_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
     opus_int32 ret;
     ret = a32 + b32 * c32;
     if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
     {
         fprintf (stderr, "silk_MLA(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_MLA_uint
 #define silk_MLA_uint(a,b,c) silk_MLA_uint_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){
     opus_uint32 ret;
     ret = a32 + b32 * c32;
     if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
     {
         fprintf (stderr, "silk_MLA_uint(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_SMULWB
 #define silk_SMULWB(a,b) silk_SMULWB_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){
     opus_int32 ret;
     ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
     if ( (opus_int64)ret != ((opus_int64)a32 * (opus_int16)b32) >> 16 )
     {
         fprintf (stderr, "silk_SMULWB(%d, %d) in %s: line %d\n", a32, b32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_SMLAWB
 #define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
     opus_int32 ret;
     ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) );
     if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) )
     {
         fprintf (stderr, "silk_SMLAWB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_SMULWT
 #define silk_SMULWT(a,b) silk_SMULWT_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){
     opus_int32 ret;
     ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
     if ( (opus_int64)ret != ((opus_int64)a32 * (b32 >> 16)) >> 16 )
     {
         fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_SMLAWT
 #define silk_SMLAWT(a,b,c) silk_SMLAWT_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
     opus_int32 ret;
     ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
     if ( (opus_int64)ret != (opus_int64)a32 + (((opus_int64)b32 * (c32 >> 16)) >> 16) )
     {
         fprintf (stderr, "silk_SMLAWT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_SMULL
 #define silk_SMULL(a,b) silk_SMULL_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){
+static OPUS_INLINE opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){
     opus_int64 ret64;
     int        fail = 0;
     ret64 = a64 * b64;
     if( b64 != 0 ) {
         fail = a64 != (ret64 / b64);
     } else if( a64 != 0 ) {
         fail = b64 != (ret64 / a64);
     }
@@ -393,64 +393,64 @@ static inline opus_int64 silk_SMULL_(opu
 #endif
     }
     return ret64;
 }
 
 /* no checking needed for silk_SMULBB */
 #undef silk_SMLABB
 #define silk_SMLABB(a,b,c) silk_SMLABB_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
     opus_int32 ret;
     ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
     if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int16)c32 )
     {
         fprintf (stderr, "silk_SMLABB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 /* no checking needed for silk_SMULBT */
 #undef silk_SMLABT
 #define silk_SMLABT(a,b,c) silk_SMLABT_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
     opus_int32 ret;
     ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
     if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (c32 >> 16) )
     {
         fprintf (stderr, "silk_SMLABT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 /* no checking needed for silk_SMULTT */
 #undef silk_SMLATT
 #define silk_SMLATT(a,b,c) silk_SMLATT_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
     opus_int32 ret;
     ret = a32 + (b32 >> 16) * (c32 >> 16);
     if ( (opus_int64)ret != (opus_int64)a32 + (b32 >> 16) * (c32 >> 16) )
     {
         fprintf (stderr, "silk_SMLATT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_SMULWW
 #define silk_SMULWW(a,b) silk_SMULWW_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){
     opus_int32 ret, tmp1, tmp2;
     opus_int64 ret64;
     int        fail = 0;
 
     ret  = silk_SMULWB( a32, b32 );
     tmp1 = silk_RSHIFT_ROUND( b32, 16 );
     tmp2 = silk_MUL( a32, tmp1 );
 
@@ -471,17 +471,17 @@ static inline opus_int32 silk_SMULWW_(op
 #endif
     }
 
     return ret;
 }
 
 #undef silk_SMLAWW
 #define silk_SMLAWW(a,b,c) silk_SMLAWW_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
     opus_int32 ret, tmp;
 
     tmp = silk_SMULWW( b32, c32 );
     ret = silk_ADD32( a32, tmp );
     if ( ret != silk_ADD_SAT32( a32, tmp ) )
     {
         fprintf (stderr, "silk_SMLAWW(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
@@ -500,30 +500,30 @@ static inline opus_int32 silk_SMLAWW_(op
 /* no checking needed for silk_SMULL
    no checking needed for silk_SMLAL
    no checking needed for silk_SMLALBB
    no checking needed for SigProcFIX_CLZ16
    no checking needed for SigProcFIX_CLZ32*/
 
 #undef silk_DIV32
 #define silk_DIV32(a,b) silk_DIV32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){
     if ( b32 == 0 )
     {
         fprintf (stderr, "silk_DIV32(%d, %d) in %s: line %d\n", a32, b32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return a32 / b32;
 }
 
 #undef silk_DIV32_16
 #define silk_DIV32_16(a,b) silk_DIV32_16_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){
+static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){
     int fail = 0;
     fail |= b32 == 0;
     fail |= b32 > silk_int16_MAX;
     fail |= b32 < silk_int16_MIN;
     if ( fail )
     {
         fprintf (stderr, "silk_DIV32_16(%d, %d) in %s: line %d\n", a32, b32, file, line);
 #ifdef FIXED_DEBUG_ASSERT
@@ -539,17 +539,17 @@ static inline opus_int32 silk_DIV32_16_(
    no checking needed for silk_POS_SAT32
    no checking needed for silk_ADD_POS_SAT8
    no checking needed for silk_ADD_POS_SAT16
    no checking needed for silk_ADD_POS_SAT32
    no checking needed for silk_ADD_POS_SAT64 */
 
 #undef silk_LSHIFT8
 #define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__)
-static inline opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
     opus_int8 ret;
     int       fail = 0;
     ret = a << shift;
     fail |= shift < 0;
     fail |= shift >= 8;
     fail |= (opus_int64)ret != ((opus_int64)a) << shift;
     if ( fail )
     {
@@ -558,17 +558,17 @@ static inline opus_int8 silk_LSHIFT8_(op
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_LSHIFT16
 #define silk_LSHIFT16(a,b) silk_LSHIFT16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
     opus_int16 ret;
     int        fail = 0;
     ret = a << shift;
     fail |= shift < 0;
     fail |= shift >= 16;
     fail |= (opus_int64)ret != ((opus_int64)a) << shift;
     if ( fail )
     {
@@ -577,17 +577,17 @@ static inline opus_int16 silk_LSHIFT16_(
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_LSHIFT32
 #define silk_LSHIFT32(a,b) silk_LSHIFT32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
     opus_int32 ret;
     int        fail = 0;
     ret = a << shift;
     fail |= shift < 0;
     fail |= shift >= 32;
     fail |= (opus_int64)ret != ((opus_int64)a) << shift;
     if ( fail )
     {
@@ -596,17 +596,17 @@ static inline opus_int32 silk_LSHIFT32_(
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_LSHIFT64
 #define silk_LSHIFT64(a,b) silk_LSHIFT64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){
+static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){
     opus_int64 ret;
     int        fail = 0;
     ret = a << shift;
     fail |= shift < 0;
     fail |= shift >= 64;
     fail |= (ret>>shift) != ((opus_int64)a);
     if ( fail )
     {
@@ -615,325 +615,325 @@ static inline opus_int64 silk_LSHIFT64_(
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_LSHIFT_ovflw
 #define silk_LSHIFT_ovflw(a,b) silk_LSHIFT_ovflw_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){
     if ( (shift < 0) || (shift >= 32) ) /* no check for overflow */
     {
         fprintf (stderr, "silk_LSHIFT_ovflw(%d, %d) in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return a << shift;
 }
 
 #undef silk_LSHIFT_uint
 #define silk_LSHIFT_uint(a,b) silk_LSHIFT_uint_((a), (b), __FILE__, __LINE__)
-static inline opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
     opus_uint32 ret;
     ret = a << shift;
     if ( (shift < 0) || ((opus_int64)ret != ((opus_int64)a) << shift))
     {
         fprintf (stderr, "silk_LSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_RSHIFT8
 #define silk_RSHITF8(a,b) silk_RSHIFT8_((a), (b), __FILE__, __LINE__)
-static inline opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
     if ( (shift < 0) || (shift>=8) )
     {
         fprintf (stderr, "silk_RSHITF8(%d, %d) in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return a >> shift;
 }
 
 #undef silk_RSHIFT16
 #define silk_RSHITF16(a,b) silk_RSHIFT16_((a), (b), __FILE__, __LINE__)
-static inline opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
     if ( (shift < 0) || (shift>=16) )
     {
         fprintf (stderr, "silk_RSHITF16(%d, %d) in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return a >> shift;
 }
 
 #undef silk_RSHIFT32
 #define silk_RSHIFT32(a,b) silk_RSHIFT32_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
     if ( (shift < 0) || (shift>=32) )
     {
         fprintf (stderr, "silk_RSHITF32(%d, %d) in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return a >> shift;
 }
 
 #undef silk_RSHIFT64
 #define silk_RSHIFT64(a,b) silk_RSHIFT64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){
+static OPUS_INLINE opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){
     if ( (shift < 0) || (shift>=64) )
     {
         fprintf (stderr, "silk_RSHITF64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return a >> shift;
 }
 
 #undef silk_RSHIFT_uint
 #define silk_RSHIFT_uint(a,b) silk_RSHIFT_uint_((a), (b), __FILE__, __LINE__)
-static inline opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
     if ( (shift < 0) || (shift>32) )
     {
         fprintf (stderr, "silk_RSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return a >> shift;
 }
 
 #undef silk_ADD_LSHIFT
 #define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__)
-static inline int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){
+static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){
     opus_int16 ret;
     ret = a + (b << shift);
     if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
     {
         fprintf (stderr, "silk_ADD_LSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;                /* shift >= 0 */
 }
 
 #undef silk_ADD_LSHIFT32
 #define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
     opus_int32 ret;
     ret = a + (b << shift);
     if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
     {
         fprintf (stderr, "silk_ADD_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;                /* shift >= 0 */
 }
 
 #undef silk_ADD_LSHIFT_uint
 #define silk_ADD_LSHIFT_uint(a,b,c) silk_ADD_LSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
     opus_uint32 ret;
     ret = a + (b << shift);
     if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
     {
         fprintf (stderr, "silk_ADD_LSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;                /* shift >= 0 */
 }
 
 #undef silk_ADD_RSHIFT
 #define silk_ADD_RSHIFT(a,b,c) silk_ADD_RSHIFT_((a), (b), (c), __FILE__, __LINE__)
-static inline int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){
+static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){
     opus_int16 ret;
     ret = a + (b >> shift);
     if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
     {
         fprintf (stderr, "silk_ADD_RSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;                /* shift  > 0 */
 }
 
 #undef silk_ADD_RSHIFT32
 #define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
     opus_int32 ret;
     ret = a + (b >> shift);
     if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
     {
         fprintf (stderr, "silk_ADD_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;                /* shift  > 0 */
 }
 
 #undef silk_ADD_RSHIFT_uint
 #define silk_ADD_RSHIFT_uint(a,b,c) silk_ADD_RSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
     opus_uint32 ret;
     ret = a + (b >> shift);
     if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
     {
         fprintf (stderr, "silk_ADD_RSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;                /* shift  > 0 */
 }
 
 #undef silk_SUB_LSHIFT32
 #define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
     opus_int32 ret;
     ret = a - (b << shift);
     if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) )
     {
         fprintf (stderr, "silk_SUB_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;                /* shift >= 0 */
 }
 
 #undef silk_SUB_RSHIFT32
 #define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
-static inline opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
     opus_int32 ret;
     ret = a - (b >> shift);
     if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) )
     {
         fprintf (stderr, "silk_SUB_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;                /* shift  > 0 */
 }
 
 #undef silk_RSHIFT_ROUND
 #define silk_RSHIFT_ROUND(a,b) silk_RSHIFT_ROUND_((a), (b), __FILE__, __LINE__)
-static inline opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){
     opus_int32 ret;
     ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
     /* the marco definition can't handle a shift of zero */
     if ( (shift <= 0) || (shift>31) || ((opus_int64)ret != ((opus_int64)a + ((opus_int64)1 << (shift - 1))) >> shift) )
     {
         fprintf (stderr, "silk_RSHIFT_ROUND(%d, %d) in %s: line %d\n", a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return ret;
 }
 
 #undef silk_RSHIFT_ROUND64
 #define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__)
-static inline opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){
     opus_int64 ret;
     /* the marco definition can't handle a shift of zero */
     if ( (shift <= 0) || (shift>=64) )
     {
         fprintf (stderr, "silk_RSHIFT_ROUND64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
     return ret;
 }
 
 /* silk_abs is used on floats also, so doesn't work... */
 /*#undef silk_abs
-static inline opus_int32 silk_abs(opus_int32 a){
+static OPUS_INLINE opus_int32 silk_abs(opus_int32 a){
     silk_assert(a != 0x80000000);
     return (((a) >  0)  ? (a) : -(a));            // Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN
 }*/
 
 #undef silk_abs_int64
 #define silk_abs_int64(a) silk_abs_int64_((a), __FILE__, __LINE__)
-static inline opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){
+static OPUS_INLINE opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){
     if ( a == silk_int64_MIN )
     {
         fprintf (stderr, "silk_abs_int64(%lld) in %s: line %d\n", (long long)a, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return (((a) >  0)  ? (a) : -(a));            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
 }
 
 #undef silk_abs_int32
 #define silk_abs_int32(a) silk_abs_int32_((a), __FILE__, __LINE__)
-static inline opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){
+static OPUS_INLINE opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){
     if ( a == silk_int32_MIN )
     {
         fprintf (stderr, "silk_abs_int32(%d) in %s: line %d\n", a, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return silk_abs(a);
 }
 
 #undef silk_CHECK_FIT8
 #define silk_CHECK_FIT8(a) silk_CHECK_FIT8_((a), __FILE__, __LINE__)
-static inline opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){
+static OPUS_INLINE opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){
     opus_int8 ret;
     ret = (opus_int8)a;
     if ( (opus_int64)ret != a )
     {
         fprintf (stderr, "silk_CHECK_FIT8(%lld) in %s: line %d\n", (long long)a, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return( ret );
 }
 
 #undef silk_CHECK_FIT16
 #define silk_CHECK_FIT16(a) silk_CHECK_FIT16_((a), __FILE__, __LINE__)
-static inline opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){
+static OPUS_INLINE opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){
     opus_int16 ret;
     ret = (opus_int16)a;
     if ( (opus_int64)ret != a )
     {
         fprintf (stderr, "silk_CHECK_FIT16(%lld) in %s: line %d\n", (long long)a, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
     }
     return( ret );
 }
 
 #undef silk_CHECK_FIT32
 #define silk_CHECK_FIT32(a) silk_CHECK_FIT32_((a), __FILE__, __LINE__)
-static inline opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){
+static OPUS_INLINE opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){
     opus_int32 ret;
     ret = (opus_int32)a;
     if ( (opus_int64)ret != a )
     {
         fprintf (stderr, "silk_CHECK_FIT32(%lld) in %s: line %d\n", (long long)a, file, line);
 #ifdef FIXED_DEBUG_ASSERT
         silk_assert( 0 );
 #endif
--- a/media/libopus/silk/NLSF2A.c
+++ b/media/libopus/silk/NLSF2A.c
@@ -36,17 +36,17 @@ POSSIBILITY OF SUCH DAMAGE.
 /* functions are accurate inverses of each other                */
 
 #include "SigProc_FIX.h"
 #include "tables.h"
 
 #define QA      16
 
 /* helper function for NLSF2A(..) */
-static inline void silk_NLSF2A_find_poly(
+static OPUS_INLINE void silk_NLSF2A_find_poly(
     opus_int32          *out,      /* O    intermediate polynomial, QA [dd+1]        */
     const opus_int32    *cLSF,     /* I    vector of interleaved 2*cos(LSFs), QA [d] */
     opus_int            dd         /* I    polynomial order (= 1/2 * filter order)   */
 )
 {
     opus_int   k, n;
     opus_int32 ftmp;
 
--- a/media/libopus/silk/NLSF_decode.c
+++ b/media/libopus/silk/NLSF_decode.c
@@ -27,17 +27,17 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 
 /* Predictive dequantizer for NLSF residuals */
-static inline void silk_NLSF_residual_dequant(               /* O    Returns RD value in Q30                     */
+static OPUS_INLINE void silk_NLSF_residual_dequant(               /* O    Returns RD value in Q30                     */
           opus_int16         x_Q10[],                        /* O    Output [ order ]                            */
     const opus_int8          indices[],                      /* I    Quantization indices [ order ]              */
     const opus_uint8         pred_coef_Q8[],                 /* I    Backward predictor coefs [ order ]          */
     const opus_int           quant_step_size_Q16,            /* I    Quantization step size                      */
     const opus_int16         order                           /* I    Number of input values                      */
 )
 {
     opus_int     i, out_Q10, pred_Q10;
--- a/media/libopus/silk/NLSF_del_dec_quant.c
+++ b/media/libopus/silk/NLSF_del_dec_quant.c
@@ -116,17 +116,17 @@ opus_int32 silk_NLSF_del_dec_quant(     
             }
             RD_tmp_Q25            = RD_Q25[ j ];
             diff_Q10              = silk_SUB16( in_Q10, out0_Q10 );
             RD_Q25[ j ]           = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate0_Q5 );
             diff_Q10              = silk_SUB16( in_Q10, out1_Q10 );
             RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 );
         }
 
-        if( nStates < NLSF_QUANT_DEL_DEC_STATES ) {
+        if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) {
             /* double number of states and copy */
             for( j = 0; j < nStates; j++ ) {
                 ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1;
             }
             nStates = silk_LSHIFT( nStates, 1 );
             for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
                 ind[ j ][ i ] = ind[ j - nStates ][ i ];
             }
--- a/media/libopus/silk/NSQ.c
+++ b/media/libopus/silk/NSQ.c
@@ -27,31 +27,31 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "stack_alloc.h"
 
-static inline void silk_nsq_scale_states(
+static OPUS_INLINE void silk_nsq_scale_states(
     const silk_encoder_state *psEncC,           /* I    Encoder State                   */
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
     const opus_int32    x_Q3[],                 /* I    input in Q3                     */
     opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
     const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
     opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
     opus_int            subfr,                  /* I    subframe number                 */
     const opus_int      LTP_scale_Q14,          /* I                                    */
     const opus_int32    Gains_Q16[ MAX_NB_SUBFR ], /* I                                 */
     const opus_int      pitchL[ MAX_NB_SUBFR ], /* I    Pitch lag                       */
     const opus_int      signal_type             /* I    Signal type                     */
 );
 
-static inline void silk_noise_shape_quantizer(
+static OPUS_INLINE void silk_noise_shape_quantizer(
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
     opus_int            signalType,             /* I    Signal type                     */
     const opus_int32    x_sc_Q10[],             /* I                                    */
     opus_int8           pulses[],               /* O                                    */
     opus_int16          xq[],                   /* O                                    */
     opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
     const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
     const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
@@ -167,17 +167,17 @@ void silk_NSQ(
     silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
     silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
     RESTORE_STACK;
 }
 
 /***********************************/
 /* silk_noise_shape_quantizer  */
 /***********************************/
-static inline void silk_noise_shape_quantizer(
+static OPUS_INLINE void silk_noise_shape_quantizer(
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
     opus_int            signalType,             /* I    Signal type                     */
     const opus_int32    x_sc_Q10[],             /* I                                    */
     opus_int8           pulses[],               /* O                                    */
     opus_int16          xq[],                   /* O                                    */
     opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
     const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
     const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
@@ -365,17 +365,17 @@ static inline void silk_noise_shape_quan
         /* Make dither dependent on quantized signal */
         NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] );
     }
 
     /* Update LPC synth buffer */
     silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
 }
 
-static inline void silk_nsq_scale_states(
+static OPUS_INLINE void silk_nsq_scale_states(
     const silk_encoder_state *psEncC,           /* I    Encoder State                   */
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
     const opus_int32    x_Q3[],                 /* I    input in Q3                     */
     opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
     const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
     opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
     opus_int            subfr,                  /* I    subframe number                 */
     const opus_int      LTP_scale_Q14,          /* I                                    */
--- a/media/libopus/silk/NSQ_del_dec.c
+++ b/media/libopus/silk/NSQ_del_dec.c
@@ -52,17 +52,17 @@ typedef struct {
     opus_int32 xq_Q14;
     opus_int32 LF_AR_Q14;
     opus_int32 sLTP_shp_Q14;
     opus_int32 LPC_exc_Q14;
 } NSQ_sample_struct;
 
 typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
 
-static inline void silk_nsq_del_dec_scale_states(
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
     const silk_encoder_state *psEncC,               /* I    Encoder State                       */
     silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
     NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
     const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
     opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
     const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
     opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
     opus_int            subfr,                      /* I    Subframe number                     */
@@ -72,17 +72,17 @@ static inline void silk_nsq_del_dec_scal
     const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
     const opus_int      signal_type,                /* I    Signal type                         */
     const opus_int      decisionDelay               /* I    Decision delay                      */
 );
 
 /******************************************/
 /* Noise shape quantizer for one subframe */
 /******************************************/
-static inline void silk_noise_shape_quantizer_del_dec(
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
     NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
     opus_int            signalType,             /* I    Signal type                         */
     const opus_int32    x_Q10[],                /* I                                        */
     opus_int8           pulses[],               /* O                                        */
     opus_int16          xq[],                   /* O                                        */
     opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
     opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
@@ -298,17 +298,17 @@ void silk_NSQ_del_dec(
     silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
     silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
     RESTORE_STACK;
 }
 
 /******************************************/
 /* Noise shape quantizer for one subframe */
 /******************************************/
-static inline void silk_noise_shape_quantizer_del_dec(
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
     NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
     opus_int            signalType,             /* I    Signal type                         */
     const opus_int32    x_Q10[],                /* I                                        */
     opus_int8           pulses[],               /* O                                        */
     opus_int16          xq[],                   /* O                                        */
     opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
     opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
@@ -625,17 +625,17 @@ static inline void silk_noise_shape_quan
     /* Update LPC states */
     for( k = 0; k < nStatesDelayedDecision; k++ ) {
         psDD = &psDelDec[ k ];
         silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
     }
     RESTORE_STACK;
 }
 
-static inline void silk_nsq_del_dec_scale_states(
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
     const silk_encoder_state *psEncC,               /* I    Encoder State                       */
     silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
     NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
     const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
     opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
     const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
     opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
     opus_int            subfr,                      /* I    Subframe number                     */
--- a/media/libopus/silk/PLC.c
+++ b/media/libopus/silk/PLC.c
@@ -33,22 +33,22 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "stack_alloc.h"
 #include "PLC.h"
 
 #define NB_ATT 2
 static const opus_int16 HARM_ATT_Q15[NB_ATT]              = { 32440, 31130 }; /* 0.99, 0.95 */
 static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT]  = { 31130, 26214 }; /* 0.95, 0.8 */
 static const opus_int16 PLC_RAND_ATTENUATE_UV_Q15[NB_ATT] = { 32440, 29491 }; /* 0.99, 0.9 */
 
-static inline void silk_PLC_update(
+static OPUS_INLINE void silk_PLC_update(
     silk_decoder_state                  *psDec,             /* I/O Decoder state        */
     silk_decoder_control                *psDecCtrl          /* I/O Decoder control      */
 );
 
-static inline void silk_PLC_conceal(
+static OPUS_INLINE void silk_PLC_conceal(
     silk_decoder_state                  *psDec,             /* I/O Decoder state        */
     silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
     opus_int16                          frame[]             /* O LPC residual signal    */
 );
 
 
 void silk_PLC_Reset(
     silk_decoder_state                  *psDec              /* I/O Decoder state        */
@@ -87,17 +87,17 @@ void silk_PLC(
         /****************************/
         silk_PLC_update( psDec, psDecCtrl );
     }
 }
 
 /**************************************************/
 /* Update state of PLC                            */
 /**************************************************/
-static inline void silk_PLC_update(
+static OPUS_INLINE void silk_PLC_update(
     silk_decoder_state                  *psDec,             /* I/O Decoder state        */
     silk_decoder_control                *psDecCtrl          /* I/O Decoder control      */
 )
 {
     opus_int32 LTP_Gain_Q14, temp_LTP_Gain_Q14;
     opus_int   i, j;
     silk_PLC_struct *psPLC;
 
@@ -160,17 +160,17 @@ static inline void silk_PLC_update(
 
     /* Save last two gains */
     silk_memcpy( psPLC->prevGain_Q16, &psDecCtrl->Gains_Q16[ psDec->nb_subfr - 2 ], 2 * sizeof( opus_int32 ) );
 
     psPLC->subfr_length = psDec->subfr_length;
     psPLC->nb_subfr = psDec->nb_subfr;
 }
 
-static inline void silk_PLC_conceal(
+static OPUS_INLINE void silk_PLC_conceal(
     silk_decoder_state                  *psDec,             /* I/O Decoder state        */
     silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
     opus_int16                          frame[]             /* O LPC residual signal    */
 )
 {
     opus_int   i, j, k;
     opus_int   lag, idx, sLTP_buf_idx, shift1, shift2;
     opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q30;
--- a/media/libopus/silk/SigProc_FIX.h
+++ b/media/libopus/silk/SigProc_FIX.h
@@ -222,17 +222,18 @@ void silk_apply_sine_window(
 );
 
 /* Compute autocorrelation */
 void silk_autocorr(
     opus_int32                  *results,           /* O    Result (length correlationCount)                            */
     opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
     const opus_int16            *inputData,         /* I    Input data to correlate                                     */
     const opus_int              inputDataSize,      /* I    Length of input                                             */
-    const opus_int              correlationCount    /* I    Number of correlation taps to compute                       */
+    const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
+    int                         arch                /* I    Run-time architecture                                       */
 );
 
 void silk_decode_pitch(
     opus_int16                  lagIndex,           /* I                                                                */
     opus_int8                   contourIndex,       /* O                                                                */
     opus_int                    pitch_lags[],       /* O    4 pitch values                                              */
     const opus_int              Fs_kHz,             /* I    sampling frequency (kHz)                                    */
     const opus_int              nb_subfr            /* I    number of sub frames                                        */
@@ -244,17 +245,18 @@ opus_int silk_pitch_analysis_core(      
     opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
     opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
     const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int              nb_subfr            /* I    number of 5 ms subframes                                    */
+    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
+    int                         arch                /* I    Run-time architecture                                       */
 );
 
 /* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients      */
 /* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
 void silk_A2NLSF(
     opus_int16                  *NLSF,              /* O    Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
     opus_int32                  *a_Q16,             /* I/O  Monic whitening filter coefficients in Q16 [d]              */
     const opus_int              d                   /* I    Filter order (must be even)                                 */
@@ -304,17 +306,18 @@ void silk_NLSF_VQ_weights_laroia(
 void silk_burg_modified(
     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
-    const opus_int              D                   /* I    Order                                                       */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
 );
 
 /* Copy and multiply a vector by a constant */
 void silk_scale_copy_vector16(
     opus_int16                  *data_out,
     const opus_int16            *data_in,
     opus_int32                  gain_Q16,           /* I    Gain in Q16                                                 */
     const opus_int              dataSize            /* I    Length                                                      */
@@ -353,18 +356,18 @@ opus_int64 silk_inner_prod16_aligned_64(
 
 /********************************************************************/
 /*                                MACROS                            */
 /********************************************************************/
 
 /* Rotate a32 right by 'rot' bits. Negative rot values result in rotating
    left. Output is 32bit int.
    Note: contemporary compilers recognize the C expression below and
-   compile it into a 'ror' instruction if available. No need for inline ASM! */
-static inline opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
+   compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */
+static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
 {
     opus_uint32 x = (opus_uint32) a32;
     opus_uint32 r = (opus_uint32) rot;
     opus_uint32 m = (opus_uint32) -rot;
     if( rot == 0 ) {
         return a32;
     } else if( rot < 0 ) {
         return (opus_int32) ((x << m) | (x >> (32 - m)));
@@ -503,47 +506,47 @@ static inline opus_int32 silk_ROR32( opu
 
 #define silk_min(a, b)                      (((a) < (b)) ? (a) : (b))
 #define silk_max(a, b)                      (((a) > (b)) ? (a) : (b))
 
 /* Macro to convert floating-point constants to fixed-point */
 #define SILK_FIX_CONST( C, Q )              ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5))
 
 /* silk_min() versions with typecast in the function call */
-static inline opus_int silk_min_int(opus_int a, opus_int b)
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
 {
     return (((a) < (b)) ? (a) : (b));
 }
-static inline opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
 {
     return (((a) < (b)) ? (a) : (b));
 }
-static inline opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
 {
     return (((a) < (b)) ? (a) : (b));
 }
-static inline opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
 {
     return (((a) < (b)) ? (a) : (b));
 }
 
 /* silk_min() versions with typecast in the function call */
-static inline opus_int silk_max_int(opus_int a, opus_int b)
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
 {
     return (((a) > (b)) ? (a) : (b));
 }
-static inline opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
 {
     return (((a) > (b)) ? (a) : (b));
 }
-static inline opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
 {
     return (((a) > (b)) ? (a) : (b));
 }
-static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 {
     return (((a) > (b)) ? (a) : (b));
 }
 
 #define silk_LIMIT( a, limit1, limit2)      ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
                                                                  : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))))
 
 #define silk_LIMIT_int                      silk_LIMIT
@@ -571,21 +574,21 @@ static inline opus_int64 silk_max_64(opu
 /*#define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/
 /* the following seems faster on x86 */
 #define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
 
 #include "Inlines.h"
 #include "MacroCount.h"
 #include "MacroDebug.h"
 
-#ifdef ARMv4_ASM
+#ifdef OPUS_ARM_INLINE_ASM
 #include "arm/SigProc_FIX_armv4.h"
 #endif
 
-#ifdef ARMv5E_ASM
+#ifdef OPUS_ARM_INLINE_EDSP
 #include "arm/SigProc_FIX_armv5e.h"
 #endif
 
 #ifdef  __cplusplus
 }
 #endif
 
 #endif /* SILK_SIGPROC_FIX_H */
--- a/media/libopus/silk/VAD.c
+++ b/media/libopus/silk/VAD.c
@@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "stack_alloc.h"
 
 /* Silk VAD noise level estimation */
-static inline void silk_VAD_GetNoiseLevels(
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
     const opus_int32             pX[ VAD_N_BANDS ], /* I    subband energies                            */
     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
 );
 
 /**********************************/
 /* Initialization of the Silk VAD */
 /**********************************/
 opus_int silk_VAD_Init(                                         /* O    Return value, 0 if success                  */
@@ -291,17 +291,17 @@ opus_int silk_VAD_GetSA_Q8(             
 
     RESTORE_STACK;
     return( ret );
 }
 
 /**************************/
 /* Noise level estimation */
 /**************************/
-static inline void silk_VAD_GetNoiseLevels(
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
     const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
 )
 {
     opus_int   k;
     opus_int32 nl, nrg, inv_nrg;
     opus_int   coef, min_coef;
 
--- a/media/libopus/silk/VQ_WMat_EC.c
+++ b/media/libopus/silk/VQ_WMat_EC.c
@@ -30,42 +30,50 @@ POSSIBILITY OF SUCH DAMAGE.
 #endif
 
 #include "main.h"
 
 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
 void silk_VQ_WMat_EC(
     opus_int8                   *ind,                           /* O    index of best codebook vector               */
     opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
     const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
     const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
     const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
     const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
     opus_int                    L                               /* I    number of vectors in codebook               */
 )
 {
-    opus_int   k;
+    opus_int   k, gain_tmp_Q7;
     const opus_int8 *cb_row_Q7;
     opus_int16 diff_Q14[ 5 ];
     opus_int32 sum1_Q14, sum2_Q16;
 
     /* Loop over codebook */
     *rate_dist_Q14 = silk_int32_MAX;
     cb_row_Q7 = cb_Q7;
     for( k = 0; k < L; k++ ) {
+	    gain_tmp_Q7 = cb_gain_Q7[k];
+
         diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
         diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );
         diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 );
         diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 );
         diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 );
 
         /* Weighted rate */
         sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
 
+		/* Penalty for too large gain */
+		sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
+
         silk_assert( sum1_Q14 >= 0 );
 
         /* first row of W_Q18 */
         sum2_Q16 = silk_SMULWB(           W_Q18[  1 ], diff_Q14[ 1 ] );
         sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  2 ], diff_Q14[ 2 ] );
         sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  3 ], diff_Q14[ 3 ] );
         sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  4 ], diff_Q14[ 4 ] );
         sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
@@ -98,14 +106,15 @@ void silk_VQ_WMat_EC(
         sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 4 ] );
 
         silk_assert( sum1_Q14 >= 0 );
 
         /* find best */
         if( sum1_Q14 < *rate_dist_Q14 ) {
             *rate_dist_Q14 = sum1_Q14;
             *ind = (opus_int8)k;
+			*gain_Q7 = gain_tmp_Q7;
         }
 
         /* Go to next cbk vector */
         cb_row_Q7 += LTP_ORDER;
     }
 }
--- a/media/libopus/silk/arm/SigProc_FIX_armv4.h
+++ b/media/libopus/silk/arm/SigProc_FIX_armv4.h
@@ -25,17 +25,17 @@ CONTRACT, STRICT LIABILITY, OR TORT (INC
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifndef SILK_SIGPROC_FIX_ARMv4_H
 #define SILK_SIGPROC_FIX_ARMv4_H
 
 #undef silk_MLA
-static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
  opus_int32 c)
 {
   opus_int32 res;
   __asm__(
       "#silk_MLA\n\t"
       "mla %0, %1, %2, %3\n\t"
       : "=&r"(res)
       : "r"(b), "r"(c), "r"(a)
--- a/media/libopus/silk/arm/SigProc_FIX_armv5e.h
+++ b/media/libopus/silk/arm/SigProc_FIX_armv5e.h
@@ -25,31 +25,31 @@ CONTRACT, STRICT LIABILITY, OR TORT (INC
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifndef SILK_SIGPROC_FIX_ARMv5E_H
 #define SILK_SIGPROC_FIX_ARMv5E_H
 
 #undef silk_SMULTT
-static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
 {
   opus_int32 res;
   __asm__(
       "#silk_SMULTT\n\t"
       "smultt %0, %1, %2\n\t"
       : "=r"(res)
       : "%r"(a), "r"(b)
   );
   return res;
 }
 #define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b))
 
 #undef silk_SMLATT
-static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
  opus_int32 c)
 {
   opus_int32 res;
   __asm__(
       "#silk_SMLATT\n\t"
       "smlatt %0, %1, %2, %3\n\t"
       : "=r"(res)
       : "%r"(b), "r"(c), "r"(a)
--- a/media/libopus/silk/arm/macros_armv4.h
+++ b/media/libopus/silk/arm/macros_armv4.h
@@ -25,17 +25,17 @@ ARISING IN ANY WAY OUT OF THE USE OF THI
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifndef SILK_MACROS_ARMv4_H
 #define SILK_MACROS_ARMv4_H
 
 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 #undef silk_SMULWB
-static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
+static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
 {
   unsigned rd_lo;
   int rd_hi;
   __asm__(
       "#silk_SMULWB\n\t"
       "smull %0, %1, %2, %3\n\t"
       : "=&r"(rd_lo), "=&r"(rd_hi)
       : "%r"(a), "r"(b<<16)
@@ -45,17 +45,17 @@ static inline opus_int32 silk_SMULWB_arm
 #define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b))
 
 /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
 #undef silk_SMLAWB
 #define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c))
 
 /* (a32 * (b32 >> 16)) >> 16 */
 #undef silk_SMULWT
-static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
 {
   unsigned rd_lo;
   int rd_hi;
   __asm__(
       "#silk_SMULWT\n\t"
       "smull %0, %1, %2, %3\n\t"
       : "=&r"(rd_lo), "=&r"(rd_hi)
       : "%r"(a), "r"(b&~0xFFFF)
@@ -65,32 +65,32 @@ static inline opus_int32 silk_SMULWT_arm
 #define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b))
 
 /* a32 + (b32 * (c32 >> 16)) >> 16 */
 #undef silk_SMLAWT
 #define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c))
 
 /* (a32 * b32) >> 16 */
 #undef silk_SMULWW
-static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
 {
   unsigned rd_lo;
   int rd_hi;
   __asm__(
     "#silk_SMULWW\n\t"
     "smull %0, %1, %2, %3\n\t"
     : "=&r"(rd_lo), "=&r"(rd_hi)
     : "%r"(a), "r"(b)
   );
   return (rd_hi<<16)+(rd_lo>>16);
 }
 #define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
 
 #undef silk_SMLAWW
-static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
  opus_int32 c)
 {
   unsigned rd_lo;
   int rd_hi;
   __asm__(
     "#silk_SMLAWW\n\t"
     "smull %0, %1, %2, %3\n\t"
     : "=&r"(rd_lo), "=&r"(rd_hi)
--- a/media/libopus/silk/arm/macros_armv5e.h
+++ b/media/libopus/silk/arm/macros_armv5e.h
@@ -26,183 +26,183 @@ ARISING IN ANY WAY OUT OF THE USE OF THI
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifndef SILK_MACROS_ARMv5E_H
 #define SILK_MACROS_ARMv5E_H
 
 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 #undef silk_SMULWB
-static inline opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
+static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
 {
   int res;
   __asm__(
       "#silk_SMULWB\n\t"
       "smulwb %0, %1, %2\n\t"
       : "=r"(res)
       : "r"(a), "r"(b)
   );
   return res;
 }
 #define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b))
 
 /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
 #undef silk_SMLAWB
-static inline opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
  opus_int16 c)
 {
   int res;
   __asm__(
       "#silk_SMLAWB\n\t"
       "smlawb %0, %1, %2, %3\n\t"
       : "=r"(res)
       : "r"(b), "r"(c), "r"(a)
   );
   return res;
 }
 #define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c))
 
 /* (a32 * (b32 >> 16)) >> 16 */
 #undef silk_SMULWT
-static inline opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
 {
   int res;
   __asm__(
       "#silk_SMULWT\n\t"
       "smulwt %0, %1, %2\n\t"
       : "=r"(res)
       : "r"(a), "r"(b)
   );
   return res;
 }
 #define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b))
 
 /* a32 + (b32 * (c32 >> 16)) >> 16 */
 #undef silk_SMLAWT
-static inline opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
  opus_int32 c)
 {
   int res;
   __asm__(
       "#silk_SMLAWT\n\t"
       "smlawt %0, %1, %2, %3\n\t"
       : "=r"(res)
       : "r"(b), "r"(c), "r"(a)
   );
   return res;
 }
 #define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c))
 
 /* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
 #undef silk_SMULBB
-static inline opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
 {
   int res;
   __asm__(
       "#silk_SMULBB\n\t"
       "smulbb %0, %1, %2\n\t"
       : "=r"(res)
       : "%r"(a), "r"(b)
   );
   return res;
 }
 #define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b))
 
 /* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
 #undef silk_SMLABB
-static inline opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
  opus_int32 c)
 {
   int res;
   __asm__(
       "#silk_SMLABB\n\t"
       "smlabb %0, %1, %2, %3\n\t"
       : "=r"(res)
       : "%r"(b), "r"(c), "r"(a)
   );
   return res;
 }
 #define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c))
 
 /* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
 #undef silk_SMULBT
-static inline opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
 {
   int res;
   __asm__(
       "#silk_SMULBT\n\t"
       "smulbt %0, %1, %2\n\t"
       : "=r"(res)
       : "r"(a), "r"(b)
   );
   return res;
 }
 #define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b))
 
 /* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
 #undef silk_SMLABT
-static inline opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
+static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
  opus_int32 c)
 {
   int res;
   __asm__(
       "#silk_SMLABT\n\t"
       "smlabt %0, %1, %2, %3\n\t"
       : "=r"(res)
       : "r"(b), "r"(c), "r"(a)
   );
   return res;
 }
 #define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c))
 
 /* add/subtract with output saturated */
 #undef silk_ADD_SAT32
-static inline opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
 {
   int res;
   __asm__(
       "#silk_ADD_SAT32\n\t"
       "qadd %0, %1, %2\n\t"
       : "=r"(res)
       : "%r"(a), "r"(b)
   );
   return res;
 }
 #define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b))
 
 #undef silk_SUB_SAT32
-static inline opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
 {
   int res;
   __asm__(
       "#silk_SUB_SAT32\n\t"
       "qsub %0, %1, %2\n\t"
       : "=r"(res)
       : "r"(a), "r"(b)
   );
   return res;
 }
 #define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b))
 
 #undef silk_CLZ16
-static inline opus_int32 silk_CLZ16_armv5(opus_int16 in16)
+static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16)
 {
   int res;
   __asm__(
       "#silk_CLZ16\n\t"
       "clz %0, %1;\n"
       : "=r"(res)
       : "r"(in16<<16|0x8000)
   );
   return res;
 }
 #define silk_CLZ16(in16) (silk_CLZ16_armv5(in16))
 
 #undef silk_CLZ32
-static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32)
+static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32)
 {
   int res;
   __asm__(
       "#silk_CLZ32\n\t"
       "clz %0, %1\n\t"
       : "=r"(res)
       : "r"(in32)
   );
--- a/media/libopus/silk/control.h
+++ b/media/libopus/silk/control.h
@@ -87,16 +87,19 @@ typedef struct {
     opus_int maxBits;
 
     /* I:   Causes a smooth downmix to mono                                                 */
     opus_int toMono;
 
     /* I:   Opus encoder is allowing us to switch bandwidth                                 */
     opus_int opusCanSwitch;
 
+    /* I: Make frames as independent as possible (but still use LPC)                        */
+    opus_int reducedDependency;
+
     /* O:   Internal sampling rate used, in Hertz; 8000/12000/16000                         */
     opus_int32 internalSampleRate;
 
     /* O: Flag that bandwidth switching is allowed (because low voice activity)             */
     opus_int allowBandwidthSwitch;
 
     /* O:   Flag that SILK runs in WB mode without variable LP filter (use for switching between WB/SWB/FB) */
     opus_int inWBmodeWithoutVariableLP;
--- a/media/libopus/silk/control_codec.c
+++ b/media/libopus/silk/control_codec.c
@@ -50,17 +50,17 @@ static opus_int silk_setup_fs(
     opus_int                        PacketSize_ms       /* I                        */
 );
 
 static opus_int silk_setup_complexity(
     silk_encoder_state              *psEncC,            /* I/O                      */
     opus_int                        Complexity          /* I                        */
 );
 
-static inline opus_int silk_setup_LBRR(
+static OPUS_INLINE opus_int silk_setup_LBRR(
     silk_encoder_state              *psEncC,            /* I/O                      */
     const opus_int32                TargetRate_bps      /* I                        */
 );
 
 
 /* Control encoder */
 opus_int silk_control_encoder(
     silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk encoder state                                               */
@@ -387,17 +387,17 @@ static opus_int silk_setup_complexity(
     silk_assert( psEncC->warping_Q16             <= 32767                    );
     silk_assert( psEncC->la_shape                <= LA_SHAPE_MAX             );
     silk_assert( psEncC->shapeWinLength          <= SHAPE_LPC_WIN_MAX        );
     silk_assert( psEncC->NLSF_MSVQ_Survivors     <= NLSF_VQ_MAX_SURVIVORS    );
 
     return ret;
 }
 
-static inline opus_int silk_setup_LBRR(
+static OPUS_INLINE opus_int silk_setup_LBRR(
     silk_encoder_state          *psEncC,            /* I/O                      */
     const opus_int32            TargetRate_bps      /* I                        */
 )
 {
     opus_int   ret = SILK_NO_ERROR;
     opus_int32 LBRR_rate_thres_bps;
 
     psEncC->LBRR_enabled = 0;
--- a/media/libopus/silk/dec_API.c
+++ b/media/libopus/silk/dec_API.c
@@ -300,17 +300,17 @@ opus_int silk_Decode(                   
         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
     }
 
     /* Number of output samples */
     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
 
     /* Set up pointers to temp buffers */
     ALLOC( samplesOut2_tmp,
-           decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 );
+           decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
     if( decControl->nChannelsAPI == 2 ) {
         resample_out_ptr = samplesOut2_tmp;
     } else {
         resample_out_ptr = samplesOut;
     }
 
     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
 
--- a/media/libopus/silk/enc_API.c
+++ b/media/libopus/silk/enc_API.c
@@ -64,28 +64,29 @@ opus_int silk_Get_Encoder_Size(         
     return ret;
 }
 
 /*************************/
 /* Init or Reset encoder */
 /*************************/
 opus_int silk_InitEncoder(                              /* O    Returns error code                              */
     void                            *encState,          /* I/O  State                                           */
+    int                              arch,              /* I    Run-time architecture                           */
     silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
 )
 {
     silk_encoder *psEnc;
     opus_int n, ret = SILK_NO_ERROR;
 
     psEnc = (silk_encoder *)encState;
 
     /* Reset encoder */
     silk_memset( psEnc, 0, sizeof( silk_encoder ) );
     for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
-        if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ] ) ) {
+        if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
             silk_assert( 0 );
         }
     }
 
     psEnc->nChannelsAPI = 1;
     psEnc->nChannelsInternal = 1;
 
     /* Read control structure */
@@ -151,30 +152,35 @@ opus_int silk_Encode(                   
     opus_int   nSamplesFromInput = 0, nSamplesFromInputMax;
     opus_int   speech_act_thr_for_switch_Q8;
     opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
     silk_encoder *psEnc = ( silk_encoder * )encState;
     VARDECL( opus_int16, buf );
     opus_int transition, curr_block, tot_blocks;
     SAVE_STACK;
 
+    if (encControl->reducedDependency)
+    {
+       psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
+       psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
+    }
     psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
 
     /* Check values in encoder control structure */
     if( ( ret = check_control_input( encControl ) != 0 ) ) {
         silk_assert( 0 );
         RESTORE_STACK;
         return ret;
     }
 
     encControl->switchReady = 0;
 
     if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
         /* Mono -> Stereo transition: init state of second channel and stereo state */
-        ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ] );
+        ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch );
         silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
         silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
         psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
         psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1;
         psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0;
         psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1;
         psEnc->sStereo.width_prev_Q14 = 0;
         psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 );
@@ -196,19 +202,18 @@ opus_int silk_Encode(                   
         /* Only accept input length of 10 ms */
         if( nBlocksOf10ms != 1 ) {
             silk_assert( 0 );
             RESTORE_STACK;
             return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
         }
         /* Reset Encoder */
         for( n = 0; n < encControl->nChannelsInternal; n++ ) {
-            if( (ret = silk_init_encoder( &psEnc->state_Fxx[ n ] ) ) != 0 ) {
-                silk_assert( 0 );
-            }
+            ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
+            silk_assert( !ret );
         }
         tmp_payloadSize_ms = encControl->payloadSize_ms;
         encControl->payloadSize_ms = 10;
         tmp_complexity = encControl->complexity;
         encControl->complexity = 0;
         for( n = 0; n < encControl->nChannelsInternal; n++ ) {
             psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
             psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
--- a/media/libopus/silk/encode_pulses.c
+++ b/media/libopus/silk/encode_pulses.c
@@ -31,17 +31,17 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include "main.h"
 #include "stack_alloc.h"
 
 /*********************************************/
 /* Encode quantization indices of excitation */
 /*********************************************/
 
-static inline opus_int combine_and_check(    /* return ok                           */
+static OPUS_INLINE opus_int combine_and_check(    /* return ok                           */
     opus_int         *pulses_comb,           /* O                                   */
     const opus_int   *pulses_in,             /* I                                   */
     opus_int         max_pulses,             /* I    max value for sum of pulses    */
     opus_int         len                     /* I    number of output values        */
 )
 {
     opus_int k, sum;
 
--- a/media/libopus/silk/fixed/autocorr_FIX.c
+++ b/media/libopus/silk/fixed/autocorr_FIX.c
@@ -33,15 +33,16 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "celt_lpc.h"
 
 /* Compute autocorrelation */
 void silk_autocorr(
     opus_int32                  *results,           /* O    Result (length correlationCount)                            */
     opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
     const opus_int16            *inputData,         /* I    Input data to correlate                                     */
     const opus_int              inputDataSize,      /* I    Length of input                                             */
-    const opus_int              correlationCount    /* I    Number of correlation taps to compute                       */
+    const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
+    int                         arch                /* I    Run-time architecture                                       */
 )
 {
     opus_int   corrCount;
     corrCount = silk_min_int( inputDataSize, correlationCount );
-    *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize);
+    *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch);
 }
--- a/media/libopus/silk/fixed/burg_modified_FIX.c
+++ b/media/libopus/silk/fixed/burg_modified_FIX.c
@@ -45,17 +45,18 @@ POSSIBILITY OF SUCH DAMAGE.
 void silk_burg_modified(
     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
-    const opus_int              D                   /* I    Order                                                       */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
 )
 {
     opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
     opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
     const opus_int16 *x_ptr;
     opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
     opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
     opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
@@ -93,17 +94,17 @@ void silk_burg_modified(
                     silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
             }
         }
     } else {
         for( s = 0; s < nb_subfr; s++ ) {
             int i;
             opus_int32 d;
             x_ptr = x + s * subfr_length;
-            celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D );
+            celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
             for( n = 1; n < D + 1; n++ ) {
                for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
                   d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
                xcorr[ n - 1 ] += d;
             }
             for( n = 1; n < D + 1; n++ ) {
                 C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
             }
--- a/media/libopus/silk/fixed/encode_frame_FIX.c
+++ b/media/libopus/silk/fixed/encode_frame_FIX.c
@@ -29,17 +29,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "config.h"
 #endif
 
 #include "main_FIX.h"
 #include "stack_alloc.h"
 #include "tuning_parameters.h"
 
 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate           */
-static inline void silk_LBRR_encode_FIX(
+static OPUS_INLINE void silk_LBRR_encode_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
     const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
     opus_int                        condCoding                              /* I    The type of conditional coding used so far for this frame                   */
 );
 
 void silk_encode_do_VAD_FIX(
     silk_encoder_state_FIX          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
@@ -127,22 +127,22 @@ opus_int silk_encode_frame_FIX(
                psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length
                    + psEnc->sCmn.ltp_mem_length, opus_int16 );
         /* start of pitch LPC residual frame */
         res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length;
 
         /*****************************************/
         /* Find pitch lags, initial LPC analysis */
         /*****************************************/
-        silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame );
+        silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
 
         /************************/
         /* Noise shape analysis */
         /************************/
-        silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
+        silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch );
 
         /***************************************************/
         /* Find linear prediction coefficients (LPC + LTP) */
         /***************************************************/
         silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
 
         /****************************************/
         /* Process gains                        */
@@ -297,41 +297,41 @@ opus_int silk_encode_frame_FIX(
             gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
         }
     }
 
     /* Update input buffer */
     silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
         ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) );
 
-    /* Parameters needed for next frame */
-    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
-    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
-
     /* Exit without entropy coding */
     if( psEnc->sCmn.prefillFlag ) {
         /* No payload */
         *pnBytesOut = 0;
         RESTORE_STACK;
         return ret;
     }
 
+    /* Parameters needed for next frame */
+    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
     /****************************************/
     /* Finalize payload                     */
     /****************************************/
     psEnc->sCmn.first_frame_after_reset = 0;
     /* Payload size */
     *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
 
     RESTORE_STACK;
     return ret;
 }
 
 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
-static inline void silk_LBRR_encode_FIX(
+static OPUS_INLINE void silk_LBRR_encode_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
     const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
     opus_int                        condCoding                              /* I    The type of conditional coding used so far for this frame                   */
 )
 {
     opus_int32   TempGains_Q16[ MAX_NB_SUBFR ];
     SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
--- a/media/libopus/silk/fixed/find_LPC_FIX.c
+++ b/media/libopus/silk/fixed/find_LPC_FIX.c
@@ -55,23 +55,23 @@ void silk_find_LPC_FIX(
     SAVE_STACK;
 
     subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder;
 
     /* Default: no interpolation */
     psEncC->indices.NLSFInterpCoef_Q2 = 4;
 
     /* Burg AR analysis for the full frame */
-    silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );
+    silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch );
 
     if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
         VARDECL( opus_int16, LPC_res );
 
         /* Optimal solution for last 10 ms */
-        silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder );
+        silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch );
 
         /* subtract residual energy here, as that's easier than adding it to the    */
         /* residual energy of the first 10 ms in each iteration of the search below */
         shift = res_tmp_nrg_Q - res_nrg_Q;
         if( shift >= 0 ) {
             if( shift < 32 ) {
                 res_nrg = res_nrg - silk_RSHIFT( res_tmp_nrg, shift );
             }
--- a/media/libopus/silk/fixed/find_pitch_lags_FIX.c
+++ b/media/libopus/silk/fixed/find_pitch_lags_FIX.c
@@ -33,17 +33,18 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "stack_alloc.h"
 #include "tuning_parameters.h"
 
 /* Find pitch lags */
 void silk_find_pitch_lags_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
     opus_int16                      res[],                                  /* O    residual                                                                    */
-    const opus_int16                x[]                                     /* I    Speech signal                                                               */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 )
 {
     opus_int   buf_len, i, scale;
     opus_int32 thrhld_Q13, res_nrg;
     const opus_int16 *x_buf, *x_buf_ptr;
     VARDECL( opus_int16, Wsig );
     opus_int16 *Wsig_ptr;
     opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
@@ -81,17 +82,17 @@ void silk_find_pitch_lags_FIX(
     silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) );
 
     /* Last LA_LTP samples */
     Wsig_ptr  += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
     x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
     silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
 
     /* Calculate autocorrelation sequence */
-    silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 );
+    silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch );
 
     /* Add white noise, as fraction of energy */
     auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1;
 
     /* Calculate the reflection coefficients using schur */
     res_nrg = silk_schur( rc_Q15, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder );
 
     /* Prediction gain */
@@ -122,17 +123,18 @@ void silk_find_pitch_lags_FIX(
         thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1,   14 ), psEnc->sCmn.input_tilt_Q15 );
         thrhld_Q13 = silk_SAT16(  thrhld_Q13 );
 
         /*****************************************/
         /* Call pitch estimator                  */
         /*****************************************/
         if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex,
                 &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16,
-                (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 )
+                (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr,
+                psEnc->sCmn.arch) == 0 )
         {
             psEnc->sCmn.indices.signalType = TYPE_VOICED;
         } else {
             psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
         }
     } else {
         silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) );
         psEnc->sCmn.indices.lagIndex = 0;
--- a/media/libopus/silk/fixed/find_pred_coefs_FIX.c
+++ b/media/libopus/silk/fixed/find_pred_coefs_FIX.c
@@ -88,17 +88,17 @@ void silk_find_pred_coefs_FIX(
 
         /* LTP analysis */
         silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,
             res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,
             psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift );
 
         /* Quantize LTP gain parameters */
         silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
-            WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);
 
         /* Control LTP scaling */
         silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding );
 
         /* Create LTP residual */
         silk_LTP_analysis_filter_FIX( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14,
             psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
 
@@ -113,16 +113,17 @@ void silk_find_pred_coefs_FIX(
             silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ],
                 psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder );
             x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder;
             x_ptr     += psEnc->sCmn.subfr_length;
         }
 
         silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) );
         psEncCtrl->LTPredCodGain_Q7 = 0;
+		psEnc->sCmn.sum_log_gain_Q7 = 0;
     }
 
     /* Limit on total predictive coding gain */
     if( psEnc->sCmn.first_frame_after_reset ) {
         minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 );
     } else {        
         minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) );      /* Q16 */
         minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30, 
--- a/media/libopus/silk/fixed/main_FIX.h
+++ b/media/libopus/silk/fixed/main_FIX.h
@@ -68,17 +68,18 @@ opus_int silk_encode_frame_FIX(
     ec_enc                          *psRangeEnc,                            /* I/O  compressor data structure                                                   */
     opus_int                        condCoding,                             /* I    The type of conditional coding to use                                       */
     opus_int                        maxBits,                                /* I    If > 0: maximum number of output bits                                       */
     opus_int                        useCBR                                  /* I    Flag to force constant-bitrate operation                                    */
 );
 
 /* Initializes the Silk encoder state */
 opus_int silk_init_encoder(
-    silk_encoder_state_Fxx          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    int                              arch                                   /* I    Run-time architecture                                                       */
 );
 
 /* Control the Silk encoder */
 opus_int silk_control_encoder(
     silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk encoder state                                               */
     silk_EncControlStruct           *encControl,                            /* I    Control structure                                                           */
     const opus_int32                TargetRate_bps,                         /* I    Target max bitrate (bps)                                                    */
     const opus_int                  allow_bw_switch,                        /* I    Flag to allow switching audio bandwidth                                     */
@@ -99,17 +100,18 @@ void silk_prefilter_FIX(
 /**************************/
 /* Noise shaping analysis */
 /**************************/
 /* Compute noise shaping coefficients and initial gain values */
 void silk_noise_shape_analysis_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
     const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
-    const opus_int16                *x                                      /* I    Input signal [ frame_length + la_shape ]                                    */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
 );
 
 /* Autocorrelations for a warped frequency axis */
 void silk_warped_autocorrelation_FIX(
           opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
           opus_int                  *scale,                                 /* O    Scaling of the correlation vector                                           */
     const opus_int16                *input,                                 /* I    Input data to correlate                                                     */
     const opus_int                  warping_Q16,                            /* I    Warping coefficient                                                         */
@@ -127,17 +129,18 @@ void silk_LTP_scale_ctrl_FIX(
 /**********************************************/
 /* Prediction Analysis                        */
 /**********************************************/
 /* Find pitch lags */
 void silk_find_pitch_lags_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
     opus_int16                      res[],                                  /* O    residual                                                                    */
-    const opus_int16                x[]                                     /* I    Speech signal                                                               */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    int                             arch                                    /* I    Run-time architecture                                                       */
 );
 
 /* Find LPC and LTP coefficients */
 void silk_find_pred_coefs_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
     const opus_int16                res_pitch[],                            /* I    Residual from pitch analysis                                                */
     const opus_int16                x[],                                    /* I    Speech signal                                                               */
--- a/media/libopus/silk/fixed/noise_shape_analysis_FIX.c
+++ b/media/libopus/silk/fixed/noise_shape_analysis_FIX.c
@@ -32,17 +32,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "main_FIX.h"
 #include "stack_alloc.h"
 #include "tuning_parameters.h"
 
 /* Compute gain to make warped filter coefficients have a zero mean log frequency response on a   */
 /* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
 /* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
 /* coefficient in an array of coefficients, for monic filters.                                    */
-static inline opus_int32 warped_gain( /* gain in Q16*/
+static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/
     const opus_int32     *coefs_Q24,
     opus_int             lambda_Q16,
     opus_int             order
 ) {
     opus_int   i;
     opus_int32 gain_Q24;
 
     lambda_Q16 = -lambda_Q16;
@@ -51,17 +51,17 @@ static inline opus_int32 warped_gain( /*
         gain_Q24 = silk_SMLAWB( coefs_Q24[ i ], gain_Q24, lambda_Q16 );
     }
     gain_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), gain_Q24, -lambda_Q16 );
     return silk_INVERSE32_varQ( gain_Q24, 40 );
 }
 
 /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum     */
 /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
-static inline void limit_warped_coefs(
+static OPUS_INLINE void limit_warped_coefs(
     opus_int32           *coefs_syn_Q24,
     opus_int32           *coefs_ana_Q24,
     opus_int             lambda_Q16,
     opus_int32           limit_Q24,
     opus_int             order
 ) {
     opus_int   i, iter, ind = 0;
     opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16;
@@ -140,17 +140,18 @@ static inline void limit_warped_coefs(
 
 /**************************************************************/
 /* Compute noise shaping coefficients and initial gain values */
 /**************************************************************/
 void silk_noise_shape_analysis_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
     const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
-    const opus_int16                *x                                      /* I    Input signal [ frame_length + la_shape ]                                    */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
 )
 {
     silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
     opus_int     k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
     opus_int32   SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
     opus_int32   nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
     opus_int32   delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
     opus_int32   auto_corr[     MAX_SHAPE_LPC_ORDER + 1 ];
@@ -276,17 +277,17 @@ void silk_noise_shape_analysis_FIX(
         /* Update pointer: next LPC analysis block */
         x_ptr += psEnc->sCmn.subfr_length;
 
         if( psEnc->sCmn.warping_Q16 > 0 ) {
             /* Calculate warped auto correlation */
             silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
         } else {
             /* Calculate regular auto correlation */
-            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 );
+            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
         }
 
         /* Add white noise, as a fraction of energy */
         auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ),
             SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );
 
         /* Calculate the reflection coefficients using schur */
         nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
--- a/media/libopus/silk/fixed/pitch_analysis_core_FIX.c
+++ b/media/libopus/silk/fixed/pitch_analysis_core_FIX.c
@@ -57,17 +57,18 @@ typedef opus_int32 silk_pe_stage3_vals[ 
 /* Internally used functions                                */
 /************************************************************/
 static void silk_P_Ana_calc_corr_st3(
     silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
     const opus_int16  frame[],                         /* I vector to correlate         */
     opus_int          start_lag,                       /* I lag offset to search around */
     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
     opus_int          nb_subfr,                        /* I number of subframes         */
-    opus_int          complexity                       /* I Complexity setting          */
+    opus_int          complexity,                      /* I Complexity setting          */
+    int               arch                             /* I Run-time architecture       */
 );
 
 static void silk_P_Ana_calc_energy_st3(
     silk_pe_stage3_vals energies_st3[],                /* O 3 DIM energy array */
     const opus_int16  frame[],                         /* I vector to calc energy in    */
     opus_int          start_lag,                       /* I lag offset to search around */
     opus_int          sf_length,                       /* I length of one 5 ms subframe */
     opus_int          nb_subfr,                        /* I number of subframes         */
@@ -83,17 +84,18 @@ opus_int silk_pitch_analysis_core(      
     opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
     opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
     const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int              nb_subfr            /* I    number of 5 ms subframes                                    */
+    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
+    int                         arch                /* I    Run-time architecture                                       */
 )
 {
     VARDECL( opus_int16, frame_8kHz );
     VARDECL( opus_int16, frame_4kHz );
     opus_int32 filt_state[ 6 ];
     const opus_int16 *input_frame_ptr;
     opus_int   i, k, d, j;
     VARDECL( opus_int16, C );
@@ -184,17 +186,17 @@ opus_int silk_pitch_analysis_core(      
         silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
 
         basis_ptr = target_ptr - MIN_LAG_4KHZ;
 
         /* Check that we are within range of the array */
         silk_assert( basis_ptr >= frame_4kHz );
         silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
 
-        celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1 );
+        celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
 
         /* Calculate first vector products before loop */
         cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
         normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
         normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ ) );
         normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
 
         matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
@@ -460,17 +462,17 @@ opus_int silk_pitch_analysis_core(      
 
     if( Fs_kHz > 8 ) {
         VARDECL( opus_int16, scratch_mem );
         /***************************************************************************/
         /* Scale input signal down to avoid correlations measures from overflowing */
         /***************************************************************************/
         /* find scaling as max scaling for each subframe */
         silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
-        ALLOC( scratch_mem, shift > 0 ? frame_length : 0, opus_int16 );
+        ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
         if( shift > 0 ) {
             /* Move signal to scratch mem because the input signal should be unchanged */
             shift = silk_RSHIFT( shift, 1 );
             for( i = 0; i < frame_length; i++ ) {
                 scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
             }
             input_frame_ptr = scratch_mem;
         } else {
@@ -511,17 +513,17 @@ opus_int silk_pitch_analysis_core(      
             nb_cbk_search   = PE_NB_CBKS_STAGE3_10MS;
             cbk_size        = PE_NB_CBKS_STAGE3_10MS;
             Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
         }
 
         /* Calculate the correlations and energies needed in stage 3 */
         ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
         ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
-        silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+        silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
         silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
 
         lag_counter = 0;
         silk_assert( lag == silk_SAT16( lag ) );
         contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
 
         target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
@@ -592,17 +594,18 @@ opus_int silk_pitch_analysis_core(      
  * case 4*12*5 = 240 correlations, but more likely around 120.
  ***********************************************************************/
 static void silk_P_Ana_calc_corr_st3(
     silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
     const opus_int16  frame[],                         /* I vector to correlate         */
     opus_int          start_lag,                       /* I lag offset to search around */
     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
     opus_int          nb_subfr,                        /* I number of subframes         */
-    opus_int          complexity                       /* I Complexity setting          */
+    opus_int          complexity,                      /* I Complexity setting          */
+    int               arch                             /* I Run-time architecture       */
 )
 {
     const opus_int16 *target_ptr;
     opus_int   i, j, k, lag_counter, lag_low, lag_high;
     opus_int   nb_cbk_search, delta, idx, cbk_size;
     VARDECL( opus_int32, scratch_mem );
     VARDECL( opus_int32, xcorr32 );
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
@@ -629,17 +632,17 @@ static void silk_P_Ana_calc_corr_st3(
     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
     for( k = 0; k < nb_subfr; k++ ) {
         lag_counter = 0;
 
         /* Calculate the correlations for each subframe */
         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
         silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
-        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1 );
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
         for( j = lag_low; j <= lag_high; j++ ) {
             silk_assert( lag_counter < SCRATCH_SIZE );
             scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
             lag_counter++;
         }
 
         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
         for( i = 0; i < nb_cbk_search; i++ ) {
--- a/media/libopus/silk/fixed/prefilter_FIX.c
+++ b/media/libopus/silk/fixed/prefilter_FIX.c
@@ -29,17 +29,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "config.h"
 #endif
 
 #include "main_FIX.h"
 #include "stack_alloc.h"
 #include "tuning_parameters.h"
 
 /* Prefilter for finding Quantizer input signal */
-static inline void silk_prefilt_FIX(
+static OPUS_INLINE void silk_prefilt_FIX(
     silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
     opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
     opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
     opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coeficients        */
     opus_int                    Tilt_Q14,                   /* I    Tilt shaping coeficient             */
     opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coeficients   */
     opus_int                    lag,                        /* I    Lag for harmonic shaping            */
     opus_int                    length                      /* I    Length of signals                   */
@@ -151,17 +151,17 @@ void silk_prefilter_FIX(
         pxw_Q3 += psEnc->sCmn.subfr_length;
     }
 
     P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
     RESTORE_STACK;
 }
 
 /* Prefilter for finding Quantizer input signal */
-static inline void silk_prefilt_FIX(
+static OPUS_INLINE void silk_prefilt_FIX(
     silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
     opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
     opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
     opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coeficients        */
     opus_int                    Tilt_Q14,                   /* I    Tilt shaping coeficient             */
     opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coeficients   */
     opus_int                    lag,                        /* I    Lag for harmonic shaping            */
     opus_int                    length                      /* I    Length of signals                   */
--- a/media/libopus/silk/fixed/solve_LS_FIX.c
+++ b/media/libopus/silk/fixed/solve_LS_FIX.c
@@ -38,40 +38,40 @@ POSSIBILITY OF SUCH DAMAGE.
 /*****************************/
 
 typedef struct {
     opus_int32 Q36_part;
     opus_int32 Q48_part;
 } inv_D_t;
 
 /* Factorize square matrix A into LDL form */
-static inline void silk_LDL_factorize_FIX(
+static OPUS_INLINE void silk_LDL_factorize_FIX(
     opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
     opus_int            M,          /* I   Size of Matrix                                               */
     opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
     inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
 );
 
 /* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
-static inline void silk_LS_SolveFirst_FIX(
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
     const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
     opus_int            M,          /* I    Dim of Matrix equation                                      */
     const opus_int32    *b,         /* I    b Vector                                                    */
     opus_int32          *x_Q16      /* O    x Vector                                                    */
 );
 
 /* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
-static inline void silk_LS_SolveLast_FIX(
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
     const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
     const opus_int      M,          /* I    Dim of Matrix equation                                      */
     const opus_int32    *b,         /* I    b Vector                                                    */
     opus_int32          *x_Q16      /* O    x Vector                                                    */
 );
 
-static inline void silk_LS_divide_Q16_FIX(
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
     opus_int32          T[],        /* I/O  Numenator vector                                            */
     inv_D_t             *inv_D,     /* I    1 / D vector                                                */
     opus_int            M           /* I    dimension                                                   */
 );
 
 /* Solves Ax = b, assuming A is symmetric */
 void silk_solve_LDL_FIX(
     opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */
@@ -108,17 +108,17 @@ void silk_solve_LDL_FIX(
 
     /****************************************************
     x = inv(L') * inv(D) * Y
     *****************************************************/
     silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );
     RESTORE_STACK;
 }
 
-static inline void silk_LDL_factorize_FIX(
+static OPUS_INLINE void silk_LDL_factorize_FIX(
     opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
     opus_int            M,          /* I   Size of Matrix                                               */
     opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
     inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
 )
 {
     opus_int   i, j, k, status, loop_count;
     const opus_int32 *ptr1, *ptr2;
@@ -180,17 +180,17 @@ static inline void silk_LDL_factorize_FI
                 ptr2 += M;
             }
         }
     }
 
     silk_assert( status == 0 );
 }
 
-static inline void silk_LS_divide_Q16_FIX(
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
     opus_int32          T[],        /* I/O  Numenator vector                                            */
     inv_D_t             *inv_D,     /* I    1 / D vector                                                */
     opus_int            M           /* I    dimension                                                   */
 )
 {
     opus_int   i;
     opus_int32 tmp_32;
     opus_int32 one_div_diag_Q36, one_div_diag_Q48;
@@ -200,17 +200,17 @@ static inline void silk_LS_divide_Q16_FI
         one_div_diag_Q48 = inv_D[ i ].Q48_part;
 
         tmp_32 = T[ i ];
         T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
     }
 }
 
 /* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
-static inline void silk_LS_SolveFirst_FIX(
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
     const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
     opus_int            M,          /* I    Dim of Matrix equation                                      */
     const opus_int32    *b,         /* I    b Vector                                                    */
     opus_int32          *x_Q16      /* O    x Vector                                                    */
 )
 {
     opus_int i, j;
     const opus_int32 *ptr32;
@@ -222,17 +222,17 @@ static inline void silk_LS_SolveFirst_FI
         for( j = 0; j < i; j++ ) {
             tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] );
         }
         x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
     }
 }
 
 /* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
-static inline void silk_LS_SolveLast_FIX(
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
     const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
     const opus_int      M,          /* I    Dim of Matrix equation                                      */
     const opus_int32    *b,         /* I    b Vector                                                    */
     opus_int32          *x_Q16      /* O    x Vector                                                    */
 )
 {
     opus_int i, j;
     const opus_int32 *ptr32;
--- a/media/libopus/silk/float/LPC_analysis_filter_FLP.c
+++ b/media/libopus/silk/float/LPC_analysis_filter_FLP.c
@@ -35,17 +35,17 @@ POSSIBILITY OF SUCH DAMAGE.
 /************************************************/
 /* LPC analysis filter                          */
 /* NB! State is kept internally and the         */
 /* filter always starts with zero state         */
 /* first Order output samples are set to zero   */
 /************************************************/
 
 /* 16th order LPC analysis filter, does not write first 16 samples */
-static inline void silk_LPC_analysis_filter16_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter16_FLP(
           silk_float                 r_LPC[],            /* O    LPC residual signal                     */
     const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
     const silk_float                 s[],                /* I    Input signal                            */
     const opus_int                   length              /* I    Length of input signal                  */
 )
 {
     opus_int   ix;
     silk_float LPC_pred;
@@ -73,17 +73,17 @@ static inline void silk_LPC_analysis_fil
                    s_ptr[ -15 ] * PredCoef[ 15 ];
 
         /* prediction error */
         r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
     }
 }
 
 /* 12th order LPC analysis filter, does not write first 12 samples */
-static inline void silk_LPC_analysis_filter12_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter12_FLP(
           silk_float                 r_LPC[],            /* O    LPC residual signal                     */
     const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
     const silk_float                 s[],                /* I    Input signal                            */
     const opus_int                   length              /* I    Length of input signal                  */
 )
 {
     opus_int   ix;
     silk_float LPC_pred;
@@ -107,17 +107,17 @@ static inline void silk_LPC_analysis_fil
                    s_ptr[ -11 ] * PredCoef[ 11 ];
 
         /* prediction error */
         r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
     }
 }
 
 /* 10th order LPC analysis filter, does not write first 10 samples */
-static inline void silk_LPC_analysis_filter10_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter10_FLP(
           silk_float                 r_LPC[],            /* O    LPC residual signal                     */
     const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
     const silk_float                 s[],                /* I    Input signal                            */
     const opus_int                   length              /* I    Length of input signal                  */
 )
 {
     opus_int   ix;
     silk_float LPC_pred;
@@ -139,17 +139,17 @@ static inline void silk_LPC_analysis_fil
                    s_ptr[ -9 ] * PredCoef[ 9 ];
 
         /* prediction error */
         r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
     }
 }
 
 /* 8th order LPC analysis filter, does not write first 8 samples */
-static inline void silk_LPC_analysis_filter8_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter8_FLP(
           silk_float                 r_LPC[],            /* O    LPC residual signal                     */
     const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
     const silk_float                 s[],                /* I    Input signal                            */
     const opus_int                   length              /* I    Length of input signal                  */
 )
 {
     opus_int   ix;
     silk_float LPC_pred;
@@ -169,17 +169,17 @@ static inline void silk_LPC_analysis_fil
                    s_ptr[ -7 ] * PredCoef[ 7 ];
 
         /* prediction error */
         r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
     }
 }
 
 /* 6th order LPC analysis filter, does not write first 6 samples */
-static inline void silk_LPC_analysis_filter6_FLP(
+static OPUS_INLINE void silk_LPC_analysis_filter6_FLP(
           silk_float                 r_LPC[],            /* O    LPC residual signal                     */
     const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
     const silk_float                 s[],                /* I    Input signal                            */
     const opus_int                   length              /* I    Length of input signal                  */
 )
 {
     opus_int   ix;
     silk_float LPC_pred;
--- a/media/libopus/silk/float/SigProc_FLP.h
+++ b/media/libopus/silk/float/SigProc_FLP.h
@@ -89,17 +89,18 @@ opus_int silk_pitch_analysis_core_FLP(  
     opus_int16          *lagIndex,          /* O    Lag Index                                                   */
     opus_int8           *contourIndex,      /* O    Pitch contour Index                                         */
     silk_float          *LTPCorr,           /* I/O  Normalized correlation; input: value from previous frame    */
     opus_int            prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
     const silk_float    search_thres1,      /* I    First stage threshold for lag candidates 0 - 1              */
     const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
     const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
     const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int      nb_subfr            /* I    Number of 5 ms subframes                                    */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
 );
 
 void silk_insertion_sort_decreasing_FLP(
     silk_float          *a,                 /* I/O  Unsorted / Sorted vector                                    */
     opus_int            *idx,               /* O    Index vector for the sorted elements                        */
     const opus_int      L,                  /* I    Vector length                                               */
     const opus_int      K                   /* I    Number of correctly sorted positions                        */
 );
@@ -148,55 +149,55 @@ double silk_energy_FLP(
 
 #define PI              (3.1415926536f)
 
 #define silk_min_float( a, b )                  (((a) < (b)) ? (a) :  (b))
 #define silk_max_float( a, b )                  (((a) > (b)) ? (a) :  (b))
 #define silk_abs_float( a )                     ((silk_float)fabs(a))
 
 /* sigmoid function */
-static inline silk_float silk_sigmoid( silk_float x )
+static OPUS_INLINE silk_float silk_sigmoid( silk_float x )
 {
     return (silk_float)(1.0 / (1.0 + exp(-x)));
 }
 
 /* floating-point to integer conversion (rounding) */
-static inline opus_int32 silk_float2int( silk_float x )
+static OPUS_INLINE opus_int32 silk_float2int( silk_float x )
 {
     return (opus_int32)float2int( x );
 }
 
 /* floating-point to integer conversion (rounding) */
-static inline void silk_float2short_array(
+static OPUS_INLINE void silk_float2short_array(
     opus_int16       *out,
     const silk_float *in,
     opus_int32       length
 )
 {
     opus_int32 k;
     for( k = length - 1; k >= 0; k-- ) {
         out[k] = silk_SAT16( (opus_int32)float2int( in[k] ) );
     }
 }
 
 /* integer to floating-point conversion */
-static inline void silk_short2float_array(
+static OPUS_INLINE void silk_short2float_array(
     silk_float       *out,
     const opus_int16 *in,
     opus_int32       length
 )
 {
     opus_int32 k;
     for( k = length - 1; k >= 0; k-- ) {
         out[k] = (silk_float)in[k];
     }
 }
 
 /* using log2() helps the fixed-point conversion */
-static inline silk_float silk_log2( double x )
+static OPUS_INLINE silk_float silk_log2( double x )
 {
     return ( silk_float )( 3.32192809488736 * log10( x ) );
 }
 
 #ifdef  __cplusplus
 }
 #endif
 
--- a/media/libopus/silk/float/encode_frame_FLP.c
+++ b/media/libopus/silk/float/encode_frame_FLP.c
@@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main_FLP.h"
 #include "tuning_parameters.h"
 
 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
-static inline void silk_LBRR_encode_FLP(
+static OPUS_INLINE void silk_LBRR_encode_FLP(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
     const silk_float                xfw[],                              /* I    Input signal                                */
     opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
 );
 
 void silk_encode_do_VAD_FLP(
     silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
@@ -124,17 +124,17 @@ opus_int silk_encode_frame_FLP(
     for( i = 0; i < 8; i++ ) {
         x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f;
     }
 
     if( !psEnc->sCmn.prefillFlag ) {
         /*****************************************/
         /* Find pitch lags, initial LPC analysis */
         /*****************************************/
-        silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame );
+        silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
 
         /************************/
         /* Noise shape analysis */
         /************************/
         silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
 
         /***************************************************/
         /* Find linear prediction coefficients (LPC + LTP) */
@@ -289,39 +289,39 @@ opus_int silk_encode_frame_FLP(
             }
         }
     }
 
     /* Update input buffer */
     silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
         ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );
 
-    /* Parameters needed for next frame */
-    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
-    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
-
     /* Exit without entropy coding */
     if( psEnc->sCmn.prefillFlag ) {
         /* No payload */
         *pnBytesOut = 0;
         return ret;
     }
 
+    /* Parameters needed for next frame */
+    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
     /****************************************/
     /* Finalize payload                     */
     /****************************************/
     psEnc->sCmn.first_frame_after_reset = 0;
     /* Payload size */
     *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
 
     return ret;
 }
 
 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
-static inline void silk_LBRR_encode_FLP(
+static OPUS_INLINE void silk_LBRR_encode_FLP(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
     const silk_float                xfw[],                              /* I    Input signal                                */
     opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
 )
 {
     opus_int     k;
     opus_int32   Gains_Q16[ MAX_NB_SUBFR ];
--- a/media/libopus/silk/float/find_pitch_lags_FLP.c
+++ b/media/libopus/silk/float/find_pitch_lags_FLP.c
@@ -32,17 +32,18 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <stdlib.h>
 #include "main_FLP.h"
 #include "tuning_parameters.h"
 
 void silk_find_pitch_lags_FLP(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
     silk_float                      res[],                              /* O    Residual                                    */
-    const silk_float                x[]                                 /* I    Speech signal                               */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    int                             arch                                /* I    Run-time architecture                       */
 )
 {
     opus_int   buf_len;
     silk_float thrhld, res_nrg;
     const silk_float *x_buf_ptr, *x_buf;
     silk_float auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
     silk_float A[         MAX_FIND_PITCH_LPC_ORDER ];
     silk_float refl_coef[ MAX_FIND_PITCH_LPC_ORDER ];
@@ -111,17 +112,17 @@ void silk_find_pitch_lags_FLP(
         thrhld -= 0.15f  * (psEnc->sCmn.prevSignalType >> 1);
         thrhld -= 0.1f   * psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f );
 
         /*****************************************/
         /* Call Pitch estimator                  */
         /*****************************************/
         if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex,
             &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f,
-            thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 )
+            thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 )
         {
             psEnc->sCmn.indices.signalType = TYPE_VOICED;
         } else {
             psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
         }
     } else {
         silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) );
         psEnc->sCmn.indices.lagIndex = 0;
--- a/media/libopus/silk/float/find_pred_coefs_FLP.c
+++ b/media/libopus/silk/float/find_pred_coefs_FLP.c
@@ -62,17 +62,17 @@ void silk_find_pred_coefs_FLP(
         silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
 
         /* LTP analysis */
         silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch,
             psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length );
 
         /* Quantize LTP gain parameters */
         silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
-            WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr );
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr );
 
         /* Control LTP scaling */
         silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding );
 
         /* Create LTP residual */
         silk_LTP_analysis_filter_FLP( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef,
             psEncCtrl->pitchL, invGains, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
     } else {
@@ -85,16 +85,17 @@ void silk_find_pred_coefs_FLP(
         for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
             silk_scale_copy_vector_FLP( x_pre_ptr, x_ptr, invGains[ i ],
                 psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder );
             x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder;
             x_ptr     += psEnc->sCmn.subfr_length;
         }
         silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) );
         psEncCtrl->LTPredCodGain = 0.0f;
+		psEnc->sCmn.sum_log_gain_Q7 = 0;
     }
 
     /* Limit on total predictive coding gain */
     if( psEnc->sCmn.first_frame_after_reset ) {
         minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET;
     } else {        
         minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) /  MAX_PREDICTION_POWER_GAIN;
         minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality;
--- a/media/libopus/silk/float/main_FLP.h
+++ b/media/libopus/silk/float/main_FLP.h
@@ -66,17 +66,18 @@ opus_int silk_encode_frame_FLP(
     ec_enc                          *psRangeEnc,                        /* I/O  compressor data structure                   */
     opus_int                        condCoding,                         /* I    The type of conditional coding to use       */
     opus_int                        maxBits,                            /* I    If > 0: maximum number of output bits       */
     opus_int                        useCBR                              /* I    Flag to force constant-bitrate operation    */
 );
 
 /* Initializes the Silk encoder state */
 opus_int silk_init_encoder(
-    silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    int                              arch                               /* I    Run-tim architecture                        */
 );
 
 /* Control the Silk encoder */
 opus_int silk_control_encoder(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Pointer to Silk encoder state FLP           */
     silk_EncControlStruct           *encControl,                        /* I    Control structure                           */
     const opus_int32                TargetRate_bps,                     /* I    Target max bitrate (bps)                    */
     const opus_int                  allow_bw_switch,                    /* I    Flag to allow switching audio bandwidth     */
@@ -124,17 +125,18 @@ void silk_LTP_scale_ctrl_FLP(
 /**********************************************/
 /* Prediction Analysis                        */
 /**********************************************/
 /* Find pitch lags */
 void silk_find_pitch_lags_FLP(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
     silk_float                      res[],                              /* O    Residual                                    */
-    const silk_float                x[]                                 /* I    Speech signal                               */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    int                             arch                                /* I    Run-time architecture                       */
 );
 
 /* Find LPC and LTP coefficients */
 void silk_find_pred_coefs_FLP(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
     const silk_float                res_pitch[],                        /* I    Residual from pitch analysis                */
     const silk_float                x[],                                /* I    Speech signal                               */
@@ -194,16 +196,17 @@ void silk_LPC_analysis_filter_FLP(
     const opus_int                  Order                               /* I    LPC order                                   */
 );
 
 /* LTP tap quantizer */
 void silk_quant_LTP_gains_FLP(
     silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */
     opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */
     opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */
+    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */
     const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
     const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
     const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
     const opus_int                  nb_subfr                            /* I    number of subframes                         */
 );
 
 /* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
 silk_float silk_residual_energy_covar_FLP(                              /* O    Weighted residual energy                    */
--- a/media/libopus/silk/float/noise_shape_analysis_FLP.c
+++ b/media/libopus/silk/float/noise_shape_analysis_FLP.c
@@ -31,17 +31,17 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include "main_FLP.h"
 #include "tuning_parameters.h"
 
 /* Compute gain to make warped filter coefficients have a zero mean log frequency response on a   */
 /* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
 /* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
 /* coefficient in an array of coefficients, for monic filters.                                    */
-static inline silk_float warped_gain(
+static OPUS_INLINE silk_float warped_gain(
     const silk_float     *coefs,
     silk_float           lambda,
     opus_int             order
 ) {
     opus_int   i;
     silk_float gain;
 
     lambda = -lambda;
@@ -49,17 +49,17 @@ static inline silk_float warped_gain(
     for( i = order - 2; i >= 0; i-- ) {
         gain = lambda * gain + coefs[ i ];
     }
     return (silk_float)( 1.0f / ( 1.0f - lambda * gain ) );
 }
 
 /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum     */
 /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
-static inline void warped_true2monic_coefs(
+static OPUS_INLINE void warped_true2monic_coefs(
     silk_float           *coefs_syn,
     silk_float           *coefs_ana,
     silk_float           lambda,
     silk_float           limit,
     opus_int             order
 ) {
     opus_int   i, iter, ind = 0;
     silk_float tmp, maxabs, chirp, gain_syn, gain_ana;
--- a/media/libopus/silk/float/pitch_analysis_core_FLP.c
+++ b/media/libopus/silk/float/pitch_analysis_core_FLP.c
@@ -43,17 +43,18 @@ POSSIBILITY OF SUCH DAMAGE.
 /* Internally used functions                                */
 /************************************************************/
 static void silk_P_Ana_calc_corr_st3(
     silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
     const silk_float    frame[],            /* I vector to correlate                                            */
     opus_int            start_lag,          /* I start lag                                                      */
     opus_int            sf_length,          /* I sub frame length                                               */
     opus_int            nb_subfr,           /* I number of subframes                                            */
-    opus_int            complexity          /* I Complexity setting                                             */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 );
 
 static void silk_P_Ana_calc_energy_st3(
     silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
     const silk_float    frame[],            /* I vector to correlate                                            */
     opus_int            start_lag,          /* I start lag                                                      */
     opus_int            sf_length,          /* I sub frame length                                               */
     opus_int            nb_subfr,           /* I number of subframes                                            */
@@ -69,17 +70,18 @@ opus_int silk_pitch_analysis_core_FLP(  
     opus_int16          *lagIndex,          /* O    Lag Index                                                   */
     opus_int8           *contourIndex,      /* O    Pitch contour Index                                         */
     silk_float          *LTPCorr,           /* I/O  Normalized correlation; input: value from previous frame    */
     opus_int            prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
     const silk_float    search_thres1,      /* I    First stage threshold for lag candidates 0 - 1              */
     const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
     const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
     const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int      nb_subfr            /* I    Number of 5 ms subframes                                    */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
 )
 {
     opus_int   i, k, d, j;
     silk_float frame_8kHz[  PE_MAX_FRAME_LENGTH_MS * 8 ];
     silk_float frame_4kHz[  PE_MAX_FRAME_LENGTH_MS * 4 ];
     opus_int16 frame_8_FIX[ PE_MAX_FRAME_LENGTH_MS * 8 ];
     opus_int16 frame_4_FIX[ PE_MAX_FRAME_LENGTH_MS * 4 ];
     opus_int32 filt_state[ 6 ];
@@ -171,17 +173,17 @@ opus_int silk_pitch_analysis_core_FLP(  
         silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
         basis_ptr = target_ptr - min_lag_4kHz;
 
         /* Check that we are within range of the array */
         silk_assert( basis_ptr >= frame_4kHz );
         silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
-        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1 );
+        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch );
 
         /* Calculate first vector products before loop */
         cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
         normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 
                      silk_energy_FLP( basis_ptr,  sf_length_8kHz ) + 
                      sf_length_8kHz * 4000.0f;
 
         C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer );
@@ -404,17 +406,17 @@ opus_int silk_pitch_analysis_core_FLP(  
         start_lag = silk_max_int( lag - 2, min_lag );
         end_lag   = silk_min_int( lag + 2, max_lag );
         lag_new   = lag;                                    /* to avoid undefined lag */
         CBimax    = 0;                                      /* to avoid undefined lag */
 
         CCmax = -1000.0f;
 
         /* Calculate the correlations and energies needed in stage 3 */
-        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity );
+        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
         silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity );
 
         lag_counter = 0;
         silk_assert( lag == silk_SAT16( lag ) );
         contour_bias = PE_FLATCONTOUR_BIAS / lag;
 
         /* Set up cbk parameters according to complexity setting and frame length */
         if( nb_subfr == PE_MAX_NB_SUBFR ) {
@@ -488,20 +490,21 @@ opus_int silk_pitch_analysis_core_FLP(  
  * case 4*12*5 = 240 correlations, but more likely around 120.
  ***********************************************************************/
 static void silk_P_Ana_calc_corr_st3(
     silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
     const silk_float    frame[],            /* I vector to correlate                                            */
     opus_int            start_lag,          /* I start lag                                                      */
     opus_int            sf_length,          /* I sub frame length                                               */
     opus_int            nb_subfr,           /* I number of subframes                                            */
-    opus_int            complexity          /* I Complexity setting                                             */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 )
 {
-    const silk_float *target_ptr, *basis_ptr;
+    const silk_float *target_ptr;
     opus_int   i, j, k, lag_counter, lag_low, lag_high;
     opus_int   nb_cbk_search, delta, idx, cbk_size;
     silk_float scratch_mem[ SCRATCH_SIZE ];
     opus_val32 xcorr[ SCRATCH_SIZE ];
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
 
     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
@@ -522,19 +525,18 @@ static void silk_P_Ana_calc_corr_st3(
     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
     for( k = 0; k < nb_subfr; k++ ) {
         lag_counter = 0;
 
         /* Calculate the correlations for each subframe */
         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
         silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
-        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1 );
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch );
         for( j = lag_low; j <= lag_high; j++ ) {
-            basis_ptr = target_ptr - ( start_lag + j );
             silk_assert( lag_counter < SCRATCH_SIZE );
             scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];
             lag_counter++;
         }
 
         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
         for( i = 0; i < nb_cbk_search; i++ ) {
             /* Fill out the 3 dim array that stores the correlations for */
--- a/media/libopus/silk/float/prefilter_FLP.c
+++ b/media/libopus/silk/float/prefilter_FLP.c
@@ -30,17 +30,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #endif
 
 #include "main_FLP.h"
 #include "tuning_parameters.h"
 
 /*
 * Prefilter for finding Quantizer input signal
 */
-static inline void silk_prefilt_FLP(
+static OPUS_INLINE void silk_prefilt_FLP(
     silk_prefilter_state_FLP    *P,                 /* I/O state */
     silk_float                  st_res[],           /* I */
     silk_float                  xw[],               /* O */
     silk_float                  *HarmShapeFIR,      /* I */
     silk_float                  Tilt,               /* I */
     silk_float                  LF_MA_shp,          /* I */
     silk_float                  LF_AR_shp,          /* I */
     opus_int                    lag,                /* I */
@@ -148,17 +148,17 @@ void silk_prefilter_FLP(
         pxw += psEnc->sCmn.subfr_length;
     }
     P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
 }
 
 /*
 * Prefilter for finding Quantizer input signal
 */
-static inline void silk_prefilt_FLP(
+static OPUS_INLINE void silk_prefilt_FLP(
     silk_prefilter_state_FLP    *P,                 /* I/O state */
     silk_float                  st_res[],           /* I */
     silk_float                  xw[],               /* O */
     silk_float                  *HarmShapeFIR,      /* I */
     silk_float                  Tilt,               /* I */
     silk_float                  LF_MA_shp,          /* I */
     silk_float                  LF_AR_shp,          /* I */
     opus_int                    lag,                /* I */
--- a/media/libopus/silk/float/solve_LS_FLP.c
+++ b/media/libopus/silk/float/solve_LS_FLP.c
@@ -32,39 +32,39 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "main_FLP.h"
 #include "tuning_parameters.h"
 
 /**********************************************************************
  * LDL Factorisation. Finds the upper triangular matrix L and the diagonal
  * Matrix D (only the diagonal elements returned in a vector)such that
  * the symmetric matric A is given by A = L*D*L'.
  **********************************************************************/
-static inline void silk_LDL_FLP(
+static OPUS_INLINE void silk_LDL_FLP(
     silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */
     opus_int            M,          /* I    Size of Matrix                                                  */
     silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */
     silk_float          *Dinv       /* I/O  Pointer to vector holding the inverse diagonal elements of D    */
 );
 
 /**********************************************************************
  * Function to solve linear equation Ax = b, when A is a MxM lower
  * triangular matrix, with ones on the diagonal.
  **********************************************************************/
-static inline void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
     const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
     opus_int            M,          /* I    Dim of Matrix equation                                          */
     const silk_float    *b,         /* I    b Vector                                                        */
     silk_float          *x          /* O    x Vector                                                        */
 );
 
 /**********************************************************************
  * Function to solve linear equation (A^T)x = b, when A is a MxM lower
  * triangular, with ones on the diagonal. (ie then A^T is upper triangular)
  **********************************************************************/
-static inline void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
     const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
     opus_int            M,          /* I    Dim of Matrix equation                                          */
     const silk_float    *b,         /* I    b Vector                                                        */
     silk_float          *x          /* O    x Vector                                                        */
 );
 
 /**********************************************************************
  * Function to solve linear equation Ax = b, when A is a MxM
@@ -104,17 +104,17 @@ void silk_solve_LDL_FLP(
         T[ i ] = T[ i ] * Dinv[ i ];
     }
     /****************************************************
     x = inv(L') * inv(D) * T
     *****************************************************/
     silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x );
 }
 
-static inline void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
     const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
     opus_int            M,          /* I    Dim of Matrix equation                                          */
     const silk_float    *b,         /* I    b Vector                                                        */
     silk_float          *x          /* O    x Vector                                                        */
 )
 {
     opus_int   i, j;
     silk_float temp;
@@ -126,17 +126,17 @@ static inline void silk_SolveWithUpperTr
         for( j = M - 1; j > i ; j-- ) {
             temp += ptr1[ j * M ] * x[ j ];
         }
         temp = b[ i ] - temp;
         x[ i ] = temp;
     }
 }
 
-static inline void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
     const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
     opus_int            M,          /* I    Dim of Matrix equation                                          */
     const silk_float    *b,         /* I    b Vector                                                        */
     silk_float          *x          /* O    x Vector                                                        */
 )
 {
     opus_int   i, j;
     silk_float temp;
@@ -148,17 +148,17 @@ static inline void silk_SolveWithLowerTr
         for( j = 0; j < i; j++ ) {
             temp += ptr1[ j ] * x[ j ];
         }
         temp = b[ i ] - temp;
         x[ i ] = temp;
     }
 }
 
-static inline void silk_LDL_FLP(
+static OPUS_INLINE void silk_LDL_FLP(
     silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */
     opus_int            M,          /* I    Size of Matrix                                                  */
     silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */
     silk_float          *Dinv       /* I/O  Pointer to vector holding the inverse diagonal elements of D    */
 )
 {
     opus_int i, j, k, loop_count, err = 1;
     silk_float *ptr1, *ptr2;
--- a/media/libopus/silk/float/wrappers_FLP.c
+++ b/media/libopus/silk/float/wrappers_FLP.c
@@ -170,16 +170,17 @@ void silk_NSQ_wrapper_FLP(
 
 /***********************************************/
 /* Floating-point Silk LTP quantiation wrapper */
 /***********************************************/
 void silk_quant_LTP_gains_FLP(
     silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */
     opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */
     opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */
+    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */
     const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
     const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
     const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
     const opus_int                  nb_subfr                            /* I    number of subframes                         */
 )
 {
     opus_int   i;
     opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ];
@@ -187,14 +188,14 @@ void silk_quant_LTP_gains_FLP(
 
     for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
         B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f );
     }
     for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) {
         W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f );
     }
 
-    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, W_Q18, mu_Q10, lowComplexity, nb_subfr );
+    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr );
 
     for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
         B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f );
     }
 }
--- a/media/libopus/silk/init_encoder.c
+++ b/media/libopus/silk/init_encoder.c
@@ -29,29 +29,33 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "config.h"
 #endif
 #ifdef FIXED_POINT
 #include "main_FIX.h"
 #else
 #include "main_FLP.h"
 #endif
 #include "tuning_parameters.h"
+#include "cpu_support.h"
 
 /*********************************/
 /* Initialize Silk Encoder state */
 /*********************************/
 opus_int silk_init_encoder(
-    silk_encoder_state_Fxx          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    int                              arch                                   /* I    Run-time architecture                                                       */
 )
 {
     opus_int ret = 0;
 
     /* Clear the entire encoder state */
     silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) );
 
+    psEnc->sCmn.arch = arch;
+
     psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 );
     psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15;
 
     /* Used to deactivate LSF interpolation, pitch prediction */
     psEnc->sCmn.first_frame_after_reset = 1;
 
     /* Initialize Silk VAD */
     ret += silk_VAD_Init( &psEnc->sCmn.sVAD );
--- a/media/libopus/silk/log2lin.c
+++ b/media/libopus/silk/log2lin.c
@@ -36,17 +36,19 @@ POSSIBILITY OF SUCH DAMAGE.
 opus_int32 silk_log2lin( 
     const opus_int32            inLog_Q7            /* I  input on log scale                                            */
 )
 {
     opus_int32 out, frac_Q7;
 
     if( inLog_Q7 < 0 ) {
         return 0;
-    }
+    } else if ( inLog_Q7 >= 3967 ) {
+		return silk_int32_MAX;
+	}
 
     out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) );
     frac_Q7 = inLog_Q7 & 0x7F;
     if( inLog_Q7 < 2048 ) {
         /* Piece-wise parabolic approximation */
         out = silk_ADD_RSHIFT32( out, silk_MUL( out, silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ), 7 );
     } else {
         /* Piece-wise parabolic approximation */
--- a/media/libopus/silk/macros.h
+++ b/media/libopus/silk/macros.h
@@ -27,17 +27,20 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #ifndef SILK_MACROS_H
 #define SILK_MACROS_H
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
-/* This is an inline header file for general platform. */
+#include "opus_types.h"
+#include "opus_defines.h"
+
+/* This is an OPUS_INLINE header file for general platform. */
 
 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 #define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
 
 /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
 #define silk_SMLAWB(a32, b32, c32)       ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
 
 /* (a32 * (b32 >> 16)) >> 16 */
@@ -73,40 +76,40 @@ POSSIBILITY OF SUCH DAMAGE.
                                         ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) )
 
 #define silk_SUB_SAT32(a, b)             ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ?                                        \
                                         (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
                                         ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
 
 #include "ecintrin.h"
 
-static inline opus_int32 silk_CLZ16(opus_int16 in16)
+static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
 {
     return 32 - EC_ILOG(in16<<16|0x8000);
 }
 
-static inline opus_int32 silk_CLZ32(opus_int32 in32)
+static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
 {
     return in32 ? 32 - EC_ILOG(in32) : 32;
 }
 
 /* Row based */
 #define matrix_ptr(Matrix_base_adr, row, column, N) \
     (*((Matrix_base_adr) + ((row)*(N)+(column))))
 #define matrix_adr(Matrix_base_adr, row, column, N) \
       ((Matrix_base_adr) + ((row)*(N)+(column)))
 
 /* Column based */
 #ifndef matrix_c_ptr
 #   define matrix_c_ptr(Matrix_base_adr, row, column, M) \
     (*((Matrix_base_adr) + ((row)+(M)*(column))))
 #endif
 
-#ifdef ARMv4_ASM
+#ifdef OPUS_ARM_INLINE_ASM
 #include "arm/macros_armv4.h"
 #endif
 
-#ifdef ARMv5E_ASM
+#ifdef OPUS_ARM_INLINE_EDSP
 #include "arm/macros_armv5e.h"
 #endif
 
 #endif /* SILK_MACROS_H */
 
--- a/media/libopus/silk/main.h
+++ b/media/libopus/silk/main.h
@@ -199,31 +199,35 @@ void silk_interpolate(
     const opus_int              d                               /* I    number of parameters                        */
 );
 
 /* LTP tap quantizer */
 void silk_quant_LTP_gains(
     opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
     opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
     opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
+	opus_int32					*sum_gain_dB_Q7,							/* I/O  Cumulative max prediction gain  */
     const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
     opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
     opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
     const opus_int              nb_subfr                                    /* I    number of subframes             */
 );
 
 /* Entropy constrained matrix-weighted VQ, for a single input data vector */
 void silk_VQ_WMat_EC(
     opus_int8                   *ind,                           /* O    index of best codebook vector               */
     opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
     const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
     const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
     const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
     const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
     opus_int                    L                               /* I    number of vectors in codebook               */
 );
 
 /************************************/
 /* Noise shaping quantization (NSQ) */
 /************************************/
 void silk_NSQ(
     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
--- a/media/libopus/silk/quant_LTP_gains.c
+++ b/media/libopus/silk/quant_LTP_gains.c
@@ -25,83 +25,104 @@ ARISING IN ANY WAY OUT OF THE USE OF THI
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
+#include "tuning_parameters.h"
 
 void silk_quant_LTP_gains(
     opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
     opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
     opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
+	opus_int32					*sum_log_gain_Q7,							/* I/O  Cumulative max prediction gain  */
     const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
     opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
     opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
     const opus_int              nb_subfr                                    /* I    number of subframes             */
 )
 {
     opus_int             j, k, cbk_size;
     opus_int8            temp_idx[ MAX_NB_SUBFR ];
     const opus_uint8     *cl_ptr_Q5;
     const opus_int8      *cbk_ptr_Q7;
+    const opus_uint8     *cbk_gain_ptr_Q7;
     const opus_int16     *b_Q14_ptr;
     const opus_int32     *W_Q18_ptr;
     opus_int32           rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14;
+	opus_int32           sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7;
 
     /***************************************************/
     /* iterate over different codebooks with different */
     /* rates/distortions, and choose best */
     /***************************************************/
     min_rate_dist_Q14 = silk_int32_MAX;
+    best_sum_log_gain_Q7 = 0;
     for( k = 0; k < 3; k++ ) {
+        /* Safety margin for pitch gain control, to take into account factors
+           such as state rescaling/rewhitening. */
+        opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 );
+
         cl_ptr_Q5  = silk_LTP_gain_BITS_Q5_ptrs[ k ];
         cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[        k ];
+        cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ];
         cbk_size   = silk_LTP_vq_sizes[          k ];
 
         /* Set up pointer to first subframe */
         W_Q18_ptr = W_Q18;
         b_Q14_ptr = B_Q14;
 
         rate_dist_Q14 = 0;
+		sum_log_gain_tmp_Q7 = *sum_log_gain_Q7;
         for( j = 0; j < nb_subfr; j++ ) {
+			max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) 
+										+ SILK_FIX_CONST( 7, 7 ) ) - gain_safety;
+
             silk_VQ_WMat_EC(
                 &temp_idx[ j ],         /* O    index of best codebook vector                           */
                 &rate_dist_Q14_subfr,   /* O    best weighted quantization error + mu * rate            */
+				&gain_Q7,               /* O    sum of absolute LTP coefficients                        */
                 b_Q14_ptr,              /* I    input vector to be quantized                            */
                 W_Q18_ptr,              /* I    weighting matrix                                        */
                 cbk_ptr_Q7,             /* I    codebook                                                */
+                cbk_gain_ptr_Q7,        /* I    codebook effective gains                                */
                 cl_ptr_Q5,              /* I    code length for each codebook vector                    */
                 mu_Q9,                  /* I    tradeoff between weighted error and rate                */
+				max_gain_Q7,            /* I    maximum sum of absolute LTP coefficients                */
                 cbk_size                /* I    number of vectors in codebook                           */
             );
 
             rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr );
+            sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7
+                                + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 ));
 
             b_Q14_ptr += LTP_ORDER;
             W_Q18_ptr += LTP_ORDER * LTP_ORDER;
         }
 
         /* Avoid never finding a codebook */
         rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 );
 
         if( rate_dist_Q14 < min_rate_dist_Q14 ) {
             min_rate_dist_Q14 = rate_dist_Q14;
             *periodicity_index = (opus_int8)k;
             silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) );
+			best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7;
         }
 
         /* Break early in low-complexity mode if rate distortion is below threshold */
         if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) {
             break;
         }
     }
 
     cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ];
     for( j = 0; j < nb_subfr; j++ ) {
         for( k = 0; k < LTP_ORDER; k++ ) {
             B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 );
         }
     }
+	*sum_log_gain_Q7 = best_sum_log_gain_Q7;
 }
 
--- a/media/libopus/silk/resampler_private_IIR_FIR.c
+++ b/media/libopus/silk/resampler_private_IIR_FIR.c
@@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "SigProc_FIX.h"
 #include "resampler_private.h"
 #include "stack_alloc.h"
 
-static inline opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL(
+static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL(
     opus_int16  *out,
     opus_int16  *buf,
     opus_int32  max_index_Q16,
     opus_int32  index_increment_Q16
 )
 {
     opus_int32 index_Q16, res_Q15;
     opus_int16 *buf_ptr;
--- a/media/libopus/silk/resampler_private_down_FIR.c
+++ b/media/libopus/silk/resampler_private_down_FIR.c
@@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "SigProc_FIX.h"
 #include "resampler_private.h"
 #include "stack_alloc.h"
 
-static inline opus_int16 *silk_resampler_private_down_FIR_INTERPOL(
+static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL(
     opus_int16          *out,
     opus_int32          *buf,
     const opus_int16    *FIR_Coefs,
     opus_int            FIR_Order,
     opus_int            FIR_Fracs,
     opus_int32          max_index_Q16,
     opus_int32          index_increment_Q16
 )
--- a/media/libopus/silk/shell_coder.c
+++ b/media/libopus/silk/shell_coder.c
@@ -28,41 +28,41 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 
 /* shell coder; pulse-subframe length is hardcoded */
 
-static inline void combine_pulses(
+static OPUS_INLINE void combine_pulses(
     opus_int         *out,   /* O    combined pulses vector [len] */
     const opus_int   *in,    /* I    input vector       [2 * len] */
     const opus_int   len     /* I    number of OUTPUT samples     */
 )
 {
     opus_int k;
     for( k = 0; k < len; k++ ) {
         out[ k ] = in[ 2 * k ] + in[ 2 * k + 1 ];
     }
 }
 
-static inline void encode_split(
+static OPUS_INLINE void encode_split(
     ec_enc                      *psRangeEnc,    /* I/O  compressor data structure                   */
     const opus_int              p_child1,       /* I    pulse amplitude of first child subframe     */
     const opus_int              p,              /* I    pulse amplitude of current subframe         */
     const opus_uint8            *shell_table    /* I    table of shell cdfs                         */
 )
 {
     if( p > 0 ) {
         ec_enc_icdf( psRangeEnc, p_child1, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 );
     }
 }
 
-static inline void decode_split(
+static OPUS_INLINE void decode_split(
     opus_int                    *p_child1,      /* O    pulse amplitude of first child subframe     */
     opus_int                    *p_child2,      /* O    pulse amplitude of second child subframe    */
     ec_dec                      *psRangeDec,    /* I/O  Compressor data structure                   */
     const opus_int              p,              /* I    pulse amplitude of current subframe         */
     const opus_uint8            *shell_table    /* I    table of shell cdfs                         */
 )
 {
     if( p > 0 ) {
--- a/media/libopus/silk/structs.h
+++ b/media/libopus/silk/structs.h
@@ -166,16 +166,17 @@ typedef struct {
     opus_int                     useInterpolatedNLSFs;              /* Flag for using NLSF interpolation                                */
     opus_int                     shapingLPCOrder;                   /* Filter order for noise shaping filters                           */
     opus_int                     predictLPCOrder;                   /* Filter order for prediction filters                              */
     opus_int                     pitchEstimationComplexity;         /* Complexity level for pitch estimator                             */
     opus_int                     pitchEstimationLPCOrder;           /* Whitening filter order for pitch estimator                       */
     opus_int32                   pitchEstimationThreshold_Q16;      /* Threshold for pitch estimator                                    */
     opus_int                     LTPQuantLowComplexity;             /* Flag for low complexity LTP quantization                         */
     opus_int                     mu_LTP_Q9;                         /* Rate-distortion tradeoff in LTP quantization                     */
+    opus_int32                   sum_log_gain_Q7;					/* Cumulative max prediction gain									*/
     opus_int                     NLSF_MSVQ_Survivors;               /* Number of survivors in NLSF MSVQ                                 */
     opus_int                     first_frame_after_reset;           /* Flag for deactivating NLSF interpolation, pitch prediction       */
     opus_int                     controlled_since_last_payload;     /* Flag for ensuring codec_control only runs once per packet        */
     opus_int                     warping_Q16;                       /* Warping parameter for warped noise shaping                       */
     opus_int                     useCBR;                            /* Flag to enable constant bitrate                                  */
     opus_int                     prefillFlag;                       /* Flag to indicate that only buffers are prefilled, no coding      */
     const opus_uint8             *pitch_lag_low_bits_iCDF;          /* Pointer to iCDF table for low bits of pitch lag index            */
     const opus_uint8             *pitch_contour_iCDF;               /* Pointer to iCDF table for pitch contour index                    */
@@ -186,16 +187,18 @@ typedef struct {
 
     opus_int8                    VAD_flags[ MAX_FRAMES_PER_PACKET ];
     opus_int8                    LBRR_flag;
     opus_int                     LBRR_flags[ MAX_FRAMES_PER_PACKET ];
 
     SideInfoIndices              indices;
     opus_int8                    pulses[ MAX_FRAME_LENGTH ];
 
+    int                          arch;
+
     /* Input/output buffering */
     opus_int16                   inputBuf[ MAX_FRAME_LENGTH + 2 ];  /* Buffer containing input signal                                   */
     opus_int                     inputBufIx;
     opus_int                     nFramesPerPacket;
     opus_int                     nFramesEncoded;                    /* Number of frames analyzed in current packet                      */
 
     opus_int                     nChannelsAPI;
     opus_int                     nChannelsInternal;
--- a/media/libopus/silk/tables.h
+++ b/media/libopus/silk/tables.h
@@ -73,16 +73,18 @@ extern const opus_uint8  silk_uniform8_i
 
 extern const opus_uint8  silk_NLSF_EXT_iCDF[ 7 ];                                                   /*   7 */
 
 extern const opus_uint8  silk_LTP_per_index_iCDF[ 3 ];                                              /*   3 */
 extern const opus_uint8  * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ];                            /*   3 */
 extern const opus_uint8  * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ];                         /*   3 */
 extern const opus_int16  silk_LTP_gain_middle_avg_RD_Q14;
 extern const opus_int8   * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ];                                /* 168 */
+extern const opus_uint8  * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS];
+
 extern const opus_int8   silk_LTP_vq_sizes[ NB_LTP_CBKS ];                                          /*   3 */
 
 extern const opus_uint8  silk_LTPscale_iCDF[ 3 ];                                                   /*   4 */
 extern const opus_int16  silk_LTPScales_table_Q14[ 3 ];                                             /*   6 */
 
 extern const opus_uint8  silk_type_offset_VAD_iCDF[ 4 ];                                            /*   4 */
 extern const opus_uint8  silk_type_offset_no_VAD_iCDF[ 2 ];                                         /*   2 */
 
--- a/media/libopus/silk/tables_LTP.c
+++ b/media/libopus/silk/tables_LTP.c
@@ -262,11 +262,35 @@ static const opus_int8 silk_LTP_gain_vq_
 };
 
 const opus_int8 * const silk_LTP_vq_ptrs_Q7[NB_LTP_CBKS] = {
     (opus_int8 *)&silk_LTP_gain_vq_0[0][0],
     (opus_int8 *)&silk_LTP_gain_vq_1[0][0],
     (opus_int8 *)&silk_LTP_gain_vq_2[0][0]
 };
 
+/* Maximum frequency-dependent response of the pitch taps above,
+   computed as max(abs(freqz(taps))) */
+static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = {
+      46,      2,     90,     87,     93,     91,     82,     98
+};
+
+static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = {
+     109,    120,    118,     12,    113,    115,    117,    119,
+      99,     59,     87,    111,     63,    111,    112,     80
+};
+
+static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = {
+     126,    124,    125,    124,    129,    121,    126,     23,
+     132,    127,    127,    127,    126,    127,    122,    133,
+     130,    134,    101,    118,    119,    145,    126,     86,
+     124,    120,    123,    119,    170,    173,    107,    109
+};
+
+const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = {
+    &silk_LTP_gain_vq_0_gain[0],
+    &silk_LTP_gain_vq_1_gain[0],
+    &silk_LTP_gain_vq_2_gain[0]
+};
+
 const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = {
     8, 16, 32
 };
--- a/media/libopus/silk/tuning_parameters.h
+++ b/media/libopus/silk/tuning_parameters.h
@@ -45,29 +45,32 @@ extern "C"
 
 /* Bandwidth expansion for whitening filter in pitch analysis */
 #define FIND_PITCH_BANDWIDTH_EXPANSION                  0.99f
 
 /*********************/
 /* Linear prediction */
 /*********************/
 
-/* LPC analysis defines: regularization and bandwidth expansion */
+/* LPC analysis regularization */
 #define FIND_LPC_COND_FAC                               1e-5f
 
 /* LTP analysis defines */
 #define FIND_LTP_COND_FAC                               1e-5f
 #define LTP_DAMPING                                     0.05f
 #define LTP_SMOOTHING                                   0.1f
 
 /* LTP quantization settings */
 #define MU_LTP_QUANT_NB                                 0.03f
 #define MU_LTP_QUANT_MB                                 0.025f
 #define MU_LTP_QUANT_WB                                 0.02f
 
+/* Max cumulative LTP gain */
+#define MAX_SUM_LOG_GAIN_DB								250.0f
+
 /***********************/
 /* High pass filtering */
 /***********************/
 
 /* Smoothing parameters for low end of pitch frequency range estimation */
 #define VARIABLE_HP_SMTH_COEF1                          0.1f
 #define VARIABLE_HP_SMTH_COEF2                          0.015f
 #define VARIABLE_HP_MAX_DELTA_FREQ                      0.4f
--- a/media/libopus/silk/typedef.h
+++ b/media/libopus/silk/typedef.h
@@ -24,16 +24,17 @@ CONTRACT, STRICT LIABILITY, OR TORT (INC
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifndef SILK_TYPEDEF_H
 #define SILK_TYPEDEF_H
 
 #include "opus_types.h"
+#include "opus_defines.h"
 
 #ifndef FIXED_POINT
 # include <float.h>
 # define silk_float      float
 # define silk_float_MAX  FLT_MAX
 #endif
 
 #define silk_int64_MAX   ((opus_int64)0x7FFFFFFFFFFFFFFFLL)   /*  2^63 - 1 */
@@ -58,17 +59,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #else
 # ifdef ENABLE_ASSERTIONS
 #  include <stdio.h>
 #  include <stdlib.h>
 #define silk_fatal(str) _silk_fatal(str, __FILE__, __LINE__);
 #ifdef __GNUC__
 __attribute__((noreturn))
 #endif
-static inline void _silk_fatal(const char *str, const char *file, int line)
+static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line)
 {
    fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
    abort();
 }
 #  define silk_assert(COND) {if (!(COND)) {silk_fatal("assertion failed: " #COND);}}
 # else
 #  define silk_assert(COND)
 # endif
--- a/media/libopus/src/analysis.c
+++ b/media/libopus/src/analysis.c
@@ -110,17 +110,17 @@ static const int extra_bands[NB_TOT_BAND
 };*/
 
 #define NB_TONAL_SKIP_BANDS 9
 
 #define cA 0.43157974f
 #define cB 0.67848403f
 #define cC 0.08595542f
 #define cE ((float)M_PI/2)
-static inline float fast_atan2f(float y, float x) {
+static OPUS_INLINE float fast_atan2f(float y, float x) {
    float x2, y2;
    /* Should avoid underflow on the values we'll get */
    if (ABS16(x)+ABS16(y)<1e-9f)
    {
       x*=1e12f;
       y*=1e12f;
    }
    x2 = x*x;
@@ -179,22 +179,22 @@ void tonality_get_info(TonalityAnalysisS
    psum=0;
    /* Summing the probability of transition patterns that involve music at
       time (DETECT_SIZE-curr_lookahead-1) */
    for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
       psum += tonal->pmusic[i];
    for (;i<DETECT_SIZE;i++)
       psum += tonal->pspeech[i];
    psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
-   /*printf("%f %f\n", psum, info_out->music_prob);*/
+   /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/
 
    info_out->music_prob = psum;
 }
 
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
 {
     int i, b;
     const kiss_fft_state *kfft;
     VARDECL(kiss_fft_cpx, in);
     VARDECL(kiss_fft_cpx, out);
     int N = 480, N2=240;
     float * OPUS_RESTRICT A = tonal->angle;
     float * OPUS_RESTRICT dA = tonal->d_angle;
@@ -229,17 +229,17 @@ void tonality_analysis(TonalityAnalysisS
     alphaE = 1.f/IMIN(50, 1+tonal->count);
     alphaE2 = 1.f/IMIN(1000, 1+tonal->count);
 
     if (tonal->count<4)
        tonal->music_prob = .5;
     kfft = celt_mode->mdct.kfft[0];
     if (tonal->count==0)
        tonal->mem_fill = 240;
-    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
+    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
     if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
     {
        tonal->mem_fill += len;
        /* Don't have enough to update the analysis */
        RESTORE_STACK;
        return;
     }
     info = &tonal->info[tonal->write_pos++];
@@ -248,37 +248,37 @@ void tonality_analysis(TonalityAnalysisS
 
     ALLOC(in, 480, kiss_fft_cpx);
     ALLOC(out, 480, kiss_fft_cpx);
     ALLOC(tonality, 240, float);
     ALLOC(noisiness, 240, float);
     for (i=0;i<N2;i++)
     {
        float w = analysis_window[i];
-       in[i].r = MULT16_16(w, tonal->inmem[i]);
-       in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
-       in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
-       in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
+       in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
+       in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
+       in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
+       in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
     }
     OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
-    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
+    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
     tonal->mem_fill = 240 + remaining;
     opus_fft(kfft, in, out);
 
     for (i=1;i<N2;i++)
     {
        float X1r, X2r, X1i, X2i;
        float angle, d_angle, d2_angle;
        float angle2, d_angle2, d2_angle2;
        float mod1, mod2, avg_mod;
-       X1r = out[i].r+out[N-i].r;
-       X1i = out[i].i-out[N-i].i;
-       X2r = out[i].i+out[N-i].i;
-       X2i = out[N-i].r-out[i].r;
+       X1r = (float)out[i].r+out[N-i].r;
+       X1i = (float)out[i].i-out[N-i].i;
+       X2r = (float)out[i].i+out[N-i].i;
+       X2i = (float)out[N-i].r-out[i].r;
 
        angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
        d_angle = angle - A[i];
        d2_angle = d_angle - dA[i];
 
        angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
        d_angle2 = angle2 - angle;
        d2_angle2 = d_angle2 - d_angle;
@@ -312,57 +312,60 @@ void tonality_analysis(TonalityAnalysisS
        for (b=0;b<NB_TBANDS;b++)
        {
           tonal->lowE[b] = 1e10;
           tonal->highE[b] = -1e10;
        }
     }
     relativeE = 0;
     frame_loudness = 0;
-    bandwidth_mask = 0;
     for (b=0;b<NB_TBANDS;b++)
     {
        float E=0, tE=0, nE=0;
        float L1, L2;
        float stationarity;
        for (i=tbands[b];i<tbands[b+1];i++)
        {
-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+#ifdef FIXED_POINT
+          /* FIXME: It's probably best to change the BFCC filter initial state instead */
+          binE *= 5.55e-17f;
+#endif
           E += binE;
           tE += binE*tonality[i];
           nE += binE*2.f*(.5f-noisiness[i]);
        }
        tonal->E[tonal->E_count][b] = E;
        frame_noisiness += nE/(1e-15f+E);
 
-       frame_loudness += celt_sqrt(E+1e-10f);
+       frame_loudness += (float)sqrt(E+1e-10f);
        logE[b] = (float)log(E+1e-10f);
        tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
        tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
        if (tonal->highE[b] < tonal->lowE[b]+1.f)
        {
           tonal->highE[b]+=.5f;
           tonal->lowE[b]-=.5f;
        }
-       relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
+       relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]);
 
        L1=L2=0;
        for (i=0;i<NB_FRAMES;i++)
        {
-          L1 += celt_sqrt(tonal->E[i][b]);
+          L1 += (float)sqrt(tonal->E[i][b]);
           L2 += tonal->E[i][b];
        }
 
-       stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2));
+       stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
        stationarity *= stationarity;
        stationarity *= stationarity;
        frame_stationarity += stationarity;
        /*band_tonality[b] = tE/(1e-15+E)*/;
-       band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
+       band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
 #if 0
        if (b>=NB_TONAL_SKIP_BANDS)
        {
           frame_tonality += tweight[b]*band_tonality[b];
           tw_sum += tweight[b];
        }
 #else
        frame_tonality += band_tonality[b];
@@ -374,28 +377,31 @@ void tonality_analysis(TonalityAnalysisS
        /*printf("%f %f ", band_tonality[b], stationarity);*/
        tonal->prev_band_tonality[b] = band_tonality[b];
     }
 
     bandwidth_mask = 0;
     bandwidth = 0;
     maxE = 0;
     noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
+#ifdef FIXED_POINT
+    noise_floor *= 1<<(15+SIG_SHIFT);
+#endif
     noise_floor *= noise_floor;
     for (b=0;b<NB_TOT_BANDS;b++)
     {
        float E=0;
        int band_start, band_end;
        /* Keep a margin of 300 Hz for aliasing */
        band_start = extra_bands[b];
        band_end = extra_bands[b+1];
        for (i=band_start;i<band_end;i++)
        {
-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
           E += binE;
        }
        maxE = MAX32(maxE, E);
        tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
        E = MAX32(E, tonal->meanE[b]);
        /* Use a simple follower with 13 dB/Bark slope for spreading function */
        bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
        /* Consider the band "active" only if all these conditions are met:
@@ -464,24 +470,24 @@ void tonality_analysis(TonalityAnalysisS
     for (i=0;i<8;i++)
     {
        tonal->mem[i+24] = tonal->mem[i+16];
        tonal->mem[i+16] = tonal->mem[i+8];
        tonal->mem[i+8] = tonal->mem[i];
        tonal->mem[i] = BFCC[i];
     }
     for (i=0;i<9;i++)
-       features[11+i] = celt_sqrt(tonal->std[i]);
+       features[11+i] = (float)sqrt(tonal->std[i]);
     features[20] = info->tonality;
     features[21] = info->activity;
     features[22] = frame_stationarity;
     features[23] = info->tonality_slope;
     features[24] = tonal->lowECount;
 
-#ifndef FIXED_POINT
+#ifndef DISABLE_FLOAT_API
     mlp_process(&net, features, frame_probs);
     frame_probs[0] = .5f*(frame_probs[0]+1);
     /* Curve fitting between the MLP probability and the actual probability */
     frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
     /* Probability of active audio (as opposed to silence) */
     frame_probs[1] = .5f*frame_probs[1]+.5f;
     /* Consider that silence has a 50-50 probability. */
     frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
@@ -585,17 +591,16 @@ void tonality_analysis(TonalityAnalysisS
              tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
           }
        } else {
           if (tonal->music_confidence_count==0)
              tonal->music_confidence = .9f;
           if (tonal->speech_confidence_count==0)
              tonal->speech_confidence = .1f;
        }
-       psum = MAX16(tonal->speech_confidence, MIN16(tonal->music_confidence, psum));
     }
     if (tonal->last_music != (tonal->music_prob>.5f))
        tonal->last_transition=0;
     tonal->last_music = tonal->music_prob>.5f;
 #else
     info->music_prob = 0;
 #endif
     /*for (i=0;i<25;i++)
@@ -606,49 +611,35 @@ void tonality_analysis(TonalityAnalysisS
     /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
     info->noisiness = frame_noisiness;
     info->valid = 1;
     if (info_out!=NULL)
        OPUS_COPY(info_out, info, 1);
     RESTORE_STACK;
 }
 
-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
-                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
-                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
 {
    int offset;
    int pcm_len;
 
-   /* Avoid overflow/wrap-around of the analysis buffer */
-   frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
-
-   pcm_len = frame_size - analysis->analysis_offset;
-   offset = 0;
-   do {
-      tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
-      offset += 480;
-      pcm_len -= 480;
-   } while (pcm_len>0);
-   analysis->analysis_offset = frame_size;
-
-   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+   if (analysis_pcm != NULL)
    {
-      int LM = 3;
-      LM = optimize_framesize((const opus_val16*)pcm, frame_size, C, Fs, bitrate_bps,
-            analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
-      while ((Fs/400<<LM)>frame_size)
-         LM--;
-      frame_size = (Fs/400<<LM);
-   } else {
-      frame_size = frame_size_select(frame_size, variable_duration, Fs);
+      /* Avoid overflow/wrap-around of the analysis buffer */
+      analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
+
+      pcm_len = analysis_frame_size - analysis->analysis_offset;
+      offset = analysis->analysis_offset;
+      do {
+         tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+         offset += 480;
+         pcm_len -= 480;
+      } while (pcm_len>0);
+      analysis->analysis_offset = analysis_frame_size;
+
+      analysis->analysis_offset -= frame_size;
    }
-   if (frame_size<0)
-      return -1;
-   analysis->analysis_offset -= frame_size;
 
-   /* Only perform analysis up to 20-ms frames. Longer ones will be split if
-      they're in CELT-only mode. */
    analysis_info->valid = 0;
    tonality_get_info(analysis, analysis_info, frame_size);
-
-   return frame_size;
 }
--- a/media/libopus/src/analysis.h
+++ b/media/libopus/src/analysis.h
@@ -37,17 +37,17 @@
 #define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
 
 #define DETECT_SIZE 200
 
 typedef struct {
    float angle[240];
    float d_angle[240];
    float d2_angle[240];
-   float inmem[ANALYSIS_BUF_SIZE];
+   opus_val32 inmem[ANALYSIS_BUF_SIZE];
    int   mem_fill;                      /* number of usable samples in the buffer */
    float prev_band_tonality[NB_TBANDS];
    float prev_tonality;
    float E[NB_FRAMES][NB_TBANDS];
    float lowE[NB_TBANDS];
    float highE[NB_TBANDS];
    float meanE[NB_TOT_BANDS];
    float mem[32];
@@ -55,17 +55,17 @@ typedef struct {
    float std[9];
    float music_prob;
    float Etracker;
    float lowECount;
    int E_count;
    int last_music;
    int last_transition;
    int count;
-   opus_val32   subframe_mem[3];
+   float subframe_mem[3];
    int analysis_offset;
    /** Probability of having speech for time i to DETECT_SIZE-1 (and music before).
        pspeech[0] is the probability that all frames in the window are speech. */
    float pspeech[DETECT_SIZE];
    /** Probability of having music for time i to DETECT_SIZE-1 (and speech before).
        pmusic[0] is the probability that all frames in the window are music. */
    float pmusic[DETECT_SIZE];
    float speech_confidence;
@@ -74,17 +74,17 @@ typedef struct {
    int music_confidence_count;
    int write_pos;
    int read_pos;
    int read_subframe;
    AnalysisInfo info[DETECT_SIZE];
 } TonalityAnalysisState;
 
 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
-     const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix);
+     const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix);
 
 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
 
-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
-                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
-                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
 
 #endif
--- a/media/libopus/src/mlp.c
+++ b/media/libopus/src/mlp.c
@@ -24,72 +24,77 @@
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
+#include "opus_types.h"
+#include "opus_defines.h"
+
 #include <math.h>
 #include "mlp.h"
 #include "arch.h"
 #include "tansig_table.h"
 #define MAX_NEURONS 100
 
-#ifdef FIXED_POINT
-static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
+#if 0
+static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
 {
 	int i;
 	opus_val16 xx; /* Q11 */
 	/*double x, y;*/
 	opus_val16 dy, yy; /* Q14 */
 	/*x = 1.9073e-06*_x;*/
-	if (_x>=QCONST32(10,19))
+	if (_x>=QCONST32(8,19))
 		return QCONST32(1.,14);
-	if (_x<=-QCONST32(10,19))
+	if (_x<=-QCONST32(8,19))
 		return -QCONST32(1.,14);
 	xx = EXTRACT16(SHR32(_x, 8));
 	/*i = lrint(25*x);*/
 	i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
 	/*x -= .04*i;*/
 	xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
 	/*x = xx*(1./2048);*/
 	/*y = tansig_table[250+i];*/
 	yy = tansig_table[250+i];
 	/*y = yy*(1./16384);*/
 	dy = 16384-MULT16_16_Q14(yy,yy);
 	yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
 	return yy;
 }
 #else
 /*extern const float tansig_table[501];*/
-static inline opus_val16 tansig_approx(opus_val16 x)
+static OPUS_INLINE float tansig_approx(float x)
 {
 	int i;
-	opus_val16 y, dy;
-	opus_val16 sign=1;
-    if (x>=8)
+	float y, dy;
+	float sign=1;
+	/* Tests are reversed to catch NaNs */
+    if (!(x<8))
         return 1;
-    if (x<=-8)
+    if (!(x>-8))
         return -1;
 	if (x<0)
 	{
 	   x=-x;
 	   sign=-1;
 	}
 	i = (int)floor(.5f+25*x);
 	x -= .04f*i;
 	y = tansig_table[i];
 	dy = 1-y*y;
 	y = y + x*dy*(1 - y*x);
 	return sign*y;
 }
 #endif
 
+#if 0
 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
 {
 	int j;
 	opus_val16 hidden[MAX_NEURONS];
 	const opus_val16 *W = m->weights;
 	/* Copy to tmp_in */
 	for (j=0;j<m->topo[1];j++)
 	{
@@ -103,9 +108,33 @@ void mlp_process(const MLP *m, const opu
 	{
 		int k;
 		opus_val32 sum = SHL32(EXTEND32(*W++),14);
 		for (k=0;k<m->topo[1];k++)
 			sum = MAC16_16(sum, hidden[k], *W++);
 		out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
 	}
 }
-
+#else
+void mlp_process(const MLP *m, const float *in, float *out)
+{
+    int j;
+    float hidden[MAX_NEURONS];
+    const float *W = m->weights;
+    /* Copy to tmp_in */
+    for (j=0;j<m->topo[1];j++)
+    {
+        int k;
+        float sum = *W++;
+        for (k=0;k<m->topo[0];k++)
+            sum = sum + in[k]**W++;
+        hidden[j] = tansig_approx(sum);
+    }
+    for (j=0;j<m->topo[2];j++)
+    {
+        int k;
+        float sum = *W++;
+        for (k=0;k<m->topo[1];k++)
+            sum = sum + hidden[k]**W++;
+        out[j] = tansig_approx(sum);
+    }
+}
+#endif
--- a/media/libopus/src/mlp.h
+++ b/media/libopus/src/mlp.h
@@ -28,14 +28,14 @@
 #ifndef _MLP_H_
 #define _MLP_H_
 
 #include "arch.h"
 
 typedef struct {
 	int layers;
 	const int *topo;
-	const opus_val16 *weights;
+	const float *weights;
 } MLP;
 
-void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out);
+void mlp_process(const MLP *m, const float *in, float *out);
 
 #endif /* _MLP_H_ */
--- a/media/libopus/src/opus.c
+++ b/media/libopus/src/opus.c
@@ -34,16 +34,18 @@
 
 #ifndef DISABLE_FLOAT_API
 OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem)
 {
    int c;
    int i;
    float *x;
 
+   if (C<1 || N<1 || !_x || !declip_mem) return;
+
    /* First thing: saturate everything to +/- 2 which is the highest level our
       non-linearity can handle. At the point where the signal reaches +/-2,
       the derivative will be zero anyway, so this doesn't introduce any
       discontinuity in the derivative. */
    for (i=0;i<N*C;i++)
       _x[i] = MAX16(-2.f, MIN16(2.f, _x[i]));
    for (c=0;c<C;c++)
    {
@@ -139,8 +141,189 @@ int encode_size(int size, unsigned char 
       return 1;
    } else {
       data[0] = 252+(size&0x3);
       data[1] = (size-(int)data[0])>>2;
       return 2;
    }
 }
 
+static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *size)
+{
+   if (len<1)
+   {
+      *size = -1;
+      return -1;
+   } else if (data[0]<252)
+   {
+      *size = data[0];
+      return 1;
+   } else if (len<2)
+   {
+      *size = -1;
+      return -1;
+   } else {
+      *size = 4*data[1] + data[0];
+      return 2;
+   }
+}
+
+int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
+      int self_delimited, unsigned char *out_toc,
+      const unsigned char *frames[48], opus_int16 size[48],
+      int *payload_offset, opus_int32 *packet_offset)
+{
+   int i, bytes;
+   int count;
+   int cbr;
+   unsigned char ch, toc;
+   int framesize;
+   opus_int32 last_size;
+   opus_int32 pad = 0;
+   const unsigned char *data0 = data;
+
+   if (size==NULL)
+      return OPUS_BAD_ARG;
+
+   framesize = opus_packet_get_samples_per_frame(data, 48000);
+
+   cbr = 0;
+   toc = *data++;
+   len--;
+   last_size = len;
+   switch (toc&0x3)
+   {
+   /* One frame */
+   case 0:
+      count=1;
+      break;
+   /* Two CBR frames */
+   case 1:
+      count=2;
+      cbr = 1;
+      if (!self_delimited)
+      {
+         if (len&0x1)
+            return OPUS_INVALID_PACKET;
+         last_size = len/2;
+         /* If last_size doesn't fit in size[0], we'll catch it later */
+         size[0] = (opus_int16)last_size;
+      }
+      break;
+   /* Two VBR frames */
+   case 2:
+      count = 2;
+      bytes = parse_size(data, len, size);
+      len -= bytes;
+      if (size[0]<0 || size[0] > len)
+         return OPUS_INVALID_PACKET;
+      data += bytes;
+      last_size = len-size[0];
+      break;
+   /* Multiple CBR/VBR frames (from 0 to 120 ms) */
+   default: /*case 3:*/
+      if (len<1)
+         return OPUS_INVALID_PACKET;
+      /* Number of frames encoded in bits 0 to 5 */
+      ch = *data++;
+      count = ch&0x3F;
+      if (count <= 0 || framesize*count > 5760)
+         return OPUS_INVALID_PACKET;
+      len--;
+      /* Padding flag is bit 6 */
+      if (ch&0x40)
+      {
+         int p;
+         do {
+            int tmp;
+            if (len<=0)
+               return OPUS_INVALID_PACKET;
+            p = *data++;
+            len--;
+            tmp = p==255 ? 254: p;
+            len -= tmp;
+            pad += tmp;
+         } while (p==255);
+      }
+      if (len<0)
+         return OPUS_INVALID_PACKET;
+      /* VBR flag is bit 7 */
+      cbr = !(ch&0x80);
+      if (!cbr)
+      {
+         /* VBR case */
+         last_size = len;
+         for (i=0;i<count-1;i++)
+         {
+            bytes = parse_size(data, len, size+i);
+            len -= bytes;
+            if (size[i]<0 || size[i] > len)
+               return OPUS_INVALID_PACKET;
+            data += bytes;
+            last_size -= bytes+size[i];
+         }
+         if (last_size<0)
+            return OPUS_INVALID_PACKET;
+      } else if (!self_delimited)
+      {
+         /* CBR case */
+         last_size = len/count;
+         if (last_size*count!=len)
+            return OPUS_INVALID_PACKET;
+         for (i=0;i<count-1;i++)
+            size[i] = (opus_int16)last_size;
+      }
+      break;
+   }
+   /* Self-delimited framing has an extra size for the last frame. */
+   if (self_delimited)
+   {
+      bytes = parse_size(data, len, size+count-1);
+      len -= bytes;
+      if (size[count-1]<0 || size[count-1] > len)
+         return OPUS_INVALID_PACKET;
+      data += bytes;
+      /* For CBR packets, apply the size to all the frames. */