Bug 1487049 - Update libopus to v1.3-rc-19-g5cbd7d5f; r=jmspeex
authorDan Minor <dminor@mozilla.com>
Thu, 30 Aug 2018 10:29:43 -0400
changeset 436001 5f2915028e3f785513efd3e211aa4efa0a5896be
parent 436000 0d8d4803236e5ee1300d0c95f73c5e57f3ade2dc
child 436002 87c44124c772e288a4a556627be73d6005ac6382
push id107770
push userdminor@mozilla.com
push dateWed, 12 Sep 2018 20:27:45 +0000
treeherdermozilla-inbound@309d6b502c71 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjmspeex
bugs1487049
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1487049 - Update libopus to v1.3-rc-19-g5cbd7d5f; r=jmspeex Tags: #secure-revision Bug #: 1487049 Differential Revision: https://phabricator.services.mozilla.com/D4722
media/libopus/README_MOZILLA
media/libopus/celt/arch.h
media/libopus/celt/arm/celt_fft_ne10.c
media/libopus/celt/arm/celt_mdct_ne10.c
media/libopus/celt/arm/celt_neon_intr.c
media/libopus/celt/arm/pitch_arm.h
media/libopus/celt/bands.c
media/libopus/celt/bands.h
media/libopus/celt/celt.h
media/libopus/celt/celt_decoder.c
media/libopus/celt/celt_encoder.c
media/libopus/celt/celt_lpc.c
media/libopus/celt/cwrs.c
media/libopus/celt/entcode.h
media/libopus/celt/entdec.h
media/libopus/celt/entenc.h
media/libopus/celt/mathops.c
media/libopus/celt/mathops.h
media/libopus/celt/pitch.c
media/libopus/celt/quant_bands.c
media/libopus/celt/vq.c
media/libopus/celt/x86/celt_lpc_sse4_1.c
media/libopus/celt/x86/vq_sse2.c
media/libopus/celt/x86/x86cpu.h
media/libopus/include/opus.h
media/libopus/include/opus_defines.h
media/libopus/include/opus_projection.h
media/libopus/include/opus_types.h
media/libopus/moz.build
media/libopus/silk/API.h
media/libopus/silk/CNG.c
media/libopus/silk/LPC_analysis_filter.c
media/libopus/silk/NLSF2A.c
media/libopus/silk/NLSF_VQ.c
media/libopus/silk/NLSF_VQ_weights_laroia.c
media/libopus/silk/NLSF_encode.c
media/libopus/silk/NSQ.c
media/libopus/silk/NSQ_del_dec.c
media/libopus/silk/PLC.c
media/libopus/silk/VAD.c
media/libopus/silk/arm/LPC_inv_pred_gain_neon_intr.c
media/libopus/silk/check_control_input.c
media/libopus/silk/control_SNR.c
media/libopus/silk/control_audio_bandwidth.c
media/libopus/silk/control_codec.c
media/libopus/silk/dec_API.c
media/libopus/silk/decode_core.c
media/libopus/silk/decode_frame.c
media/libopus/silk/decode_indices.c
media/libopus/silk/decode_pitch.c
media/libopus/silk/decode_pulses.c
media/libopus/silk/decoder_set_fs.c
media/libopus/silk/define.h
media/libopus/silk/enc_API.c
media/libopus/silk/encode_indices.c
media/libopus/silk/encode_pulses.c
media/libopus/silk/fixed/apply_sine_window_FIX.c
media/libopus/silk/fixed/burg_modified_FIX.c
media/libopus/silk/fixed/encode_frame_FIX.c
media/libopus/silk/fixed/find_LPC_FIX.c
media/libopus/silk/fixed/find_pitch_lags_FIX.c
media/libopus/silk/fixed/find_pred_coefs_FIX.c
media/libopus/silk/fixed/main_FIX.h
media/libopus/silk/fixed/pitch_analysis_core_FIX.c
media/libopus/silk/fixed/residual_energy16_FIX.c
media/libopus/silk/fixed/residual_energy_FIX.c
media/libopus/silk/fixed/schur64_FIX.c
media/libopus/silk/fixed/schur_FIX.c
media/libopus/silk/fixed/warped_autocorrelation_FIX.c
media/libopus/silk/fixed/x86/burg_modified_FIX_sse4_1.c
media/libopus/silk/fixed/x86/vector_ops_FIX_sse4_1.c
media/libopus/silk/float/LPC_analysis_filter_FLP.c
media/libopus/silk/float/apply_sine_window_FLP.c
media/libopus/silk/float/burg_modified_FLP.c
media/libopus/silk/float/encode_frame_FLP.c
media/libopus/silk/float/find_LPC_FLP.c
media/libopus/silk/float/find_pitch_lags_FLP.c
media/libopus/silk/float/find_pred_coefs_FLP.c
media/libopus/silk/float/main_FLP.h
media/libopus/silk/float/pitch_analysis_core_FLP.c
media/libopus/silk/float/residual_energy_FLP.c
media/libopus/silk/float/schur_FLP.c
media/libopus/silk/float/sort_FLP.c
media/libopus/silk/float/warped_autocorrelation_FLP.c
media/libopus/silk/interpolate.c
media/libopus/silk/process_NLSFs.c
media/libopus/silk/resampler.c
media/libopus/silk/resampler_down2.c
media/libopus/silk/resampler_private_down_FIR.c
media/libopus/silk/sort.c
media/libopus/silk/stereo_LR_to_MS.c
media/libopus/silk/stereo_encode_pred.c
media/libopus/silk/structs.h
media/libopus/silk/tables.h
media/libopus/silk/tables_other.c
media/libopus/silk/x86/NSQ_del_dec_sse4_1.c
media/libopus/silk/x86/NSQ_sse4_1.c
media/libopus/silk/x86/VAD_sse4_1.c
media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c
media/libopus/sources.mozbuild
media/libopus/src/analysis.c
media/libopus/src/analysis.h
media/libopus/src/mapping_matrix.c
media/libopus/src/mapping_matrix.h
media/libopus/src/mlp.c
media/libopus/src/mlp.h
media/libopus/src/mlp_data.c
media/libopus/src/opus_decoder.c
media/libopus/src/opus_encoder.c
media/libopus/src/opus_multistream_decoder.c
media/libopus/src/opus_multistream_encoder.c
media/libopus/src/opus_private.h
media/libopus/src/opus_projection_decoder.c
media/libopus/src/opus_projection_encoder.c
media/libopus/src/repacketizer.c
--- a/media/libopus/README_MOZILLA
+++ b/media/libopus/README_MOZILLA
@@ -3,9 +3,9 @@ IETF Opus audio codec reference implemen
 The source in this directory was copied from an opus
 repository checkout by running the ./update.sh script.
 Any changes made to this version of the source should
 be reflected in that script, e.g. by applying patch
 files after the copy step.
 
 The upstream repository is https://git.xiph.org/opus.git
 
-The git tag/revision used was v1.2.1.
+The git tag/revision used was v1.3-rc-19-g5cbd7d5f.
--- a/media/libopus/celt/arch.h
+++ b/media/libopus/celt/arch.h
@@ -51,33 +51,50 @@
 #define opus_unlikely(x)     (__builtin_expect(!!(x), 0))
 #else
 #define opus_likely(x)       (!!(x))
 #define opus_unlikely(x)     (!!(x))
 #endif
 
 #define CELT_SIG_SCALE 32768.f
 
-#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
-#ifdef ENABLE_ASSERTIONS
+#define CELT_FATAL(str) celt_fatal(str, __FILE__, __LINE__);
+
+#if defined(ENABLE_ASSERTIONS) || defined(ENABLE_HARDENING)
+#ifdef __GNUC__
+__attribute__((noreturn))
+#endif
+void celt_fatal(const char *str, const char *file, int line);
+
+#if defined(CELT_C) && !defined(OVERRIDE_celt_fatal)
 #include <stdio.h>
 #include <stdlib.h>
 #ifdef __GNUC__
 __attribute__((noreturn))
 #endif
-static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
+void celt_fatal(const char *str, const char *file, int line)
 {
    fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
    abort();
 }
-#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}}
-#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}}
+#endif
+
+#define celt_assert(cond) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond);}}
+#define celt_assert2(cond, message) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond "\n" message);}}
+#define MUST_SUCCEED(call) celt_assert((call) == OPUS_OK)
 #else
 #define celt_assert(cond)
 #define celt_assert2(cond, message)
+#define MUST_SUCCEED(call) do {if((call) != OPUS_OK) {RESTORE_STACK; return OPUS_INTERNAL_ERROR;} } while (0)
+#endif
+
+#if defined(ENABLE_ASSERTIONS)
+#define celt_sig_assert(cond) {if (!(cond)) {CELT_FATAL("signal assertion failed: " #cond);}}
+#else
+#define celt_sig_assert(cond)
 #endif
 
 #define IMUL32(a,b) ((a)*(b))
 
 #define MIN16(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 16-bit value.   */
 #define MAX16(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 16-bit value.   */
 #define MIN32(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 32-bit value.   */
 #define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
@@ -102,16 +119,18 @@ static OPUS_INLINE void _celt_fatal(cons
 typedef opus_int16 opus_val16;
 typedef opus_int32 opus_val32;
 typedef opus_int64 opus_val64;
 
 typedef opus_val32 celt_sig;
 typedef opus_val16 celt_norm;
 typedef opus_val32 celt_ener;
 
+#define celt_isnan(x) 0
+
 #define Q15ONE 32767
 
 #define SIG_SHIFT 12
 /* Safe saturation value for 32-bit signals. Should be less than
    2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
 #define SIG_SAT (300000000)
 
 #define NORM_SCALING 16384
new file mode 100644
--- /dev/null
+++ b/media/libopus/celt/arm/celt_fft_ne10.c
@@ -0,0 +1,173 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file celt_fft_ne10.c
+   @brief ARM Neon optimizations for fft using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include <NE10_dsp.h>
+#include "os_support.h"
+#include "kiss_fft.h"
+#include "stack_alloc.h"
+
+#if !defined(FIXED_POINT)
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
+#else
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
+#endif
+
+#if defined(CUSTOM_MODES)
+
+/* nfft lengths in NE10 that support scaled fft */
+# define NE10_FFTSCALED_SUPPORT_MAX 4
+static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
+   480, 240, 120, 60
+};
+
+int opus_fft_alloc_arm_neon(kiss_fft_state *st)
+{
+   int i;
+   size_t memneeded = sizeof(struct arch_fft_state);
+
+   st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
+   if (!st->arch_fft)
+      return -1;
+
+   for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
+      if(st->nfft == ne10_fft_scaled_support[i])
+         break;
+   }
+   if (i == NE10_FFTSCALED_SUPPORT_MAX) {
+      /* This nfft length (scaled fft) is not supported in NE10 */
+      st->arch_fft->is_supported = 0;
+      st->arch_fft->priv = NULL;
+   }
+   else {
+      st->arch_fft->is_supported = 1;
+      st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
+      if (st->arch_fft->priv == NULL) {
+         return -1;
+      }
+   }
+   return 0;
+}
+
+void opus_fft_free_arm_neon(kiss_fft_state *st)
+{
+   NE10_FFT_CFG_TYPE_T cfg;
+
+   if (!st->arch_fft)
+      return;
+
+   cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
+   if (cfg)
+      NE10_FFT_DESTROY_C2C_TYPE(cfg);
+   opus_free(st->arch_fft);
+}
+#endif
+
+void opus_fft_neon(const kiss_fft_state *st,
+                   const kiss_fft_cpx *fin,
+                   kiss_fft_cpx *fout)
+{
+   NE10_FFT_STATE_TYPE_T state;
+   NE10_FFT_CFG_TYPE_T cfg = &state;
+   VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
+   SAVE_STACK;
+   ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
+
+   if (!st->arch_fft->is_supported) {
+      /* This nfft length (scaled fft) not supported in NE10 */
+      opus_fft_c(st, fin, fout);
+   }
+   else {
+      memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+      state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
+      state.is_forward_scaled = 1;
+
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 0);
+#else
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 0, 1);
+#endif
+   }
+   RESTORE_STACK;
+}
+
+void opus_ifft_neon(const kiss_fft_state *st,
+                    const kiss_fft_cpx *fin,
+                    kiss_fft_cpx *fout)
+{
+   NE10_FFT_STATE_TYPE_T state;
+   NE10_FFT_CFG_TYPE_T cfg = &state;
+   VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
+   SAVE_STACK;
+   ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
+
+   if (!st->arch_fft->is_supported) {
+      /* This nfft length (scaled fft) not supported in NE10 */
+      opus_ifft_c(st, fin, fout);
+   }
+   else {
+      memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+      state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
+      state.is_backward_scaled = 0;
+
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 1);
+#else
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 1, 0);
+#endif
+   }
+   RESTORE_STACK;
+}
new file mode 100644
--- /dev/null
+++ b/media/libopus/celt/arm/celt_mdct_ne10.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file celt_mdct_ne10.c
+   @brief ARM Neon optimizations for mdct using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include "mdct.h"
+#include "stack_alloc.h"
+
+void clt_mdct_forward_neon(const mdct_lookup *l,
+                           kiss_fft_scalar *in,
+                           kiss_fft_scalar * OPUS_RESTRICT out,
+                           const opus_val16 *window,
+                           int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_cpx, f2);
+   const kiss_fft_state *st = l->kfft[shift];
+   const kiss_twiddle_scalar *trig;
+
+   SAVE_STACK;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N4, kiss_fft_cpx);
+
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_cpx yc;
+         kiss_twiddle_scalar t0, t1;
+         kiss_fft_scalar re, im, yr, yi;
+         t0 = t[i];
+         t1 = t[N4+i];
+         re = *yp++;
+         im = *yp++;
+         yr = S_MUL(re,t0)  -  S_MUL(im,t1);
+         yi = S_MUL(im,t0)  +  S_MUL(re,t1);
+         yc.r = yr;
+         yc.i = yi;
+         f2[i] = yc;
+      }
+   }
+
+   opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
+
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+         yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
+         *yp1 = yr;
+         *yp2 = yi;
+         fp++;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+
+void clt_mdct_backward_neon(const mdct_lookup *l,
+                            kiss_fft_scalar *in,
+                            kiss_fft_scalar * OPUS_RESTRICT out,
+                            const opus_val16 * OPUS_RESTRICT window,
+                            int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   const kiss_twiddle_scalar *trig;
+   const kiss_fft_state *st = l->kfft[shift];
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
+         yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
+         yp[2*i] = yr;
+         yp[2*i+1] = yi;
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+
+   opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
+
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * yp0 = out+(overlap>>1);
+      kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         re = yp0[0];
+         im = yp0[1];
+         t0 = t[i];
+         t1 = t[N4+i];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         re = yp1[0];
+         im = yp1[1];
+         yp0[0] = yr;
+         yp1[1] = yi;
+
+         t0 = t[(N4-i-1)];
+         t1 = t[(N2-i-1)];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         yp1[0] = yr;
+         yp0[1] = yi;
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+   RESTORE_STACK;
+}
--- a/media/libopus/celt/arm/celt_neon_intr.c
+++ b/media/libopus/celt/arm/celt_neon_intr.c
@@ -191,17 +191,17 @@ static void xcorr_kernel_neon_float(cons
    vst1q_f32(sum, SUMM);
 }
 
 void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
                         opus_val32 *xcorr, int len, int max_pitch, int arch) {
    int i;
    (void)arch;
    celt_assert(max_pitch > 0);
-   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
+   celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
 
    for (i = 0; i < (max_pitch-3); i += 4) {
       xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i,
             (float32_t *)xcorr+i, len);
    }
 
    /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
    for (; i < max_pitch; i++) {
--- a/media/libopus/celt/arm/pitch_arm.h
+++ b/media/libopus/celt/arm/pitch_arm.h
@@ -85,17 +85,19 @@ opus_val32 celt_pitch_xcorr_edsp(const o
 #  if defined(OPUS_HAVE_RTCD) && \
     ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
      (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
      (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
 extern opus_val32
 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
       const opus_val16 *, opus_val32 *, int, int, int);
 #   define OVERRIDE_PITCH_XCORR (1)
-#   define celt_pitch_xcorr (*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch, arch))
 
 #  elif defined(OPUS_ARM_PRESUME_EDSP) || \
     defined(OPUS_ARM_PRESUME_MEDIA) || \
     defined(OPUS_ARM_PRESUME_NEON)
 #   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr (PRESUME_NEON(celt_pitch_xcorr))
 
 #  endif
@@ -137,17 +139,19 @@ void celt_pitch_xcorr_float_neon(const o
 
 #  if defined(OPUS_HAVE_RTCD) && \
     (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
 extern void
 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
       const opus_val16 *, opus_val32 *, int, int, int);
 
 #  define OVERRIDE_PITCH_XCORR (1)
-#  define celt_pitch_xcorr (*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch, arch))
 
 #  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
 
 #   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr celt_pitch_xcorr_float_neon
 
 #  endif
 
--- a/media/libopus/celt/bands.c
+++ b/media/libopus/celt/bands.c
@@ -65,20 +65,20 @@ opus_uint32 celt_lcg_rand(opus_uint32 se
 
 /* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
    with this approximation is important because it has an impact on the bit allocation */
 opus_int16 bitexact_cos(opus_int16 x)
 {
    opus_int32 tmp;
    opus_int16 x2;
    tmp = (4096+((opus_int32)(x)*(x)))>>13;
-   celt_assert(tmp<=32767);
+   celt_sig_assert(tmp<=32767);
    x2 = tmp;
    x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
-   celt_assert(x2<=32766);
+   celt_sig_assert(x2<=32766);
    return 1+x2;
 }
 
 int bitexact_log2tan(int isin,int icos)
 {
    int lc;
    int ls;
    lc=EC_ILOG(icos);
@@ -277,17 +277,17 @@ void anti_collapse(const CELTMode *m, ce
       int depth;
 #ifdef FIXED_POINT
       int shift;
       opus_val32 thresh32;
 #endif
 
       N0 = m->eBands[i+1]-m->eBands[i];
       /* depth in 1/8 bits */
-      celt_assert(pulses[i]>=0);
+      celt_sig_assert(pulses[i]>=0);
       depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
 
 #ifdef FIXED_POINT
       thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
       thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
       {
          opus_val32 t;
          t = N0<<LM;
@@ -473,17 +473,17 @@ static void stereo_merge(celt_norm * OPU
       X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
       Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
    }
 }
 
 /* Decide whether we should spread the pulses in the current frame */
 int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
       int last_decision, int *hf_average, int *tapset_decision, int update_hf,
-      int end, int C, int M)
+      int end, int C, int M, const int *spread_weight)
 {
    int i, c, N0;
    int sum = 0, nbBands=0;
    const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
    int decision;
    int hf_sum=0;
 
    celt_assert(end>0);
@@ -514,18 +514,18 @@ int spreading_decision(const CELTMode *m
             if (x2N < QCONST16(0.015625f,13))
                tcount[2]++;
          }
 
          /* Only include four last bands (8 kHz and up) */
          if (i>m->nbEBands-4)
             hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
          tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
-         sum += tmp*256;
-         nbBands++;
+         sum += tmp*spread_weight[i];
+         nbBands+=spread_weight[i];
       }
    } while (++c<C);
 
    if (update_hf)
    {
       if (hf_sum)
          hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
       *hf_average = (*hf_average+hf_sum)>>1;
@@ -539,17 +539,17 @@ int spreading_decision(const CELTMode *m
       else if (hf_sum > 18)
          *tapset_decision=1;
       else
          *tapset_decision=0;
    }
    /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
    celt_assert(nbBands>0); /* end has to be non-zero */
    celt_assert(sum>=0);
-   sum = celt_udiv(sum, nbBands);
+   sum = celt_udiv((opus_int32)sum<<8, nbBands);
    /* Recursive averaging */
    sum = (sum+*average)>>1;
    *average = sum;
    /* Hysteresis */
    sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
    if (sum < 80)
    {
       decision = SPREAD_AGGRESSIVE;
@@ -1487,32 +1487,33 @@ void quant_all_bands(int encode, const C
       last = (i==end-1);
 
       X = X_+M*eBands[i];
       if (Y_!=NULL)
          Y = Y_+M*eBands[i];
       else
          Y = NULL;
       N = M*eBands[i+1]-M*eBands[i];
+      celt_assert(N > 0);
       tell = ec_tell_frac(ec);
 
       /* Compute how many bits we want to allocate to this band */
       if (i != start)
          balance -= tell;
       remaining_bits = total_bits-tell-1;
       ctx.remaining_bits = remaining_bits;
       if (i <= codedBands-1)
       {
          curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
          b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
       } else {
          b = 0;
       }
 
-#ifdef ENABLE_UPDATE_DRAFT
+#ifndef DISABLE_UPDATE_DRAFT
       if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
             lowband_offset = i;
       if (i == start+1)
          special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
 #else
       if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
             lowband_offset = i;
 #endif
@@ -1536,17 +1537,17 @@ void quant_all_bands(int encode, const C
          int fold_start;
          int fold_end;
          int fold_i;
          /* This ensures we never repeat spectral content within one band */
          effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
          fold_start = lowband_offset;
          while(M*eBands[--fold_start] > effective_lowband+norm_offset);
          fold_end = lowband_offset-1;
-#ifdef ENABLE_UPDATE_DRAFT
+#ifndef DISABLE_UPDATE_DRAFT
          while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
 #else
          while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
 #endif
          x_cm = y_cm = 0;
          fold_i = fold_start; do {
            x_cm |= collapse_masks[fold_i*C+0];
            y_cm |= collapse_masks[fold_i*C+C-1];
@@ -1616,18 +1617,20 @@ void quant_all_bands(int encode, const C
                save_bytes = nend_bytes-nstart_bytes;
                OPUS_COPY(bytes_save, bytes_buf, save_bytes);
 
                /* Restore */
                *ec = ec_save;
                ctx = ctx_save;
                OPUS_COPY(X, X_save, N);
                OPUS_COPY(Y, Y_save, N);
+#ifndef DISABLE_UPDATE_DRAFT
                if (i == start+1)
                   special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
+#endif
                /* Encode and round up. */
                ctx.theta_round = 1;
                x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
                      effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
                      last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
                dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
                if (dist0 >= dist1) {
                   x_cm = cm2;
--- a/media/libopus/celt/bands.h
+++ b/media/libopus/celt/bands.h
@@ -67,17 +67,17 @@ void denormalise_bands(const CELTMode *m
 
 #define SPREAD_NONE       (0)
 #define SPREAD_LIGHT      (1)
 #define SPREAD_NORMAL     (2)
 #define SPREAD_AGGRESSIVE (3)
 
 int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
       int last_decision, int *hf_average, int *tapset_decision, int update_hf,
-      int end, int C, int M);
+      int end, int C, int M, const int *spread_weight);
 
 #ifdef MEASURE_NORM_MSE
 void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C);
 #endif
 
 void haar1(celt_norm *X, int N0, int stride);
 
 /** Quantisation/encoding of the residual spectrum
--- a/media/libopus/celt/celt.h
+++ b/media/libopus/celt/celt.h
@@ -54,19 +54,21 @@ extern "C" {
 
 typedef struct {
    int valid;
    float tonality;
    float tonality_slope;
    float noisiness;
    float activity;
    float music_prob;
-   float vad_prob;
+   float music_prob_min;
+   float music_prob_max;
    int   bandwidth;
    float activity_probability;
+   float max_pitch_ratio;
    /* Store as Q6 char to save space. */
    unsigned char leak_boost[LEAK_BANDS];
 } AnalysisInfo;
 
 typedef struct {
    int signalType;
    int offset;
 } SILKInfo;
@@ -202,16 +204,23 @@ static OPUS_INLINE int fromOpus(unsigned
 }
 #endif /* CUSTOM_MODES */
 
 #define COMBFILTER_MAXPERIOD 1024
 #define COMBFILTER_MINPERIOD 15
 
 extern const signed char tf_select_table[4][8];
 
+#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS)
+void validate_celt_decoder(CELTDecoder *st);
+#define VALIDATE_CELT_DECODER(st) validate_celt_decoder(st)
+#else
+#define VALIDATE_CELT_DECODER(st)
+#endif
+
 int resampling_factor(opus_int32 rate);
 
 void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
                         int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
 
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
       const opus_val16 *window, int overlap, int arch);
--- a/media/libopus/celt/celt_decoder.c
+++ b/media/libopus/celt/celt_decoder.c
@@ -46,16 +46,24 @@
 #include "rate.h"
 #include "stack_alloc.h"
 #include "mathops.h"
 #include "float_cast.h"
 #include <stdarg.h>
 #include "celt_lpc.h"
 #include "vq.h"
 
+/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
+   CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
+   current value corresponds to a pitch of 66.67 Hz. */
+#define PLC_PITCH_LAG_MAX (720)
+/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
+   pitch of 480 Hz. */
+#define PLC_PITCH_LAG_MIN (100)
+
 #if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
 #define NORM_ALIASING_HACK
 #endif
 /**********************************************************************/
 /*                                                                    */
 /*                             DECODER                                */
 /*                                                                    */
 /**********************************************************************/
@@ -96,16 +104,48 @@ struct OpusCustomDecoder {
    celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
    /* opus_val16 lpc[],  Size = channels*LPC_ORDER */
    /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
    /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
    /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
    /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
 };
 
+#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS)
+/* Make basic checks on the CELT state to ensure we don't end
+   up writing all over memory. */
+void validate_celt_decoder(CELTDecoder *st)
+{
+#ifndef CUSTOM_MODES
+   celt_assert(st->mode == opus_custom_mode_create(48000, 960, NULL));
+   celt_assert(st->overlap == 120);
+#endif
+   celt_assert(st->channels == 1 || st->channels == 2);
+   celt_assert(st->stream_channels == 1 || st->stream_channels == 2);
+   celt_assert(st->downsample > 0);
+   celt_assert(st->start == 0 || st->start == 17);
+   celt_assert(st->start < st->end);
+   celt_assert(st->end <= 21);
+#ifdef OPUS_ARCHMASK
+   celt_assert(st->arch >= 0);
+   celt_assert(st->arch <= OPUS_ARCHMASK);
+#endif
+   celt_assert(st->last_pitch_index <= PLC_PITCH_LAG_MAX);
+   celt_assert(st->last_pitch_index >= PLC_PITCH_LAG_MIN || st->last_pitch_index == 0);
+   celt_assert(st->postfilter_period < MAX_PERIOD);
+   celt_assert(st->postfilter_period >= COMBFILTER_MINPERIOD || st->postfilter_period == 0);
+   celt_assert(st->postfilter_period_old < MAX_PERIOD);
+   celt_assert(st->postfilter_period_old >= COMBFILTER_MINPERIOD || st->postfilter_period_old == 0);
+   celt_assert(st->postfilter_tapset <= 2);
+   celt_assert(st->postfilter_tapset >= 0);
+   celt_assert(st->postfilter_tapset_old <= 2);
+   celt_assert(st->postfilter_tapset_old >= 0);
+}
+#endif
+
 int celt_decoder_get_size(int channels)
 {
    const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
    return opus_custom_decoder_get_size(mode, channels);
 }
 
 OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels)
 {
@@ -159,17 +199,17 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_dec
    st->mode = mode;
    st->overlap = mode->overlap;
    st->stream_channels = st->channels = channels;
 
    st->downsample = 1;
    st->start = 0;
    st->end = st->mode->effEBands;
    st->signalling = 1;
-#ifdef ENABLE_UPDATE_DRAFT
+#ifndef DISABLE_UPDATE_DRAFT
    st->disable_inv = channels == 1;
 #else
    st->disable_inv = 0;
 #endif
    st->arch = opus_select_arch();
 
    opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
 
@@ -432,24 +472,16 @@ static void tf_decode(int start, int end
       tf_select = ec_dec_bit_logp(dec, 1);
    }
    for (i=start;i<end;i++)
    {
       tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
    }
 }
 
-/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
-   CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
-   current value corresponds to a pitch of 66.67 Hz. */
-#define PLC_PITCH_LAG_MAX (720)
-/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
-   pitch of 480 Hz. */
-#define PLC_PITCH_LAG_MIN (100)
-
 static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
 {
    int pitch_index;
    VARDECL( opus_val16, lp_pitch_buf );
    SAVE_STACK;
    ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
    pitch_downsample(decode_mem, lp_pitch_buf,
          DECODE_BUFFER_SIZE, C, arch);
@@ -549,50 +581,55 @@ static void celt_decode_lost(CELTDecoder
 
       c=0; do {
          OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
                DECODE_BUFFER_SIZE-N+(overlap>>1));
       } while (++c<C);
 
       celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
    } else {
+      int exc_length;
       /* Pitch-based PLC */
       const opus_val16 *window;
       opus_val16 *exc;
       opus_val16 fade = Q15ONE;
       int pitch_index;
       VARDECL(opus_val32, etmp);
       VARDECL(opus_val16, _exc);
+      VARDECL(opus_val16, fir_tmp);
 
       if (loss_count == 0)
       {
          st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
       } else {
          pitch_index = st->last_pitch_index;
          fade = QCONST16(.8f,15);
       }
 
+      /* We want the excitation for 2 pitch periods in order to look for a
+         decaying signal, but we can't get more than MAX_PERIOD. */
+      exc_length = IMIN(2*pitch_index, MAX_PERIOD);
+
       ALLOC(etmp, overlap, opus_val32);
       ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
+      ALLOC(fir_tmp, exc_length, opus_val16);
       exc = _exc+LPC_ORDER;
       window = mode->window;
       c=0; do {
          opus_val16 decay;
          opus_val16 attenuation;
          opus_val32 S1=0;
          celt_sig *buf;
          int extrapolation_offset;
          int extrapolation_len;
-         int exc_length;
          int j;
 
          buf = decode_mem[c];
-         for (i=0;i<MAX_PERIOD;i++) {
-            exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT);
-         }
+         for (i=0;i<MAX_PERIOD+LPC_ORDER;i++)
+            exc[i-LPC_ORDER] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-LPC_ORDER+i], SIG_SHIFT);
 
          if (loss_count == 0)
          {
             opus_val32 ac[LPC_ORDER+1];
             /* Compute LPC coefficients for the last MAX_PERIOD samples before
                the first loss so we can work in the excitation-filter domain. */
             _celt_autocorr(exc, ac, window, overlap,
                    LPC_ORDER, MAX_PERIOD, st->arch);
@@ -626,30 +663,24 @@ static void celt_decode_lost(CELTDecoder
             for (i=0;i<LPC_ORDER;i++)
             {
                tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
                lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
             }
          }
 #endif
          }
-         /* We want the excitation for 2 pitch periods in order to look for a
-            decaying signal, but we can't get more than MAX_PERIOD. */
-         exc_length = IMIN(2*pitch_index, MAX_PERIOD);
          /* Initialize the LPC history with the samples just before the start
             of the region for which we're computing the excitation. */
          {
-            for (i=0;i<LPC_ORDER;i++)
-            {
-               exc[MAX_PERIOD-exc_length-LPC_ORDER+i] =
-                     ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-LPC_ORDER+i], SIG_SHIFT);
-            }
-            /* Compute the excitation for exc_length samples before the loss. */
+            /* Compute the excitation for exc_length samples before the loss. We need the copy
+               because celt_fir() cannot filter in-place. */
             celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
-                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, st->arch);
+                  fir_tmp, exc_length, LPC_ORDER, st->arch);
+            OPUS_COPY(exc+MAX_PERIOD-exc_length, fir_tmp, exc_length);
          }
 
          /* Check if the waveform is decaying, and if so how fast.
             We do this to avoid adding energy when concealing in a segment
             with decaying energy. */
          {
             opus_val32 E1=1, E2=1;
             int decay_length;
@@ -828,16 +859,17 @@ int celt_decode_with_ec(CELTDecoder * OP
    int silence;
    int C = st->stream_channels;
    const OpusCustomMode *mode;
    int nbEBands;
    int overlap;
    const opus_int16 *eBands;
    ALLOC_STACK;
 
+   VALIDATE_CELT_DECODER(st);
    mode = st->mode;
    nbEBands = mode->nbEBands;
    overlap = mode->overlap;
    eBands = mode->eBands;
    start = st->start;
    end = st->end;
    frame_size *= st->downsample;
 
--- a/media/libopus/celt/celt_encoder.c
+++ b/media/libopus/celt/celt_encoder.c
@@ -357,16 +357,22 @@ static int transient_analysis(const opus
 #else
       mean = celt_sqrt(mean * maxE*.5*len2);
 #endif
       /* Inverse of the mean energy in Q15+6 */
       norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1));
       /* Compute harmonic mean discarding the unreliable boundaries
          The data is smooth, so we only take 1/4th of the samples */
       unmask=0;
+      /* We should never see NaNs here. If we find any, then something really bad happened and we better abort
+         before it does any damage later on. If these asserts are disabled (no hardening), then the table
+         lookup a few lines below (id = ...) is likely to crash dur to an out-of-bounds read. DO NOT FIX
+         that crash on NaN since it could result in a worse issue later on. */
+      celt_assert(!celt_isnan(tmp[0]));
+      celt_assert(!celt_isnan(norm));
       for (i=12;i<len2-5;i+=4)
       {
          int id;
 #ifdef FIXED_POINT
          id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */
 #else
          id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */
 #endif
@@ -572,17 +578,17 @@ static opus_val32 l1_metric(const celt_n
    /* When in doubt, prefer good freq resolution */
    L1 = MAC16_32_Q15(L1, LM*bias, L1);
    return L1;
 
 }
 
 static int tf_analysis(const CELTMode *m, int len, int isTransient,
       int *tf_res, int lambda, celt_norm *X, int N0, int LM,
-      opus_val16 tf_estimate, int tf_chan)
+      opus_val16 tf_estimate, int tf_chan, int *importance)
 {
    int i;
    VARDECL(int, metric);
    int cost0;
    int cost1;
    VARDECL(int, path0);
    VARDECL(int, path1);
    VARDECL(celt_norm, tmp);
@@ -655,42 +661,42 @@ static int tf_analysis(const CELTMode *m
       if (isTransient)
          metric[i] = 2*best_level;
       else
          metric[i] = -2*best_level;
       /* For bands that can't be split to -1, set the metric to the half-way point to avoid
          biasing the decision */
       if (narrow && (metric[i]==0 || metric[i]==-2*LM))
          metric[i]-=1;
-      /*printf("%d ", metric[i]);*/
+      /*printf("%d ", metric[i]/2 + (!isTransient)*LM);*/
    }
    /*printf("\n");*/
    /* Search for the optimal tf resolution, including tf_select */
    tf_select = 0;
    for (sel=0;sel<2;sel++)
    {
-      cost0 = 0;
-      cost1 = isTransient ? 0 : lambda;
+      cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
+      cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+1]) + (isTransient ? 0 : lambda);
       for (i=1;i<len;i++)
       {
          int curr0, curr1;
          curr0 = IMIN(cost0, cost1 + lambda);
          curr1 = IMIN(cost0 + lambda, cost1);
-         cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
-         cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
+         cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
+         cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
       }
       cost0 = IMIN(cost0, cost1);
       selcost[sel]=cost0;
    }
    /* For now, we're conservative and only allow tf_select=1 for transients.
     * If tests confirm it's useful for non-transients, we could allow it. */
    if (selcost[1]<selcost[0] && isTransient)
       tf_select=1;
-   cost0 = 0;
-   cost1 = isTransient ? 0 : lambda;
+   cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
+   cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]) + (isTransient ? 0 : lambda);
    /* Viterbi forward pass */
    for (i=1;i<len;i++)
    {
       int curr0, curr1;
       int from0, from1;
 
       from0 = cost0;
       from1 = cost1 + lambda;
@@ -708,18 +714,18 @@ static int tf_analysis(const CELTMode *m
       if (from0 < from1)
       {
          curr1 = from0;
          path1[i]= 0;
       } else {
          curr1 = from1;
          path1[i]= 1;
       }
-      cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
-      cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
+      cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
+      cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
    }
    tf_res[len-1] = cost0 < cost1 ? 0 : 1;
    /* Viterbi backward pass to check the decisions */
    for (i=len-2;i>=0;i--)
    {
       if (tf_res[i+1] == 1)
          tf_res[i] = path1[i+1];
       else
@@ -959,17 +965,18 @@ static opus_val16 median_of_3(const opus
       return t2;
    else
       return t0;
 }
 
 static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
       int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
       int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
-      int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc, AnalysisInfo *analysis)
+      int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc,
+      AnalysisInfo *analysis, int *importance, int *spread_weight)
 {
    int i, c;
    opus_int32 tot_boost=0;
    opus_val16 maxDepth;
    VARDECL(opus_val16, follower);
    VARDECL(opus_val16, noise_floor);
    SAVE_STACK;
    ALLOC(follower, C*nbEBands, opus_val16);
@@ -985,16 +992,52 @@ static opus_val16 dynalloc_analysis(cons
             +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6)
             +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5));
    }
    c=0;do
    {
       for (i=0;i<end;i++)
          maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]);
    } while (++c<C);
+   {
+      /* Compute a really simple masking model to avoid taking into account completely masked
+         bands when computing the spreading decision. */
+      VARDECL(opus_val16, mask);
+      VARDECL(opus_val16, sig);
+      ALLOC(mask, nbEBands, opus_val16);
+      ALLOC(sig, nbEBands, opus_val16);
+      for (i=0;i<end;i++)
+         mask[i] = bandLogE[i]-noise_floor[i];
+      if (C==2)
+      {
+         for (i=0;i<end;i++)
+            mask[i] = MAX16(mask[i], bandLogE[nbEBands+i]-noise_floor[i]);
+      }
+      OPUS_COPY(sig, mask, end);
+      for (i=1;i<end;i++)
+         mask[i] = MAX16(mask[i], mask[i-1] - QCONST16(2.f, DB_SHIFT));
+      for (i=end-2;i>=0;i--)
+         mask[i] = MAX16(mask[i], mask[i+1] - QCONST16(3.f, DB_SHIFT));
+      for (i=0;i<end;i++)
+      {
+         /* Compute SMR: Mask is never more than 72 dB below the peak and never below the noise floor.*/
+         opus_val16 smr = sig[i]-MAX16(MAX16(0, maxDepth-QCONST16(12.f, DB_SHIFT)), mask[i]);
+         /* Clamp SMR to make sure we're not shifting by something negative or too large. */
+#ifdef FIXED_POINT
+         /* FIXME: Use PSHR16() instead */
+         int shift = -PSHR32(MAX16(-QCONST16(5.f, DB_SHIFT), MIN16(0, smr)), DB_SHIFT);
+#else
+         int shift = IMIN(5, IMAX(0, -(int)floor(.5f + smr)));
+#endif
+         spread_weight[i] = 32 >> shift;
+      }
+      /*for (i=0;i<end;i++)
+         printf("%d ", spread_weight[i]);
+      printf("\n");*/
+   }
    /* Make sure that dynamic allocation can't make us bust the budget */
    if (effectiveBytes > 50 && LM>=1 && !lfe)
    {
       int last=0;
       c=0;do
       {
          opus_val16 offset;
          opus_val16 tmp;
@@ -1041,16 +1084,24 @@ static opus_val16 dynalloc_analysis(cons
       } else {
          for (i=start;i<end;i++)
          {
             follower[i] = MAX16(0, bandLogE[i]-follower[i]);
          }
       }
       for (i=start;i<end;i++)
          follower[i] = MAX16(follower[i], surround_dynalloc[i]);
+      for (i=start;i<end;i++)
+      {
+#ifdef FIXED_POINT
+         importance[i] = PSHR32(13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))), 16);
+#else
+         importance[i] = (int)floor(.5f+13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))));
+#endif
+      }
       /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
       if ((!vbr || constrained_vbr)&&!isTransient)
       {
          for (i=start;i<end;i++)
             follower[i] = HALF16(follower[i]);
       }
       for (i=start;i<end;i++)
       {
@@ -1096,25 +1147,28 @@ static opus_val16 dynalloc_analysis(cons
             offsets[i] = cap-tot_boost;
             tot_boost = cap;
             break;
          } else {
             offsets[i] = boost;
             tot_boost += boost_bits;
          }
       }
+   } else {
+      for (i=start;i<end;i++)
+         importance[i] = 13;
    }
    *tot_boost_ = tot_boost;
    RESTORE_STACK;
    return maxDepth;
 }
 
 
 static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
-      int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes)
+      int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis)
 {
    int c;
    VARDECL(celt_sig, _pre);
    celt_sig *pre[2];
    const CELTMode *mode;
    int pitch_index;
    opus_val16 gain1;
    opus_val16 pf_threshold;
@@ -1160,17 +1214,22 @@ static int run_prefilter(CELTEncoder *st
       if (st->loss_rate>4)
          gain1 = HALF32(gain1);
       if (st->loss_rate>8)
          gain1 = 0;
    } else {
       gain1 = 0;
       pitch_index = COMBFILTER_MINPERIOD;
    }
-
+#ifndef DISABLE_FLOAT_API
+   if (analysis->valid)
+      gain1 = (opus_val16)(gain1 * analysis->max_pitch_ratio);
+#else
+   (void)analysis;
+#endif
    /* Gain threshold for enabling the prefilter/postfilter */
    pf_threshold = QCONST16(.2f,15);
 
    /* Adjusting the threshold based on rate and continuity */
    if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
       pf_threshold += QCONST16(.2f,15);
    if (nbAvailableBytes<25)
       pf_threshold += QCONST16(.1f,15);
@@ -1357,16 +1416,18 @@ int celt_encode_with_ec(CELTEncoder * OP
    VARDECL(celt_ener, bandE);
    VARDECL(opus_val16, bandLogE);
    VARDECL(opus_val16, bandLogE2);
    VARDECL(int, fine_quant);
    VARDECL(opus_val16, error);
    VARDECL(int, pulses);
    VARDECL(int, cap);
    VARDECL(int, offsets);
+   VARDECL(int, importance);
+   VARDECL(int, spread_weight);
    VARDECL(int, fine_priority);
    VARDECL(int, tf_res);
    VARDECL(unsigned char, collapse_masks);
    celt_sig *prefilter_mem;
    opus_val16 *oldBandE, *oldLogE, *oldLogE2, *energyError;
    int shortBlocks=0;
    int isTransient=0;
    const int CC = st->channels;
@@ -1409,16 +1470,17 @@ int celt_encode_with_ec(CELTEncoder * OP
    int signalBandwidth;
    int transient_got_disabled=0;
    opus_val16 surround_masking=0;
    opus_val16 temporal_vbr=0;
    opus_val16 surround_trim = 0;
    opus_int32 equiv_rate;
    int hybrid;
    int weak_transient = 0;
+   int enable_tf_analysis;
    VARDECL(opus_val16, surround_dynalloc);
    ALLOC_STACK;
 
    mode = st->mode;
    nbEBands = mode->nbEBands;
    overlap = mode->overlap;
    eBands = mode->eBands;
    start = st->start;
@@ -1449,17 +1511,17 @@ int celt_encode_with_ec(CELTEncoder * OP
    oldLogE2 = oldLogE + CC*nbEBands;
    energyError = oldLogE2 + CC*nbEBands;
 
    if (enc==NULL)
    {
       tell0_frac=tell=1;
       nbFilledBytes=0;
    } else {
-      tell0_frac=tell=ec_tell_frac(enc);
+      tell0_frac=ec_tell_frac(enc);
       tell=ec_tell(enc);
       nbFilledBytes=(tell+4)>>3;
    }
 
 #ifdef CUSTOM_MODES
    if (st->signalling && enc==NULL)
    {
       int tmp = (mode->effEBands-end)>>1;
@@ -1598,17 +1660,17 @@ int celt_encode_with_ec(CELTEncoder * OP
    /* Find pitch period and gain */
    {
       int enabled;
       int qg;
       enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && !st->disable_pf
             && st->complexity >= 5;
 
       prefilter_tapset = st->tapset_decision;
-      pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
+      pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis);
       if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
             && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
          pitch_change = 1;
       if (pf_on==0)
       {
          if(!hybrid && tell+16<=total_bits)
             ec_enc_bit_logp(enc, 0, 1);
       } else {
@@ -1628,17 +1690,17 @@ int celt_encode_with_ec(CELTEncoder * OP
 
    isTransient = 0;
    shortBlocks = 0;
    if (st->complexity >= 1 && !st->lfe)
    {
       /* Reduces the likelihood of energy instability on fricatives at low bitrate
          in hybrid mode. It seems like we still want to have real transients on vowels
          though (small SILK quantization offset value). */
-      int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.offset >= 100;
+      int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.signalType != 2;
       isTransient = transient_analysis(in, N+overlap, CC,
             &tf_estimate, &tf_chan, allow_weak_transients, &weak_transient);
    }
    if (LM>0 && ec_tell(enc)+3<=total_bits)
    {
       if (isTransient)
          shortBlocks = M;
    } else {
@@ -1657,16 +1719,19 @@ int celt_encode_with_ec(CELTEncoder * OP
       compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
       compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
       amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
       for (i=0;i<C*nbEBands;i++)
          bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
    }
 
    compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
+   /* This should catch any NaN in the CELT input. Since we're not supposed to see any (they're filtered
+      at the Opus layer), just abort. */
+   celt_assert(!celt_isnan(freq[0]) && (C==1 || !celt_isnan(freq[N])));
    if (CC==2&&C==1)
       tf_chan = 0;
    compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
 
    if (st->lfe)
    {
       for (i=2;i<end;i++)
       {
@@ -1800,34 +1865,44 @@ int celt_encode_with_ec(CELTEncoder * OP
    if (LM>0 && ec_tell(enc)+3<=total_bits)
       ec_enc_bit_logp(enc, isTransient, 3);
 
    ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
 
    /* Band normalisation */
    normalise_bands(mode, freq, X, bandE, effEnd, C, M);
 
+   enable_tf_analysis = effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe;
+
+   ALLOC(offsets, nbEBands, int);
+   ALLOC(importance, nbEBands, int);
+   ALLOC(spread_weight, nbEBands, int);
+
+   maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
+         st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
+         eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance, spread_weight);
+
    ALLOC(tf_res, nbEBands, int);
    /* Disable variable tf resolution for hybrid and at very low bitrate */
-   if (effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe)
+   if (enable_tf_analysis)
    {
       int lambda;
-      lambda = IMAX(5, 1280/effectiveBytes + 2);
-      tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan);
+      lambda = IMAX(80, 20480/effectiveBytes + 2);
+      tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan, importance);
       for (i=effEnd;i<end;i++)
          tf_res[i] = tf_res[effEnd-1];
    } else if (hybrid && weak_transient)
    {
       /* For weak transients, we rely on the fact that improving time resolution using
          TF on a long window is imperfect and will not result in an energy collapse at
          low bitrate. */
       for (i=0;i<end;i++)
          tf_res[i] = 1;
       tf_select=0;
-   } else if (hybrid && effectiveBytes<15)
+   } else if (hybrid && effectiveBytes<15 && st->silk_info.signalType != 2)
    {
       /* For low bitrate hybrid, we force temporal resolution to 5 ms rather than 2.5 ms. */
       for (i=0;i<end;i++)
          tf_res[i] = 0;
       tf_select=isTransient;
    } else {
       for (i=0;i<end;i++)
          tf_res[i] = isTransient;
@@ -1888,29 +1963,24 @@ int celt_encode_with_ec(CELTEncoder * OP
             static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
             st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
             st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
          } else
 #endif
          {
             st->spread_decision = spreading_decision(mode, X,
                   &st->tonal_average, st->spread_decision, &st->hf_average,
-                  &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
+                  &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M, spread_weight);
          }
          /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
          /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
       }
       ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
    }
 
-   ALLOC(offsets, nbEBands, int);
-
-   maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
-         st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
-         eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis);
    /* For LFE, everything interesting is in the first band */
    if (st->lfe)
       offsets[0] = IMIN(8, effectiveBytes/3);
    ALLOC(cap, nbEBands, int);
    init_caps(mode,cap,LM,C);
 
    dynalloc_logp = 6;
    total_bits<<=BITRES;
--- a/media/libopus/celt/celt_lpc.c
+++ b/media/libopus/celt/celt_lpc.c
@@ -94,25 +94,25 @@ void celt_fir_c(
          opus_val16 *y,
          int N,
          int ord,
          int arch)
 {
    int i,j;
    VARDECL(opus_val16, rnum);
    SAVE_STACK;
-
+   celt_assert(x != y);
    ALLOC(rnum, ord, opus_val16);
    for(i=0;i<ord;i++)
       rnum[i] = num[ord-i-1];
    for (i=0;i<N-3;i+=4)
    {
       opus_val32 sum[4];
       sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
-      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT),
+      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
       sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
       sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
       xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
       y[i  ] = ROUND16(sum[0], SIG_SHIFT);
       y[i+1] = ROUND16(sum[1], SIG_SHIFT);
       y[i+2] = ROUND16(sum[2], SIG_SHIFT);
       y[i+3] = ROUND16(sum[3], SIG_SHIFT);
    }
--- a/media/libopus/celt/cwrs.c
+++ b/media/libopus/celt/cwrs.c
@@ -477,17 +477,17 @@ static opus_val32 cwrsi(int _n,int _k,op
       /*Are the pulses in this dimension negative?*/
       p=row[_k+1];
       s=-(_i>=p);
       _i-=p&s;
       /*Count how many pulses were placed in this dimension.*/
       k0=_k;
       q=row[_n];
       if(q>_i){
-        celt_assert(p>q);
+        celt_sig_assert(p>q);
         _k=_n;
         do p=CELT_PVQ_U_ROW[--_k][_n];
         while(p>_i);
       }
       else for(p=row[_k];p>_i;p=row[_k])_k--;
       _i-=p;
       val=(k0-_k+s)^s;
       *_y++=val;
--- a/media/libopus/celt/entcode.h
+++ b/media/libopus/celt/entcode.h
@@ -117,33 +117,33 @@ static OPUS_INLINE int ec_tell(ec_ctx *_
    suitable for making coding decisions.
   Return: The number of bits scaled by 2**BITRES.
           This will always be slightly larger than the exact value (e.g., all
            rounding error is in the positive direction).*/
 opus_uint32 ec_tell_frac(ec_ctx *_this);
 
 /* Tested exhaustively for all n and for 1<=d<=256 */
 static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
-   celt_assert(d>0);
+   celt_sig_assert(d>0);
 #ifdef USE_SMALL_DIV_TABLE
    if (d>256)
       return n/d;
    else {
       opus_uint32 t, q;
       t = EC_ILOG(d&-d);
       q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32;
       return q+(n-q*d >= d);
    }
 #else
    return n/d;
 #endif
 }
 
 static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) {
-   celt_assert(d>0);
+   celt_sig_assert(d>0);
 #ifdef USE_SMALL_DIV_TABLE
    if (n<0)
       return -(opus_int32)celt_udiv(-n, d);
    else
       return celt_udiv(n, d);
 #else
    return n/d;
 #endif
--- a/media/libopus/celt/entdec.h
+++ b/media/libopus/celt/entdec.h
@@ -80,17 +80,17 @@ int ec_dec_bit_logp(ec_dec *_this,unsign
   _ftb: The number of bits of precision in the cumulative distribution.
   Return: The decoded symbol s.*/
 int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
 
 /*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
   The bits must have been encoded with ec_enc_uint().
   No call to ec_dec_update() is necessary after this call.
   _ft: The number of integers that can be decoded (one more than the max).
-       This must be at least one, and no more than 2**32-1.
+       This must be at least 2, and no more than 2**32-1.
   Return: The decoded bits.*/
 opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft);
 
 /*Extracts a sequence of raw bits from the stream.
   The bits must have been encoded with ec_enc_bits().
   No call to ec_dec_update() is necessary after this call.
   _ftb: The number of bits to extract.
         This must be between 0 and 25, inclusive.
--- a/media/libopus/celt/entenc.h
+++ b/media/libopus/celt/entenc.h
@@ -62,17 +62,17 @@ void ec_enc_bit_logp(ec_enc *_this,int _
          The values must be monotonically non-increasing, and the last value
           must be 0.
   _ftb: The number of bits of precision in the cumulative distribution.*/
 void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
 
 /*Encodes a raw unsigned integer in the stream.
   _fl: The integer to encode.
   _ft: The number of integers that can be encoded (one more than the max).
-       This must be at least one, and no more than 2**32-1.*/
+       This must be at least 2, and no more than 2**32-1.*/
 void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft);
 
 /*Encodes a sequence of raw bits in the stream.
   _fl:  The bits to encode.
   _ftb: The number of bits to encode.
         This must be between 1 and 25, inclusive.*/
 void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _ftb);
 
--- a/media/libopus/celt/mathops.c
+++ b/media/libopus/celt/mathops.c
@@ -33,17 +33,18 @@
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "mathops.h"
 
 /*Compute floor(sqrt(_val)) with exact arithmetic.
-  This has been tested on all possible 32-bit inputs.*/
+  _val must be greater than 0.
+  This has been tested on all possible 32-bit inputs greater than 0.*/
 unsigned isqrt32(opus_uint32 _val){
   unsigned b;
   unsigned g;
   int      bshift;
   /*Uses the second method from
      http://www.azillionmonkeys.com/qed/sqroot.html
     The main idea is to search for the largest binary digit b such that
      (g+b)*(g+b) <= _val, and add it to the solution g.*/
@@ -177,17 +178,17 @@ opus_val16 celt_cos_norm(opus_val32 x)
 }
 
 /** Reciprocal approximation (Q15 input, Q16 output) */
 opus_val32 celt_rcp(opus_val32 x)
 {
    int i;
    opus_val16 n;
    opus_val16 r;
-   celt_assert2(x>0, "celt_rcp() only defined for positive values");
+   celt_sig_assert(x>0);
    i = celt_ilog2(x);
    /* n is Q15 with range [0,1). */
    n = VSHR32(x,i-15)-32768;
    /* Start with a linear approximation:
       r = 1.8823529411764706-0.9411764705882353*n.
       The coefficients and the result are Q14 in the range [15420,30840].*/
    r = ADD16(30840, MULT16_16_Q15(-15420, n));
    /* Perform two Newton iterations:
--- a/media/libopus/celt/mathops.h
+++ b/media/libopus/celt/mathops.h
@@ -67,17 +67,17 @@ static OPUS_INLINE float fast_atan2f(flo
    }else{
       float den = (x2 + cB*y2) * (x2 + cC*y2);
       return  x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
    }
 }
 #undef cA
 #undef cB
 #undef cC
-#undef cD
+#undef cE
 #endif
 
 
 #ifndef OVERRIDE_CELT_MAXABS16
 static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
 {
    int i;
    opus_val16 maxval = 0;
@@ -174,17 +174,17 @@ static OPUS_INLINE float celt_exp2(float
 #ifdef FIXED_POINT
 
 #include "os_support.h"
 
 #ifndef OVERRIDE_CELT_ILOG2
 /** Integer log in base2. Undefined for zero and negative numbers */
 static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
 {
-   celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers");
+   celt_sig_assert(x>0);
    return EC_ILOG(x)-1;
 }
 #endif
 
 
 /** Integer log in base2. Defined for zero, but not for negative numbers */
 static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x)
 {
--- a/media/libopus/celt/pitch.c
+++ b/media/libopus/celt/pitch.c
@@ -97,65 +97,58 @@ static void find_best_pitch(opus_val32 *
             }
          }
       }
       Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
       Syy = MAX32(1, Syy);
    }
 }
 
-static void celt_fir5(const opus_val16 *x,
+static void celt_fir5(opus_val16 *x,
          const opus_val16 *num,
-         opus_val16 *y,
-         int N,
-         opus_val16 *mem)
+         int N)
 {
    int i;
    opus_val16 num0, num1, num2, num3, num4;
    opus_val32 mem0, mem1, mem2, mem3, mem4;
    num0=num[0];
    num1=num[1];
    num2=num[2];
    num3=num[3];
    num4=num[4];
-   mem0=mem[0];
-   mem1=mem[1];
-   mem2=mem[2];
-   mem3=mem[3];
-   mem4=mem[4];
+   mem0=0;
+   mem1=0;
+   mem2=0;
+   mem3=0;
+   mem4=0;
    for (i=0;i<N;i++)
    {
       opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
       sum = MAC16_16(sum,num0,mem0);
       sum = MAC16_16(sum,num1,mem1);
       sum = MAC16_16(sum,num2,mem2);
       sum = MAC16_16(sum,num3,mem3);
       sum = MAC16_16(sum,num4,mem4);
       mem4 = mem3;
       mem3 = mem2;
       mem2 = mem1;
       mem1 = mem0;
       mem0 = x[i];
-      y[i] = ROUND16(sum, SIG_SHIFT);
+      x[i] = ROUND16(sum, SIG_SHIFT);
    }
-   mem[0]=mem0;
-   mem[1]=mem1;
-   mem[2]=mem2;
-   mem[3]=mem3;
-   mem[4]=mem4;
 }
 
 
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
       int len, int C, int arch)
 {
    int i;
    opus_val32 ac[5];
    opus_val16 tmp=Q15ONE;
-   opus_val16 lpc[4], mem[5]={0,0,0,0,0};
+   opus_val16 lpc[4];
    opus_val16 lpc2[5];
    opus_val16 c1 = QCONST16(.8f,15);
 #ifdef FIXED_POINT
    int shift;
    opus_val32 maxabs = celt_maxabs32(x[0], len);
    if (C==2)
    {
       opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
@@ -206,17 +199,17 @@ void pitch_downsample(celt_sig * OPUS_RE
       lpc[i] = MULT16_16_Q15(lpc[i], tmp);
    }
    /* Add a zero */
    lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
    lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
    lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
    lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
    lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
-   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
+   celt_fir5(x_lp, lpc2, len>>1);
 }
 
 /* Pure C implementation. */
 #ifdef FIXED_POINT
 opus_val32
 #else
 void
 #endif
@@ -251,17 +244,17 @@ celt_pitch_xcorr_c(const opus_val16 *_x,
    int i;
    /*The EDSP version requires that max_pitch is at least 1, and that _x is
       32-bit aligned.
      Since it's hard to put asserts in assembly, put them here.*/
 #ifdef FIXED_POINT
    opus_val32 maxcorr=1;
 #endif
    celt_assert(max_pitch>0);
-   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
+   celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
    for (i=0;i<max_pitch-3;i+=4)
    {
       opus_val32 sum[4]={0,0,0,0};
       xcorr_kernel(_x, _y+i, sum, len, arch);
       xcorr[i]=sum[0];
       xcorr[i+1]=sum[1];
       xcorr[i+2]=sum[2];
       xcorr[i+3]=sum[3];
--- a/media/libopus/celt/quant_bands.c
+++ b/media/libopus/celt/quant_bands.c
@@ -452,17 +452,17 @@ void unquant_coarse_energy(const CELTMod
       c=0;
       do {
          int qi;
          opus_val32 q;
          opus_val32 tmp;
          /* It would be better to express this invariant as a
             test on C at function entry, but that isn't enough
             to make the static analyzer happy. */
-         celt_assert(c<2);
+         celt_sig_assert(c<2);
          tell = ec_tell(dec);
          if(budget-tell>=15)
          {
             int pi;
             pi = 2*IMIN(i,20);
             qi = ec_laplace_decode(dec,
                   prob_model[pi]<<7, prob_model[pi+1]<<6);
          }
--- a/media/libopus/celt/vq.c
+++ b/media/libopus/celt/vq.c
@@ -225,22 +225,22 @@ opus_val16 op_pvq_search_c(celt_norm *X,
 #endif
          y[j] = (celt_norm)iy[j];
          yy = MAC16_16(yy, y[j],y[j]);
          xy = MAC16_16(xy, X[j],y[j]);
          y[j] *= 2;
          pulsesLeft -= iy[j];
       }  while (++j<N);
    }
-   celt_assert2(pulsesLeft>=0, "Allocated too many pulses in the quick pass");
+   celt_sig_assert(pulsesLeft>=0);
 
    /* This should never happen, but just in case it does (e.g. on silence)
       we fill the first bin with pulses. */
 #ifdef FIXED_POINT_DEBUG
-   celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
+   celt_sig_assert(pulsesLeft<=N+3);
 #endif
    if (pulsesLeft > N+3)
    {
       opus_val16 tmp = (opus_val16)pulsesLeft;
       yy = MAC16_16(yy, tmp, tmp);
       yy = MAC16_16(yy, tmp, y[0]);
       iy[0] += pulsesLeft;
       pulsesLeft=0;
new file mode 100644
--- /dev/null
+++ b/media/libopus/celt/x86/celt_lpc_sse4_1.c
@@ -0,0 +1,89 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+#include "x86cpu.h"
+
+#if defined(FIXED_POINT)
+
+void celt_fir_sse4_1(const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         int arch)
+{
+    int i,j;
+    VARDECL(opus_val16, rnum);
+
+    __m128i vecNoA;
+    opus_int32 noA ;
+    SAVE_STACK;
+
+   ALLOC(rnum, ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   noA = EXTEND32(1) << SIG_SHIFT >> 1;
+   vecNoA = _mm_set_epi32(noA, noA, noA, noA);
+
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sums[4] = {0};
+      __m128i vecSum, vecX;
+
+      xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
+
+      vecSum = _mm_loadu_si128((__m128i *)sums);
+      vecSum = _mm_add_epi32(vecSum, vecNoA);
+      vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
+      vecX = OP_CVTEPI16_EPI32_M64(x + i);
+      vecSum = _mm_add_epi32(vecSum, vecX);
+      vecSum = _mm_packs_epi32(vecSum, vecSum);
+      _mm_storel_epi64((__m128i *)(y + i), vecSum);
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum, rnum[j], x[i+j-ord]);
+      y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT)));
+   }
+
+   RESTORE_STACK;
+}
+
+#endif
--- a/media/libopus/celt/x86/vq_sse2.c
+++ b/media/libopus/celt/x86/vq_sse2.c
@@ -130,17 +130,17 @@ opus_val16 op_pvq_search_sse2(celt_norm 
       xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(2, 3, 0, 1)));
       xy = _mm_cvtss_f32(xy4);
       yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(1, 0, 3, 2)));
       yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(2, 3, 0, 1)));
       yy = _mm_cvtss_f32(yy4);
    }
    X[N] = X[N+1] = X[N+2] = -100;
    y[N] = y[N+1] = y[N+2] = 100;
-   celt_assert2(pulsesLeft>=0, "Allocated too many pulses in the quick pass");
+   celt_sig_assert(pulsesLeft>=0);
 
    /* This should never happen, but just in case it does (e.g. on silence)
       we fill the first bin with pulses. */
    if (pulsesLeft > N+3)
    {
       opus_val16 tmp = (opus_val16)pulsesLeft;
       yy = MAC16_16(yy, tmp, tmp);
       yy = MAC16_16(yy, tmp, y[0]);
--- a/media/libopus/celt/x86/x86cpu.h
+++ b/media/libopus/celt/x86/x86cpu.h
@@ -77,17 +77,19 @@ int opus_select_arch(void);
 # if defined(__clang__) || !defined(__OPTIMIZE__)
 #  define OP_CVTEPI8_EPI32_M32(x) \
  (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
 # else
 #  define OP_CVTEPI8_EPI32_M32(x) \
  (_mm_cvtepi8_epi32(*(__m128i *)(x)))
 #endif
 
-# if !defined(__OPTIMIZE__)
+/* similar reasoning about the instruction sequence as in the 32-bit macro above,
+ */
+# if defined(__clang__) || !defined(__OPTIMIZE__)
 #  define OP_CVTEPI16_EPI32_M64(x) \
  (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
 # else
 #  define OP_CVTEPI16_EPI32_M64(x) \
  (_mm_cvtepi16_epi32(*(__m128i *)(x)))
 # endif
 
 #endif
--- a/media/libopus/include/opus.h
+++ b/media/libopus/include/opus.h
@@ -526,17 +526,17 @@ OPUS_EXPORT void opus_decoder_destroy(Op
   */
 OPUS_EXPORT int opus_packet_parse(
    const unsigned char *data,
    opus_int32 len,
    unsigned char *out_toc,
    const unsigned char *frames[48],
    opus_int16 size[48],
    int *payload_offset
-) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(5);
 
 /** Gets the bandwidth of an Opus packet.
   * @param [in] data <tt>char*</tt>: Opus packet
   * @retval OPUS_BANDWIDTH_NARROWBAND Narrowband (4kHz bandpass)
   * @retval OPUS_BANDWIDTH_MEDIUMBAND Mediumband (6kHz bandpass)
   * @retval OPUS_BANDWIDTH_WIDEBAND Wideband (8kHz bandpass)
   * @retval OPUS_BANDWIDTH_SUPERWIDEBAND Superwideband (12kHz bandpass)
   * @retval OPUS_BANDWIDTH_FULLBAND Fullband (20kHz bandpass)
--- a/media/libopus/include/opus_defines.h
+++ b/media/libopus/include/opus_defines.h
@@ -164,16 +164,19 @@ extern "C" {
 #define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040
 #define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
 #define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042
 #define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043
 /* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
 #define OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST 4046
 #define OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST 4047
 
+/** Defines for the presence of extended APIs. */
+#define OPUS_HAVE_OPUS_PROJECTION_H
+
 /* Macros to trigger compilation errors when the wrong types are provided to a CTL */
 #define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
 #define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr)))
 #define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr)))
 #define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr)))
 /** @endcond */
 
 /** @defgroup opus_ctlvalues Pre-defined values for CTL interface
new file mode 100644
--- /dev/null
+++ b/media/libopus/include/opus_projection.h
@@ -0,0 +1,568 @@
+/* Copyright (c) 2017 Google Inc.
+   Written by Andrew Allen */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus_projection.h
+ * @brief Opus projection reference API
+ */
+
+#ifndef OPUS_PROJECTION_H
+#define OPUS_PROJECTION_H
+
+#include "opus_multistream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @cond OPUS_INTERNAL_DOC */
+
+/** These are the actual encoder and decoder CTL ID numbers.
+  * They should not be used directly by applications.c
+  * In general, SETs should be even and GETs should be odd.*/
+/**@{*/
+#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN_REQUEST    6001
+#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE_REQUEST    6003
+#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_REQUEST         6005
+/**@}*/
+
+
+/** @endcond */
+
+/** @defgroup opus_projection_ctls Projection specific encoder and decoder CTLs
+  *
+  * These are convenience macros that are specific to the
+  * opus_projection_encoder_ctl() and opus_projection_decoder_ctl()
+  * interface.
+  * The CTLs from @ref opus_genericctls, @ref opus_encoderctls,
+  * @ref opus_decoderctls, and @ref opus_multistream_ctls may be applied to a
+  * projection encoder or decoder as well.
+  */
+/**@{*/
+
+/** Gets the gain (in dB. S7.8-format) of the demixing matrix from the encoder.
+  * @param[out] x <tt>opus_int32 *</tt>: Returns the gain (in dB. S7.8-format)
+  *                                      of the demixing matrix.
+  * @hideinitializer
+  */
+#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN(x) OPUS_PROJECTION_GET_DEMIXING_MATRIX_GAIN_REQUEST, __opus_check_int_ptr(x)
+
+
+/** Gets the size in bytes of the demixing matrix from the encoder.
+  * @param[out] x <tt>opus_int32 *</tt>: Returns the size in bytes of the
+  *                                      demixing matrix.
+  * @hideinitializer
+  */
+#define OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE(x) OPUS_PROJECTION_GET_DEMIXING_MATRIX_SIZE_REQUEST, __opus_check_int_ptr(x)
+
+
+/** Copies the demixing matrix to the supplied pointer location.
+  * @param[out] x <tt>unsigned char *</tt>: Returns the demixing matrix to the
+  *                                         supplied pointer location.
+  * @param y <tt>opus_int32</tt>: The size in bytes of the reserved memory at the
+  *                              pointer location.
+  * @hideinitializer
+  */
+#define OPUS_PROJECTION_GET_DEMIXING_MATRIX(x,y) OPUS_PROJECTION_GET_DEMIXING_MATRIX_REQUEST, x, __opus_check_int(y)
+
+
+/**@}*/
+
+/** Opus projection encoder state.
+ * This contains the complete state of a projection Opus encoder.
+ * It is position independent and can be freely copied.
+ * @see opus_projection_ambisonics_encoder_create
+ */
+typedef struct OpusProjectionEncoder OpusProjectionEncoder;
+
+
+/** Opus projection decoder state.
+  * This contains the complete state of a projection Opus decoder.
+  * It is position independent and can be freely copied.
+  * @see opus_projection_decoder_create
+  * @see opus_projection_decoder_init
+  */
+typedef struct OpusProjectionDecoder OpusProjectionDecoder;
+
+
+/**\name Projection encoder functions */
+/**@{*/
+
+/** Gets the size of an OpusProjectionEncoder structure.
+  * @param channels <tt>int</tt>: The total number of input channels to encode.
+  *                               This must be no more than 255.
+  * @param mapping_family <tt>int</tt>: The mapping family to use for selecting
+  *                                     the appropriate projection.
+  * @returns The size in bytes on success, or a negative error code
+  *          (see @ref opus_errorcodes) on error.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_projection_ambisonics_encoder_get_size(
+    int channels,
+    int mapping_family
+);
+
+
+/** Allocates and initializes a projection encoder state.
+  * Call opus_projection_encoder_destroy() to release
+  * this object when finished.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels in the input signal.
+  *                               This must be at most 255.
+  *                               It may be greater than the number of
+  *                               coded channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param mapping_family <tt>int</tt>: The mapping family to use for selecting
+  *                                     the appropriate projection.
+  * @param[out] streams <tt>int *</tt>: The total number of streams that will
+  *                                     be encoded from the input.
+  * @param[out] coupled_streams <tt>int *</tt>: Number of coupled (2 channel)
+  *                                 streams that will be encoded from the input.
+  * @param application <tt>int</tt>: The target encoder application.
+  *                                  This must be one of the following:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
+  *                                   code (see @ref opus_errorcodes) on
+  *                                   failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusProjectionEncoder *opus_projection_ambisonics_encoder_create(
+    opus_int32 Fs,
+    int channels,
+    int mapping_family,
+    int *streams,
+    int *coupled_streams,
+    int application,
+    int *error
+) OPUS_ARG_NONNULL(4) OPUS_ARG_NONNULL(5);
+
+
+/** Initialize a previously allocated projection encoder state.
+  * The memory pointed to by \a st must be at least the size returned by
+  * opus_projection_ambisonics_encoder_get_size().
+  * This is intended for applications which use their own allocator instead of
+  * malloc.
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @see opus_projection_ambisonics_encoder_create
+  * @see opus_projection_ambisonics_encoder_get_size
+  * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state to initialize.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels in the input signal.
+  *                               This must be at most 255.
+  *                               It may be greater than the number of
+  *                               coded channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams to encode from the
+  *                              input.
+  *                              This must be no more than the number of channels.
+  * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+  *                                      to encode.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      encoded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than the number of input channels.
+  * @param application <tt>int</tt>: The target encoder application.
+  *                                  This must be one of the following:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
+  *          on failure.
+  */
+OPUS_EXPORT int opus_projection_ambisonics_encoder_init(
+    OpusProjectionEncoder *st,
+    opus_int32 Fs,
+    int channels,
+    int mapping_family,
+    int *streams,
+    int *coupled_streams,
+    int application
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(5) OPUS_ARG_NONNULL(6);
+
+
+/** Encodes a projection Opus frame.
+  * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state.
+  * @param[in] pcm <tt>const opus_int16*</tt>: The input signal as interleaved
+  *                                            samples.
+  *                                            This must contain
+  *                                            <code>frame_size*channels</code>
+  *                                            samples.
+  * @param frame_size <tt>int</tt>: Number of samples per channel in the input
+  *                                 signal.
+  *                                 This must be an Opus frame size for the
+  *                                 encoder's sampling rate.
+  *                                 For example, at 48 kHz the permitted values
+  *                                 are 120, 240, 480, 960, 1920, and 2880.
+  *                                 Passing in a duration of less than 10 ms
+  *                                 (480 samples at 48 kHz) will prevent the
+  *                                 encoder from using the LPC or hybrid modes.
+  * @param[out] data <tt>unsigned char*</tt>: Output payload.
+  *                                           This must contain storage for at
+  *                                           least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_encode(
+    OpusProjectionEncoder *st,
+    const opus_int16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+
+/** Encodes a projection Opus frame from floating point input.
+  * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state.
+  * @param[in] pcm <tt>const float*</tt>: The input signal as interleaved
+  *                                       samples with a normal range of
+  *                                       +/-1.0.
+  *                                       Samples with a range beyond +/-1.0
+  *                                       are supported but will be clipped by
+  *                                       decoders using the integer API and
+  *                                       should only be used if it is known
+  *                                       that the far end supports extended
+  *                                       dynamic range.
+  *                                       This must contain
+  *                                       <code>frame_size*channels</code>
+  *                                       samples.
+  * @param frame_size <tt>int</tt>: Number of samples per channel in the input
+  *                                 signal.
+  *                                 This must be an Opus frame size for the
+  *                                 encoder's sampling rate.
+  *                                 For example, at 48 kHz the permitted values
+  *                                 are 120, 240, 480, 960, 1920, and 2880.
+  *                                 Passing in a duration of less than 10 ms
+  *                                 (480 samples at 48 kHz) will prevent the
+  *                                 encoder from using the LPC or hybrid modes.
+  * @param[out] data <tt>unsigned char*</tt>: Output payload.
+  *                                           This must contain storage for at
+  *                                           least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_encode_float(
+    OpusProjectionEncoder *st,
+    const float *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+
+/** Frees an <code>OpusProjectionEncoder</code> allocated by
+  * opus_projection_ambisonics_encoder_create().
+  * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state to be freed.
+  */
+OPUS_EXPORT void opus_projection_encoder_destroy(OpusProjectionEncoder *st);
+
+
+/** Perform a CTL function on a projection Opus encoder.
+  *
+  * Generally the request and subsequent arguments are generated by a
+  * convenience macro.
+  * @param st <tt>OpusProjectionEncoder*</tt>: Projection encoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls,
+  *                @ref opus_encoderctls, @ref opus_multistream_ctls, or
+  *                @ref opus_projection_ctls
+  * @see opus_genericctls
+  * @see opus_encoderctls
+  * @see opus_multistream_ctls
+  * @see opus_projection_ctls
+  */
+OPUS_EXPORT int opus_projection_encoder_ctl(OpusProjectionEncoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+
+/**@}*/
+
+/**\name Projection decoder functions */
+/**@{*/
+
+/** Gets the size of an <code>OpusProjectionDecoder</code> structure.
+  * @param channels <tt>int</tt>: The total number of output channels.
+  *                               This must be no more than 255.
+  * @param streams <tt>int</tt>: The total number of streams coded in the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number streams to decode as coupled
+  *                                      (2 channel) streams.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      coded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @returns The size in bytes on success, or a negative error code
+  *          (see @ref opus_errorcodes) on error.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_projection_decoder_get_size(
+    int channels,
+    int streams,
+    int coupled_streams
+);
+
+
+/** Allocates and initializes a projection decoder state.
+  * Call opus_projection_decoder_destroy() to release
+  * this object when finished.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels to output.
+  *                               This must be at most 255.
+  *                               It may be different from the number of coded
+  *                               channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams coded in the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
+  *                                      (2 channel) streams.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      coded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @param[in] demixing_matrix <tt>const unsigned char[demixing_matrix_size]</tt>: Demixing matrix
+  *                         that mapping from coded channels to output channels,
+  *                         as described in @ref opus_projection and
+  *                         @ref opus_projection_ctls.
+  * @param demixing_matrix_size <tt>opus_int32</tt>: The size in bytes of the
+  *                                                  demixing matrix, as
+  *                                                  described in @ref
+  *                                                  opus_projection_ctls.
+  * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
+  *                                   code (see @ref opus_errorcodes) on
+  *                                   failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusProjectionDecoder *opus_projection_decoder_create(
+    opus_int32 Fs,
+    int channels,
+    int streams,
+    int coupled_streams,
+    unsigned char *demixing_matrix,
+    opus_int32 demixing_matrix_size,
+    int *error
+) OPUS_ARG_NONNULL(5);
+
+
+/** Intialize a previously allocated projection decoder state object.
+  * The memory pointed to by \a st must be at least the size returned by
+  * opus_projection_decoder_get_size().
+  * This is intended for applications which use their own allocator instead of
+  * malloc.
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @see opus_projection_decoder_create
+  * @see opus_projection_deocder_get_size
+  * @param st <tt>OpusProjectionDecoder*</tt>: Projection encoder state to initialize.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels to output.
+  *                               This must be at most 255.
+  *                               It may be different from the number of coded
+  *                               channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams coded in the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
+  *                                      (2 channel) streams.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      coded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @param[in] demixing_matrix <tt>const unsigned char[demixing_matrix_size]</tt>: Demixing matrix
+  *                         that mapping from coded channels to output channels,
+  *                         as described in @ref opus_projection and
+  *                         @ref opus_projection_ctls.
+  * @param demixing_matrix_size <tt>opus_int32</tt>: The size in bytes of the
+  *                                                  demixing matrix, as
+  *                                                  described in @ref
+  *                                                  opus_projection_ctls.
+  * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
+  *          on failure.
+  */
+OPUS_EXPORT int opus_projection_decoder_init(
+    OpusProjectionDecoder *st,
+    opus_int32 Fs,
+    int channels,
+    int streams,
+    int coupled_streams,
+    unsigned char *demixing_matrix,
+    opus_int32 demixing_matrix_size
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+
+/** Decode a projection Opus packet.
+  * @param st <tt>OpusProjectionDecoder*</tt>: Projection decoder state.
+  * @param[in] data <tt>const unsigned char*</tt>: Input payload.
+  *                                                Use a <code>NULL</code>
+  *                                                pointer to indicate packet
+  *                                                loss.
+  * @param len <tt>opus_int32</tt>: Number of bytes in payload.
+  * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
+  *                                       samples.
+  *                                       This must contain room for
+  *                                       <code>frame_size*channels</code>
+  *                                       samples.
+  * @param frame_size <tt>int</tt>: The number of samples per channel of
+  *                                 available space in \a pcm.
+  *                                 If this is less than the maximum packet duration
+  *                                 (120 ms; 5760 for 48kHz), this function will not be capable
+  *                                 of decoding some packets. In the case of PLC (data==NULL)
+  *                                 or FEC (decode_fec=1), then frame_size needs to be exactly
+  *                                 the duration of audio that is missing, otherwise the
+  *                                 decoder will not be in the optimal state to decode the
+  *                                 next incoming packet. For the PLC and FEC cases, frame_size
+  *                                 <b>must</b> be a multiple of 2.5 ms.
+  * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
+  *                                 forward error correction data be decoded.
+  *                                 If no such data is available, the frame is
+  *                                 decoded as if it were lost.
+  * @returns Number of samples decoded on success or a negative error code
+  *          (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_decode(
+    OpusProjectionDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    opus_int16 *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+
+/** Decode a projection Opus packet with floating point output.
+  * @param st <tt>OpusProjectionDecoder*</tt>: Projection decoder state.
+  * @param[in] data <tt>const unsigned char*</tt>: Input payload.
+  *                                                Use a <code>NULL</code>
+  *                                                pointer to indicate packet
+  *                                                loss.
+  * @param len <tt>opus_int32</tt>: Number of bytes in payload.
+  * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
+  *                                       samples.
+  *                                       This must contain room for
+  *                                       <code>frame_size*channels</code>
+  *                                       samples.
+  * @param frame_size <tt>int</tt>: The number of samples per channel of
+  *                                 available space in \a pcm.
+  *                                 If this is less than the maximum packet duration
+  *                                 (120 ms; 5760 for 48kHz), this function will not be capable
+  *                                 of decoding some packets. In the case of PLC (data==NULL)
+  *                                 or FEC (decode_fec=1), then frame_size needs to be exactly
+  *                                 the duration of audio that is missing, otherwise the
+  *                                 decoder will not be in the optimal state to decode the
+  *                                 next incoming packet. For the PLC and FEC cases, frame_size
+  *                                 <b>must</b> be a multiple of 2.5 ms.
+  * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
+  *                                 forward error correction data be decoded.
+  *                                 If no such data is available, the frame is
+  *                                 decoded as if it were lost.
+  * @returns Number of samples decoded on success or a negative error code
+  *          (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_projection_decode_float(
+    OpusProjectionDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    float *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+
+/** Perform a CTL function on a projection Opus decoder.
+  *
+  * Generally the request and subsequent arguments are generated by a
+  * convenience macro.
+  * @param st <tt>OpusProjectionDecoder*</tt>: Projection decoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls,
+  *                @ref opus_decoderctls, @ref opus_multistream_ctls, or
+  *                @ref opus_projection_ctls.
+  * @see opus_genericctls
+  * @see opus_decoderctls
+  * @see opus_multistream_ctls
+  * @see opus_projection_ctls
+  */
+OPUS_EXPORT int opus_projection_decoder_ctl(OpusProjectionDecoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+
+/** Frees an <code>OpusProjectionDecoder</code> allocated by
+  * opus_projection_decoder_create().
+  * @param st <tt>OpusProjectionDecoder</tt>: Projection decoder state to be freed.
+  */
+OPUS_EXPORT void opus_projection_decoder_destroy(OpusProjectionDecoder *st);
+
+
+/**@}*/
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_PROJECTION_H */
--- a/media/libopus/include/opus_types.h
+++ b/media/libopus/include/opus_types.h
@@ -28,24 +28,39 @@
 
 /**
    @file opus_types.h
    @brief Opus reference implementation types
 */
 #ifndef OPUS_TYPES_H
 #define OPUS_TYPES_H
 
+#define opus_int         int                     /* used for counters etc; at least 16 bits */
+#define opus_int64       long long
+#define opus_int8        signed char
+
+#define opus_uint        unsigned int            /* used for counters etc; at least 16 bits */
+#define opus_uint64      unsigned long long
+#define opus_uint8       unsigned char
+
 /* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
 #if (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
 #include <stdint.h>
-
+#  undef opus_int64
+#  undef opus_int8
+#  undef opus_uint64
+#  undef opus_uint8
+   typedef int8_t opus_int8;
+   typedef uint8_t opus_uint8;
    typedef int16_t opus_int16;
    typedef uint16_t opus_uint16;
    typedef int32_t opus_int32;
    typedef uint32_t opus_uint32;
+   typedef int64_t opus_int64;
+   typedef uint64_t opus_uint64;
 #elif defined(_WIN32)
 
 #  if defined(__CYGWIN__)
 #    include <_G_config.h>
      typedef _G_int32_t opus_int32;
      typedef _G_uint32_t opus_uint32;
      typedef _G_int16 opus_int16;
      typedef _G_uint16 opus_uint16;
@@ -143,17 +158,9 @@
    /* Give up, take a reasonable guess */
    typedef short opus_int16;
    typedef unsigned short opus_uint16;
    typedef int opus_int32;
    typedef unsigned int opus_uint32;
 
 #endif
 
-#define opus_int         int                     /* used for counters etc; at least 16 bits */
-#define opus_int64       long long
-#define opus_int8        signed char
-
-#define opus_uint        unsigned int            /* used for counters etc; at least 16 bits */
-#define opus_uint64      unsigned long long
-#define opus_uint8       unsigned char
-
 #endif  /* OPUS_TYPES_H */
--- a/media/libopus/moz.build
+++ b/media/libopus/moz.build
@@ -15,17 +15,17 @@ EXPORTS.opus += [
 ]
 
 # We allow warnings for third-party code that can be updated from upstream.
 AllowCompilerWarnings()
 
 FINAL_LIBRARY = 'gkmedias'
 
 DEFINES['OPUS_BUILD'] = True
-DEFINES['OPUS_VERSION'] = '"v1.2.1-mozilla"'
+DEFINES['OPUS_VERSION'] = '"v1.3-rc-19-g5cbd7d5f-mozilla"'
 DEFINES['USE_ALLOCA'] = True
 
 # Don't export symbols
 DEFINES['OPUS_EXPORT'] = ''
 
 if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['GNU_AS']:
     DEFINES['OPUS_ARM_ASM'] = True
     DEFINES['OPUS_ARM_EXTERNAL_ASM'] = True
--- a/media/libopus/silk/API.h
+++ b/media/libopus/silk/API.h
@@ -75,17 +75,18 @@ opus_int silk_InitEncoder(              
 /* encControl->payloadSize_ms is set to                                                                         */
 opus_int silk_Encode(                                   /* O    Returns error code                              */
     void                            *encState,          /* I/O  State                                           */
     silk_EncControlStruct           *encControl,        /* I    Control status                                  */
     const opus_int16                *samplesIn,         /* I    Speech sample input vector                      */
     opus_int                        nSamplesIn,         /* I    Number of samples in input vector               */
     ec_enc                          *psRangeEnc,        /* I/O  Compressor data structure                       */
     opus_int32                      *nBytesOut,         /* I/O  Number of bytes in payload (input: Max bytes)   */
-    const opus_int                  prefillFlag         /* I    Flag to indicate prefilling buffers no coding   */
+    const opus_int                  prefillFlag,        /* I    Flag to indicate prefilling buffers no coding   */
+    int                             activity            /* I    Decision of Opus voice activity detector        */
 );
 
 /****************************************/
 /* Decoder functions                    */
 /****************************************/
 
 /***********************************************/
 /* Get size in bytes of the Silk decoder state */
--- a/media/libopus/silk/CNG.c
+++ b/media/libopus/silk/CNG.c
@@ -141,18 +141,18 @@ void silk_CNG(
 
         silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed );
 
         /* Convert CNG NLSF to filter representation */
         silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order, psDec->arch );
 
         /* Generate CNG signal, by synthesis filtering */
         silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        celt_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
         for( i = 0; i < length; i++ ) {
-            silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
             LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
--- a/media/libopus/silk/LPC_analysis_filter.c
+++ b/media/libopus/silk/LPC_analysis_filter.c
@@ -59,22 +59,22 @@ void silk_LPC_analysis_filter(
 #if defined(FIXED_POINT) && USE_CELT_FIR
     opus_int16 num[SILK_MAX_ORDER_LPC];
 #else
     int ix;
     opus_int32       out32_Q12, out32;
     const opus_int16 *in_ptr;
 #endif
 
-    silk_assert( d >= 6 );
-    silk_assert( (d & 1) == 0 );
-    silk_assert( d <= len );
+    celt_assert( d >= 6 );
+    celt_assert( (d & 1) == 0 );
+    celt_assert( d <= len );
 
 #if defined(FIXED_POINT) && USE_CELT_FIR
-    silk_assert( d <= SILK_MAX_ORDER_LPC );
+    celt_assert( d <= SILK_MAX_ORDER_LPC );
     for ( j = 0; j < d; j++ ) {
         num[ j ] = -B[ j ];
     }
     celt_fir( in + d, num, out + d, len - d, d, arch );
     for ( j = 0; j < d; j++ ) {
         out[ j ] = 0;
     }
 #else
--- a/media/libopus/silk/NLSF2A.c
+++ b/media/libopus/silk/NLSF2A.c
@@ -81,17 +81,17 @@ void silk_NLSF2A(
     const unsigned char *ordering;
     opus_int   k, i, dd;
     opus_int32 cos_LSF_QA[ SILK_MAX_ORDER_LPC ];
     opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
     opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta;
     opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ];
 
     silk_assert( LSF_COS_TAB_SZ_FIX == 128 );
-    silk_assert( d==10 || d==16 );
+    celt_assert( d==10 || d==16 );
 
     /* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */
     ordering = d == 16 ? ordering16 : ordering10;
     for( k = 0; k < d; k++ ) {
         silk_assert( NLSF[k] >= 0 );
 
         /* f_int on a scale 0-127 (rounded down) */
         f_int = silk_RSHIFT( NLSF[k], 15 - 7 );
--- a/media/libopus/silk/NLSF_VQ.c
+++ b/media/libopus/silk/NLSF_VQ.c
@@ -41,17 +41,17 @@ void silk_NLSF_VQ(
     const opus_int              LPC_order                       /* I    Number of LPCs                              */
 )
 {
     opus_int         i, m;
     opus_int32       diff_Q15, diffw_Q24, sum_error_Q24, pred_Q24;
     const opus_int16 *w_Q9_ptr;
     const opus_uint8 *cb_Q8_ptr;
 
-    silk_assert( ( LPC_order & 1 ) == 0 );
+    celt_assert( ( LPC_order & 1 ) == 0 );
 
     /* Loop over codebook */
     cb_Q8_ptr = pCB_Q8;
     w_Q9_ptr = pWght_Q9;
     for( i = 0; i < K; i++ ) {
         sum_error_Q24 = 0;
         pred_Q24 = 0;
         for( m = LPC_order-2; m >= 0; m -= 2 ) {
--- a/media/libopus/silk/NLSF_VQ_weights_laroia.c
+++ b/media/libopus/silk/NLSF_VQ_weights_laroia.c
@@ -43,18 +43,18 @@ void silk_NLSF_VQ_weights_laroia(
     opus_int16                  *pNLSFW_Q_OUT,      /* O     Pointer to input vector weights [D]                        */
     const opus_int16            *pNLSF_Q15,         /* I     Pointer to input vector         [D]                        */
     const opus_int              D                   /* I     Input vector dimension (even)                              */
 )
 {
     opus_int   k;
     opus_int32 tmp1_int, tmp2_int;
 
-    silk_assert( D > 0 );
-    silk_assert( ( D & 1 ) == 0 );
+    celt_assert( D > 0 );
+    celt_assert( ( D & 1 ) == 0 );
 
     /* First value */
     tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 );
     tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
     tmp2_int = silk_max_int( pNLSF_Q15[ 1 ] - pNLSF_Q15[ 0 ], 1 );
     tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int );
     pNLSFW_Q_OUT[ 0 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
     silk_assert( pNLSFW_Q_OUT[ 0 ] > 0 );
--- a/media/libopus/silk/NLSF_encode.c
+++ b/media/libopus/silk/NLSF_encode.c
@@ -55,17 +55,17 @@ opus_int32 silk_NLSF_encode(            
     opus_int16       NLSF_tmp_Q15[ MAX_LPC_ORDER ];
     opus_int16       W_adj_Q5[     MAX_LPC_ORDER ];
     opus_uint8       pred_Q8[      MAX_LPC_ORDER ];
     opus_int16       ec_ix[        MAX_LPC_ORDER ];
     const opus_uint8 *pCB_element, *iCDF_ptr;
     const opus_int16 *pCB_Wght_Q9;
     SAVE_STACK;
 
-    silk_assert( signalType >= 0 && signalType <= 2 );
+    celt_assert( signalType >= 0 && signalType <= 2 );
     silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 );
 
     /* NLSF stabilization */
     silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
 
     /* First stage: VQ */
     ALLOC( err_Q24, psNLSF_CB->nVectors, opus_int32 );
     silk_NLSF_VQ( err_Q24, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->CB1_Wght_Q9, psNLSF_CB->nVectors, psNLSF_CB->order );
--- a/media/libopus/silk/NSQ.c
+++ b/media/libopus/silk/NSQ.c
@@ -138,17 +138,17 @@ void silk_NSQ_c
         if( psIndices->signalType == TYPE_VOICED ) {
             /* Voiced */
             lag = pitchL[ k ];
 
             /* Re-whitening */
             if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
                 /* Rewhiten with new A coefs */
                 start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
-                silk_assert( start_idx > 0 );
+                celt_assert( start_idx > 0 );
 
                 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
                     A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
 
                 NSQ->rewhite_flag = 1;
                 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
             }
         }
@@ -242,25 +242,25 @@ void silk_noise_shape_quantizer(
             LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
             LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
             pred_lag_ptr++;
         } else {
             LTP_pred_Q13 = 0;
         }
 
         /* Noise shape feedback */
-        silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+        celt_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
         n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(&NSQ->sDiff_shp_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
 
         n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
 
         n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
         n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 );
 
-        silk_assert( lag > 0 || signalType != TYPE_VOICED );
+        celt_assert( lag > 0 || signalType != TYPE_VOICED );
 
         /* Combine prediction and noise shaping signals */
         tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 );        /* Q12 */
         tmp1 = silk_SUB32( tmp1, n_LF_Q12 );                                    /* Q12 */
         if( lag > 0 ) {
             /* Symmetric, packed FIR coefficients */
             n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
             n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
--- a/media/libopus/silk/NSQ_del_dec.c
+++ b/media/libopus/silk/NSQ_del_dec.c
@@ -245,17 +245,17 @@ void silk_NSQ_del_dec_c(
                         NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
                     }
 
                     subfr = 0;
                 }
 
                 /* Rewhiten with new A coefs */
                 start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
-                silk_assert( start_idx > 0 );
+                celt_assert( start_idx > 0 );
 
                 silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
                     A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
 
                 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
                 NSQ->rewhite_flag = 1;
             }
         }
@@ -356,17 +356,17 @@ static OPUS_INLINE void silk_noise_shape
     opus_int32   a_Q12_arch[MAX_LPC_ORDER];
 #endif
 
     VARDECL( NSQ_sample_pair, psSampleState );
     NSQ_del_dec_struct *psDD;
     NSQ_sample_struct  *psSS;
     SAVE_STACK;
 
-    silk_assert( nStatesDelayedDecision > 0 );
+    celt_assert( nStatesDelayedDecision > 0 );
     ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
 
     shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
     Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
 
 #ifdef silk_short_prediction_create_arch_coef
     silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
@@ -414,17 +414,17 @@ static OPUS_INLINE void silk_noise_shape
 
             /* Pointer used in short term prediction and shaping */
             psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
             /* Short-term prediction */
             LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
             LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
 
             /* Noise shape feedback */
-            silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+            celt_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
             /* Output of lowpass section */
             tmp2 = silk_SMLAWB( psDD->Diff_Q14, psDD->sAR2_Q14[ 0 ], warping_Q16 );
             /* Output of allpass section */
             tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
             psDD->sAR2_Q14[ 0 ] = tmp2;
             n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
             n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
             /* Loop over allpass sections */
--- a/media/libopus/silk/PLC.c
+++ b/media/libopus/silk/PLC.c
@@ -286,17 +286,17 @@ static OPUS_INLINE void silk_PLC_conceal
     }
 
     rand_seed    = psPLC->rand_seed;
     lag          = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 );
     sLTP_buf_idx = psDec->ltp_mem_length;
 
     /* Rewhiten LTP state */
     idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
-    silk_assert( idx > 0 );
+    celt_assert( idx > 0 );
     silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order, arch );
     /* Scale LTP state */
     inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 );
     inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 );
     for( i = idx + psDec->LPC_order; i < psDec->ltp_mem_length; i++ ) {
         sLTP_Q14[ i ] = silk_SMULWB( inv_gain_Q30, sLTP[ i ] );
     }
 
@@ -342,17 +342,17 @@ static OPUS_INLINE void silk_PLC_conceal
     /***************************/
     /* LPC synthesis filtering */
     /***************************/
     sLPC_Q14_ptr = &sLTP_Q14[ psDec->ltp_mem_length - MAX_LPC_ORDER ];
 
     /* Copy LPC state */
     silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
 
-    silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */
+    celt_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */
     for( i = 0; i < psDec->frame_length; i++ ) {
         /* partly unrolled */
         /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
         LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
--- a/media/libopus/silk/VAD.c
+++ b/media/libopus/silk/VAD.c
@@ -96,19 +96,19 @@ opus_int silk_VAD_GetSA_Q8_c(           
     opus_int32 speech_nrg, x_tmp;
     opus_int   X_offset[ VAD_N_BANDS ];
     opus_int   ret = 0;
     silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
     SAVE_STACK;
 
     /* Safety checks */
     silk_assert( VAD_N_BANDS == 4 );
-    silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
-    silk_assert( psEncC->frame_length <= 512 );
-    silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
+    celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
+    celt_assert( psEncC->frame_length <= 512 );
+    celt_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
 
     /***********************/
     /* Filter and Decimate */
     /***********************/
     decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
     decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
     decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
     /* Decimate into 4 bands:
@@ -247,25 +247,24 @@ opus_int silk_VAD_GetSA_Q8_c(           
     /* Scale the sigmoid output based on power levels */
     /**************************************************/
     speech_nrg = 0;
     for( b = 0; b < VAD_N_BANDS; b++ ) {
         /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
         speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
     }
 
+    if( psEncC->frame_length == 20 * psEncC->fs_kHz ) {
+        speech_nrg = silk_RSHIFT32( speech_nrg, 1 );
+    }
     /* Power scaling */
     if( speech_nrg <= 0 ) {
         SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
-    } else if( speech_nrg < 32768 ) {
-        if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
-            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 );
-        } else {
-            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 );
-        }
+    } else if( speech_nrg < 16384 ) {
+        speech_nrg = silk_LSHIFT32( speech_nrg, 16 );
 
         /* square-root */
         speech_nrg = silk_SQRT_APPROX( speech_nrg );
         SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );
     }
 
     /* Copy the resulting speech activity in Q8 */
     psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX );
--- a/media/libopus/silk/arm/LPC_inv_pred_gain_neon_intr.c
+++ b/media/libopus/silk/arm/LPC_inv_pred_gain_neon_intr.c
@@ -212,23 +212,23 @@ opus_int32 silk_LPC_inverse_pred_gain_ne
         t1_s16x8 = vld1q_s16( A_Q12 +  8 );
         t2_s16x8 = vld1q_s16( A_Q12 + 16 );
         t0_s32x4 = vpaddlq_s16( t0_s16x8 );
 
         switch( order - leftover )
         {
         case 24:
             t0_s32x4 = vpadalq_s16( t0_s32x4, t2_s16x8 );
-            /* Intend to fall through */
+            /* FALLTHROUGH */
 
         case 16:
             t0_s32x4 = vpadalq_s16( t0_s32x4, t1_s16x8 );
             vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) );
             vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) );
-            /* Intend to fall through */
+            /* FALLTHROUGH */
 
         case 8:
         {
             const int32x2_t t_s32x2 = vpadd_s32( vget_low_s32( t0_s32x4 ), vget_high_s32( t0_s32x4 ) );
             const int64x1_t t_s64x1 = vpaddl_s32( t_s32x2 );
             DC_resp = vget_lane_s32( vreinterpret_s32_s64( t_s64x1 ), 0 );
             vst1q_s32( Atmp_QA +  8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) );
             vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) );
@@ -241,27 +241,27 @@ opus_int32 silk_LPC_inverse_pred_gain_ne
         }
         A_Q12 += order - leftover;
 
         switch( leftover )
         {
         case 6:
             DC_resp += (opus_int32)A_Q12[ 5 ];
             DC_resp += (opus_int32)A_Q12[ 4 ];
-            /* Intend to fall through */
+            /* FALLTHROUGH */
 
         case 4:
             DC_resp += (opus_int32)A_Q12[ 3 ];
             DC_resp += (opus_int32)A_Q12[ 2 ];
-            /* Intend to fall through */
+            /* FALLTHROUGH */
 
         case 2:
             DC_resp += (opus_int32)A_Q12[ 1 ];
             DC_resp += (opus_int32)A_Q12[ 0 ];
-            /* Intend to fall through */
+            /* FALLTHROUGH */
 
         default:
             break;
         }
 
         /* If the DC is unstable, we don't even need to do the full calculations */
         if( DC_resp >= 4096 ) {
             invGain_Q30 = 0;
--- a/media/libopus/silk/check_control_input.c
+++ b/media/libopus/silk/check_control_input.c
@@ -33,17 +33,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "control.h"
 #include "errors.h"
 
 /* Check encoder control struct */
 opus_int check_control_input(
     silk_EncControlStruct        *encControl                    /* I    Control structure                           */
 )
 {
-    silk_assert( encControl != NULL );
+    celt_assert( encControl != NULL );
 
     if( ( ( encControl->API_sampleRate            !=  8000 ) &&
           ( encControl->API_sampleRate            != 12000 ) &&
           ( encControl->API_sampleRate            != 16000 ) &&
           ( encControl->API_sampleRate            != 24000 ) &&
           ( encControl->API_sampleRate            != 32000 ) &&
           ( encControl->API_sampleRate            != 44100 ) &&
           ( encControl->API_sampleRate            != 48000 ) ) ||
@@ -54,53 +54,53 @@ opus_int check_control_input(
           ( encControl->maxInternalSampleRate     != 12000 ) &&
           ( encControl->maxInternalSampleRate     != 16000 ) ) ||
         ( ( encControl->minInternalSampleRate     !=  8000 ) &&
           ( encControl->minInternalSampleRate     != 12000 ) &&
           ( encControl->minInternalSampleRate     != 16000 ) ) ||
           ( encControl->minInternalSampleRate > encControl->desiredInternalSampleRate ) ||
           ( encControl->maxInternalSampleRate < encControl->desiredInternalSampleRate ) ||
           ( encControl->minInternalSampleRate > encControl->maxInternalSampleRate ) ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_FS_NOT_SUPPORTED;
     }
     if( encControl->payloadSize_ms != 10 &&
         encControl->payloadSize_ms != 20 &&
         encControl->payloadSize_ms != 40 &&
         encControl->payloadSize_ms != 60 ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
     }
     if( encControl->packetLossPercentage < 0 || encControl->packetLossPercentage > 100 ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_INVALID_LOSS_RATE;
     }
     if( encControl->useDTX < 0 || encControl->useDTX > 1 ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_INVALID_DTX_SETTING;
     }
     if( encControl->useCBR < 0 || encControl->useCBR > 1 ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_INVALID_CBR_SETTING;
     }
     if( encControl->useInBandFEC < 0 || encControl->useInBandFEC > 1 ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_INVALID_INBAND_FEC_SETTING;
     }
     if( encControl->nChannelsAPI < 1 || encControl->nChannelsAPI > ENCODER_NUM_CHANNELS ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
     }
     if( encControl->nChannelsInternal < 1 || encControl->nChannelsInternal > ENCODER_NUM_CHANNELS ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
     }
     if( encControl->nChannelsInternal > encControl->nChannelsAPI ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
     }
     if( encControl->complexity < 0 || encControl->complexity > 10 ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         return SILK_ENC_INVALID_COMPLEXITY_SETTING;
     }
 
     return SILK_NO_ERROR;
 }
--- a/media/libopus/silk/control_SNR.c
+++ b/media/libopus/silk/control_SNR.c
@@ -27,49 +27,87 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "tuning_parameters.h"
 
+/* These tables hold SNR values divided by 21 (so they fit in 8 bits)
+   for different target bitrates spaced at 400 bps interval. The first
+   10 values are omitted (0-4 kb/s) because they're all zeros.
+   These tables were obtained by running different SNRs through the
+   encoder and measuring the active bitrate. */
+static const unsigned char silk_TargetRate_NB_21[117 - 10] = {
+                                              0, 15, 39, 52, 61, 68,
+     74, 79, 84, 88, 92, 95, 99,102,105,108,111,114,117,119,122,124,
+    126,129,131,133,135,137,139,142,143,145,147,149,151,153,155,157,
+    158,160,162,163,165,167,168,170,171,173,174,176,177,179,180,182,
+    183,185,186,187,189,190,192,193,194,196,197,199,200,201,203,204,
+    205,207,208,209,211,212,213,215,216,217,219,220,221,223,224,225,
+    227,228,230,231,232,234,235,236,238,239,241,242,243,245,246,248,
+    249,250,252,253,255
+};
+
+static const unsigned char silk_TargetRate_MB_21[165 - 10] = {
+                                              0,  0, 28, 43, 52, 59,
+     65, 70, 74, 78, 81, 85, 87, 90, 93, 95, 98,100,102,105,107,109,
+    111,113,115,116,118,120,122,123,125,127,128,130,131,133,134,136,
+    137,138,140,141,143,144,145,147,148,149,151,152,153,154,156,157,
+    158,159,160,162,163,164,165,166,167,168,169,171,172,173,174,175,
+    176,177,178,179,180,181,182,183,184,185,186,187,188,188,189,190,
+    191,192,193,194,195,196,197,198,199,200,201,202,203,203,204,205,
+    206,207,208,209,210,211,212,213,214,214,215,216,217,218,219,220,
+    221,222,223,224,224,225,226,227,228,229,230,231,232,233,234,235,
+    236,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,
+    251,252,253,254,255
+};
+
+static const unsigned char silk_TargetRate_WB_21[201 - 10] = {
+                                              0,  0,  0,  8, 29, 41,
+     49, 56, 62, 66, 70, 74, 77, 80, 83, 86, 88, 91, 93, 95, 97, 99,
+    101,103,105,107,108,110,112,113,115,116,118,119,121,122,123,125,
+    126,127,129,130,131,132,134,135,136,137,138,140,141,142,143,144,
+    145,146,147,148,149,150,151,152,153,154,156,157,158,159,159,160,
+    161,162,163,164,165,166,167,168,169,170,171,171,172,173,174,175,
+    176,177,177,178,179,180,181,181,182,183,184,185,185,186,187,188,
+    189,189,190,191,192,192,193,194,195,195,196,197,198,198,199,200,
+    200,201,202,203,203,204,205,206,206,207,208,209,209,210,211,211,
+    212,213,214,214,215,216,216,217,218,219,219,220,221,221,222,223,
+    224,224,225,226,226,227,228,229,229,230,231,232,232,233,234,234,
+    235,236,237,237,238,239,240,240,241,242,243,243,244,245,246,246,
+    247,248,249,249,250,251,252,253,255
+};
+
 /* Control SNR of redidual quantizer */
 opus_int silk_control_SNR(
     silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
     opus_int32                  TargetRate_bps                  /* I    Target max bitrate (bps)                    */
 )
 {
-    opus_int k, ret = SILK_NO_ERROR;
-    opus_int32 frac_Q6;
-    const opus_int32 *rateTable;
-
-    /* Set bitrate/coding quality */
-    TargetRate_bps = silk_LIMIT( TargetRate_bps, MIN_TARGET_RATE_BPS, MAX_TARGET_RATE_BPS );
-    if( TargetRate_bps != psEncC->TargetRate_bps ) {
-        psEncC->TargetRate_bps = TargetRate_bps;
-
-        /* If new TargetRate_bps, translate to SNR_dB value */
-        if( psEncC->fs_kHz == 8 ) {
-            rateTable = silk_TargetRate_table_NB;
-        } else if( psEncC->fs_kHz == 12 ) {
-            rateTable = silk_TargetRate_table_MB;
-        } else {
-            rateTable = silk_TargetRate_table_WB;
-        }
+    int id;
+    int bound;
+    const unsigned char *snr_table;
 
-        /* Reduce bitrate for 10 ms modes in these calculations */
-        if( psEncC->nb_subfr == 2 ) {
-            TargetRate_bps -= REDUCE_BITRATE_10_MS_BPS;
-        }
-
-        /* Find bitrate interval in table and interpolate */
-        for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) {
-            if( TargetRate_bps <= rateTable[ k ] ) {
-                frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ), rateTable[ k ] - rateTable[ k - 1 ] );
-                psEncC->SNR_dB_Q7 = silk_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + silk_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] );
-                break;
-            }
-        }
+    psEncC->TargetRate_bps = TargetRate_bps;
+    if( psEncC->nb_subfr == 2 ) {
+        TargetRate_bps -= 2000 + psEncC->fs_kHz/16;
     }
-
-    return ret;
+    if( psEncC->fs_kHz == 8 ) {
+        bound = sizeof(silk_TargetRate_NB_21);
+        snr_table = silk_TargetRate_NB_21;
+    } else if( psEncC->fs_kHz == 12 ) {
+        bound = sizeof(silk_TargetRate_MB_21);
+        snr_table = silk_TargetRate_MB_21;
+    } else {
+        bound = sizeof(silk_TargetRate_WB_21);
+        snr_table = silk_TargetRate_WB_21;
+    }
+    id = (TargetRate_bps+200)/400;
+    id = silk_min(id - 10, bound-1);
+    if( id <= 0 ) {
+        psEncC->SNR_dB_Q7 = 0;
+    } else {
+        psEncC->SNR_dB_Q7 = snr_table[id]*21;
+    }
+    return SILK_NO_ERROR;
 }
--- a/media/libopus/silk/control_audio_bandwidth.c
+++ b/media/libopus/silk/control_audio_bandwidth.c
@@ -34,19 +34,25 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* Control internal sampling rate */
 opus_int silk_control_audio_bandwidth(
     silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
     silk_EncControlStruct       *encControl                     /* I    Control structure                           */
 )
 {
     opus_int   fs_kHz;
+    opus_int   orig_kHz;
     opus_int32 fs_Hz;
 
-    fs_kHz = psEncC->fs_kHz;
+    orig_kHz = psEncC->fs_kHz;
+    /* Handle a bandwidth-switching reset where we need to be aware what the last sampling rate was. */
+    if( orig_kHz == 0 ) {
+        orig_kHz = psEncC->sLP.saved_fs_kHz;
+    }
+    fs_kHz = orig_kHz;
     fs_Hz = silk_SMULBB( fs_kHz, 1000 );
     if( fs_Hz == 0 ) {
         /* Encoder has just been initialized */
         fs_Hz  = silk_min( psEncC->desiredInternal_fs_Hz, psEncC->API_fs_Hz );
         fs_kHz = silk_DIV32_16( fs_Hz, 1000 );
     } else if( fs_Hz > psEncC->API_fs_Hz || fs_Hz > psEncC->maxInternal_fs_Hz || fs_Hz < psEncC->minInternal_fs_Hz ) {
         /* Make sure internal rate is not higher than external rate or maximum allowed, or lower than minimum allowed */
         fs_Hz  = psEncC->API_fs_Hz;
@@ -56,51 +62,51 @@ opus_int silk_control_audio_bandwidth(
     } else {
         /* State machine for the internal sampling rate switching */
         if( psEncC->sLP.transition_frame_no >= TRANSITION_FRAMES ) {
             /* Stop transition phase */
             psEncC->sLP.mode = 0;
         }
         if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) {
             /* Check if we should switch down */
-            if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz )
+            if( silk_SMULBB( orig_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz )
             {
                 /* Switch down */
                 if( psEncC->sLP.mode == 0 ) {
                     /* New transition */
                     psEncC->sLP.transition_frame_no = TRANSITION_FRAMES;
 
                     /* Reset transition filter state */
                     silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
                 }
                 if( encControl->opusCanSwitch ) {
                     /* Stop transition phase */
                     psEncC->sLP.mode = 0;
 
                     /* Switch to a lower sample frequency */
-                    fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8;
+                    fs_kHz = orig_kHz == 16 ? 12 : 8;
                 } else {
                    if( psEncC->sLP.transition_frame_no <= 0 ) {
                        encControl->switchReady = 1;
                        /* Make room for redundancy */
                        encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 );
                    } else {
                        /* Direction: down (at double speed) */
                        psEncC->sLP.mode = -2;
                    }
                 }
             }
             else
             /* Check if we should switch up */
-            if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz )
+            if( silk_SMULBB( orig_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz )
             {
                 /* Switch up */
                 if( encControl->opusCanSwitch ) {
                     /* Switch to a higher sample frequency */
-                    fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16;
+                    fs_kHz = orig_kHz == 8 ? 12 : 16;
 
                     /* New transition */
                     psEncC->sLP.transition_frame_no = 0;
 
                     /* Reset transition filter state */
                     silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
 
                     /* Direction: up */
--- a/media/libopus/silk/control_codec.c
+++ b/media/libopus/silk/control_codec.c
@@ -233,18 +233,18 @@ static opus_int silk_setup_fs(
                 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
             }
         }
         psEnc->sCmn.PacketSize_ms  = PacketSize_ms;
         psEnc->sCmn.TargetRate_bps = 0;         /* trigger new SNR computation */
     }
 
     /* Set internal sampling frequency */
-    silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
-    silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );
+    celt_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
+    celt_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );
     if( psEnc->sCmn.fs_kHz != fs_kHz ) {
         /* reset part of the state */
         silk_memset( &psEnc->sShape,               0, sizeof( psEnc->sShape ) );
         silk_memset( &psEnc->sCmn.sNSQ,            0, sizeof( psEnc->sCmn.sNSQ ) );
         silk_memset( psEnc->sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
         silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) );
         psEnc->sCmn.inputBufIx                  = 0;
         psEnc->sCmn.nFramesEncoded              = 0;
@@ -294,30 +294,30 @@ static opus_int silk_setup_fs(
         } else if( psEnc->sCmn.fs_kHz == 12 ) {
             psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
         } else {
             psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
         }
     }
 
     /* Check that settings are valid */
-    silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length );
+    celt_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length );
 
     return ret;
 }
 
 static opus_int silk_setup_complexity(
     silk_encoder_state              *psEncC,            /* I/O                      */
     opus_int                        Complexity          /* I                        */
 )
 {
     opus_int ret = 0;
 
     /* Set encoding complexity */
-    silk_assert( Complexity >= 0 && Complexity <= 10 );
+    celt_assert( Complexity >= 0 && Complexity <= 10 );
     if( Complexity < 1 ) {
         psEncC->pitchEstimationComplexity       = SILK_PE_MIN_COMPLEX;
         psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.8, 16 );
         psEncC->pitchEstimationLPCOrder         = 6;
         psEncC->shapingLPCOrder                 = 12;
         psEncC->la_shape                        = 3 * psEncC->fs_kHz;
         psEncC->nStatesDelayedDecision          = 1;
         psEncC->useInterpolatedNLSFs            = 0;
@@ -385,22 +385,22 @@ static opus_int silk_setup_complexity(
         psEncC->warping_Q16                     = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
     }
 
     /* Do not allow higher pitch estimation LPC order than predict LPC order */
     psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder );
     psEncC->shapeWinLength          = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape;
     psEncC->Complexity              = Complexity;
 
-    silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER );
-    silk_assert( psEncC->shapingLPCOrder         <= MAX_SHAPE_LPC_ORDER      );
-    silk_assert( psEncC->nStatesDelayedDecision  <= MAX_DEL_DEC_STATES       );
-    silk_assert( psEncC->warping_Q16             <= 32767                    );
-    silk_assert( psEncC->la_shape                <= LA_SHAPE_MAX             );
-    silk_assert( psEncC->shapeWinLength          <= SHAPE_LPC_WIN_MAX        );
+    celt_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER );
+    celt_assert( psEncC->shapingLPCOrder         <= MAX_SHAPE_LPC_ORDER      );
+    celt_assert( psEncC->nStatesDelayedDecision  <= MAX_DEL_DEC_STATES       );
+    celt_assert( psEncC->warping_Q16             <= 32767                    );
+    celt_assert( psEncC->la_shape                <= LA_SHAPE_MAX             );
+    celt_assert( psEncC->shapeWinLength          <= SHAPE_LPC_WIN_MAX        );
 
     return ret;
 }
 
 static OPUS_INLINE opus_int silk_setup_LBRR(
     silk_encoder_state          *psEncC,            /* I/O                      */
     const silk_EncControlStruct *encControl         /* I                        */
 )
--- a/media/libopus/silk/dec_API.c
+++ b/media/libopus/silk/dec_API.c
@@ -99,17 +99,17 @@ opus_int silk_Decode(                   
     opus_int16 *resample_out_ptr;
     silk_decoder *psDec = ( silk_decoder * )decState;
     silk_decoder_state *channel_state = psDec->channel_state;
     opus_int has_side;
     opus_int stereo_to_mono;
     int delay_stack_alloc;
     SAVE_STACK;
 
-    silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
+    celt_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
 
     /**********************************/
     /* Test if first frame in payload */
     /**********************************/
     if( newPacketFlag ) {
         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
             channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
         }
@@ -138,23 +138,23 @@ opus_int silk_Decode(                   
                 channel_state[ n ].nb_subfr = 4;
             } else if( decControl->payloadSize_ms == 40 ) {
                 channel_state[ n ].nFramesPerPacket = 2;
                 channel_state[ n ].nb_subfr = 4;
             } else if( decControl->payloadSize_ms == 60 ) {
                 channel_state[ n ].nFramesPerPacket = 3;
                 channel_state[ n ].nb_subfr = 4;
             } else {
-                silk_assert( 0 );
+                celt_assert( 0 );
                 RESTORE_STACK;
                 return SILK_DEC_INVALID_FRAME_SIZE;
             }
             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
-                silk_assert( 0 );
+                celt_assert( 0 );
                 RESTORE_STACK;
                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
             }
             ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
         }
     }
 
     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
--- a/media/libopus/silk/decode_core.c
+++ b/media/libopus/silk/decode_core.c
@@ -136,17 +136,17 @@ void silk_decode_core(
         if( signalType == TYPE_VOICED ) {
             /* Voiced */
             lag = psDecCtrl->pitchL[ k ];
 
             /* Re-whitening */
             if( k == 0 || ( k == 2 && NLSF_interpolation_flag ) ) {
                 /* Rewhiten with new A coefs */
                 start_idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
-                silk_assert( start_idx > 0 );
+                celt_assert( start_idx > 0 );
 
                 if( k == 2 ) {
                     silk_memcpy( &psDec->outBuf[ psDec->ltp_mem_length ], xq, 2 * psDec->subfr_length * sizeof( opus_int16 ) );
                 }
 
                 silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ],
                     A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order, arch );
 
@@ -191,17 +191,17 @@ void silk_decode_core(
                 sLTP_buf_idx++;
             }
         } else {
             pres_Q14 = pexc_Q14;
         }
 
         for( i = 0; i < psDec->subfr_length; i++ ) {
             /* Short-term prediction */
-            silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
+            celt_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
             LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12_tmp[ 0 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12_tmp[ 1 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12_tmp[ 2 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12_tmp[ 3 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12_tmp[ 4 ] );
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12_tmp[ 5 ] );
--- a/media/libopus/silk/decode_frame.c
+++ b/media/libopus/silk/decode_frame.c
@@ -50,17 +50,17 @@ opus_int silk_decode_frame(
     opus_int         L, mv_len, ret = 0;
     SAVE_STACK;
 
     L = psDec->frame_length;
     ALLOC( psDecCtrl, 1, silk_decoder_control );
     psDecCtrl->LTP_scale_Q14 = 0;
 
     /* Safety checks */
-    silk_assert( L > 0 && L <= MAX_FRAME_LENGTH );
+    celt_assert( L > 0 && L <= MAX_FRAME_LENGTH );
 
     if(   lostFlag == FLAG_DECODE_NORMAL ||
         ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) )
     {
         VARDECL( opus_int16, pulses );
         ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
                        ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 );
         /*********************************************/
@@ -86,30 +86,30 @@ opus_int silk_decode_frame(
 
         /********************************************************/
         /* Update PLC state                                     */
         /********************************************************/
         silk_PLC( psDec, psDecCtrl, pOut, 0, arch );
 
         psDec->lossCnt = 0;
         psDec->prevSignalType = psDec->indices.signalType;
-        silk_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 );
+        celt_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 );
 
         /* A frame has been decoded without errors */
         psDec->first_frame_after_reset = 0;
     } else {
         /* Handle packet loss by extrapolation */
         psDec->indices.signalType = psDec->prevSignalType;
         silk_PLC( psDec, psDecCtrl, pOut, 1, arch );
     }
 
     /*************************/
     /* Update output buffer. */
     /*************************/
-    silk_assert( psDec->ltp_mem_length >= psDec->frame_length );
+    celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
     mv_len = psDec->ltp_mem_length - psDec->frame_length;
     silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
     silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
 
     /************************************************/
     /* Comfort noise generation / estimation        */
     /************************************************/
     silk_CNG( psDec, psDecCtrl, pOut, L );
--- a/media/libopus/silk/decode_indices.c
+++ b/media/libopus/silk/decode_indices.c
@@ -74,17 +74,17 @@ void silk_decode_indices(
         psDec->indices.GainsIndices[ i ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 );
     }
 
     /**********************/
     /* Decode LSF Indices */
     /**********************/
     psDec->indices.NLSFIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->CB1_iCDF[ ( psDec->indices.signalType >> 1 ) * psDec->psNLSF_CB->nVectors ], 8 );
     silk_NLSF_unpack( ec_ix, pred_Q8, psDec->psNLSF_CB, psDec->indices.NLSFIndices[ 0 ] );
-    silk_assert( psDec->psNLSF_CB->order == psDec->LPC_order );
+    celt_assert( psDec->psNLSF_CB->order == psDec->LPC_order );
     for( i = 0; i < psDec->psNLSF_CB->order; i++ ) {
         Ix = ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
         if( Ix == 0 ) {
             Ix -= ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 );
         } else if( Ix == 2 * NLSF_QUANT_MAX_AMPLITUDE ) {
             Ix += ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 );
         }
         psDec->indices.NLSFIndices[ i+1 ] = (opus_int8)( Ix - NLSF_QUANT_MAX_AMPLITUDE );
--- a/media/libopus/silk/decode_pitch.c
+++ b/media/libopus/silk/decode_pitch.c
@@ -46,26 +46,26 @@ void silk_decode_pitch(
     opus_int   lag, k, min_lag, max_lag, cbk_size;
     const opus_int8 *Lag_CB_ptr;
 
     if( Fs_kHz == 8 ) {
         if( nb_subfr == PE_MAX_NB_SUBFR ) {
             Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
             cbk_size   = PE_NB_CBKS_STAGE2_EXT;
         } else {
-            silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
+            celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
             Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
             cbk_size   = PE_NB_CBKS_STAGE2_10MS;
         }
     } else {
         if( nb_subfr == PE_MAX_NB_SUBFR ) {
             Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
             cbk_size   = PE_NB_CBKS_STAGE3_MAX;
         } else {
-            silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
+            celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
             Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
             cbk_size   = PE_NB_CBKS_STAGE3_10MS;
         }
     }
 
     min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz );
     max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz );
     lag = min_lag + lagIndex;
--- a/media/libopus/silk/decode_pulses.c
+++ b/media/libopus/silk/decode_pulses.c
@@ -51,17 +51,17 @@ void silk_decode_pulses(
     /* Decode rate level */
     /*********************/
     RateLevelIndex = ec_dec_icdf( psRangeDec, silk_rate_levels_iCDF[ signalType >> 1 ], 8 );
 
     /* Calculate number of shell blocks */
     silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH );
     iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH );
     if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) {
-        silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
+        celt_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
         iter++;
     }
 
     /***************************************************/
     /* Sum-Weighted-Pulses Decoding                    */
     /***************************************************/
     cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ];
     for( i = 0; i < iter; i++ ) {
--- a/media/libopus/silk/decoder_set_fs.c
+++ b/media/libopus/silk/decoder_set_fs.c
@@ -35,18 +35,18 @@ POSSIBILITY OF SUCH DAMAGE.
 opus_int silk_decoder_set_fs(
     silk_decoder_state          *psDec,                         /* I/O  Decoder state pointer                       */
     opus_int                    fs_kHz,                         /* I    Sampling frequency (kHz)                    */
     opus_int32                  fs_API_Hz                       /* I    API Sampling frequency (Hz)                 */
 )
 {
     opus_int frame_length, ret = 0;
 
-    silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
-    silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 );
+    celt_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
+    celt_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 );
 
     /* New (sub)frame length */
     psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz );
     frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length );
 
     /* Initialize resampler when switching internal or external sampling frequency */
     if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) {
         /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */
@@ -81,28 +81,28 @@ opus_int silk_decoder_set_fs(
             if( fs_kHz == 16 ) {
                 psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
             } else if( fs_kHz == 12 ) {
                 psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
             } else if( fs_kHz == 8 ) {
                 psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
             } else {
                 /* unsupported sampling rate */
-                silk_assert( 0 );
+                celt_assert( 0 );
             }
             psDec->first_frame_after_reset = 1;
             psDec->lagPrev                 = 100;
             psDec->LastGainIndex           = 10;
             psDec->prevSignalType          = TYPE_NO_VOICE_ACTIVITY;
             silk_memset( psDec->outBuf, 0, sizeof(psDec->outBuf));
             silk_memset( psDec->sLPC_Q14_buf, 0, sizeof(psDec->sLPC_Q14_buf) );
         }
 
         psDec->fs_kHz       = fs_kHz;
         psDec->frame_length = frame_length;
     }
 
     /* Check that settings are valid */
-    silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH );
+    celt_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH );
 
     return ret;
 }
 
--- a/media/libopus/silk/define.h
+++ b/media/libopus/silk/define.h
@@ -41,28 +41,32 @@ extern "C"
 /* Number of decoder channels (1/2) */
 #define DECODER_NUM_CHANNELS                    2
 
 #define MAX_FRAMES_PER_PACKET                   3
 
 /* Limits on bitrate */
 #define MIN_TARGET_RATE_BPS                     5000
 #define MAX_TARGET_RATE_BPS                     80000
-#define TARGET_RATE_TAB_SZ                      8
 
 /* LBRR thresholds */
 #define LBRR_NB_MIN_RATE_BPS                    12000
 #define LBRR_MB_MIN_RATE_BPS                    14000
 #define LBRR_WB_MIN_RATE_BPS                    16000
 
 /* DTX settings */
 #define NB_SPEECH_FRAMES_BEFORE_DTX             10      /* eq 200 ms */
 #define MAX_CONSECUTIVE_DTX                     20      /* eq 400 ms */
 #define DTX_ACTIVITY_THRESHOLD                  0.1f
 
+/* VAD decision */
+#define VAD_NO_DECISION                         -1
+#define VAD_NO_ACTIVITY                         0
+#define VAD_ACTIVITY                            1
+
 /* Maximum sampling frequency */
 #define MAX_FS_KHZ                              16
 #define MAX_API_FS_KHZ                          48
 
 /* Signal types */
 #define TYPE_NO_VOICE_ACTIVITY                  0
 #define TYPE_UNVOICED                           1
 #define TYPE_VOICED                             2
--- a/media/libopus/silk/enc_API.c
+++ b/media/libopus/silk/enc_API.c
@@ -77,26 +77,26 @@ opus_int silk_InitEncoder(              
     opus_int n, ret = SILK_NO_ERROR;
 
     psEnc = (silk_encoder *)encState;
 
     /* Reset encoder */
     silk_memset( psEnc, 0, sizeof( silk_encoder ) );
     for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
         if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
-            silk_assert( 0 );
+            celt_assert( 0 );
         }
     }
 
     psEnc->nChannelsAPI = 1;
     psEnc->nChannelsInternal = 1;
 
     /* Read control structure */
     if( ret += silk_QueryEncoder( encState, encStatus ) ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
     }
 
     return ret;
 }
 
 /***************************************/
 /* Read control structure from encoder */
 /***************************************/
@@ -139,17 +139,18 @@ static opus_int silk_QueryEncoder(      
 /* encControl->payloadSize_ms is set to                                                                         */
 opus_int silk_Encode(                                   /* O    Returns error code                              */
     void                            *encState,          /* I/O  State                                           */
     silk_EncControlStruct           *encControl,        /* I    Control status                                  */
     const opus_int16                *samplesIn,         /* I    Speech sample input vector                      */
     opus_int                        nSamplesIn,         /* I    Number of samples in input vector               */
     ec_enc                          *psRangeEnc,        /* I/O  Compressor data structure                       */
     opus_int32                      *nBytesOut,         /* I/O  Number of bytes in payload (input: Max bytes)   */
-    const opus_int                  prefillFlag         /* I    Flag to indicate prefilling buffers no coding   */
+    const opus_int                  prefillFlag,        /* I    Flag to indicate prefilling buffers no coding   */
+    opus_int                        activity            /* I    Decision of Opus voice activity detector        */
 )
 {
     opus_int   n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
     opus_int   nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;
     opus_int   nSamplesFromInput = 0, nSamplesFromInputMax;
     opus_int   speech_act_thr_for_switch_Q8;
     opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
     silk_encoder *psEnc = ( silk_encoder * )encState;
@@ -161,17 +162,17 @@ opus_int silk_Encode(                   
     {
        psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
        psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
     }
     psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
 
     /* Check values in encoder control structure */
     if( ( ret = check_control_input( encControl ) ) != 0 ) {
-        silk_assert( 0 );
+        celt_assert( 0 );
         RESTORE_STACK;
         return ret;
     }
 
     encControl->switchReady = 0;
 
     if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
         /* Mono -> Stereo transition: init state of second channel and stereo state */
@@ -194,45 +195,55 @@ opus_int silk_Encode(                   
 
     psEnc->nChannelsAPI = encControl->nChannelsAPI;
     psEnc->nChannelsInternal = encControl->nChannelsInternal;
 
     nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate );
     tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1;
     curr_block = 0;
     if( prefillFlag ) {
+        silk_LP_state save_LP;
         /* Only accept input length of 10 ms */
         if( nBlocksOf10ms != 1 ) {
-            silk_assert( 0 );
+            celt_assert( 0 );
             RESTORE_STACK;
             return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
         }
+        if ( prefillFlag == 2 ) {
+            save_LP = psEnc->state_Fxx[ 0 ].sCmn.sLP;
+            /* Save the sampling rate so the bandwidth switching code can keep handling transitions. */
+            save_LP.saved_fs_kHz = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
+        }
         /* Reset Encoder */
         for( n = 0; n < encControl->nChannelsInternal; n++ ) {
             ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
-            silk_assert( !ret );
+            /* Restore the variable LP state. */
+            if ( prefillFlag == 2 ) {
+                psEnc->state_Fxx[ n ].sCmn.sLP = save_LP;
+            }
+            celt_assert( !ret );
         }
         tmp_payloadSize_ms = encControl->payloadSize_ms;
         encControl->payloadSize_ms = 10;
         tmp_complexity = encControl->complexity;
         encControl->complexity = 0;
         for( n = 0; n < encControl->nChannelsInternal; n++ ) {
             psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
             psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
         }
     } else {
         /* Only accept input lengths that are a multiple of 10 ms */
         if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {
-            silk_assert( 0 );
+            celt_assert( 0 );
             RESTORE_STACK;
             return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
         }
         /* Make sure no more than one packet can be produced */
         if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {
-            silk_assert( 0 );
+            celt_assert( 0 );
             RESTORE_STACK;
             return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
         }
     }
 
     for( n = 0; n < encControl->nChannelsInternal; n++ ) {
         /* Force the side channel to the same rate as the mid */
         opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
@@ -243,17 +254,17 @@ opus_int silk_Encode(                   
         }
         if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
             for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
                 psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
             }
         }
         psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
     }
-    silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
+    celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
 
     /* Input buffering/resampling and encoding */
     nSamplesToBufferMax =
         10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
     nSamplesFromInputMax =
         silk_DIV32_16( nSamplesToBufferMax *
                            psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,
                        psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
@@ -301,34 +312,34 @@ opus_int silk_Encode(                   
                for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) {
                   psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] =
                         silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
                                   + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
                }
             }
             psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
         } else {
-            silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
+            celt_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
             silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
             psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
         }
 
         samplesIn  += nSamplesFromInput * encControl->nChannelsAPI;
         nSamplesIn -= nSamplesFromInput;
 
         /* Default */
         psEnc->allowBandwidthSwitch = 0;
 
         /* Silk encoder */
         if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) {
             /* Enough data in input buffer, so encode */
-            silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length );
-            silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length );
+            celt_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+            celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length );
 
             /* Deal with LBRR data */
             if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) {
                 /* Create space at start of payload for VAD and FEC flags */
                 opus_uint8 iCDF[ 2 ] = { 0, 0 };
                 iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
                 ec_enc_icdf( psRangeEnc, 0, iCDF, 8 );
 
@@ -420,32 +431,32 @@ opus_int silk_Encode(                   
                         silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
                         psEnc->state_Fxx[ 1 ].sCmn.prevLag                 = 100;
                         psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev            = 100;
                         psEnc->state_Fxx[ 1 ].sShape.LastGainIndex         = 10;
                         psEnc->state_Fxx[ 1 ].sCmn.prevSignalType          = TYPE_NO_VOICE_ACTIVITY;
                         psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16      = 65536;
                         psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
                     }
-                    silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
+                    silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ], activity );
                 } else {
                     psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
                 }
                 if( !prefillFlag ) {
                     silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
                     if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
                         silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
                     }
                 }
             } else {
                 /* Buffering */
                 silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
                 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
             }
-            silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
+            silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ], activity );
 
             /* Encode */
             for( n = 0; n < encControl->nChannelsInternal; n++ ) {
                 opus_int maxBits, useCBR;
 
                 /* Handling rate constraints */
                 maxBits = encControl->maxBits;
                 if( tot_blocks == 2 && curr_block == 0 ) {
--- a/media/libopus/silk/encode_indices.c
+++ b/media/libopus/silk/encode_indices.c
@@ -51,18 +51,18 @@ void silk_encode_indices(
     } else {
          psIndices = &psEncC->indices;
     }
 
     /*******************************************/
     /* Encode signal type and quantizer offset */
     /*******************************************/
     typeOffset = 2 * psIndices->signalType + psIndices->quantOffsetType;
-    silk_assert( typeOffset >= 0 && typeOffset < 6 );
-    silk_assert( encode_LBRR == 0 || typeOffset >= 2 );
+    celt_assert( typeOffset >= 0 && typeOffset < 6 );
+    celt_assert( encode_LBRR == 0 || typeOffset >= 2 );
     if( encode_LBRR || typeOffset >= 2 ) {
         ec_enc_icdf( psRangeEnc, typeOffset - 2, silk_type_offset_VAD_iCDF, 8 );
     } else {
         ec_enc_icdf( psRangeEnc, typeOffset, silk_type_offset_no_VAD_iCDF, 8 );
     }
 
     /****************/
     /* Encode gains */
@@ -85,17 +85,17 @@ void silk_encode_indices(
         ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ i ], silk_delta_gain_iCDF, 8 );
     }
 
     /****************/
     /* Encode NLSFs */
     /****************/
     ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ 0 ], &psEncC->psNLSF_CB->CB1_iCDF[ ( psIndices->signalType >> 1 ) * psEncC->psNLSF_CB->nVectors ], 8 );
     silk_NLSF_unpack( ec_ix, pred_Q8, psEncC->psNLSF_CB, psIndices->NLSFIndices[ 0 ] );
-    silk_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder );
+    celt_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder );
     for( i = 0; i < psEncC->psNLSF_CB->order; i++ ) {
         if( psIndices->NLSFIndices[ i+1 ] >= NLSF_QUANT_MAX_AMPLITUDE ) {
             ec_enc_icdf( psRangeEnc, 2 * NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
             ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 );
         } else if( psIndices->NLSFIndices[ i+1 ] <= -NLSF_QUANT_MAX_AMPLITUDE ) {
             ec_enc_icdf( psRangeEnc, 0, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
             ec_enc_icdf( psRangeEnc, -psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 );
         } else {
--- a/media/libopus/silk/encode_pulses.c
+++ b/media/libopus/silk/encode_pulses.c
@@ -81,17 +81,17 @@ void silk_encode_pulses(
 
     /****************************/
     /* Prepare for shell coding */
     /****************************/
     /* Calculate number of shell blocks */
     silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH );
     iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH );
     if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) {
-        silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
+        celt_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
         iter++;
         silk_memset( &pulses[ frame_length ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof(opus_int8));
     }
 
     /* Take the absolute value of the pulses */
     ALLOC( abs_pulses, iter * SHELL_CODEC_FRAME_LENGTH, opus_int );
     silk_assert( !( SHELL_CODEC_FRAME_LENGTH & 3 ) );
     for( i = 0; i < iter * SHELL_CODEC_FRAME_LENGTH; i+=4 ) {
--- a/media/libopus/silk/fixed/apply_sine_window_FIX.c
+++ b/media/libopus/silk/fixed/apply_sine_window_FIX.c
@@ -52,25 +52,25 @@ void silk_apply_sine_window(
     const opus_int16            px[],               /* I    Pointer to input signal                                     */
     const opus_int              win_type,           /* I    Selects a window type                                       */
     const opus_int              length              /* I    Window length, multiple of 4                                */
 )
 {
     opus_int   k, f_Q16, c_Q16;
     opus_int32 S0_Q16, S1_Q16;
 
-    silk_assert( win_type == 1 || win_type == 2 );
+    celt_assert( win_type == 1 || win_type == 2 );
 
     /* Length must be in a range from 16 to 120 and a multiple of 4 */
-    silk_assert( length >= 16 && length <= 120 );
-    silk_assert( ( length & 3 ) == 0 );
+    celt_assert( length >= 16 && length <= 120 );
+    celt_assert( ( length & 3 ) == 0 );
 
     /* Frequency */
     k = ( length >> 2 ) - 4;
-    silk_assert( k >= 0 && k <= 26 );
+    celt_assert( k >= 0 && k <= 26 );
     f_Q16 = (opus_int)freq_table_Q16[ k ];
 
     /* Factor used for cosine approximation */
     c_Q16 = silk_SMULWB( (opus_int32)f_Q16, -f_Q16 );
     silk_assert( c_Q16 >= -32768 );
 
     /* initialize state */
     if( win_type == 1 ) {
--- a/media/libopus/silk/fixed/burg_modified_FIX.c
+++ b/media/libopus/silk/fixed/burg_modified_FIX.c
@@ -60,17 +60,17 @@ void silk_burg_modified_c(
     opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
     opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
     opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
     opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
     opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
     opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
     opus_int64       C0_64;
 
-    silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+    celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
 
     /* Compute autocorrelations, added over subframes */
     C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch );
     lz = silk_CLZ64(C0_64);
     rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz;
     if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS;
     if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS;
 
--- a/media/libopus/silk/fixed/encode_frame_FIX.c
+++ b/media/libopus/silk/fixed/encode_frame_FIX.c
@@ -38,31 +38,38 @@ POSSIBILITY OF SUCH DAMAGE.
 static OPUS_INLINE void silk_LBRR_encode_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
     const opus_int16                x16[],                                  /* I    Input signal                                                                */
     opus_int                        condCoding                              /* I    The type of conditional coding used so far for this frame                   */
 );
 
 void silk_encode_do_VAD_FIX(
-    silk_encoder_state_FIX          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    opus_int                        activity                                /* I    Decision of Opus voice activity detector                                    */
 )
 {
+    const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 );
+
     /****************************/
     /* Voice Activity Detection */
     /****************************/
     silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
+    /* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */
+    if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) {
+        psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1;
+    }
 
     /**************************************************/
     /* Convert speech activity into VAD and DTX flags */
     /**************************************************/
-    if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+    if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) {
         psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
         psEnc->sCmn.noSpeechCounter++;
-        if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
+        if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) {
             psEnc->sCmn.inDTX = 0;
         } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
             psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
             psEnc->sCmn.inDTX           = 0;
         }
         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
     } else {
         psEnc->sCmn.noSpeechCounter    = 0;
@@ -250,17 +257,17 @@ opus_int silk_encode_frame_FIX(
                     break;
                 }
             }
 
             if( iter == maxIter ) {
                 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
                     /* Restore output state from earlier iteration that did meet the bitrate budget */
                     silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
-                    silk_assert( sRangeEnc_copy2.offs <= 1275 );
+                    celt_assert( sRangeEnc_copy2.offs <= 1275 );
                     silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
                     silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
                     psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
                 }
                 break;
             }
 
             if( nBits > maxBits ) {
@@ -278,17 +285,17 @@ opus_int silk_encode_frame_FIX(
             } else if( nBits < maxBits - 5 ) {
                 found_lower = 1;
                 nBits_lower = nBits;
                 gainMult_lower = gainMult_Q8;
                 if( gainsID != gainsID_lower ) {
                     gainsID_lower = gainsID;
                     /* Copy part of the output state */
                     silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
-                    silk_assert( psRangeEnc->offs <= 1275 );
+                    celt_assert( psRangeEnc->offs <= 1275 );
                     silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
                     silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
                     LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
                 }
             } else {
                 /* Within 5 bits of budget: close enough */
                 break;
             }
--- a/media/libopus/silk/fixed/find_LPC_FIX.c
+++ b/media/libopus/silk/fixed/find_LPC_FIX.c
@@ -141,11 +141,11 @@ void silk_find_LPC_FIX(
         }
     }
 
     if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) {
         /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */
         silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder );
     }
 
-    silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
+    celt_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
     RESTORE_STACK;
 }
--- a/media/libopus/silk/fixed/find_pitch_lags_FIX.c
+++ b/media/libopus/silk/fixed/find_pitch_lags_FIX.c
@@ -54,17 +54,17 @@ void silk_find_pitch_lags_FIX(
     SAVE_STACK;
 
     /******************************************/
     /* Set up buffer lengths etc based on Fs  */
     /******************************************/
     buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length;
 
     /* Safety check */
-    silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
+    celt_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
 
     /*************************************/
     /* Estimate LPC AR coefficients      */
     /*************************************/
 
     /* Calculate windowed signal */
 
     ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 );
--- a/media/libopus/silk/fixed/find_pred_coefs_FIX.c
+++ b/media/libopus/silk/fixed/find_pred_coefs_FIX.c
@@ -75,17 +75,17 @@ void silk_find_pred_coefs_FIX(
                + psEnc->sCmn.frame_length, opus_int16 );
     if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
         VARDECL( opus_int32, xXLTP_Q17 );
         VARDECL( opus_int32, XXLTP_Q17 );
 
         /**********/
         /* VOICED */
         /**********/
-        silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
+        celt_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
 
         ALLOC( xXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER, opus_int32 );
         ALLOC( XXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );
 
         /* LTP analysis */
         silk_find_LTP_FIX( XXLTP_Q17, xXLTP_Q17, res_pitch,
             psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch );
 
--- a/media/libopus/silk/fixed/main_FIX.h
+++ b/media/libopus/silk/fixed/main_FIX.h
@@ -61,17 +61,18 @@ extern "C"
 
 /* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
 void silk_HP_variable_cutoff(
     silk_encoder_state_Fxx          state_Fxx[]                             /* I/O  Encoder states                                                              */
 );
 
 /* Encoder main function */
 void silk_encode_do_VAD_FIX(
-    silk_encoder_state_FIX          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    opus_int                        activity                                /* I    Decision of Opus voice activity detector                                    */
 );
 
 /* Encoder main function */
 opus_int silk_encode_frame_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
     opus_int32                      *pnBytesOut,                            /* O    Pointer to number of payload bytes;                                         */
     ec_enc                          *psRangeEnc,                            /* I/O  compressor data structure                                                   */
     opus_int                        condCoding,                             /* I    The type of conditional coding to use                                       */
--- a/media/libopus/silk/fixed/pitch_analysis_core_FIX.c
+++ b/media/libopus/silk/fixed/pitch_analysis_core_FIX.c
@@ -117,21 +117,21 @@ opus_int silk_pitch_analysis_core(      
     opus_int   max_lag;
     opus_int32 contour_bias_Q15, diff;
     opus_int   nb_cbk_search, cbk_size;
     opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
     const opus_int8 *Lag_CB_ptr;
     SAVE_STACK;
 
     /* Check for valid sampling frequency */
-    silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
+    celt_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
 
     /* Check for valid complexity setting */
-    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
-    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+    celt_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    celt_assert( complexity <= SILK_PE_MAX_COMPLEX );
 
     silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
     silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
 
     /* Set up frame lengths max / min lag for the sampling frequency */
     frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
     frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
     frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
@@ -159,17 +159,17 @@ opus_int silk_pitch_analysis_core(      
         silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
         silk_resampler_down2( filt_state, frame_8kHz_buf, frame, frame_length );
         frame_8kHz = frame_8kHz_buf;
     } else if( Fs_kHz == 12 ) {
         silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
         silk_resampler_down2_3( filt_state, frame_8kHz_buf, frame, frame_length );
         frame_8kHz = frame_8kHz_buf;
     } else {
-        silk_assert( Fs_kHz == 8 );
+        celt_assert( Fs_kHz == 8 );
         frame_8kHz = frame;
     }
 
     /* Decimate again to 4 kHz */
     silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
     ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
     silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
 
@@ -183,24 +183,24 @@ opus_int silk_pitch_analysis_core(      
     * FIRST STAGE, operating in 4 khz
     ******************************************************************************/
     ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
     ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
     silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
     target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
     for( k = 0; k < nb_subfr >> 1; k++ ) {
         /* Check that we are within range of the array */
-        silk_assert( target_ptr >= frame_4kHz );
-        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+        celt_assert( target_ptr >= frame_4kHz );
+        celt_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
 
         basis_ptr = target_ptr - MIN_LAG_4KHZ;
 
         /* Check that we are within range of the array */
-        silk_assert( basis_ptr >= frame_4kHz );
-        silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+        celt_assert( basis_ptr >= frame_4kHz );
+        celt_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
 
         celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
 
         /* Calculate first vector products before loop */
         cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
         normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch );
         normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ, arch ) );
         normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
@@ -244,17 +244,17 @@ opus_int silk_pitch_analysis_core(      
             sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 );                          /* Q14 */
             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
             C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
         }
     }
 
     /* Sort */
     length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
-    silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
+    celt_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
     silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
                                           length_d_srch );
 
     /* Escape if correlation is very low already here */
     Cmax = (opus_int)C[ 0 ];                                                    /* Q14 */
     if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
         *LTPCorr_Q15  = 0;
@@ -269,17 +269,17 @@ opus_int silk_pitch_analysis_core(      
         /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
         if( C[ i ] > threshold ) {
             d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
         } else {
             length_d_srch = i;
             break;
         }
     }
-    silk_assert( length_d_srch > 0 );
+    celt_assert( length_d_srch > 0 );
 
     ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
     for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
         d_comp[ i - D_COMP_MIN ] = 0;
     }
     for( i = 0; i < length_d_srch; i++ ) {
         d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
     }
@@ -320,18 +320,18 @@ opus_int silk_pitch_analysis_core(      
     * Find energy of each subframe projected onto its history, for a range of delays
     *********************************************************************************/
     silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
 
     target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
     for( k = 0; k < nb_subfr; k++ ) {
 
         /* Check that we are within range of the array */
-        silk_assert( target_ptr >= frame_8kHz );
-        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
+        celt_assert( target_ptr >= frame_8kHz );
+        celt_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
 
         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 );
         for( j = 0; j < length_d_comp; j++ ) {
             d = d_comp[ j ];
             basis_ptr = target_ptr - d;
 
             /* Check that we are within range of the array */
             silk_assert( basis_ptr >= frame_8kHz );
@@ -545,17 +545,17 @@ opus_int silk_pitch_analysis_core(      
         /* Save Lags */
         for( k = 0; k < nb_subfr; k++ ) {
             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
         }
         *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
         *contourIndex = (opus_int8)CBimax;
     }
-    silk_assert( *lagIndex >= 0 );
+    celt_assert( *lagIndex >= 0 );
     /* return as voiced */
     RESTORE_STACK;
     return 0;
 }
 
 /***********************************************************************
  * Calculates the correlations used in stage 3 search. In order to cover
  * the whole lag codebook for all the searched offset lags (lag +- 2),
@@ -582,42 +582,42 @@ static void silk_P_Ana_calc_corr_st3(
     const opus_int16 *target_ptr;
     opus_int   i, j, k, lag_counter, lag_low, lag_high;
     opus_int   nb_cbk_search, delta, idx, cbk_size;
     VARDECL( opus_int32, scratch_mem );
     VARDECL( opus_int32, xcorr32 );
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
     SAVE_STACK;
 
-    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
-    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+    celt_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    celt_assert( complexity <= SILK_PE_MAX_COMPLEX );
 
     if( nb_subfr == PE_MAX_NB_SUBFR ) {
         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
     } else {
-        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
     }
     ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
     ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
 
     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
     for( k = 0; k < nb_subfr; k++ ) {
         lag_counter = 0;
 
         /* Calculate the correlations for each subframe */
         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
-        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+        celt_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
         celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
         for( j = lag_low; j <= lag_high; j++ ) {
             silk_assert( lag_counter < SCRATCH_SIZE );
             scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
             lag_counter++;
         }
 
         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
@@ -654,26 +654,26 @@ static void silk_P_Ana_calc_energy_st3(
     const opus_int16 *target_ptr, *basis_ptr;
     opus_int32 energy;
     opus_int   k, i, j, lag_counter;
     opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
     VARDECL( opus_int32, scratch_mem );
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
     SAVE_STACK;
 
-    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
-    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+    celt_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    celt_assert( complexity <= SILK_PE_MAX_COMPLEX );
 
     if( nb_subfr == PE_MAX_NB_SUBFR ) {
         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
     } else {
-        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
     }
     ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
 
     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
--- a/media/libopus/silk/fixed/residual_energy16_FIX.c
+++ b/media/libopus/silk/fixed/residual_energy16_FIX.c
@@ -42,20 +42,20 @@ opus_int32 silk_residual_energy16_covar_
 )
 {
     opus_int   i, j, lshifts, Qxtra;
     opus_int32 c_max, w_max, tmp, tmp2, nrg;
     opus_int   cn[ MAX_MATRIX_SIZE ];
     const opus_int32 *pRow;
 
     /* Safety checks */
-    silk_assert( D >=  0 );
-    silk_assert( D <= 16 );
-    silk_assert( cQ >  0 );
-    silk_assert( cQ < 16 );
+    celt_assert( D >=  0 );
+    celt_assert( D <= 16 );
+    celt_assert( cQ >  0 );
+    celt_assert( cQ < 16 );
 
     lshifts = 16 - cQ;
     Qxtra = lshifts;
 
     c_max = 0;
     for( i = 0; i < D; i++ ) {
         c_max = silk_max_32( c_max, silk_abs( (opus_int32)c[ i ] ) );
     }
--- a/media/libopus/silk/fixed/residual_energy_FIX.c
+++ b/media/libopus/silk/fixed/residual_energy_FIX.c
@@ -53,17 +53,17 @@ void silk_residual_energy_FIX(
     opus_int32       tmp32;
     SAVE_STACK;
 
     x_ptr  = x;
     offset = LPC_order + subfr_length;
 
     /* Filter input to create the LPC residual for each frame half, and measure subframe energies */
     ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 );
-    silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr );
+    celt_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr );
     for( i = 0; i < nb_subfr >> 1; i++ ) {
         /* Calculate half frame LPC residual signal including preceding samples */
         silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order, arch );
 
         /* Point to first subframe of the just calculated LPC residual signal */
         LPC_res_ptr = LPC_res + LPC_order;
         for( j = 0; j < ( MAX_NB_SUBFR >> 1 ); j++ ) {
             /* Measure subframe energy */
--- a/media/libopus/silk/fixed/schur64_FIX.c
+++ b/media/libopus/silk/fixed/schur64_FIX.c
@@ -38,17 +38,17 @@ opus_int32 silk_schur64(                
     const opus_int32            c[],                /* I    Correlations [order+1]                                      */
     opus_int32                  order               /* I    Prediction order                                            */
 )
 {
     opus_int   k, n;
     opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
     opus_int32 Ctmp1_Q30, Ctmp2_Q30, rc_tmp_Q31;
 
-    silk_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC );
+    celt_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC );
 
     /* Check for invalid input */
     if( c[ 0 ] <= 0 ) {
         silk_memset( rc_Q16, 0, order * sizeof( opus_int32 ) );
         return 0;
     }
 
     k = 0;
--- a/media/libopus/silk/fixed/schur_FIX.c
+++ b/media/libopus/silk/fixed/schur_FIX.c
@@ -38,17 +38,17 @@ opus_int32 silk_schur(                  
     const opus_int32            *c,                 /* I    correlations [order+1]                                      */
     const opus_int32            order               /* I    prediction order                                            */
 )
 {
     opus_int        k, n, lz;
     opus_int32    C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
     opus_int32    Ctmp1, Ctmp2, rc_tmp_Q15;
 
-    silk_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC );
+    celt_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC );
 
     /* Get number of leading zeros */
     lz = silk_CLZ32( c[ 0 ] );
 
     /* Copy correlations and adjust level to Q30 */
     k = 0;
     if( lz < 2 ) {
         /* lz must be 1, so shift one to the right */
--- a/media/libopus/silk/fixed/warped_autocorrelation_FIX.c
+++ b/media/libopus/silk/fixed/warped_autocorrelation_FIX.c
@@ -47,17 +47,17 @@ void silk_warped_autocorrelation_FIX_c(
 )
 {
     opus_int   n, i, lsh;
     opus_int32 tmp1_QS, tmp2_QS;
     opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
     opus_int64 corr_QC[  MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
 
     /* Order must be even */
-    silk_assert( ( order & 1 ) == 0 );
+    celt_assert( ( order & 1 ) == 0 );
     silk_assert( 2 * QS - QC >= 0 );
 
     /* Loop over samples */
     for( n = 0; n < length; n++ ) {
         tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
         /* Loop over allpass sections */
         for( i = 0; i < order; i += 2 ) {
             /* Output of allpass section */
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/fixed/x86/burg_modified_FIX_sse4_1.c
@@ -0,0 +1,377 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "tuning_parameters.h"
+#include "pitch.h"
+#include "celt/x86/x86cpu.h"
+
+#define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
+
+#define QA                          25
+#define N_BITS_HEAD_ROOM            2
+#define MIN_RSHIFTS                 -16
+#define MAX_RSHIFTS                 (32 - QA)
+
+/* Compute reflection coefficients from input signal */
+void silk_burg_modified_sse4_1(
+    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
+    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
+    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
+    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
+    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
+    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
+    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
+)
+{
+    opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
+    opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
+    const opus_int16 *x_ptr;
+    opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
+    opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
+    opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
+    opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
+    opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
+    opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
+
+    __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210;
+    __m128i CONST1 = _mm_set1_epi32(1);
+
+    celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+    /* Compute autocorrelations, added over subframes */
+    silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
+    if( rshifts > MAX_RSHIFTS ) {
+        C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
+        silk_assert( C0 > 0 );
+        rshifts = MAX_RSHIFTS;
+    } else {
+        lz = silk_CLZ32( C0 ) - 1;
+        rshifts_extra = N_BITS_HEAD_ROOM - lz;
+        if( rshifts_extra > 0 ) {
+            rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
+            C0 = silk_RSHIFT32( C0, rshifts_extra );
+        } else {
+            rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
+            C0 = silk_LSHIFT32( C0, -rshifts_extra );
+        }
+        rshifts += rshifts_extra;
+    }
+    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
+    silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+    if( rshifts > 0 ) {
+        for( s = 0; s < nb_subfr; s++ ) {
+            x_ptr = x + s * subfr_length;
+            for( n = 1; n < D + 1; n++ ) {
+                C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
+                    silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
+            }
+        }
+    } else {
+        for( s = 0; s < nb_subfr; s++ ) {
+            int i;
+            opus_int32 d;
+            x_ptr = x + s * subfr_length;
+            celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
+            for( n = 1; n < D + 1; n++ ) {
+               for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
+                  d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
+               xcorr[ n - 1 ] += d;
+            }
+            for( n = 1; n < D + 1; n++ ) {
+                C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
+            }
+        }
+    }
+    silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+
+    /* Initialize */
+    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
+
+    invGain_Q30 = (opus_int32)1 << 30;
+    reached_max_gain = 0;
+    for( n = 0; n < D; n++ ) {
+        /* Update first row of correlation matrix (without first element) */
+        /* Update last row of correlation matrix (without last element, stored in reversed order) */
+        /* Update C * Af */
+        /* Update C * flipud(Af) (stored in reversed order) */
+        if( rshifts > -2 ) {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
+                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
+                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
+                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
+                for( k = 0; k < n; k++ ) {
+                    C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
+                    C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+                    Atmp_QA = Af_QA[ k ];
+                    tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
+                    tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
+                }
+                tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
+                tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
+                for( k = 0; k <= n; k++ ) {
+                    CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
+                    CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
+                }
+            }
+        } else {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
+                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
+                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
+                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
+
+                X1_3210 = _mm_set1_epi32( x1 );
+                X2_3210 = _mm_set1_epi32( x2 );
+                TMP1_3210 = _mm_setzero_si128();
+                TMP2_3210 = _mm_setzero_si128();
+                for( k = 0; k < n - 3; k += 4 ) {
+                    PTR_3210   = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ] );
+                    SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k ] );
+                    FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] );
+                    PTR_3210   = _mm_shuffle_epi32( PTR_3210,  _MM_SHUFFLE( 0, 1, 2, 3 ) );
+                    LAST_3210  = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] );
+                    ATMP_3210  = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] );
+
+                    T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 );
+                    T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 );
+
+                    ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 );
+                    ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 );
+                    ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 );
+
+                    FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 );
+                    LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 );
+
+                    PTR_3210   = _mm_mullo_epi32( ATMP_3210, PTR_3210 );
+                    SUBFR_3210   = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 );
+
+                    _mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 );
+                    _mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 );
+
+                    TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 );
+                    TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 );
+                }
+
+                TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210 ) );
+                TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210 ) );
+                TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E ) );
+                TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E ) );
+
+                tmp1 += _mm_cvtsi128_si32( TMP1_3210 );
+                tmp2 += _mm_cvtsi128_si32( TMP2_3210 );
+
+                for( ; k < n; k++ ) {
+                    C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
+                    C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+                    Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
+                    tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
+                    tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
+                }
+
+                tmp1 = -tmp1;                /* Q17 */
+                tmp2 = -tmp2;                /* Q17 */
+
+                {
+                    __m128i xmm_tmp1, xmm_tmp2;
+                    __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1;
+                    __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1;
+
+                    xmm_tmp1 = _mm_set1_epi32( tmp1 );
+                    xmm_tmp2 = _mm_set1_epi32( tmp2 );
+
+                    for( k = 0; k <= n - 3; k += 4 ) {
+                        xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 3 ] );
+                        xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k - 1 ] );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 1, 2, 3 ) );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0, -rshifts - 1 );
+                        xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0, -rshifts - 1 );
+
+                        /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/
+                        xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+                        xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0, xmm_tmp1 );
+                        xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1, xmm_tmp1 );
+                        xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0, xmm_tmp2 );
+                        xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1, xmm_tmp2 );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0, 16 );
+                        xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1, 16 );
+                        xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0, 16 );
+                        xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1, 16 );
+
+                        xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC );
+                        xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC );
+
+                        X1_3210  = _mm_loadu_si128( (__m128i *)&CAf[ k ] );
+                        PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] );
+
+                        X1_3210  = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 );
+                        PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 );
+
+                        _mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 );
+                        _mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 );
+                    }
+
+                    for( ; k <= n; k++ ) {
+                        CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
+                            silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
+                        CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
+                            silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
+                    }
+                }
+            }
+        }
+
+        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+        tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
+        tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
+        num  = 0;                                                                                       /* Q( -rshifts ) */
+        nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
+        for( k = 0; k < n; k++ ) {
+            Atmp_QA = Af_QA[ k ];
+            lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
+            lz = silk_min( 32 - QA, lz );
+            Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */
+
+            tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
+                                                                                Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
+        }
+        CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
+        CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
+        num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
+        num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */
+
+        /* Calculate the next order reflection (parcor) coefficient */
+        if( silk_abs( num ) < nrg ) {
+            rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
+        } else {
+            rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
+        }
+
+        /* Update inverse prediction gain */
+        tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+        tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
+        if( tmp1 <= minInvGain_Q30 ) {
+            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+            tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
+            rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
+            if( rc_Q31 > 0 ) {
+                 /* Newton-Raphson iteration */
+                rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
+                rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
+                if( num < 0 ) {
+                    /* Ensure adjusted reflection coefficients has the original sign */
+                    rc_Q31 = -rc_Q31;
+                }
+            }
+            invGain_Q30 = minInvGain_Q30;
+            reached_max_gain = 1;
+        } else {
+            invGain_Q30 = tmp1;
+        }
+
+        /* Update the AR coefficients */
+        for( k = 0; k < (n + 1) >> 1; k++ ) {
+            tmp1 = Af_QA[ k ];                                                                  /* QA */
+            tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
+            Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
+            Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
+        }
+        Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */
+
+        if( reached_max_gain ) {
+            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+            for( k = n + 1; k < D; k++ ) {
+                Af_QA[ k ] = 0;
+            }
+            break;
+        }
+
+        /* Update C * Af and C * Ab */
+        for( k = 0; k <= n + 1; k++ ) {
+            tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
+            tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
+            CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
+            CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
+        }
+    }
+
+    if( reached_max_gain ) {
+        for( k = 0; k < D; k++ ) {
+            /* Scale coefficients */
+            A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
+        }
+        /* Subtract energy of preceding samples from C0 */
+        if( rshifts > 0 ) {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts );
+            }
+        } else {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch ), -rshifts );
+            }
+        }
+        /* Approximate residual energy */
+        *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
+        *res_nrg_Q = -rshifts;
+    } else {
+        /* Return residual energy */
+        nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
+        tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
+        for( k = 0; k < D; k++ ) {
+            Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
+            nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
+            tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
+            A_Q16[ k ] = -Atmp1;
+        }
+        *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
+        *res_nrg_Q = -rshifts;
+    }
+}
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/fixed/x86/vector_ops_FIX_sse4_1.c
@@ -0,0 +1,88 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "main.h"
+
+#include "SigProc_FIX.h"
+#include "pitch.h"
+
+opus_int64 silk_inner_prod16_aligned_64_sse4_1(
+    const opus_int16            *inVec1,            /*    I input vector 1                                              */
+    const opus_int16            *inVec2,            /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+)
+{
+    opus_int  i, dataSize8;
+    opus_int64 sum;
+
+    __m128i xmm_tempa;
+    __m128i inVec1_76543210, acc1;
+    __m128i inVec2_76543210, acc2;
+
+    sum = 0;
+    dataSize8 = len & ~7;
+
+    acc1 = _mm_setzero_si128();
+    acc2 = _mm_setzero_si128();
+
+    for( i = 0; i < dataSize8; i += 8 ) {
+        inVec1_76543210 = _mm_loadu_si128( (__m128i *)(&inVec1[i + 0] ) );
+        inVec2_76543210 = _mm_loadu_si128( (__m128i *)(&inVec2[i + 0] ) );
+
+        /* only when all 4 operands are -32768 (0x8000), this results in wrap around */
+        inVec1_76543210 = _mm_madd_epi16( inVec1_76543210, inVec2_76543210 );
+
+        xmm_tempa       = _mm_cvtepi32_epi64( inVec1_76543210 );
+        /* equal shift right 8 bytes */
+        inVec1_76543210 = _mm_shuffle_epi32( inVec1_76543210, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+        inVec1_76543210 = _mm_cvtepi32_epi64( inVec1_76543210 );
+
+        acc1 = _mm_add_epi64( acc1, xmm_tempa );
+        acc2 = _mm_add_epi64( acc2, inVec1_76543210 );
+    }
+
+    acc1 = _mm_add_epi64( acc1, acc2 );
+
+    /* equal shift right 8 bytes */
+    acc2 = _mm_shuffle_epi32( acc1, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+    acc1 = _mm_add_epi64( acc1, acc2 );
+
+    _mm_storel_epi64( (__m128i *)&sum, acc1 );
+
+    for( ; i < len; i++ ) {
+        sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
+    }
+
+    return sum;
+}
--- a/media/libopus/silk/float/LPC_analysis_filter_FLP.c
+++ b/media/libopus/silk/float/LPC_analysis_filter_FLP.c
@@ -210,17 +210,17 @@ static OPUS_INLINE void silk_LPC_analysi
 void silk_LPC_analysis_filter_FLP(
     silk_float                      r_LPC[],                            /* O    LPC residual signal                         */
     const silk_float                PredCoef[],                         /* I    LPC coefficients                            */
     const silk_float                s[],                                /* I    Input signal                                */
     const opus_int                  length,                             /* I    Length of input signal                      */
     const opus_int                  Order                               /* I    LPC order                                   */
 )
 {
-    silk_assert( Order <= length );
+    celt_assert( Order <= length );
 
     switch( Order ) {
         case 6:
             silk_LPC_analysis_filter6_FLP(  r_LPC, PredCoef, s, length );
         break;
 
         case 8:
             silk_LPC_analysis_filter8_FLP(  r_LPC, PredCoef, s, length );
@@ -234,16 +234,16 @@ void silk_LPC_analysis_filter_FLP(
             silk_LPC_analysis_filter12_FLP( r_LPC, PredCoef, s, length );
         break;
 
         case 16:
             silk_LPC_analysis_filter16_FLP( r_LPC, PredCoef, s, length );
         break;
 
         default:
-            silk_assert( 0 );
+            celt_assert( 0 );
         break;
     }
 
     /* Set first Order output samples to zero */
     silk_memset( r_LPC, 0, Order * sizeof( silk_float ) );
 }
 
--- a/media/libopus/silk/float/apply_sine_window_FLP.c
+++ b/media/libopus/silk/float/apply_sine_window_FLP.c
@@ -40,20 +40,20 @@ void silk_apply_sine_window_FLP(
     const silk_float                px[],                               /* I    Pointer to input signal                     */
     const opus_int                  win_type,                           /* I    Selects a window type                       */
     const opus_int                  length                              /* I    Window length, multiple of 4                */
 )
 {
     opus_int   k;
     silk_float freq, c, S0, S1;
 
-    silk_assert( win_type == 1 || win_type == 2 );
+    celt_assert( win_type == 1 || win_type == 2 );
 
     /* Length must be multiple of 4 */
-    silk_assert( ( length & 3 ) == 0 );
+    celt_assert( ( length & 3 ) == 0 );
 
     freq = PI / ( length + 1 );
 
     /* Approximation of 2 * cos(f) */
     c = 2.0f - freq * freq;
 
     /* Initialize state */
     if( win_type < 2 ) {
--- a/media/libopus/silk/float/burg_modified_FLP.c
+++ b/media/libopus/silk/float/burg_modified_FLP.c
@@ -47,17 +47,17 @@ silk_float silk_burg_modified_FLP(      
 {
     opus_int         k, n, s, reached_max_gain;
     double           C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
     const silk_float *x_ptr;
     double           C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
     double           CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
     double           Af[ SILK_MAX_ORDER_LPC ];
 
-    silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+    celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
 
     /* Compute autocorrelations, added over subframes */
     C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
     silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
     for( s = 0; s < nb_subfr; s++ ) {
         x_ptr = x + s * subfr_length;
         for( n = 1; n < D + 1; n++ ) {
             C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
--- a/media/libopus/silk/float/encode_frame_FLP.c
+++ b/media/libopus/silk/float/encode_frame_FLP.c
@@ -37,31 +37,38 @@ POSSIBILITY OF SUCH DAMAGE.
 static OPUS_INLINE void silk_LBRR_encode_FLP(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
     const silk_float                xfw[],                              /* I    Input signal                                */
     opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
 );
 
 void silk_encode_do_VAD_FLP(
-    silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    opus_int                        activity                            /* I    Decision of Opus voice activity detector    */
 )
 {
+    const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 );
+
     /****************************/
     /* Voice Activity Detection */
     /****************************/
     silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
+    /* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */
+    if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) {
+        psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1;
+    }
 
     /**************************************************/
     /* Convert speech activity into VAD and DTX flags */
     /**************************************************/
-    if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+    if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) {
         psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
         psEnc->sCmn.noSpeechCounter++;
-        if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
+        if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) {
             psEnc->sCmn.inDTX = 0;
         } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
             psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
             psEnc->sCmn.inDTX           = 0;
         }
         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
     } else {
         psEnc->sCmn.noSpeechCounter    = 0;
@@ -236,17 +243,17 @@ opus_int silk_encode_frame_FLP(
                     break;
                 }
             }
 
             if( iter == maxIter ) {
                 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
                     /* Restore output state from earlier iteration that did meet the bitrate budget */
                     silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
-                    silk_assert( sRangeEnc_copy2.offs <= 1275 );
+                    celt_assert( sRangeEnc_copy2.offs <= 1275 );
                     silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
                     silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
                     psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
                 }
                 break;
             }
 
             if( nBits > maxBits ) {
@@ -266,17 +273,17 @@ opus_int silk_encode_frame_FLP(
             } else if( nBits < maxBits - 5 ) {
                 found_lower = 1;
                 nBits_lower = nBits;
                 gainMult_lower = gainMult_Q8;
                 if( gainsID != gainsID_lower ) {
                     gainsID_lower = gainsID;
                     /* Copy part of the output state */
                     silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
-                    silk_assert( psRangeEnc->offs <= 1275 );
+                    celt_assert( psRangeEnc->offs <= 1275 );
                     silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
                     silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
                     LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
                 }
             } else {
                 /* Within 5 bits of budget: close enough */
                 break;
             }
--- a/media/libopus/silk/float/find_LPC_FLP.c
+++ b/media/libopus/silk/float/find_LPC_FLP.c
@@ -94,11 +94,11 @@ void silk_find_LPC_FLP(
         }
     }
 
     if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) {
         /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */
         silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder );
     }
 
-    silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 ||
+    celt_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 ||
         ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
 }
--- a/media/libopus/silk/float/find_pitch_lags_FLP.c
+++ b/media/libopus/silk/float/find_pitch_lags_FLP.c
@@ -51,17 +51,17 @@ void silk_find_pitch_lags_FLP(
     silk_float *Wsig_ptr;
 
     /******************************************/
     /* Set up buffer lengths etc based on Fs  */
     /******************************************/
     buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length;
 
     /* Safety check */
-    silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
+    celt_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
 
     x_buf = x - psEnc->sCmn.ltp_mem_length;
 
     /******************************************/
     /* Estimate LPC AR coeficients            */
     /******************************************/
 
     /* Calculate windowed signal */
--- a/media/libopus/silk/float/find_pred_coefs_FLP.c
+++ b/media/libopus/silk/float/find_pred_coefs_FLP.c
@@ -54,17 +54,17 @@ void silk_find_pred_coefs_FLP(
         silk_assert( psEncCtrl->Gains[ i ] > 0.0f );
         invGains[ i ] = 1.0f / psEncCtrl->Gains[ i ];
     }
 
     if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
         /**********/
         /* VOICED */
         /**********/
-        silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
+        celt_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
 
         /* LTP analysis */
         silk_find_LTP_FLP( XXLTP, xXLTP, res_pitch, psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr );
 
         /* Quantize LTP gain parameters */
         silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
             &psEnc->sCmn.sum_log_gain_Q7, &psEncCtrl->LTPredCodGain, XXLTP, xXLTP, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch );
 
--- a/media/libopus/silk/float/main_FLP.h
+++ b/media/libopus/silk/float/main_FLP.h
@@ -51,17 +51,18 @@ extern "C"
 
 /* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
 void silk_HP_variable_cutoff(
     silk_encoder_state_Fxx          state_Fxx[]                         /* I/O  Encoder states                              */
 );
 
 /* Encoder main function */
 void silk_encode_do_VAD_FLP(
-    silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    opus_int                        activity                            /* I    Decision of Opus voice activity detector    */
 );
 
 /* Encoder main function */
 opus_int silk_encode_frame_FLP(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
     opus_int32                      *pnBytesOut,                        /* O    Number of payload bytes;                    */
     ec_enc                          *psRangeEnc,                        /* I/O  compressor data structure                   */
     opus_int                        condCoding,                         /* I    The type of conditional coding to use       */
--- a/media/libopus/silk/float/pitch_analysis_core_FLP.c
+++ b/media/libopus/silk/float/pitch_analysis_core_FLP.c
@@ -104,21 +104,21 @@ opus_int silk_pitch_analysis_core_FLP(  
     opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
     opus_int   sf_length, sf_length_8kHz, sf_length_4kHz;
     opus_int   min_lag, min_lag_8kHz, min_lag_4kHz;
     opus_int   max_lag, max_lag_8kHz, max_lag_4kHz;
     opus_int   nb_cbk_search;
     const opus_int8 *Lag_CB_ptr;
 
     /* Check for valid sampling frequency */
-    silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
+    celt_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
 
     /* Check for valid complexity setting */
-    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
-    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+    celt_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    celt_assert( complexity <= SILK_PE_MAX_COMPLEX );
 
     silk_assert( search_thres1 >= 0.0f && search_thres1 <= 1.0f );
     silk_assert( search_thres2 >= 0.0f && search_thres2 <= 1.0f );
 
     /* Set up frame lengths max / min lag for the sampling frequency */
     frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
     frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
     frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
@@ -143,17 +143,17 @@ opus_int silk_pitch_analysis_core_FLP(  
     } else if( Fs_kHz == 12 ) {
         /* Resample to 12 -> 8 khz */
         opus_int16 frame_12_FIX[ 12 * PE_MAX_FRAME_LENGTH_MS ];
         silk_float2short_array( frame_12_FIX, frame, frame_length );
         silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
         silk_resampler_down2_3( filt_state, frame_8_FIX, frame_12_FIX, frame_length );
         silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz );
     } else {
-        silk_assert( Fs_kHz == 8 );
+        celt_assert( Fs_kHz == 8 );
         silk_float2short_array( frame_8_FIX, frame, frame_length_8kHz );
     }
 
     /* Decimate again to 4 kHz */
     silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
     silk_resampler_down2( filt_state, frame_4_FIX, frame_8_FIX, frame_length_8kHz );
     silk_short2float_array( frame_4kHz, frame_4_FIX, frame_length_4kHz );
 
@@ -164,24 +164,24 @@ opus_int silk_pitch_analysis_core_FLP(  
 
     /******************************************************************************
     * FIRST STAGE, operating in 4 khz
     ******************************************************************************/
     silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
     target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
     for( k = 0; k < nb_subfr >> 1; k++ ) {
         /* Check that we are within range of the array */
-        silk_assert( target_ptr >= frame_4kHz );
-        silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+        celt_assert( target_ptr >= frame_4kHz );
+        celt_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
         basis_ptr = target_ptr - min_lag_4kHz;
 
         /* Check that we are within range of the array */
-        silk_assert( basis_ptr >= frame_4kHz );
-        silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+        celt_assert( basis_ptr >= frame_4kHz );
+        celt_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
         celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch );
 
         /* Calculate first vector products before loop */
         cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
         normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) +
                      silk_energy_FLP( basis_ptr,  sf_length_8kHz ) +
                      sf_length_8kHz * 4000.0f;
@@ -210,17 +210,17 @@ opus_int silk_pitch_analysis_core_FLP(  
 
     /* Apply short-lag bias */
     for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
         C[ 0 ][ i ] -= C[ 0 ][ i ] * i / 4096.0f;
     }
 
     /* Sort */
     length_d_srch = 4 + 2 * complexity;
-    silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
+    celt_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
     silk_insertion_sort_decreasing_FLP( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch );
 
     /* Escape if correlation is very low already here */
     Cmax = C[ 0 ][ min_lag_4kHz ];
     if( Cmax < 0.2f ) {
         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
         *LTPCorr      = 0.0f;
         *lagIndex     = 0;
@@ -233,17 +233,17 @@ opus_int silk_pitch_analysis_core_FLP(  
         /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
         if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) {
             d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 );
         } else {
             length_d_srch = i;
             break;
         }
     }
-    silk_assert( length_d_srch > 0 );
+    celt_assert( length_d_srch > 0 );
 
     for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) {
         d_comp[ i ] = 0;
     }
     for( i = 0; i < length_d_srch; i++ ) {
         d_comp[ d_srch[ i ] ] = 1;
     }
 
@@ -466,17 +466,17 @@ opus_int silk_pitch_analysis_core_FLP(  
         /* Save Lags */
         for( k = 0; k < nb_subfr; k++ ) {
             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );
         }
         *lagIndex = (opus_int16)( lag - min_lag_8kHz );
         *contourIndex = (opus_int8)CBimax;
     }
-    silk_assert( *lagIndex >= 0 );
+    celt_assert( *lagIndex >= 0 );
     /* return as voiced */
     return 0;
 }
 
 /***********************************************************************
  * Calculates the correlations used in stage 3 search. In order to cover
  * the whole lag codebook for all the searched offset lags (lag +- 2),
  * the following correlations are needed in each sub frame:
@@ -501,26 +501,26 @@ static void silk_P_Ana_calc_corr_st3(
 {
     const silk_float *target_ptr;
     opus_int   i, j, k, lag_counter, lag_low, lag_high;
     opus_int   nb_cbk_search, delta, idx, cbk_size;
     silk_float scratch_mem[ SCRATCH_SIZE ];
     opus_val32 xcorr[ SCRATCH_SIZE ];
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
 
-    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
-    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+    celt_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    celt_assert( complexity <= SILK_PE_MAX_COMPLEX );
 
     if( nb_subfr == PE_MAX_NB_SUBFR ) {
         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
     } else {
-        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
     }
 
     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
     for( k = 0; k < nb_subfr; k++ ) {
@@ -567,26 +567,26 @@ static void silk_P_Ana_calc_energy_st3(
 {
     const silk_float *target_ptr, *basis_ptr;
     double    energy;
     opus_int   k, i, j, lag_counter;
     opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
     silk_float scratch_mem[ SCRATCH_SIZE ];
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
 
-    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
-    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+    celt_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    celt_assert( complexity <= SILK_PE_MAX_COMPLEX );
 
     if( nb_subfr == PE_MAX_NB_SUBFR ) {
         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
     } else {
-        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
     }
 
     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
     for( k = 0; k < nb_subfr; k++ ) {
--- a/media/libopus/silk/float/residual_energy_FLP.c
+++ b/media/libopus/silk/float/residual_energy_FLP.c
@@ -42,17 +42,17 @@ silk_float silk_residual_energy_covar_FL
     const silk_float                wxx,                                /* I    Weighted correlation value                  */
     const opus_int                  D                                   /* I    Dimension                                   */
 )
 {
     opus_int   i, j, k;
     silk_float tmp, nrg = 0.0f, regularization;
 
     /* Safety checks */
-    silk_assert( D >= 0 );
+    celt_assert( D >= 0 );
 
     regularization = REGULARIZATION_FACTOR * ( wXX[ 0 ] + wXX[ D * D - 1 ] );
     for( k = 0; k < MAX_ITERATIONS_RESIDUAL_NRG; k++ ) {
         nrg = wxx;
 
         tmp = 0.0f;
         for( i = 0; i < D; i++ ) {
             tmp += wXx[ i ] * c[ i ];
--- a/media/libopus/silk/float/schur_FLP.c
+++ b/media/libopus/silk/float/schur_FLP.c
@@ -36,17 +36,17 @@ silk_float silk_schur_FLP(              
     const silk_float    auto_corr[],        /* I    autocorrelation sequence (length order+1)                   */
     opus_int            order               /* I    order                                                       */
 )
 {
     opus_int   k, n;
     double C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
     double Ctmp1, Ctmp2, rc_tmp;
 
-    silk_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC );
+    celt_assert( order >= 0 && order <= SILK_MAX_ORDER_LPC );
 
     /* Copy correlations */
     k = 0;
     do {
         C[ k ][ 0 ] = C[ k ][ 1 ] = auto_corr[ k ];
     } while( ++k <= order );
 
     for( k = 0; k < order; k++ ) {
--- a/media/libopus/silk/float/sort_FLP.c
+++ b/media/libopus/silk/float/sort_FLP.c
@@ -42,19 +42,19 @@ void silk_insertion_sort_decreasing_FLP(
     const opus_int      L,                  /* I    Vector length                                               */
     const opus_int      K                   /* I    Number of correctly sorted positions                        */
 )
 {
     silk_float value;
     opus_int   i, j;
 
     /* Safety checks */
-    silk_assert( K >  0 );
-    silk_assert( L >  0 );
-    silk_assert( L >= K );
+    celt_assert( K >  0 );
+    celt_assert( L >  0 );
+    celt_assert( L >= K );
 
     /* Write start indices in index vector */
     for( i = 0; i < K; i++ ) {
         idx[ i ] = i;
     }
 
     /* Sort vector elements by value, decreasing order */
     for( i = 1; i < K; i++ ) {
--- a/media/libopus/silk/float/warped_autocorrelation_FLP.c
+++ b/media/libopus/silk/float/warped_autocorrelation_FLP.c
@@ -41,17 +41,17 @@ void silk_warped_autocorrelation_FLP(
 )
 {
     opus_int    n, i;
     double      tmp1, tmp2;
     double      state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
     double      C[     MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
 
     /* Order must be even */
-    silk_assert( ( order & 1 ) == 0 );
+    celt_assert( ( order & 1 ) == 0 );
 
     /* Loop over samples */
     for( n = 0; n < length; n++ ) {
         tmp1 = input[ n ];
         /* Loop over allpass sections */
         for( i = 0; i < order; i += 2 ) {
             /* Output of allpass section */
             tmp2 = state[ i ] + warping * ( state[ i + 1 ] - tmp1 );
--- a/media/libopus/silk/interpolate.c
+++ b/media/libopus/silk/interpolate.c
@@ -37,15 +37,15 @@ void silk_interpolate(
     const opus_int16            x0[ MAX_LPC_ORDER ],            /* I    first vector                                */
     const opus_int16            x1[ MAX_LPC_ORDER ],            /* I    second vector                               */
     const opus_int              ifact_Q2,                       /* I    interp. factor, weight on 2nd vector        */
     const opus_int              d                               /* I    number of parameters                        */
 )
 {
     opus_int i;
 
-    silk_assert( ifact_Q2 >= 0 );
-    silk_assert( ifact_Q2 <= 4 );
+    celt_assert( ifact_Q2 >= 0 );
+    celt_assert( ifact_Q2 <= 4 );
 
     for( i = 0; i < d; i++ ) {
         xi[ i ] = (opus_int16)silk_ADD_RSHIFT( x0[ i ], silk_SMULBB( x1[ i ] - x0[ i ], ifact_Q2 ), 2 );
     }
 }
--- a/media/libopus/silk/process_NLSFs.c
+++ b/media/libopus/silk/process_NLSFs.c
@@ -43,29 +43,29 @@ void silk_process_NLSFs(
     opus_int     NLSF_mu_Q20;
     opus_int16   i_sqr_Q15;
     opus_int16   pNLSF0_temp_Q15[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW_QW[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW0_temp_QW[ MAX_LPC_ORDER ];
 
     silk_assert( psEncC->speech_activity_Q8 >=   0 );
     silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) );
-    silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) );
+    celt_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) );
 
     /***********************/
     /* Calculate mu values */
     /***********************/
     /* NLSF_mu  = 0.003 - 0.0015 * psEnc->speech_activity; */
     NLSF_mu_Q20 = silk_SMLAWB( SILK_FIX_CONST( 0.003, 20 ), SILK_FIX_CONST( -0.001, 28 ), psEncC->speech_activity_Q8 );
     if( psEncC->nb_subfr == 2 ) {
         /* Multiply by 1.5 for 10 ms packets */
         NLSF_mu_Q20 = silk_ADD_RSHIFT( NLSF_mu_Q20, NLSF_mu_Q20, 1 );
     }
 
-    silk_assert( NLSF_mu_Q20 >  0 );
+    celt_assert( NLSF_mu_Q20 >  0 );
     silk_assert( NLSF_mu_Q20 <= SILK_FIX_CONST( 0.005, 20 ) );
 
     /* Calculate NLSF weights */
     silk_NLSF_VQ_weights_laroia( pNLSFW_QW, pNLSF_Q15, psEncC->predictLPCOrder );
 
     /* Update NLSF weights for interpolated NLSFs */
     doInterpolate = ( psEncC->useInterpolatedNLSFs == 1 ) && ( psEncC->indices.NLSFInterpCoef_Q2 < 4 );
     if( doInterpolate ) {
@@ -96,12 +96,12 @@ void silk_process_NLSFs(
         silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15,
             psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
 
         /* Convert back to LPC coefficients */
         silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder, psEncC->arch );
 
     } else {
         /* Copy LPC coefficients for first half from second half */
-        silk_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER );
+        celt_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER );
         silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) );
     }
 }
--- a/media/libopus/silk/resampler.c
+++ b/media/libopus/silk/resampler.c
@@ -86,24 +86,24 @@ opus_int silk_resampler_init(
 
     /* Clear state */
     silk_memset( S, 0, sizeof( silk_resampler_state_struct ) );
 
     /* Input checking */
     if( forEnc ) {
         if( ( Fs_Hz_in  != 8000 && Fs_Hz_in  != 12000 && Fs_Hz_in  != 16000 && Fs_Hz_in  != 24000 && Fs_Hz_in  != 48000 ) ||
             ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 ) ) {
-            silk_assert( 0 );
+            celt_assert( 0 );
             return -1;
         }
         S->inputDelay = delay_matrix_enc[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ];
     } else {
         if( ( Fs_Hz_in  != 8000 && Fs_Hz_in  != 12000 && Fs_Hz_in  != 16000 ) ||
             ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) {
-            silk_assert( 0 );
+            celt_assert( 0 );
             return -1;
         }
         S->inputDelay = delay_matrix_dec[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ];
     }
 
     S->Fs_in_kHz  = silk_DIV32_16( Fs_Hz_in,  1000 );
     S->Fs_out_kHz = silk_DIV32_16( Fs_Hz_out, 1000 );
 
@@ -146,17 +146,17 @@ opus_int silk_resampler_init(
             S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
             S->Coefs = silk_Resampler_1_4_COEFS;
         } else if( silk_MUL( Fs_Hz_out, 6 ) == Fs_Hz_in ) {                     /* Fs_out : Fs_in = 1 : 6 */
             S->FIR_Fracs = 1;
             S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
             S->Coefs = silk_Resampler_1_6_COEFS;
         } else {
             /* None available */
-            silk_assert( 0 );
+            celt_assert( 0 );
             return -1;
         }
     } else {
         /* Input and output sampling rates are equal: copy */
         S->resampler_function = USE_silk_resampler_copy;
     }
 
     /* Ratio of input/output samples */
@@ -176,19 +176,19 @@ opus_int silk_resampler(
     opus_int16                  out[],              /* O    Output signal                                               */
     const opus_int16            in[],               /* I    Input signal                                                */
     opus_int32                  inLen               /* I    Number of input samples                                     */
 )
 {
     opus_int nSamples;
 
     /* Need at least 1 ms of input data */
-    silk_assert( inLen >= S->Fs_in_kHz );
+    celt_assert( inLen >= S->Fs_in_kHz );
     /* Delay can't exceed the 1 ms of buffering */
-    silk_assert( S->inputDelay <= S->Fs_in_kHz );
+    celt_assert( S->inputDelay <= S->Fs_in_kHz );
 
     nSamples = S->Fs_in_kHz - S->inputDelay;
 
     /* Copy to delay buffer */
     silk_memcpy( &S->delayBuf[ S->inputDelay ], in, nSamples * sizeof( opus_int16 ) );
 
     switch( S->resampler_function ) {
         case USE_silk_resampler_private_up2_HQ_wrapper:
--- a/media/libopus/silk/resampler_down2.c
+++ b/media/libopus/silk/resampler_down2.c
@@ -38,18 +38,18 @@ void silk_resampler_down2(
     opus_int16                  *out,               /* O    Output signal [ floor(len/2) ]                              */
     const opus_int16            *in,                /* I    Input signal [ len ]                                        */
     opus_int32                  inLen               /* I    Number of input samples                                     */
 )
 {
     opus_int32 k, len2 = silk_RSHIFT32( inLen, 1 );
     opus_int32 in32, out32, Y, X;
 
-    silk_assert( silk_resampler_down2_0 > 0 );
-    silk_assert( silk_resampler_down2_1 < 0 );
+    celt_assert( silk_resampler_down2_0 > 0 );
+    celt_assert( silk_resampler_down2_1 < 0 );
 
     /* Internal variables and state are in Q10 format */
     for( k = 0; k < len2; k++ ) {
         /* Convert to Q10 */
         in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 );
 
         /* All-pass section for even input sample */
         Y      = silk_SUB32( in32, S[ 0 ] );
--- a/media/libopus/silk/resampler_private_down_FIR.c
+++ b/media/libopus/silk/resampler_private_down_FIR.c
@@ -131,17 +131,17 @@ static OPUS_INLINE opus_int16 *silk_resa
                 res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 16 ], buf_ptr[ 19 ] ), FIR_Coefs[ 16 ] );
                 res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 17 ], buf_ptr[ 18 ] ), FIR_Coefs[ 17 ] );
 
                 /* Scale down, saturate and store in output array */
                 *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
             }
             break;
         default:
-            silk_assert( 0 );
+            celt_assert( 0 );
     }
     return out;
 }
 
 /* Resample with a 2nd order AR filter followed by FIR interpolation */
 void silk_resampler_private_down_FIR(
     void                            *SS,            /* I/O  Resampler state             */
     opus_int16                      out[],          /* O    Output signal               */
--- a/media/libopus/silk/sort.c
+++ b/media/libopus/silk/sort.c
@@ -43,19 +43,19 @@ void silk_insertion_sort_increasing(
     const opus_int       L,              /* I     Vector length                          */
     const opus_int       K               /* I     Number of correctly sorted positions   */
 )
 {
     opus_int32    value;
     opus_int        i, j;
 
     /* Safety checks */
-    silk_assert( K >  0 );
-    silk_assert( L >  0 );
-    silk_assert( L >= K );
+    celt_assert( K >  0 );
+    celt_assert( L >  0 );
+    celt_assert( L >= K );
 
     /* Write start indices in index vector */
     for( i = 0; i < K; i++ ) {
         idx[ i ] = i;
     }
 
     /* Sort vector elements by value, increasing order */
     for( i = 1; i < K; i++ ) {
@@ -91,19 +91,19 @@ void silk_insertion_sort_decreasing_int1
     const opus_int              L,                  /* I     Vector length                                              */
     const opus_int              K                   /* I     Number of correctly sorted positions                       */
 )
 {
     opus_int i, j;
     opus_int value;
 
     /* Safety checks */
-    silk_assert( K >  0 );
-    silk_assert( L >  0 );
-    silk_assert( L >= K );
+    celt_assert( K >  0 );
+    celt_assert( L >  0 );
+    celt_assert( L >= K );
 
     /* Write start indices in index vector */
     for( i = 0; i < K; i++ ) {
         idx[ i ] = i;
     }
 
     /* Sort vector elements by value, decreasing order */
     for( i = 1; i < K; i++ ) {
@@ -136,17 +136,17 @@ void silk_insertion_sort_increasing_all_
      opus_int16                 *a,                 /* I/O   Unsorted / Sorted vector                                   */
      const opus_int             L                   /* I     Vector length                                              */
 )
 {
     opus_int    value;
     opus_int    i, j;
 
     /* Safety checks */
-    silk_assert( L >  0 );
+    celt_assert( L >  0 );
 
     /* Sort vector elements by value, increasing order */
     for( i = 1; i < L; i++ ) {
         value = a[ i ];
         for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
             a[ j + 1 ] = a[ j ]; /* Shift value */
         }
         a[ j + 1 ] = value; /* Write value */
--- a/media/libopus/silk/stereo_LR_to_MS.c
+++ b/media/libopus/silk/stereo_LR_to_MS.c
@@ -104,17 +104,17 @@ void silk_stereo_LR_to_MS(
     frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 );
     frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) );
 
     /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */
     total_rate_bps -= is10msFrame ? 1200 : 600;      /* Subtract approximate bitrate for coding stereo parameters */
     if( total_rate_bps < 1 ) {
         total_rate_bps = 1;
     }
-    min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 );
+    min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 600 );
     silk_assert( min_mid_rate_bps < 32767 );
     /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */
     frac_3_Q16 = silk_MUL( 3, frac_Q16 );
     mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 );
     /* If Mid bitrate below minimum, reduce stereo width */
     if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) {
         mid_side_rates_bps[ 0 ] = min_mid_rate_bps;
         mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
--- a/media/libopus/silk/stereo_encode_pred.c
+++ b/media/libopus/silk/stereo_encode_pred.c
@@ -36,21 +36,21 @@ void silk_stereo_encode_pred(
     ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
     opus_int8                   ix[ 2 ][ 3 ]                    /* I    Quantization indices                        */
 )
 {
     opus_int   n;
 
     /* Entropy coding */
     n = 5 * ix[ 0 ][ 2 ] + ix[ 1 ][ 2 ];
-    silk_assert( n < 25 );
+    celt_assert( n < 25 );
     ec_enc_icdf( psRangeEnc, n, silk_stereo_pred_joint_iCDF, 8 );
     for( n = 0; n < 2; n++ ) {
-        silk_assert( ix[ n ][ 0 ] < 3 );
-        silk_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS );
+        celt_assert( ix[ n ][ 0 ] < 3 );
+        celt_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS );
         ec_enc_icdf( psRangeEnc, ix[ n ][ 0 ], silk_uniform3_iCDF, 8 );
         ec_enc_icdf( psRangeEnc, ix[ n ][ 1 ], silk_uniform5_iCDF, 8 );
     }
 }
 
 /* Entropy code the mid-only flag */
 void silk_stereo_encode_mid_only(
     ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
--- a/media/libopus/silk/structs.h
+++ b/media/libopus/silk/structs.h
@@ -73,16 +73,17 @@ typedef struct {
     opus_int32                  counter;                        /* Frame counter used in the initial phase                              */
 } silk_VAD_state;
 
 /* Variable cut-off low-pass filter state */
 typedef struct {
     opus_int32                   In_LP_State[ 2 ];           /* Low pass filter state */
     opus_int32                   transition_frame_no;        /* Counter which is mapped to a cut-off frequency */
     opus_int                     mode;                       /* Operating mode, <0: switch down, >0: switch up; 0: do nothing           */
+    opus_int32                   saved_fs_kHz;               /* If non-zero, holds the last sampling rate before a bandwidth switching reset. */
 } silk_LP_state;
 
 /* Structure containing NLSF codebook */
 typedef struct {
     const opus_int16             nVectors;
     const opus_int16             order;
     const opus_int16             quantStepSize_Q16;
     const opus_int16             invQuantStepSize_Q6;
--- a/media/libopus/silk/tables.h
+++ b/media/libopus/silk/tables.h
@@ -92,22 +92,16 @@ extern const opus_uint8  silk_stereo_onl
 
 extern const opus_uint8  * const silk_LBRR_flags_iCDF_ptr[ 2 ];                                     /*  10 */
 
 extern const opus_uint8  silk_NLSF_interpolation_factor_iCDF[ 5 ];                                  /*   5 */
 
 extern const silk_NLSF_CB_struct silk_NLSF_CB_WB;                                                   /* 1040 */
 extern const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB;                                                /* 728 */
 
-/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */
-extern const opus_int32  silk_TargetRate_table_NB[  TARGET_RATE_TAB_SZ ];                           /*  32 */
-extern const opus_int32  silk_TargetRate_table_MB[  TARGET_RATE_TAB_SZ ];                           /*  32 */
-extern const opus_int32  silk_TargetRate_table_WB[  TARGET_RATE_TAB_SZ ];                           /*  32 */
-extern const opus_int16  silk_SNR_table_Q1[         TARGET_RATE_TAB_SZ ];                           /*  32 */
-
 /* Quantization offsets */
 extern const opus_int16  silk_Quantization_Offsets_Q10[ 2 ][ 2 ];                                   /*   8 */
 
 /* Interpolation points for filter coefficients used in the bandwidth transition smoother */
 extern const opus_int32  silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ];           /*  60 */
 extern const opus_int32  silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ];           /*  60 */
 
 /* Rom table with cosine values */
--- a/media/libopus/silk/tables_other.c
+++ b/media/libopus/silk/tables_other.c
@@ -33,30 +33,16 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "define.h"
 #include "tables.h"
 
 #ifdef __cplusplus
 extern "C"
 {
 #endif
 
-/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */
-const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ] = {
-    0,      8000,   9400,   11500,  13500,  17500,  25000,  MAX_TARGET_RATE_BPS
-};
-const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ] = {
-    0,      9000,   12000,  14500,  18500,  24500,  35500,  MAX_TARGET_RATE_BPS
-};
-const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ] = {
-    0,      10500,  14000,  17000,  21500,  28500,  42000,  MAX_TARGET_RATE_BPS
-};
-const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ] = {
-    18,     29,     38,     40,     46,     52,     62,     84
-};
-
 /* Tables for stereo predictor coding */
 const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ] = {
     -13732, -10050, -8266, -7526, -6500, -5000, -2950,  -820,
        820,   2950,  5000,  6500,  7526,  8266, 10050, 13732
 };
 const opus_uint8  silk_stereo_pred_joint_iCDF[ 25 ] = {
     249, 247, 246, 245, 244,
     234, 210, 202, 201, 200,
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/x86/NSQ_del_dec_sse4_1.c
@@ -0,0 +1,859 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "main.h"
+#include "celt/x86/x86cpu.h"
+
+#include "stack_alloc.h"
+
+typedef struct {
+    opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
+    opus_int32 RandState[ DECISION_DELAY ];
+    opus_int32 Q_Q10[     DECISION_DELAY ];
+    opus_int32 Xq_Q14[    DECISION_DELAY ];
+    opus_int32 Pred_Q15[  DECISION_DELAY ];
+    opus_int32 Shape_Q14[ DECISION_DELAY ];
+    opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32 LF_AR_Q14;
+    opus_int32 Seed;
+    opus_int32 SeedInit;
+    opus_int32 RD_Q10;
+} NSQ_del_dec_struct;
+
+typedef struct {
+    opus_int32 Q_Q10;
+    opus_int32 RD_Q10;
+    opus_int32 xq_Q14;
+    opus_int32 LF_AR_Q14;
+    opus_int32 sLTP_shp_Q14;
+    opus_int32 LPC_exc_Q14;
+} NSQ_sample_struct;
+
+typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
+    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
+    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
+    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
+    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
+    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
+    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
+    opus_int            subfr,                      /* I    Subframe number                     */
+    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
+    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
+    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
+    const opus_int      signal_type,                /* I    Signal type                         */
+    const opus_int      decisionDelay               /* I    Decision delay                      */
+);
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I/O  Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+);
+
+void silk_NSQ_del_dec_sse4_1(
+    const silk_encoder_state    *psEncC,                                    /* I    Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+)
+{
+    opus_int            i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
+    opus_int            last_smple_idx, smpl_buf_idx, decisionDelay;
+    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
+    opus_int16          *pxq;
+    VARDECL( opus_int32, sLTP_Q15 );
+    VARDECL( opus_int16, sLTP );
+    opus_int32          HarmShapeFIRPacked_Q14;
+    opus_int            offset_Q10;
+    opus_int32          RDmin_Q10, Gain_Q10;
+    VARDECL( opus_int32, x_sc_Q10 );
+    VARDECL( opus_int32, delayedGain_Q10 );
+    VARDECL( NSQ_del_dec_struct, psDelDec );
+    NSQ_del_dec_struct  *psDD;
+    SAVE_STACK;
+
+    /* Set unvoiced lag to the previous one, overwrite later for voiced */
+    lag = NSQ->lagPrev;
+
+    silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+    /* Initialize delayed decision states */
+    ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct );
+    silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
+    for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
+        psDD                 = &psDelDec[ k ];
+        psDD->Seed           = ( k + psIndices->Seed ) & 3;
+        psDD->SeedInit       = psDD->Seed;
+        psDD->RD_Q10         = 0;
+        psDD->LF_AR_Q14      = NSQ->sLF_AR_shp_Q14;
+        psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
+        silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+        silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
+    }
+
+    offset_Q10   = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+    smpl_buf_idx = 0; /* index of oldest samples */
+
+    decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length );
+
+    /* For voiced frames limit the decision delay to lower than the pitch lag */
+    if( psIndices->signalType == TYPE_VOICED ) {
+        for( k = 0; k < psEncC->nb_subfr; k++ ) {
+            decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 );
+        }
+    } else {
+        if( lag > 0 ) {
+            decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 );
+        }
+    }
+
+    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+        LSF_interpolation_flag = 0;
+    } else {
+        LSF_interpolation_flag = 1;
+    }
+
+    ALLOC( sLTP_Q15,
+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+    ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
+    /* Set up pointers to start of sub frame */
+    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
+    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
+    subfr = 0;
+    for( k = 0; k < psEncC->nb_subfr; k++ ) {
+        A_Q12      = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
+        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER           ];
+        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Noise shape parameters */
+        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+        NSQ->rewhite_flag = 0;
+        if( psIndices->signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = pitchL[ k ];
+
+            /* Re-whitening */
+            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+                if( k == 2 ) {
+                    /* RESET DELAYED DECISIONS */
+                    /* Find winner */
+                    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+                    Winner_ind = 0;
+                    for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) {
+                        if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) {
+                            RDmin_Q10 = psDelDec[ i ].RD_Q10;
+                            Winner_ind = i;
+                        }
+                    }
+                    for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) {
+                        if( i != Winner_ind ) {
+                            psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 );
+                            silk_assert( psDelDec[ i ].RD_Q10 >= 0 );
+                        }
+                    }
+
+                    /* Copy final part of signals from winner state to output and long-term filter states */
+                    psDD = &psDelDec[ Winner_ind ];
+                    last_smple_idx = smpl_buf_idx + decisionDelay;
+                    for( i = 0; i < decisionDelay; i++ ) {
+                        last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
+                        if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
+                        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+                        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
+                        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+                    }
+
+                    subfr = 0;
+                }
+
+                /* Rewhiten with new A coefs */
+                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+                celt_assert( start_idx > 0 );
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
+
+                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+                NSQ->rewhite_flag = 1;
+            }
+        }
+
+        silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
+            psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
+
+        silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
+            delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
+            Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
+            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
+
+        x_Q3   += psEncC->subfr_length;
+        pulses += psEncC->subfr_length;
+        pxq    += psEncC->subfr_length;
+    }
+
+    /* Find winner */
+    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+    Winner_ind = 0;
+    for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) {
+        if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) {
+            RDmin_Q10 = psDelDec[ k ].RD_Q10;
+            Winner_ind = k;
+        }
+    }
+
+    /* Copy final part of signals from winner state to output and long-term filter states */
+    psDD = &psDelDec[ Winner_ind ];
+    psIndices->Seed = psDD->SeedInit;
+    last_smple_idx = smpl_buf_idx + decisionDelay;
+    Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
+    for( i = 0; i < decisionDelay; i++ ) {
+        last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
+        if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
+        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
+        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+    }
+    silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) );
+
+    /* Update states */
+    NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
+    NSQ->lagPrev        = pitchL[ psEncC->nb_subfr - 1 ];
+
+    /* Save quantized speech signal */
+    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I/O  Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+)
+{
+    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
+    opus_int32   Winner_rand_state;
+    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
+    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
+    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+    VARDECL( NSQ_sample_pair, psSampleState );
+    NSQ_del_dec_struct *psDD;
+    NSQ_sample_struct  *psSS;
+
+    __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF;
+    __m128i b_Q12_0123, b_sr_Q12_0123;
+    SAVE_STACK;
+
+    celt_assert( nStatesDelayedDecision > 0 );
+    ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 );
+    a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 );
+
+    if( opus_likely( predictLPCOrder == 16 ) ) {
+        a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 );
+        a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 );
+    }
+
+    if( signalType == TYPE_VOICED ){
+        b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 );
+        b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+    }
+    for( i = 0; i < length; i++ ) {
+        /* Perform common calculations used in all states */
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q14 = 2;
+            {
+                __m128i tmpa, tmpb, pred_lag_ptr_tmp;
+                pred_lag_ptr_tmp    = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) );
+                pred_lag_ptr_tmp    = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B );
+                tmpa                = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_0123 );
+                tmpa                = _mm_srli_si128( tmpa, 2 );
+
+                pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */
+                pred_lag_ptr_tmp    = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_0123 );
+                pred_lag_ptr_tmp    = _mm_srli_si128( pred_lag_ptr_tmp, 2 );
+                pred_lag_ptr_tmp    = _mm_add_epi32( pred_lag_ptr_tmp, tmpa );
+
+                tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3, 2 ) );/* equal shift right 8 bytes */
+                pred_lag_ptr_tmp    = _mm_add_epi32( pred_lag_ptr_tmp, tmpb );
+                LTP_pred_Q14        += _mm_cvtsi128_si32( pred_lag_ptr_tmp );
+
+                LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+                LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */
+                pred_lag_ptr++;
+            }
+        } else {
+            LTP_pred_Q14 = 0;
+        }
+
+        /* Long-term shaping */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */
+            shp_lag_ptr++;
+        } else {
+            n_LTP_Q14 = 0;
+        }
+        {
+            __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp;
+
+            for( k = 0; k < nStatesDelayedDecision; k++ ) {
+                /* Delayed decision state */
+                psDD = &psDelDec[ k ];
+
+                /* Sample state */
+                psSS = psSampleState[ k ];
+
+                /* Generate dither */
+                psDD->Seed = silk_RAND( psDD->Seed );
+
+                /* Pointer used in short term prediction and shaping */
+                psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
+                /* Short-term prediction */
+                silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+                /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+                LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
+
+                tmpb = _mm_setzero_si128();
+
+                /* step 1 */
+                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */
+                psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );      /* 0, -1, -2, -3 */
+                tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 );    /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */
+
+                tmpa            = _mm_srli_epi64( tmpa, 16 );
+                tmpb            = _mm_add_epi32( tmpb, tmpa );
+
+                psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /* 1*-1, 3*-3 */
+                psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
+                tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
+
+                /* step 2 */
+                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) );
+                psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
+                tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 );
+                tmpa            = _mm_srli_epi64( tmpa, 16 );
+                tmpb            = _mm_add_epi32( tmpb, tmpa );
+
+                psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
+                psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
+                tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
+
+                if ( opus_likely( predictLPCOrder == 16 ) )
+                {
+                    /* step 3 */
+                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -11 ] ) );
+                    psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
+                    tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB );
+                    tmpa            = _mm_srli_epi64( tmpa, 16 );
+                    tmpb            = _mm_add_epi32( tmpb, tmpa );
+
+                    psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                    a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3, 2, 1 ) );/* equal shift right 4 bytes */
+                    psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
+                    psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
+                    tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
+
+                    /* setp 4 */
+                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) );
+                    psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
+                    tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF );
+                    tmpa            = _mm_srli_epi64( tmpa, 16 );
+                    tmpb            = _mm_add_epi32( tmpb, tmpa );
+
+                    psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                    a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
+                    psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
+                    psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
+                    tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
+
+                    /* add at last */
+                    /* equal shift right 8 bytes*/
+                    tmpa            = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+                    tmpb            = _mm_add_epi32( tmpb, tmpa );
+                    LPC_pred_Q14    += _mm_cvtsi128_si32( tmpb );
+                }
+                else
+                {
+                    /* add at last */
+                    tmpa            = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); /* equal shift right 8 bytes*/
+                    tmpb            = _mm_add_epi32( tmpb, tmpa );
+                    LPC_pred_Q14    += _mm_cvtsi128_si32( tmpb );
+
+                    LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
+                    LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
+                }
+
+                LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
+
+                /* Noise shape feedback */
+                silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+                /* Output of lowpass section */
+                tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
+                /* Output of allpass section */
+                tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
+                psDD->sAR2_Q14[ 0 ] = tmp2;
+                n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
+                /* Loop over allpass sections */
+                for( j = 2; j < shapingLPCOrder; j += 2 ) {
+                    /* Output of allpass section */
+                    tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
+                    psDD->sAR2_Q14[ j - 1 ] = tmp1;
+                    n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] );
+                    /* Output of allpass section */
+                    tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
+                    psDD->sAR2_Q14[ j + 0 ] = tmp2;
+                    n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] );
+                }
+                psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+
+                n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */
+                n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */
+
+                n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */
+                n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */
+                n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */
+
+                /* Input minus prediction plus noise feedback                       */
+                /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
+                tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */
+                tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */
+                tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */
+                tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */
+
+                r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */
+
+                /* Flip sign depending on dither */
+                if ( psDD->Seed < 0 ) {
+                    r_Q10 = -r_Q10;
+                }
+                r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+                /* Find two quantization level candidates and measure their rate-distortion */
+                q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+                q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+                if( q1_Q0 > 0 ) {
+                    q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                    q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                    q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                    rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                    rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+                } else if( q1_Q0 == 0 ) {
+                    q1_Q10  = offset_Q10;
+                    q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                    rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                    rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+                } else if( q1_Q0 == -1 ) {
+                    q2_Q10  = offset_Q10;
+                    q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                    rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                    rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
+                } else {            /* q1_Q0 < -1 */
+                    q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                    q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                    q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                    rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                    rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+                }
+                rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
+                rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
+                rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
+                rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
+
+                if( rd1_Q10 < rd2_Q10 ) {
+                    psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                    psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                    psSS[ 0 ].Q_Q10  = q1_Q10;
+                    psSS[ 1 ].Q_Q10  = q2_Q10;
+                } else {
+                    psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                    psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                    psSS[ 0 ].Q_Q10  = q2_Q10;
+                    psSS[ 1 ].Q_Q10  = q1_Q10;
+                }
+
+                /* Update states for best quantization */
+
+                /* Quantized excitation */
+                exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
+                if ( psDD->Seed < 0 ) {
+                    exc_Q14 = -exc_Q14;
+                }
+
+                /* Add predictions */
+                LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+                xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+                /* Update states */
+                sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+                psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+                psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+                psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;
+                psSS[ 0 ].xq_Q14       = xq_Q14;
+
+                /* Update states for second best quantization */
+
+                /* Quantized excitation */
+                exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
+                if ( psDD->Seed < 0 ) {
+                    exc_Q14 = -exc_Q14;
+                }
+
+
+                /* Add predictions */
+                LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+                xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+                /* Update states */
+                sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+                psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+                psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+                psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;
+                psSS[ 1 ].xq_Q14       = xq_Q14;
+            }
+        }
+        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) % DECISION_DELAY;
+        if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY;
+        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY;
+
+        /* Find winner */
+        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
+        Winner_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                Winner_ind = k;
+            }
+        }
+
+        /* Increase RD values of expired states */
+        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
+                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
+                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
+                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
+            }
+        }
+
+        /* Find worst in first set and best in second set */
+        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
+        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
+        RDmax_ind = 0;
+        RDmin_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            /* find worst in first set */
+            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
+                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                RDmax_ind = k;
+            }
+            /* find best in second set */
+            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
+                RDmin_ind = k;
+            }
+        }
+
+        /* Replace a state if best from second set outperforms worst in first set */
+        if( RDmin_Q10 < RDmax_Q10 ) {
+            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
+                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
+            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
+        }
+
+        /* Write samples from winner to output and long-term filter states */
+        psDD = &psDelDec[ Winner_ind ];
+        if( subfr > 0 || i >= decisionDelay ) {
+            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
+            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
+            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];
+        }
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Update states */
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            psDD                                     = &psDelDec[ k ];
+            psSS                                     = &psSampleState[ k ][ 0 ];
+            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;
+            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
+            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;
+            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
+            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
+            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;
+            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
+            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
+            psDD->RD_Q10                             = psSS->RD_Q10;
+        }
+        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;
+    }
+    /* Update LPC states */
+    for( k = 0; k < nStatesDelayedDecision; k++ ) {
+        psDD = &psDelDec[ k ];
+        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    }
+    RESTORE_STACK;
+}
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
+    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
+    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
+    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
+    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
+    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
+    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
+    opus_int            subfr,                      /* I    Subframe number                     */
+    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
+    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
+    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
+    const opus_int      signal_type,                /* I    Signal type                         */
+    const opus_int      decisionDelay               /* I    Decision delay                      */
+)
+{
+    opus_int            i, k, lag;
+    opus_int32          gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+    NSQ_del_dec_struct  *psDD;
+    __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1;
+
+    lag          = pitchL[ subfr ];
+    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+
+    silk_assert( inv_gain_Q31 != 0 );
+
+    /* Calculate gain adjustment factor */
+    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+    } else {
+        gain_adj_Q16 = (opus_int32)1 << 16;
+    }
+
+    /* Scale input */
+    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+
+    /* prepare inv_gain_Q23 in packed 4 32-bits */
+    xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23);
+
+    for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) {
+        xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) );
+        /* equal shift right 4 bytes*/
+        xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+        xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 );
+        xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 );
+
+        xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 );
+        xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 );
+
+        xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC );
+
+        _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 );
+    }
+
+    for( ; i < psEncC->subfr_length; i++ ) {
+        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+    }
+
+    /* Save inverse gain */
+    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+    if( NSQ->rewhite_flag ) {
+        if( subfr == 0 ) {
+            /* Do LTP downscaling */
+            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+        }
+        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+            silk_assert( i < MAX_FRAME_LENGTH );
+            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+        }
+    }
+
+    /* Adjust for changing gain */
+    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+        /* Scale long-term shaping state */
+        {
+            __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1;
+
+            /* prepare gain_adj_Q16 in packed 4 32-bits */
+            xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 );
+
+            for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 )
+            {
+                xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
+                /* equal shift right 4 bytes*/
+                xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+                xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 );
+                xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 );
+
+                xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 );
+                xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 );
+
+                xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
+
+                _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
+            }
+
+            for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+                NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+            }
+
+            /* Scale long-term prediction state */
+            if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+                for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) {
+                    sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+                }
+            }
+
+            for( k = 0; k < nStatesDelayedDecision; k++ ) {
+                psDD = &psDelDec[ k ];
+
+                /* Scale scalar states */
+                psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
+
+                /* Scale short-term prediction and shaping states */
+                for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+                    psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] );
+                }
+                for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+                    psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] );
+                }
+                for( i = 0; i < DECISION_DELAY; i++ ) {
+                    psDD->Pred_Q15[  i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[  i ] );
+                    psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
+                }
+            }
+        }
+    }
+}
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/x86/NSQ_sse4_1.c
@@ -0,0 +1,719 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "main.h"
+#include "celt/x86/x86cpu.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
+    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
+    opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
+    const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
+    opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
+    opus_int            subfr,                  /* I    subframe number                 */
+    const opus_int      LTP_scale_Q14,          /* I                                    */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ], /* I                                 */
+    const opus_int      pitchL[ MAX_NB_SUBFR ], /* I    Pitch lag                       */
+    const opus_int      signal_type             /* I    Signal type                     */
+);
+
+static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    opus_int            signalType,             /* I    Signal type                     */
+    const opus_int32    x_sc_Q10[],             /* I                                    */
+    opus_int8           pulses[],               /* O                                    */
+    opus_int16          xq[],                   /* O                                    */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
+    opus_int            lag,                    /* I    Pitch lag                       */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
+    opus_int32          LF_shp_Q14,             /* I                                    */
+    opus_int32          Gain_Q16,               /* I                                    */
+    opus_int            offset_Q10,             /* I                                    */
+    opus_int            length,                 /* I    Input length                    */
+    opus_int32          table[][4]              /* I                                    */
+);
+
+void silk_NSQ_sse4_1(
+    const silk_encoder_state    *psEncC,                                    /* I    Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+)
+{
+    opus_int            k, lag, start_idx, LSF_interpolation_flag;
+    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
+    opus_int16          *pxq;
+    VARDECL( opus_int32, sLTP_Q15 );
+    VARDECL( opus_int16, sLTP );
+    opus_int32          HarmShapeFIRPacked_Q14;
+    opus_int            offset_Q10;
+    VARDECL( opus_int32, x_sc_Q10 );
+
+    opus_int32   table[ 64 ][ 4 ];
+    opus_int32   tmp1;
+    opus_int32   q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
+
+    SAVE_STACK;
+
+    NSQ->rand_seed = psIndices->Seed;
+
+    /* Set unvoiced lag to the previous one, overwrite later for voiced */
+    lag = NSQ->lagPrev;
+
+    silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+    offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+
+    /* 0 */
+    q1_Q10  = offset_Q10;
+    q2_Q10  = offset_Q10 + ( 1024 - QUANT_LEVEL_ADJUST_Q10 );
+    rd1_Q20 = q1_Q10 * Lambda_Q10;
+    rd2_Q20 = q2_Q10 * Lambda_Q10;
+
+    table[ 32 ][ 0 ] = q1_Q10;
+    table[ 32 ][ 1 ] = q2_Q10;
+    table[ 32 ][ 2 ] = 2 * (q1_Q10 - q2_Q10);
+    table[ 32 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10);
+
+    /* -1 */
+    q1_Q10  = offset_Q10 - ( 1024 - QUANT_LEVEL_ADJUST_Q10 );
+    q2_Q10  = offset_Q10;
+    rd1_Q20 = - q1_Q10 * Lambda_Q10;
+    rd2_Q20 = q2_Q10 * Lambda_Q10;
+
+    table[ 31 ][ 0 ] = q1_Q10;
+    table[ 31 ][ 1 ] = q2_Q10;
+    table[ 31 ][ 2 ] = 2 * (q1_Q10 - q2_Q10);
+    table[ 31 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10);
+
+    /* > 0 */
+    for (k = 1; k <= 31; k++)
+    {
+        tmp1 = offset_Q10 + silk_LSHIFT( k, 10 );
+
+        q1_Q10  = tmp1 - QUANT_LEVEL_ADJUST_Q10;
+        q2_Q10  = tmp1 - QUANT_LEVEL_ADJUST_Q10 + 1024;
+        rd1_Q20 = q1_Q10 * Lambda_Q10;
+        rd2_Q20 = q2_Q10 * Lambda_Q10;
+
+        table[ 32 + k ][ 0 ] = q1_Q10;
+        table[ 32 + k ][ 1 ] = q2_Q10;
+        table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10);
+        table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10);
+    }
+
+    /* < -1 */
+    for (k = -32; k <= -2; k++)
+    {
+        tmp1 = offset_Q10 + silk_LSHIFT( k, 10 );
+
+        q1_Q10  = tmp1 + QUANT_LEVEL_ADJUST_Q10;
+        q2_Q10  = tmp1 + QUANT_LEVEL_ADJUST_Q10 + 1024;
+        rd1_Q20 = - q1_Q10 * Lambda_Q10;
+        rd2_Q20 = - q2_Q10 * Lambda_Q10;
+
+        table[ 32 + k ][ 0 ] = q1_Q10;
+        table[ 32 + k ][ 1 ] = q2_Q10;
+        table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10);
+        table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10);
+    }
+
+    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+        LSF_interpolation_flag = 0;
+    } else {
+        LSF_interpolation_flag = 1;
+    }
+
+    ALLOC( sLTP_Q15,
+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+    /* Set up pointers to start of sub frame */
+    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
+    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
+    for( k = 0; k < psEncC->nb_subfr; k++ ) {
+        A_Q12      = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ];
+        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER ];
+        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Noise shape parameters */
+        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+        NSQ->rewhite_flag = 0;
+        if( psIndices->signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = pitchL[ k ];
+
+            /* Re-whitening */
+            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+                /* Rewhiten with new A coefs */
+                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+                celt_assert( start_idx > 0 );
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
+
+                NSQ->rewhite_flag = 1;
+                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+            }
+        }
+
+        silk_nsq_scale_states_sse4_1( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
+
+        if ( opus_likely( ( 10 == psEncC->shapingLPCOrder ) && ( 16 == psEncC->predictLPCOrder) ) )
+        {
+            silk_noise_shape_quantizer_10_16_sse4_1( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
+                AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ],
+                offset_Q10, psEncC->subfr_length, &(table[32]) );
+        }
+        else
+        {
+            silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
+                AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
+                offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
+        }
+
+        x_Q3   += psEncC->subfr_length;
+        pulses += psEncC->subfr_length;
+        pxq    += psEncC->subfr_length;
+    }
+
+    /* Update lagPrev for next frame */
+    NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
+
+    /* Save quantized speech and noise shaping signals */
+    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
+
+/***********************************/
+/* silk_noise_shape_quantizer_10_16  */
+/***********************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    opus_int            signalType,             /* I    Signal type                     */
+    const opus_int32    x_sc_Q10[],             /* I                                    */
+    opus_int8           pulses[],               /* O                                    */
+    opus_int16          xq[],                   /* O                                    */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
+    opus_int            lag,                    /* I    Pitch lag                       */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
+    opus_int32          LF_shp_Q14,             /* I                                    */
+    opus_int32          Gain_Q16,               /* I                                    */
+    opus_int            offset_Q10,             /* I                                    */
+    opus_int            length,                 /* I    Input length                    */
+    opus_int32          table[][4]              /* I                                    */
+)
+{
+    opus_int     i;
+    opus_int32   LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
+    opus_int32   n_LF_Q12, r_Q10, q1_Q0, q1_Q10, q2_Q10;
+    opus_int32   exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
+
+    __m128i xmm_tempa, xmm_tempb;
+
+    __m128i xmm_one;
+
+    __m128i psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF;
+    __m128i psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF;
+    __m128i a_Q12_01234567,        a_Q12_89ABCDEF;
+
+    __m128i sAR2_Q14_hi_76543210, sAR2_Q14_lo_76543210;
+    __m128i AR_shp_Q13_76543210;
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    /* Set up short term AR state */
+    psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
+
+    sLF_AR_shp_Q14 = NSQ->sLF_AR_shp_Q14;
+    xq_Q14         = psLPC_Q14[ 0 ];
+    LTP_pred_Q13   = 0;
+
+    /* load a_Q12 */
+    xmm_one = _mm_set_epi8( 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 );
+
+    /* load a_Q12[0] - a_Q12[7] */
+    a_Q12_01234567 = _mm_loadu_si128( (__m128i *)(&a_Q12[ 0 ] ) );
+    /* load a_Q12[ 8 ] - a_Q12[ 15 ] */
+    a_Q12_89ABCDEF = _mm_loadu_si128( (__m128i *)(&a_Q12[ 8 ] ) );
+
+    a_Q12_01234567 = _mm_shuffle_epi8( a_Q12_01234567, xmm_one );
+    a_Q12_89ABCDEF = _mm_shuffle_epi8( a_Q12_89ABCDEF, xmm_one );
+
+    /* load AR_shp_Q13 */
+    AR_shp_Q13_76543210 = _mm_loadu_si128( (__m128i *)(&AR_shp_Q13[0] ) );
+
+    /* load psLPC_Q14 */
+    xmm_one = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0 );
+
+    xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-16]) );
+    xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-12]) );
+
+    xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
+    xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
+
+    psLPC_Q14_hi_89ABCDEF = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb );
+    psLPC_Q14_lo_89ABCDEF = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
+
+    xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -8 ]) );
+    xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -4 ]) );
+
+    xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
+    xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
+
+    psLPC_Q14_hi_01234567 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb );
+    psLPC_Q14_lo_01234567 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
+
+    /* load sAR2_Q14 */
+    xmm_tempa = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 0 ]) ) );
+    xmm_tempb = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 4 ]) ) );
+
+    xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
+    xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
+
+    sAR2_Q14_hi_76543210 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb );
+    sAR2_Q14_lo_76543210 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
+
+    /* prepare 1 in 8 * 16bit */
+    xmm_one = _mm_set1_epi16(1);
+
+    for( i = 0; i < length; i++ )
+    {
+        /* Short-term prediction */
+        __m128i xmm_hi_07, xmm_hi_8F, xmm_lo_07, xmm_lo_8F;
+
+        /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+        LPC_pred_Q10 = 8; /* silk_RSHIFT( predictLPCOrder, 1 ); */
+
+        /* shift psLPC_Q14 */
+        psLPC_Q14_hi_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF, 2 );
+        psLPC_Q14_lo_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF, 2 );
+
+        psLPC_Q14_hi_01234567 = _mm_srli_si128( psLPC_Q14_hi_01234567, 2 );
+        psLPC_Q14_lo_01234567 = _mm_srli_si128( psLPC_Q14_lo_01234567, 2 );
+
+        psLPC_Q14_hi_01234567 = _mm_insert_epi16( psLPC_Q14_hi_01234567, (xq_Q14 >> 16), 7 );
+        psLPC_Q14_lo_01234567 = _mm_insert_epi16( psLPC_Q14_lo_01234567, (xq_Q14),       7 );
+
+        /* high part, use pmaddwd, results in 4 32-bit */
+        xmm_hi_07 = _mm_madd_epi16( psLPC_Q14_hi_01234567, a_Q12_01234567 );
+        xmm_hi_8F = _mm_madd_epi16( psLPC_Q14_hi_89ABCDEF, a_Q12_89ABCDEF );
+
+        /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed, _mm_srai_epi16(psLPC_Q14_lo_01234567, 15) */
+        xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_01234567 );
+        xmm_tempb = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_89ABCDEF );
+
+        xmm_tempa = _mm_and_si128( xmm_tempa, a_Q12_01234567 );
+        xmm_tempb = _mm_and_si128( xmm_tempb, a_Q12_89ABCDEF );
+
+        xmm_lo_07 = _mm_mulhi_epi16( psLPC_Q14_lo_01234567, a_Q12_01234567 );
+        xmm_lo_8F = _mm_mulhi_epi16( psLPC_Q14_lo_89ABCDEF, a_Q12_89ABCDEF );
+
+        xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa );
+        xmm_lo_8F = _mm_add_epi16( xmm_lo_8F, xmm_tempb );
+
+        xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one );
+        xmm_lo_8F = _mm_madd_epi16( xmm_lo_8F, xmm_one );
+
+        /* accumulate */
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_hi_8F );
+        xmm_lo_07 = _mm_add_epi32( xmm_lo_07, xmm_lo_8F );
+
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 );
+
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) );
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) );
+
+        LPC_pred_Q10 += _mm_cvtsi128_si32( xmm_hi_07 );
+
+        /* Long-term prediction */
+        if ( opus_likely( signalType == TYPE_VOICED ) ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q13 = 2;
+            {
+                __m128i b_Q14_3210, b_Q14_0123, pred_lag_ptr_0123;
+
+                b_Q14_3210 = OP_CVTEPI16_EPI32_M64( b_Q14 );
+                b_Q14_0123 = _mm_shuffle_epi32( b_Q14_3210, 0x1B );
+
+                /* loaded: [0] [-1] [-2] [-3] */
+                pred_lag_ptr_0123 = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) );
+                /* shuffle to [-3] [-2] [-1] [0] and to new xmm */
+                xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, 0x1B );
+                /*64-bit multiply, a[2] * b[-2], a[0] * b[0] */
+                xmm_tempa = _mm_mul_epi32( xmm_tempa, b_Q14_3210 );
+                /* right shift 2 bytes (16 bits), zero extended */
+                xmm_tempa = _mm_srli_si128( xmm_tempa, 2 );
+
+                /* a[1] * b[-1], a[3] * b[-3] */
+                pred_lag_ptr_0123 = _mm_mul_epi32( pred_lag_ptr_0123, b_Q14_0123 );
+                pred_lag_ptr_0123 = _mm_srli_si128( pred_lag_ptr_0123, 2 );
+
+                pred_lag_ptr_0123 = _mm_add_epi32( pred_lag_ptr_0123, xmm_tempa );
+                /* equal shift right 8 bytes*/
+                xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, _MM_SHUFFLE( 0, 0, 3, 2 ) );
+                xmm_tempa = _mm_add_epi32( xmm_tempa, pred_lag_ptr_0123 );
+
+                LTP_pred_Q13 += _mm_cvtsi128_si32( xmm_tempa );
+
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+                pred_lag_ptr++;
+            }
+        }
+
+        /* Noise shape feedback */
+        NSQ->sAR2_Q14[ 9 ] = NSQ->sAR2_Q14[ 8 ];
+        NSQ->sAR2_Q14[ 8 ] = _mm_cvtsi128_si32( _mm_srli_si128(_mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ), 12 ) );
+
+        sAR2_Q14_hi_76543210 = _mm_slli_si128( sAR2_Q14_hi_76543210, 2 );
+        sAR2_Q14_lo_76543210 = _mm_slli_si128( sAR2_Q14_lo_76543210, 2 );
+
+        sAR2_Q14_hi_76543210 = _mm_insert_epi16( sAR2_Q14_hi_76543210, (xq_Q14 >> 16), 0 );
+        sAR2_Q14_lo_76543210 = _mm_insert_epi16( sAR2_Q14_lo_76543210, (xq_Q14),       0 );
+
+        /* high part, use pmaddwd, results in 4 32-bit */
+        xmm_hi_07 = _mm_madd_epi16( sAR2_Q14_hi_76543210, AR_shp_Q13_76543210 );
+
+        /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed,_mm_srai_epi16(sAR2_Q14_lo_76543210, 15) */
+        xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), sAR2_Q14_lo_76543210 );
+        xmm_tempa = _mm_and_si128( xmm_tempa, AR_shp_Q13_76543210 );
+
+        xmm_lo_07 = _mm_mulhi_epi16( sAR2_Q14_lo_76543210, AR_shp_Q13_76543210 );
+        xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa );
+
+        xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one );
+
+        /* accumulate */
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 );
+
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) );
+        xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) );
+
+        n_AR_Q12 = 5 + _mm_cvtsi128_si32( xmm_hi_07 );
+
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 8 ], AR_shp_Q13[ 8 ] );
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 9 ], AR_shp_Q13[ 9 ] );
+
+        n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 );                                /* Q11 -> Q12 */
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, sLF_AR_shp_Q14, Tilt_Q14 );
+
+        n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
+        n_LF_Q12 = silk_SMLAWT( n_LF_Q12, sLF_AR_shp_Q14, LF_shp_Q14 );
+
+        silk_assert( lag > 0 || signalType != TYPE_VOICED );
+
+        /* Combine prediction and noise shaping signals */
+        tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 );        /* Q12 */
+        tmp1 = silk_SUB32( tmp1, n_LF_Q12 );                                    /* Q12 */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 );
+            shp_lag_ptr++;
+
+            tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 );                       /* Q13 */
+            tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 );                          /* Q13 */
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 3 );                                /* Q10 */
+        } else {
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 2 );                                /* Q10 */
+        }
+
+        r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 );                              /* residual error Q10 */
+
+        /* Generate dither */
+        NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
+
+        /* Flip sign depending on dither */
+        tmp2 = -r_Q10;
+        if ( NSQ->rand_seed < 0 ) r_Q10 = tmp2;
+
+        r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+        /* Find two quantization level candidates and measure their rate-distortion */
+        q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+        q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+
+        q1_Q10 = table[q1_Q0][0];
+        q2_Q10 = table[q1_Q0][1];
+
+        if (r_Q10 * table[q1_Q0][2] - table[q1_Q0][3] < 0)
+        {
+            q1_Q10 = q2_Q10;
+        }
+
+        pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 );
+
+        /* Excitation */
+        exc_Q14 = silk_LSHIFT( q1_Q10, 4 );
+
+        tmp2 = -exc_Q14;
+        if ( NSQ->rand_seed < 0 ) exc_Q14 = tmp2;
+
+        /* Add predictions */
+        LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 );
+        xq_Q14      = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 );
+
+        /* Update states */
+        psLPC_Q14++;
+        *psLPC_Q14 = xq_Q14;
+        sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 );
+
+        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 );
+        sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 );
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Make dither dependent on quantized signal */
+        NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] );
+    }
+
+    NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14;
+
+    /* Scale XQ back to normal level before saving */
+    psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH ];
+
+    /* write back sAR2_Q14 */
+    xmm_tempa = _mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 );
+    xmm_tempb = _mm_unpacklo_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 );
+    _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 4 ]), xmm_tempa );
+    _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 0 ]), xmm_tempb );
+
+    /* xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); */
+    {
+        __m128i xmm_Gain_Q10;
+        __m128i xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, xmm_xq_Q14_7654, xmm_xq_Q14_x7x5;
+
+        /* prepare (1 << 7) in packed 4 32-bits */
+        xmm_tempa = _mm_set1_epi32( (1 << 7) );
+
+        /* prepare Gain_Q10 in packed 4 32-bits */
+        xmm_Gain_Q10 = _mm_set1_epi32( Gain_Q10 );
+
+        /* process xq */
+        for (i = 0; i < length - 7; i += 8)
+        {
+            xmm_xq_Q14_3210 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 0 ] ) ) );
+            xmm_xq_Q14_7654 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 4 ] ) ) );
+
+            /* equal shift right 4 bytes*/
+            xmm_xq_Q14_x3x1 = _mm_shuffle_epi32( xmm_xq_Q14_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+            /* equal shift right 4 bytes*/
+            xmm_xq_Q14_x7x5 = _mm_shuffle_epi32( xmm_xq_Q14_7654, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+            xmm_xq_Q14_3210 = _mm_mul_epi32( xmm_xq_Q14_3210, xmm_Gain_Q10 );
+            xmm_xq_Q14_x3x1 = _mm_mul_epi32( xmm_xq_Q14_x3x1, xmm_Gain_Q10 );
+            xmm_xq_Q14_7654 = _mm_mul_epi32( xmm_xq_Q14_7654, xmm_Gain_Q10 );
+            xmm_xq_Q14_x7x5 = _mm_mul_epi32( xmm_xq_Q14_x7x5, xmm_Gain_Q10 );
+
+            xmm_xq_Q14_3210 = _mm_srli_epi64( xmm_xq_Q14_3210, 16 );
+            xmm_xq_Q14_x3x1 = _mm_slli_epi64( xmm_xq_Q14_x3x1, 16 );
+            xmm_xq_Q14_7654 = _mm_srli_epi64( xmm_xq_Q14_7654, 16 );
+            xmm_xq_Q14_x7x5 = _mm_slli_epi64( xmm_xq_Q14_x7x5, 16 );
+
+            xmm_xq_Q14_3210 = _mm_blend_epi16( xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, 0xCC );
+            xmm_xq_Q14_7654 = _mm_blend_epi16( xmm_xq_Q14_7654, xmm_xq_Q14_x7x5, 0xCC );
+
+            /* silk_RSHIFT_ROUND(xq, 8) */
+            xmm_xq_Q14_3210 = _mm_add_epi32( xmm_xq_Q14_3210, xmm_tempa );
+            xmm_xq_Q14_7654 = _mm_add_epi32( xmm_xq_Q14_7654, xmm_tempa );
+
+            xmm_xq_Q14_3210 = _mm_srai_epi32( xmm_xq_Q14_3210, 8 );
+            xmm_xq_Q14_7654 = _mm_srai_epi32( xmm_xq_Q14_7654, 8 );
+
+            /* silk_SAT16 */
+            xmm_xq_Q14_3210 = _mm_packs_epi32( xmm_xq_Q14_3210, xmm_xq_Q14_7654 );
+
+            /* save to xq */
+            _mm_storeu_si128( (__m128i *)(&xq[ i ] ), xmm_xq_Q14_3210 );
+        }
+    }
+    for ( ; i < length; i++)
+    {
+        xq[i] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) );
+    }
+
+    /* Update LPC synth buffer */
+    silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+}
+
+static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
+    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
+    opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
+    const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
+    opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
+    opus_int            subfr,                  /* I    subframe number                 */
+    const opus_int      LTP_scale_Q14,          /* I                                    */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ], /* I                                 */
+    const opus_int      pitchL[ MAX_NB_SUBFR ], /* I    Pitch lag                       */
+    const opus_int      signal_type             /* I    Signal type                     */
+)
+{
+    opus_int   i, lag;
+    opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+    __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1;
+
+    lag          = pitchL[ subfr ];
+    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+    silk_assert( inv_gain_Q31 != 0 );
+
+    /* Calculate gain adjustment factor */
+    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+    } else {
+        gain_adj_Q16 = (opus_int32)1 << 16;
+    }
+
+    /* Scale input */
+    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+
+    /* prepare inv_gain_Q23 in packed 4 32-bits */
+    xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23);
+
+    for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) {
+        xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) );
+
+        /* equal shift right 4 bytes*/
+        xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+        xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 );
+        xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 );
+
+        xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 );
+        xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 );
+
+        xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC );
+
+        _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x_Q3_x2x0 );
+    }
+
+    for( ; i < psEncC->subfr_length; i++ ) {
+        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+    }
+
+    /* Save inverse gain */
+    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+    if( NSQ->rewhite_flag ) {
+        if( subfr == 0 ) {
+            /* Do LTP downscaling */
+            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+        }
+        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+            silk_assert( i < MAX_FRAME_LENGTH );
+            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+        }
+    }
+
+    /* Adjust for changing gain */
+    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+        /* Scale long-term shaping state */
+        __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1;
+
+        /* prepare gain_adj_Q16 in packed 4 32-bits */
+        xmm_gain_adj_Q16 = _mm_set1_epi32(gain_adj_Q16);
+
+        for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 )
+        {
+            xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
+            /* equal shift right 4 bytes*/
+            xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
+
+            xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 );
+            xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 );
+
+            xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 );
+            xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 );
+
+            xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
+
+            _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
+        }
+
+        for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+            NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+        }
+
+        /* Scale long-term prediction state */
+        if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+            for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+                sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+            }
+        }
+
+        NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 );
+
+        /* Scale short-term prediction and shaping states */
+        for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+            NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] );
+        }
+        for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+            NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] );
+        }
+    }
+}
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/x86/VAD_sse4_1.c
@@ -0,0 +1,277 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#include "main.h"
+#include "stack_alloc.h"
+
+/* Weighting factors for tilt measure */
+static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 };
+
+/***************************************/
+/* Get the speech activity level in Q8 */
+/***************************************/
+opus_int silk_VAD_GetSA_Q8_sse4_1(                  /* O    Return value, 0 if success                  */
+    silk_encoder_state          *psEncC,            /* I/O  Encoder state                               */
+    const opus_int16            pIn[]               /* I    PCM input                                   */
+)
+{
+    opus_int   SA_Q15, pSNR_dB_Q7, input_tilt;
+    opus_int   decimated_framelength1, decimated_framelength2;
+    opus_int   decimated_framelength;
+    opus_int   dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
+    opus_int32 sumSquared, smooth_coef_Q16;
+    opus_int16 HPstateTmp;
+    VARDECL( opus_int16, X );
+    opus_int32 Xnrg[ VAD_N_BANDS ];
+    opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];
+    opus_int32 speech_nrg, x_tmp;
+    opus_int   X_offset[ VAD_N_BANDS ];
+    opus_int   ret = 0;
+    silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
+
+    SAVE_STACK;
+
+    /* Safety checks */
+    silk_assert( VAD_N_BANDS == 4 );
+    celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
+    celt_assert( psEncC->frame_length <= 512 );
+    celt_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
+
+    /***********************/
+    /* Filter and Decimate */
+    /***********************/
+    decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
+    decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
+    decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
+    /* Decimate into 4 bands:
+       0       L      3L       L              3L                             5L
+               -      --       -              --                             --
+               8       8       2               4                              4
+
+       [0-1 kHz| temp. |1-2 kHz|    2-4 kHz    |            4-8 kHz           |
+
+       They're arranged to allow the minimal ( frame_length / 4 ) extra
+       scratch space during the downsampling process */
+    X_offset[ 0 ] = 0;
+    X_offset[ 1 ] = decimated_framelength + decimated_framelength2;
+    X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;
+    X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;
+    ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );
+
+    /* 0-8 kHz to 0-4 kHz and 4-8 kHz */
+    silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[  0 ],
+        X, &X[ X_offset[ 3 ] ], psEncC->frame_length );
+
+    /* 0-4 kHz to 0-2 kHz and 2-4 kHz */
+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],
+        X, &X[ X_offset[ 2 ] ], decimated_framelength1 );
+
+    /* 0-2 kHz to 0-1 kHz and 1-2 kHz */
+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],
+        X, &X[ X_offset[ 1 ] ], decimated_framelength2 );
+
+    /*********************************************/
+    /* HP filter on lowest band (differentiator) */
+    /*********************************************/
+    X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );
+    HPstateTmp = X[ decimated_framelength - 1 ];
+    for( i = decimated_framelength - 1; i > 0; i-- ) {
+        X[ i - 1 ]  = silk_RSHIFT( X[ i - 1 ], 1 );
+        X[ i ]     -= X[ i - 1 ];
+    }
+    X[ 0 ] -= psSilk_VAD->HPstate;
+    psSilk_VAD->HPstate = HPstateTmp;
+
+    /*************************************/
+    /* Calculate the energy in each band */
+    /*************************************/
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* Find the decimated framelength in the non-uniformly divided bands */
+        decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) );
+
+        /* Split length into subframe lengths */
+        dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 );
+        dec_subframe_offset = 0;
+
+        /* Compute energy per sub-frame */
+        /* initialize with summed energy of last subframe */
+        Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ];
+        for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {
+            __m128i xmm_X, xmm_acc;
+            sumSquared = 0;
+
+            xmm_acc = _mm_setzero_si128();
+
+            for( i = 0; i < dec_subframe_length - 7; i += 8 )
+            {
+                xmm_X   = _mm_loadu_si128( (__m128i *)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) );
+                xmm_X   = _mm_srai_epi16( xmm_X, 3 );
+                xmm_X   = _mm_madd_epi16( xmm_X, xmm_X );
+                xmm_acc = _mm_add_epi32( xmm_acc, xmm_X );
+            }
+
+            xmm_acc = _mm_add_epi32( xmm_acc, _mm_unpackhi_epi64( xmm_acc, xmm_acc ) );
+            xmm_acc = _mm_add_epi32( xmm_acc, _mm_shufflelo_epi16( xmm_acc, 0x0E ) );
+
+            sumSquared += _mm_cvtsi128_si32( xmm_acc );
+
+            for( ; i < dec_subframe_length; i++ ) {
+                /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2.            */
+                /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */
+                x_tmp = silk_RSHIFT(
+                    X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );
+                sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );
+
+                /* Safety check */
+                silk_assert( sumSquared >= 0 );
+            }
+
+            /* Add/saturate summed energy of current subframe */
+            if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {
+                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared );
+            } else {
+                /* Look-ahead subframe */
+                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) );
+            }
+
+            dec_subframe_offset += dec_subframe_length;
+        }
+        psSilk_VAD->XnrgSubfr[ b ] = sumSquared;
+    }
+
+    /********************/
+    /* Noise estimation */
+    /********************/
+    silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD );
+
+    /***********************************************/
+    /* Signal-plus-noise to noise ratio estimation */
+    /***********************************************/
+    sumSquared = 0;
+    input_tilt = 0;
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ];
+        if( speech_nrg > 0 ) {
+            /* Divide, with sufficient resolution */
+            if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) {
+                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 );
+            } else {
+                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 );
+            }
+
+            /* Convert to log domain */
+            SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128;
+
+            /* Sum-of-squares */
+            sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 );          /* Q14 */
+
+            /* Tilt measure */
+            if( speech_nrg < ( (opus_int32)1 << 20 ) ) {
+                /* Scale down SNR value for small subband speech energies */
+                SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 );
+            }
+            input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 );
+        } else {
+            NrgToNoiseRatio_Q8[ b ] = 256;
+        }
+    }
+
+    /* Mean-of-squares */
+    sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */
+
+    /* Root-mean-square approximation, scale to dBs, and write to output pointer */
+    pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */
+
+    /*********************************/
+    /* Speech Probability Estimation */
+    /*********************************/
+    SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );
+
+    /**************************/
+    /* Frequency Tilt Measure */
+    /**************************/
+    psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 );
+
+    /**************************************************/
+    /* Scale the sigmoid output based on power levels */
+    /**************************************************/
+    speech_nrg = 0;
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
+        speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
+    }
+
+    /* Power scaling */
+    if( speech_nrg <= 0 ) {
+        SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
+    } else if( speech_nrg < 32768 ) {
+        if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 );
+        } else {
+            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 );
+        }
+
+        /* square-root */
+        speech_nrg = silk_SQRT_APPROX( speech_nrg );
+        SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );
+    }
+
+    /* Copy the resulting speech activity in Q8 */
+    psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX );
+
+    /***********************************/
+    /* Energy Level and SNR estimation */
+    /***********************************/
+    /* Smoothing coefficient */
+    smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) );
+
+    if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+        smooth_coef_Q16 >>= 1;
+    }
+
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* compute smoothed energy-to-noise ratio per band */
+        psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ],
+            NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 );
+
+        /* signal to noise ratio in dB per band */
+        SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 );
+        /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */
+        psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) );
+    }
+
+    RESTORE_STACK;
+    return( ret );
+}
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c
@@ -0,0 +1,142 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "main.h"
+#include "celt/x86/x86cpu.h"
+
+/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
+void silk_VQ_WMat_EC_sse4_1(
+    opus_int8                   *ind,                           /* O    index of best codebook vector               */
+    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
+    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
+    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
+    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
+    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
+    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
+    opus_int                    L                               /* I    number of vectors in codebook               */
+)
+{
+    opus_int   k, gain_tmp_Q7;
+    const opus_int8 *cb_row_Q7;
+    opus_int16 diff_Q14[ 5 ];
+    opus_int32 sum1_Q14, sum2_Q16;
+
+    __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5;
+    /* Loop over codebook */
+    *rate_dist_Q14 = silk_int32_MAX;
+    cb_row_Q7 = cb_Q7;
+    for( k = 0; k < L; k++ ) {
+        gain_tmp_Q7 = cb_gain_Q7[k];
+
+        diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
+
+        C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] );
+        C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] );
+        C_tmp2 = _mm_slli_epi32( C_tmp2, 7 );
+        C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 );
+
+        diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 );
+        diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 );
+        diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 );
+        diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 );
+
+        /* Weighted rate */
+        sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
+
+        /* Penalty for too large gain */
+        sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
+
+        silk_assert( sum1_Q14 >= 0 );
+
+        /* first row of W_Q18 */
+        C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) );
+        C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 );
+        C_tmp4 = _mm_srli_si128( C_tmp4, 2 );
+
+        C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */
+        C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */
+
+        C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 );
+        C_tmp5 = _mm_srli_si128( C_tmp5, 2 );
+
+        C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 );
+        C_tmp5 = _mm_slli_epi32( C_tmp5, 1 );
+
+        C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );
+        sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 );
+
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  0 ], diff_Q14[ 0 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 0 ] );
+
+        /* second row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[  7 ], diff_Q14[ 2 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  8 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  9 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  6 ], diff_Q14[ 1 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 1 ] );
+
+        /* third row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 13 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 2 ] );
+
+        /* fourth row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 19 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 3 ] );
+
+        /* last row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 24 ], diff_Q14[ 4 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 4 ] );
+
+        silk_assert( sum1_Q14 >= 0 );
+
+        /* find best */
+        if( sum1_Q14 < *rate_dist_Q14 ) {
+            *rate_dist_Q14 = sum1_Q14;
+            *ind = (opus_int8)k;
+            *gain_Q7 = gain_tmp_Q7;
+        }
+
+        /* Go to next cbk vector */
+        cb_row_Q7 += LTP_ORDER;
+    }
+}
--- a/media/libopus/sources.mozbuild
+++ b/media/libopus/sources.mozbuild
@@ -34,17 +34,17 @@ celt_sources_sse = [
 ]
 
 celt_sources_sse2 = [
     'celt/x86/pitch_sse2.c',
     'celt/x86/vq_sse2.c',
 ]
 
 celt_sources_sse4_1 = [
-    'celt/x86/celt_lpc_sse.c',
+    'celt/x86/celt_lpc_sse4_1.c',
     'celt/x86/pitch_sse4_1.c',
 ]
 
 celt_sources_arm = [
     'celt/arm/arm_celt_map.c',
     'celt/arm/armcpu.c',
 ]
 
@@ -57,27 +57,30 @@ celt_am_sources_arm_asm = [
 ]
 
 celt_sources_arm_neon_intr = [
     'celt/arm/celt_neon_intr.c',
     'celt/arm/pitch_neon_intr.c',
 ]
 
 celt_sources_arm_ne10 = [
-    'celt/arm/celt_ne10_fft.c',
-    'celt/arm/celt_ne10_mdct.c',
+    'celt/arm/celt_fft_ne10.c',
+    'celt/arm/celt_mdct_ne10.c',
 ]
 
 opus_sources = [
+    'src/mapping_matrix.c',
     'src/opus.c',
     'src/opus_decoder.c',
     'src/opus_encoder.c',
     'src/opus_multistream.c',
     'src/opus_multistream_decoder.c',
     'src/opus_multistream_encoder.c',
+    'src/opus_projection_decoder.c',
+    'src/opus_projection_encoder.c',
     'src/repacketizer.c',
 ]
 
 opus_sources_float = [
     'src/analysis.c',
     'src/mlp.c',
     'src/mlp_data.c',
 ]
@@ -156,20 +159,20 @@ silk_sources = [
     'silk/tables_other.c',
     'silk/tables_pitch_lag.c',
     'silk/tables_pulses_per_block.c',
     'silk/VAD.c',
     'silk/VQ_WMat_EC.c',
 ]
 
 silk_sources_sse4_1 = [
-    'silk/x86/NSQ_del_dec_sse.c',
-    'silk/x86/NSQ_sse.c',
-    'silk/x86/VAD_sse.c',
-    'silk/x86/VQ_WMat_EC_sse.c',
+    'silk/x86/NSQ_del_dec_sse4_1.c',
+    'silk/x86/NSQ_sse4_1.c',
+    'silk/x86/VAD_sse4_1.c',
+    'silk/x86/VQ_WMat_EC_sse4_1.c',
     'silk/x86/x86_silk_map.c',
 ]
 
 silk_sources_arm_neon_intr = [
     'silk/arm/arm_silk_map.c',
     'silk/arm/biquad_alt_neon_intr.c',
     'silk/arm/LPC_inv_pred_gain_neon_intr.c',
     'silk/arm/NSQ_del_dec_neon_intr.c',
@@ -198,18 +201,18 @@ silk_sources_fixed = [
     'silk/fixed/residual_energy_FIX.c',
     'silk/fixed/schur64_FIX.c',
     'silk/fixed/schur_FIX.c',
     'silk/fixed/vector_ops_FIX.c',
     'silk/fixed/warped_autocorrelation_FIX.c',
 ]
 
 silk_sources_fixed_sse4_1 = [
-    'silk/fixed/x86/burg_modified_FIX_sse.c',
-    'silk/fixed/x86/vector_ops_FIX_sse.c',
+    'silk/fixed/x86/burg_modified_FIX_sse4_1.c',
+    'silk/fixed/x86/vector_ops_FIX_sse4_1.c',
 ]
 
 silk_sources_fixed_arm_neon_intr = [
     'silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c',
 ]
 
 silk_sources_float = [
     'silk/float/apply_sine_window_FLP.c',
--- a/media/libopus/src/analysis.c
+++ b/media/libopus/src/analysis.c
@@ -45,16 +45,18 @@
 #include "float_cast.h"
 
 #ifndef M_PI
 #define M_PI 3.141592653
 #endif
 
 #ifndef DISABLE_FLOAT_API
 
+#define TRANSITION_PENALTY 10
+
 static const float dct_table[128] = {
         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
         0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
        -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
         0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
        -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
         0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
@@ -219,29 +221,33 @@ void tonality_analysis_init(TonalityAnal
   tonality_analysis_reset(tonal);
 }
 
 void tonality_analysis_reset(TonalityAnalysisState *tonal)
 {
   /* Clear non-reusable fields. */
   char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START;
   OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal));
-  tonal->music_confidence = .9f;
-  tonal->speech_confidence = .1f;
 }
 
 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
 {
    int pos;
    int curr_lookahead;
-   float psum;
    float tonality_max;
    float tonality_avg;
    int tonality_count;
    int i;
+   int pos0;
+   float prob_avg;
+   float prob_count;
+   float prob_min, prob_max;
+   float vad_prob;
+   int mpos, vpos;
+   int bandwidth_span;
 
    pos = tonal->read_pos;
    curr_lookahead = tonal->write_pos-tonal->read_pos;
    if (curr_lookahead<0)
       curr_lookahead += DETECT_SIZE;
 
    /* On long frames, look at the second analysis window rather than the first. */
    if (len > tonal->Fs/50 && pos != tonal->write_pos)
@@ -249,55 +255,157 @@ void tonality_get_info(TonalityAnalysisS
       pos++;
       if (pos==DETECT_SIZE)
          pos=0;
    }
    if (pos == tonal->write_pos)
       pos--;
    if (pos<0)
       pos = DETECT_SIZE-1;
+   pos0 = pos;
    OPUS_COPY(info_out, &tonal->info[pos], 1);
    tonality_max = tonality_avg = info_out->tonality;
    tonality_count = 1;
+   /* Look at the neighbouring frames and pick largest bandwidth found (to be safe). */
+   bandwidth_span = 6;
    /* If possible, look ahead for a tone to compensate for the delay in the tone detector. */
    for (i=0;i<3;i++)
    {
       pos++;
       if (pos==DETECT_SIZE)
          pos = 0;
       if (pos == tonal->write_pos)
          break;
       tonality_max = MAX32(tonality_max, tonal->info[pos].tonality);
       tonality_avg += tonal->info[pos].tonality;
       tonality_count++;
+      info_out->bandwidth = IMAX(info_out->bandwidth, tonal->info[pos].bandwidth);
+      bandwidth_span--;
+   }
+   pos = pos0;
+   /* Look back in time to see if any has a wider bandwidth than the current frame. */
+   for (i=0;i<bandwidth_span;i++)
+   {
+      pos--;
+      if (pos < 0)
+         pos = DETECT_SIZE-1;
+      if (pos == tonal->write_pos)
+         break;
+      info_out->bandwidth = IMAX(info_out->bandwidth, tonal->info[pos].bandwidth);
    }
    info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f);
+
+   mpos = vpos = pos0;
+   /* If we have enough look-ahead, compensate for the ~5-frame delay in the music prob and
+      ~1 frame delay in the VAD prob. */
+   if (curr_lookahead > 15)
+   {
+      mpos += 5;
+      if (mpos>=DETECT_SIZE)
+         mpos -= DETECT_SIZE;
+      vpos += 1;
+      if (vpos>=DETECT_SIZE)
+         vpos -= DETECT_SIZE;
+   }
+
+   /* The following calculations attempt to minimize a "badness function"
+      for the transition. When switching from speech to music, the badness
+      of switching at frame k is
+      b_k = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T)
+      where
+      v_i is the activity probability (VAD) at frame i,
+      p_i is the music probability at frame i
+      T is the probability threshold for switching
+      S is the penalty for switching during active audio rather than silence
+      the current frame has index i=0
+
+      Rather than apply badness to directly decide when to switch, what we compute
+      instead is the threshold for which the optimal switching point is now. When
+      considering whether to switch now (frame 0) or at frame k, we have:
+      S*v_0 = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T)
+      which gives us:
+      T = ( \sum_{i=0}^{k-1} v_i*p_i + S*(v_k-v_0) ) / ( \sum_{i=0}^{k-1} v_i )
+      We take the min threshold across all positive values of k (up to the maximum
+      amount of lookahead we have) to give us the threshold for which the current
+      frame is the optimal switch point.
+
+      The last step is that we need to consider whether we want to switch at all.
+      For that we use the average of the music probability over the entire window.
+      If the threshold is higher than that average we're not going to
+      switch, so we compute a min with the average as well. The result of all these
+      min operations is music_prob_min, which gives the threshold for switching to music
+      if we're currently encoding for speech.
+
+      We do the exact opposite to compute music_prob_max which is used for switching
+      from music to speech.
+    */
+   prob_min = 1.f;
+   prob_max = 0.f;
+   vad_prob = tonal->info[vpos].activity_probability;
+   prob_count = MAX16(.1f, vad_prob);
+   prob_avg = MAX16(.1f, vad_prob)*tonal->info[mpos].music_prob;
+   while (1)
+   {
+      float pos_vad;
+      mpos++;
+      if (mpos==DETECT_SIZE)
+         mpos = 0;
+      if (mpos == tonal->write_pos)
+         break;
+      vpos++;
+      if (vpos==DETECT_SIZE)
+         vpos = 0;
+      if (vpos == tonal->write_pos)
+         break;
+      pos_vad = tonal->info[vpos].activity_probability;
+      prob_min = MIN16((prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_min);
+      prob_max = MAX16((prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_max);
+      prob_count += MAX16(.1f, pos_vad);
+      prob_avg += MAX16(.1f, pos_vad)*tonal->info[mpos].music_prob;
+   }
+   info_out->music_prob = prob_avg/prob_count;
+   prob_min = MIN16(prob_avg/prob_count, prob_min);
+   prob_max = MAX16(prob_avg/prob_count, prob_max);
+   prob_min = MAX16(prob_min, 0.f);
+   prob_max = MIN16(prob_max, 1.f);
+
+   /* If we don't have enough look-ahead, do our best to make a decent decision. */
+   if (curr_lookahead < 10)
+   {
+      float pmin, pmax;
+      pmin = prob_min;
+      pmax = prob_max;
+      pos = pos0;
+      /* Look for min/max in the past. */
+      for (i=0;i<IMIN(tonal->count-1, 15);i++)
+      {
+         pos--;
+         if (pos < 0)
+            pos = DETECT_SIZE-1;
+         pmin = MIN16(pmin, tonal->info[pos].music_prob);
+         pmax = MAX16(pmax, tonal->info[pos].music_prob);
+      }
+      /* Bias against switching on active audio. */
+      pmin = MAX16(0.f, pmin - .1f*vad_prob);
+      pmax = MIN16(1.f, pmax + .1f*vad_prob);
+      prob_min += (1.f-.1f*curr_lookahead)*(pmin - prob_min);
+      prob_max += (1.f-.1f*curr_lookahead)*(pmax - prob_max);
+   }
+   info_out->music_prob_min = prob_min;
+   info_out->music_prob_max = prob_max;
+
+   /* printf("%f %f %f %f %f\n", prob_min, prob_max, prob_avg/prob_count, vad_prob, info_out->music_prob); */
    tonal->read_subframe += len/(tonal->Fs/400);
    while (tonal->read_subframe>=8)
    {
       tonal->read_subframe -= 8;
       tonal->read_pos++;
    }
    if (tonal->read_pos>=DETECT_SIZE)
       tonal->read_pos-=DETECT_SIZE;
-
-   /* The -1 is to compensate for the delay in the features themselves. */
-   curr_lookahead = IMAX(curr_lookahead-1, 0);
-
-   psum=0;
-   /* Summing the probability of transition patterns that involve music at
-      time (DETECT_SIZE-curr_lookahead-1) */
-   for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
-      psum += tonal->pmusic[i];
-   for (;i<DETECT_SIZE;i++)
-      psum += tonal->pspeech[i];
-   psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
-   /*printf("%f %f %f %f %f\n", psum, info_out->music_prob, info_out->vad_prob, info_out->activity_probability, info_out->tonality);*/
-
-   info_out->music_prob = psum;
 }
 
 static const float std_feature_bias[9] = {
       5.684947f, 3.475288f, 1.770634f, 1.599784f, 3.773215f,
       2.163313f, 1.260756f, 1.116868f, 1.918795f
 };
 
 #define LEAKAGE_OFFSET 2.5f
@@ -335,50 +443,50 @@ static void tonality_analysis(TonalityAn
     const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI);
     float slope=0;
     float frame_stationarity;
     float relativeE;
     float frame_probs[2];
     float alpha, alphaE, alphaE2;
     float frame_loudness;
     float bandwidth_mask;
+    int is_masked[NB_TBANDS+1];
     int bandwidth=0;
     float maxE = 0;
     float noise_floor;
     int remaining;
     AnalysisInfo *info;
     float hp_ener;
     float tonality2[240];
     float midE[8];
     float spec_variability=0;
     float band_log2[NB_TBANDS+1];
     float leakage_from[NB_TBANDS+1];
     float leakage_to[NB_TBANDS+1];
+    float layer_out[MAX_NEURONS];
+    float below_max_pitch;
+    float above_max_pitch;
     SAVE_STACK;
 
     alpha = 1.f/IMIN(10, 1+tonal->count);
     alphaE = 1.f/IMIN(25, 1+tonal->count);
-    alphaE2 = 1.f/IMIN(500, 1+tonal->count);
+    /* Noise floor related decay for bandwidth detection: -2.2 dB/second */
+    alphaE2 = 1.f/IMIN(100, 1+tonal->count);
+    if (tonal->count <= 1) alphaE2 = 1;
 
     if (tonal->Fs == 48000)
     {
        /* len and offset are now at 24 kHz. */
        len/= 2;
        offset /= 2;
     } else if (tonal->Fs == 16000) {
        len = 3*len/2;
        offset = 3*offset/2;
     }
 
-    if (tonal->count<4) {
-       if (tonal->application == OPUS_APPLICATION_VOIP)
-          tonal->music_prob = .1f;
-       else
-          tonal->music_prob = .625f;
-    }
     kfft = celt_mode->mdct.kfft[0];
     if (tonal->count==0)
        tonal->mem_fill = 240;
     tonal->hp_ener_accum += (float)downmix_and_resample(downmix, x,
           &tonal->inmem[tonal->mem_fill], tonal->downmix_state,
           IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C, tonal->Fs);
     if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
     {
@@ -627,66 +735,87 @@ static void tonality_analysis(TonalityAn
        spec_variability += mindist;
     }
     spec_variability = (float)sqrt(spec_variability/NB_FRAMES/NB_TBANDS);
     bandwidth_mask = 0;
     bandwidth = 0;
     maxE = 0;
     noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
     noise_floor *= noise_floor;
+    below_max_pitch=0;
+    above_max_pitch=0;
     for (b=0;b<NB_TBANDS;b++)
     {
        float E=0;
+       float Em;
        int band_start, band_end;
        /* Keep a margin of 300 Hz for aliasing */
        band_start = tbands[b];
        band_end = tbands[b+1];
        for (i=band_start;i<band_end;i++)
        {
           float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
                      + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
           E += binE;
        }
        E = SCALE_ENER(E);
        maxE = MAX32(maxE, E);
+       if (band_start < 64)
+       {
+          below_max_pitch += E;
+       } else {
+          above_max_pitch += E;
+       }
        tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
-       E = MAX32(E, tonal->meanE[b]);
-       /* Use a simple follower with 13 dB/Bark slope for spreading function */
-       bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
+       Em = MAX32(E, tonal->meanE[b]);
        /* Consider the band "active" only if all these conditions are met:
-          1) less than 10 dB below the simple follower
-          2) less than 90 dB below the peak band (maximal masking possible considering
+          1) less than 90 dB below the peak band (maximal masking possible considering
              both the ATH and the loudness-dependent slope of the spreading function)
-          3) above the PCM quantization noise floor
+          2) above the PCM quantization noise floor
           We use b+1 because the first CELT band isn't included in tbands[]
        */
-       if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start))
+       if (E*1e9f > maxE && (Em > 3*noise_floor*(band_end-band_start) || E > noise_floor*(band_end-band_start)))
           bandwidth = b+1;
+       /* Check if the band is masked (see below). */
+       is_masked[b] = E < (tonal->prev_bandwidth >= b+1  ? .01f : .05f)*bandwidth_mask;
+       /* Use a simple follower with 13 dB/Bark slope for spreading function. */
+       bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
     }
     /* Special case for the last two bands, for which we don't have spectrum but only
-       the energy above 12 kHz. */
+       the energy above 12 kHz. The difficulty here is that the high-pass we use
+       leaks some LF energy, so we need to increase the threshold without accidentally cutting
+       off the band. */
     if (tonal->Fs == 48000) {
-       float ratio;
-       float E = hp_ener*(1.f/(240*240));
-       ratio = tonal->prev_bandwidth==20 ? 0.03f : 0.07f;
+       float noise_ratio;
+       float Em;
+       float E = hp_ener*(1.f/(60*60));
+       noise_ratio = tonal->prev_bandwidth==20 ? 10.f : 30.f;
+
 #ifdef FIXED_POINT
        /* silk_resampler_down2_hp() shifted right by an extra 8 bits. */
        E *= 256.f*(1.f/Q15ONE)*(1.f/Q15ONE);
 #endif
-       maxE = MAX32(maxE, E);
+       above_max_pitch += E;
        tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
-       E = MAX32(E, tonal->meanE[b]);
-       /* Use a simple follower with 13 dB/Bark slope for spreading function */
-       bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
-       if (E>ratio*bandwidth_mask && E*1e9f > maxE && E > noise_floor*160)
+       Em = MAX32(E, tonal->meanE[b]);
+       if (Em > 3*noise_ratio*noise_floor*160 || E > noise_ratio*noise_floor*160)
           bandwidth = 20;
-       /* This detector is unreliable, so if the bandwidth is close to SWB, assume it's FB. */
-       if (bandwidth >= 17)
-          bandwidth = 20;
+       /* Check if the band is masked (see below). */
+       is_masked[b] = E < (tonal->prev_bandwidth == 20  ? .01f : .05f)*bandwidth_mask;
     }
+    if (above_max_pitch > below_max_pitch)
+       info->max_pitch_ratio = below_max_pitch/above_max_pitch;
+    else
+       info->max_pitch_ratio = 1;
+    /* In some cases, resampling aliasing can create a small amount of energy in the first band
+       being cut. So if the last band is masked, we don't include it.  */
+    if (bandwidth == 20 && is_masked[NB_TBANDS])
+       bandwidth-=2;
+    else if (bandwidth > 0 && bandwidth <= NB_TBANDS && is_masked[bandwidth-1])
+       bandwidth--;
     if (tonal->count<=2)
        bandwidth = 20;
     frame_loudness = 20*(float)log10(frame_loudness);
     tonal->Etracker = MAX32(tonal->Etracker-.003f, frame_loudness);
     tonal->lowECount *= (1-alphaE);
     if (frame_loudness < tonal->Etracker-30)
        tonal->lowECount += alphaE;
 
@@ -756,149 +885,27 @@ static void tonality_analysis(TonalityAn
        features[11+i] = (float)sqrt(tonal->std[i]) - std_feature_bias[i];
     features[18] = spec_variability - 0.78f;
     features[20] = info->tonality - 0.154723f;
     features[21] = info->activity - 0.724643f;
     features[22] = frame_stationarity - 0.743717f;
     features[23] = info->tonality_slope + 0.069216f;
     features[24] = tonal->lowECount - 0.067930f;
 
-    mlp_process(&net, features, frame_probs);
-    frame_probs[0] = .5f*(frame_probs[0]+1);
-    /* Curve fitting between the MLP probability and the actual probability */
-    /*frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);*/
-    /* Probability of active audio (as opposed to silence) */
-    frame_probs[1] = .5f*frame_probs[1]+.5f;
-    frame_probs[1] *= frame_probs[1];
+    compute_dense(&layer0, layer_out, features);
+    compute_gru(&layer1, tonal->rnn_state, layer_out);
+    compute_dense(&layer2, frame_probs, tonal->rnn_state);
 
     /* Probability of speech or music vs noise */
     info->activity_probability = frame_probs[1];
-
-    /*printf("%f %f\n", frame_probs[0], frame_probs[1]);*/
-    {
-       /* Probability of state transition */
-       float tau;
-       /* Represents independence of the MLP probabilities, where
-          beta=1 means fully independent. */
-       float beta;
-       /* Denormalized probability of speech (p0) and music (p1) after update */
-       float p0, p1;
-       /* Probabilities for "all speech" and "all music" */
-       float s0, m0;
-       /* Probability sum for renormalisation */
-       float psum;
-       /* Instantaneous probability of speech and music, with beta pre-applied. */
-       float speech0;
-       float music0;
-       float p, q;
-
-       /* More silence transitions for speech than for music. */
-       tau = .001f*tonal->music_prob + .01f*(1-tonal->music_prob);
-       p = MAX16(.05f,MIN16(.95f,frame_probs[1]));
-       q = MAX16(.05f,MIN16(.95f,tonal->vad_prob));
-       beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
-       /* p0 and p1 are the probabilities of speech and music at this frame
-          using only information from previous frame and applying the
-          state transition model */
-       p0 = (1-tonal->vad_prob)*(1-tau) +    tonal->vad_prob *tau;
-       p1 =    tonal->vad_prob *(1-tau) + (1-tonal->vad_prob)*tau;
-       /* We apply the current probability with exponent beta to work around
-          the fact that the probability estimates aren't independent. */
-       p0 *= (float)pow(1-frame_probs[1], beta);
-       p1 *= (float)pow(frame_probs[1], beta);
-       /* Normalise the probabilities to get the Marokv probability of music. */
-       tonal->vad_prob = p1/(p0+p1);
-       info->vad_prob = tonal->vad_prob;
-       /* Consider that silence has a 50-50 probability of being speech or music. */
-       frame_probs[0] = tonal->vad_prob*frame_probs[0] + (1-tonal->vad_prob)*.5f;
-
-       /* One transition every 3 minutes of active audio */
-       tau = .0001f;
-       /* Adapt beta based on how "unexpected" the new prob is */
-       p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
-       q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
-       beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
-       /* p0 and p1 are the probabilities of speech and music at this frame
-          using only information from previous frame and applying the
-          state transition model */
-       p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
-       p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
-       /* We apply the current probability with exponent beta to work around
-          the fact that the probability estimates aren't independent. */
-       p0 *= (float)pow(1-frame_probs[0], beta);
-       p1 *= (float)pow(frame_probs[0], beta);
-       /* Normalise the probabilities to get the Marokv probability of music. */
-       tonal->music_prob = p1/(p0+p1);
-       info->music_prob = tonal->music_prob;
+    /* It seems like the RNN tends to have a bias towards speech and this
+       warping of the probabilities compensates for it. */
+    info->music_prob = MAX16(1.f-10.f*(1.f-frame_probs[0]), MIN16(10.f*frame_probs[0], .12f+.69f*frame_probs[0]*(2.f-frame_probs[0])));
 
-       /*printf("%f %f %f %f\n", frame_probs[0], frame_probs[1], tonal->music_prob, tonal->vad_prob);*/
-       /* This chunk of code deals with delayed decision. */
-       psum=1e-20f;
-       /* Instantaneous probability of speech and music, with beta pre-applied. */
-       speech0 = (float)pow(1-frame_probs[0], beta);
-       music0  = (float)pow(frame_probs[0], beta);
-       if (tonal->count==1)
-       {
-          if (tonal->application == OPUS_APPLICATION_VOIP)
-             tonal->pmusic[0] = .1f;
-          else
-             tonal->pmusic[0] = .625f;
-          tonal->pspeech[0] = 1-tonal->pmusic[0];
-       }
-       /* Updated probability of having only speech (s0) or only music (m0),
-          before considering the new observation. */
-       s0 = tonal->pspeech[0] + tonal->pspeech[1];
-       m0 = tonal->pmusic [0] + tonal->pmusic [1];
-       /* Updates s0 and m0 with instantaneous probability. */
-       tonal->pspeech[0] = s0*(1-tau)*speech0;
-       tonal->pmusic [0] = m0*(1-tau)*music0;
-       /* Propagate the transition probabilities */
-       for (i=1;i<DETECT_SIZE-1;i++)
-       {
-          tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
-          tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
-       }