Bug 1288091 - Update to libopus 1.1.3. r=jmspeex
authorRalph Giles <giles@mozilla.com>
Tue, 19 Jul 2016 18:06:20 -0400
changeset 345954 2bf27d83f47943d10f2f22e332a4af5793cd3f5a
parent 345953 aa1eab6436badfd108056b392c06ee1c95e533dc
child 345955 e749fcac8cb38b6b1189cba8e29cd3d9f4e52474
push id6389
push userraliiev@mozilla.com
push dateMon, 19 Sep 2016 13:38:22 +0000
treeherdermozilla-beta@01d67bfe6c81 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjmspeex
bugs1288091
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1288091 - Update to libopus 1.1.3. r=jmspeex New upstream release. This is a minor release focusing mainly on optimizations and bug fixes. - Neon optimizations inproving performance on ARMv7 and ARMv8 by up to 15% - Fixes some issues with 16-bit platforms (e.g. TI C55x) - Fixes to comfort noise generation (CNG) - Documenting that PLC packets can also be 2 bytes - Includes experimental ambisonics work (--enable-ambisonics) MozReview-Commit-ID: IcdnCok500X
media/libopus/README_MOZILLA
media/libopus/celt/arch.h
media/libopus/celt/arm/arm_celt_map.c
media/libopus/celt/arm/armcpu.c
media/libopus/celt/arm/armcpu.h
media/libopus/celt/arm/celt_neon_intr.c
media/libopus/celt/arm/fixed_arm64.h
media/libopus/celt/arm/pitch_arm.h
media/libopus/celt/bands.c
media/libopus/celt/celt.h
media/libopus/celt/celt_decoder.c
media/libopus/celt/celt_encoder.c
media/libopus/celt/celt_lpc.c
media/libopus/celt/cwrs.c
media/libopus/celt/fixed_generic.h
media/libopus/celt/kiss_fft.c
media/libopus/celt/mathops.c
media/libopus/celt/pitch.c
media/libopus/celt/pitch.h
media/libopus/celt/rate.c
media/libopus/celt/vq.c
media/libopus/celt/x86/pitch_sse.h
media/libopus/celt/x86/x86_celt_map.c
media/libopus/celt/x86/x86cpu.c
media/libopus/include/opus.h
media/libopus/include/opus_defines.h
media/libopus/include/opus_multistream.h
media/libopus/moz.build
media/libopus/silk/CNG.c
media/libopus/silk/NLSF_del_dec_quant.c
media/libopus/silk/NLSF_encode.c
media/libopus/silk/NSQ.c
media/libopus/silk/NSQ.h
media/libopus/silk/NSQ_del_dec.c
media/libopus/silk/PLC.c
media/libopus/silk/arm/NSQ_neon.c
media/libopus/silk/arm/NSQ_neon.h
media/libopus/silk/arm/arm_silk_map.c
media/libopus/silk/arm/macros_arm64.h
media/libopus/silk/decode_core.c
media/libopus/silk/fixed/burg_modified_FIX.c
media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c
media/libopus/silk/macros.h
media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
media/libopus/silk/process_NLSFs.c
media/libopus/silk/sort.c
media/libopus/silk/stereo_LR_to_MS.c
media/libopus/silk/x86/NSQ_sse.c
media/libopus/silk/x86/main_sse.h
media/libopus/sources.mozbuild
media/libopus/src/analysis.c
media/libopus/src/opus.c
media/libopus/src/opus_encoder.c
media/libopus/src/opus_multistream_encoder.c
media/libopus/src/repacketizer.c
--- a/media/libopus/README_MOZILLA
+++ b/media/libopus/README_MOZILLA
@@ -3,9 +3,9 @@ IETF Opus audio codec reference implemen
 The source in this directory was copied from an opus
 repository checkout by running the ./update.sh script.
 Any changes made to this version of the source should
 be reflected in that script, e.g. by applying patch
 files after the copy step.
 
 The upstream repository is https://git.xiph.org/opus.git
 
-The git tag/revision used was v1.1.2.
+The git tag/revision used was v1.1.3.
--- a/media/libopus/celt/arch.h
+++ b/media/libopus/celt/arch.h
@@ -73,16 +73,25 @@ static OPUS_INLINE void _celt_fatal(cons
 #define MAX16(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 16-bit value.   */
 #define MIN32(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 32-bit value.   */
 #define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
 #define IMIN(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum int value.   */
 #define IMAX(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum int value.   */
 #define UADD32(a,b) ((a)+(b))
 #define USUB32(a,b) ((a)-(b))
 
+/* Set this if opus_int64 is a native type of the CPU. */
+/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
+   (which can be ILP32 for x32) and Win64 (which is LLP64). */
+#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
+#define OPUS_FAST_INT64 1
+#else
+#define OPUS_FAST_INT64 0
+#endif
+
 #define PRINT_MIPS(file)
 
 #ifdef FIXED_POINT
 
 typedef opus_int16 opus_val16;
 typedef opus_int32 opus_val32;
 
 typedef opus_val32 celt_sig;
@@ -113,17 +122,19 @@ static OPUS_INLINE opus_int16 SAT16(opus
 }
 
 #ifdef FIXED_DEBUG
 #include "fixed_debug.h"
 #else
 
 #include "fixed_generic.h"
 
-#ifdef OPUS_ARM_INLINE_EDSP
+#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
+#include "arm/fixed_arm64.h"
+#elif OPUS_ARM_INLINE_EDSP
 #include "arm/fixed_armv5e.h"
 #elif defined (OPUS_ARM_INLINE_ASM)
 #include "arm/fixed_armv4.h"
 #elif defined (BFIN_ASM)
 #include "fixed_bfin.h"
 #elif defined (TI_C5X_ASM)
 #include "fixed_c5x.h"
 #elif defined (TI_C6X_ASM)
--- a/media/libopus/celt/arm/arm_celt_map.c
+++ b/media/libopus/celt/arm/arm_celt_map.c
@@ -31,35 +31,57 @@
 
 #include "pitch.h"
 #include "kiss_fft.h"
 #include "mdct.h"
 
 #if defined(OPUS_HAVE_RTCD)
 
 # if defined(FIXED_POINT)
+#  if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
+    (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
+    (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
 opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
     const opus_val16 *, opus_val32 *, int , int) = {
   celt_pitch_xcorr_c,               /* ARMv4 */
   MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
   MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
   MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
 };
+
+#  endif
 # else /* !FIXED_POINT */
-#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
 void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
     const opus_val16 *, opus_val32 *, int, int) = {
   celt_pitch_xcorr_c,              /* ARMv4 */
   celt_pitch_xcorr_c,              /* EDSP */
   celt_pitch_xcorr_c,              /* Media */
   celt_pitch_xcorr_float_neon      /* Neon */
 };
 #  endif
 # endif /* FIXED_POINT */
 
+#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \
+ defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         opus_val32       sum[4],
+         int              len
+) = {
+  xcorr_kernel_c,                /* ARMv4 */
+  xcorr_kernel_c,                /* EDSP */
+  xcorr_kernel_c,                /* Media */
+  xcorr_kernel_neon_fixed,       /* Neon */
+};
+
+#endif
+
 # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 #  if defined(HAVE_ARM_NE10)
 #   if defined(CUSTOM_MODES)
 int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
    opus_fft_alloc_arch_c,        /* ARMv4 */
    opus_fft_alloc_arch_c,        /* EDSP */
    opus_fft_alloc_arch_c,        /* Media */
    opus_fft_alloc_arm_neon       /* Neon with NE10 library support */
--- a/media/libopus/celt/arm/armcpu.c
+++ b/media/libopus/celt/arm/armcpu.c
@@ -32,57 +32,60 @@
 #endif
 
 #ifdef OPUS_HAVE_RTCD
 
 #include "armcpu.h"
 #include "cpu_support.h"
 #include "os_support.h"
 #include "opus_types.h"
+#include "arch.h"
 
-#define OPUS_CPU_ARM_V4    (1)
-#define OPUS_CPU_ARM_EDSP  (1<<1)
-#define OPUS_CPU_ARM_MEDIA (1<<2)
-#define OPUS_CPU_ARM_NEON  (1<<3)
+#define OPUS_CPU_ARM_V4_FLAG    (1<<OPUS_ARCH_ARM_V4)
+#define OPUS_CPU_ARM_EDSP_FLAG  (1<<OPUS_ARCH_ARM_EDSP)
+#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
+#define OPUS_CPU_ARM_NEON_FLAG  (1<<OPUS_ARCH_ARM_NEON)
 
 #if defined(_MSC_VER)
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
 # define WIN32_LEAN_AND_MEAN
 # define WIN32_EXTRA_LEAN
 # include <windows.h>
 
 static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
   opus_uint32 flags;
   flags=0;
   /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
    * instructions via their assembled hex code.
    * All of these instructions should be essentially nops. */
-# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
   __try{
     /*PLD [r13]*/
     __emit(0xF5DDF000);
-    flags|=OPUS_CPU_ARM_EDSP;
+    flags|=OPUS_CPU_ARM_EDSP_FLAG;
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
-#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
   __try{
     /*SHADD8 r3,r3,r3*/
     __emit(0xE6333F93);
-    flags|=OPUS_CPU_ARM_MEDIA;
+    flags|=OPUS_CPU_ARM_MEDIA_FLAG;
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
 #   if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
   __try{
     /*VORR q0,q0,q0*/
     __emit(0xF2200150);
-    flags|=OPUS_CPU_ARM_NEON;
+    flags|=OPUS_CPU_ARM_NEON_FLAG;
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
 #   endif
 #  endif
 # endif
   return flags;
@@ -102,44 +105,44 @@ opus_uint32 opus_cpu_capabilities(void)
   if(cpuinfo != NULL)
   {
     /* 512 should be enough for anybody (it's even enough for all the flags that
      * x86 has accumulated... so far). */
     char buf[512];
 
     while(fgets(buf, 512, cpuinfo) != NULL)
     {
-# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
       /* Search for edsp and neon flag */
       if(memcmp(buf, "Features", 8) == 0)
       {
         char *p;
-#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
         p = strstr(buf, " edsp");
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
-          flags |= OPUS_CPU_ARM_EDSP;
-#  endif
+          flags |= OPUS_CPU_ARM_EDSP_FLAG;
 
 #  if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
         p = strstr(buf, " neon");
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
-          flags |= OPUS_CPU_ARM_NEON;
+          flags |= OPUS_CPU_ARM_NEON_FLAG;
 #  endif
       }
 # endif
 
-# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
       /* Search for media capabilities (>= ARMv6) */
       if(memcmp(buf, "CPU architecture:", 17) == 0)
       {
         int version;
         version = atoi(buf+17);
 
         if(version >= 6)
-          flags |= OPUS_CPU_ARM_MEDIA;
+          flags |= OPUS_CPU_ARM_MEDIA_FLAG;
       }
 # endif
     }
 
     fclose(cpuinfo);
   }
   return flags;
 }
@@ -151,24 +154,32 @@ opus_uint32 opus_cpu_capabilities(void)
    "your platform.  Reconfigure with --disable-rtcd (or send patches)."
 #endif
 
 int opus_select_arch(void)
 {
   opus_uint32 flags = opus_cpu_capabilities();
   int arch = 0;
 
-  if(!(flags & OPUS_CPU_ARM_EDSP))
+  if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) {
+    /* Asserts ensure arch values are sequential */
+    celt_assert(arch == OPUS_ARCH_ARM_V4);
     return arch;
+  }
   arch++;
 
-  if(!(flags & OPUS_CPU_ARM_MEDIA))
+  if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_EDSP);
     return arch;
+  }
   arch++;
 
-  if(!(flags & OPUS_CPU_ARM_NEON))
+  if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_MEDIA);
     return arch;
+  }
   arch++;
 
+  celt_assert(arch == OPUS_ARCH_ARM_NEON);
   return arch;
 }
 
 #endif
--- a/media/libopus/celt/arm/armcpu.h
+++ b/media/libopus/celt/arm/armcpu.h
@@ -61,11 +61,17 @@
 # if defined(OPUS_ARM_PRESUME_NEON)
 #  define PRESUME_NEON(name) name ## _neon
 # else
 #  define PRESUME_NEON(name) PRESUME_MEDIA(name)
 # endif
 
 # if defined(OPUS_HAVE_RTCD)
 int opus_select_arch(void);
+
+#define OPUS_ARCH_ARM_V4    (0)
+#define OPUS_ARCH_ARM_EDSP  (1)
+#define OPUS_ARCH_ARM_MEDIA (2)
+#define OPUS_ARCH_ARM_NEON  (3)
+
 # endif
 
 #endif
--- a/media/libopus/celt/arm/celt_neon_intr.c
+++ b/media/libopus/celt/arm/celt_neon_intr.c
@@ -32,17 +32,76 @@
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include <arm_neon.h>
 #include "../pitch.h"
 
-#if !defined(FIXED_POINT)
+#if defined(FIXED_POINT)
+void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   int32x4_t a = vld1q_s32(sum);
+   /* Load y[0...3] */
+   /* This requires len>0 to always be valid (which we assert in the C code). */
+   int16x4_t y0 = vld1_s16(y);
+   y += 4;
+
+   for (j = 0; j + 8 <= len; j += 8)
+   {
+      /* Load x[0...7] */
+      int16x8_t xx = vld1q_s16(x);
+      int16x4_t x0 = vget_low_s16(xx);
+      int16x4_t x4 = vget_high_s16(xx);
+      /* Load y[4...11] */
+      int16x8_t yy = vld1q_s16(y);
+      int16x4_t y4 = vget_low_s16(yy);
+      int16x4_t y8 = vget_high_s16(yy);
+      int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
+      int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0);
+
+      int16x4_t y1 = vext_s16(y0, y4, 1);
+      int16x4_t y5 = vext_s16(y4, y8, 1);
+      int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1);
+      int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1);
+
+      int16x4_t y2 = vext_s16(y0, y4, 2);
+      int16x4_t y6 = vext_s16(y4, y8, 2);
+      int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2);
+      int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2);
+
+      int16x4_t y3 = vext_s16(y0, y4, 3);
+      int16x4_t y7 = vext_s16(y4, y8, 3);
+      int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3);
+      int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3);
+
+      y0 = y8;
+      a = a7;
+      x += 8;
+      y += 8;
+   }
+
+   for (; j < len; j++)
+   {
+      int16x4_t x0 = vld1_dup_s16(x);  /* load next x */
+      int32x4_t a0 = vmlal_s16(a, y0, x0);
+
+      int16x4_t y4 = vld1_dup_s16(y);  /* load next y */
+      y0 = vext_s16(y0, y4, 1);
+      a = a0;
+      x++;
+      y++;
+   }
+
+   vst1q_s32(sum, a);
+}
+
+#else
 /*
  * Function: xcorr_kernel_neon_float
  * ---------------------------------
  * Computes 4 correlation values and stores them in sum[4]
  */
 static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
       float32_t sum[4], int len) {
    float32x4_t YY[3];
new file mode 100644
--- /dev/null
+++ b/media/libopus/celt/arm/fixed_arm64.h
@@ -0,0 +1,35 @@
+/* Copyright (C) 2015 Vidyo */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARM64_H
+#define FIXED_ARM64_H
+
+#include <arm_neon.h>
+
+#undef SIG2WORD16
+#define SIG2WORD16(x) (vqmovns_s32(PSHR32((x), SIG_SHIFT)))
+
+#endif
--- a/media/libopus/celt/arm/pitch_arm.h
+++ b/media/libopus/celt/arm/pitch_arm.h
@@ -41,28 +41,86 @@ opus_val32 celt_pitch_xcorr_neon(const o
 #   define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr)
 #  endif
 
 #  if defined(OPUS_ARM_MAY_HAVE_EDSP)
 opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
     opus_val32 *xcorr, int len, int max_pitch);
 #  endif
 
-#  if !defined(OPUS_HAVE_RTCD)
+#  if defined(OPUS_HAVE_RTCD) && \
+    ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
+     (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
+     (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
+extern opus_val32
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+
+#  elif defined(OPUS_ARM_PRESUME_EDSP) || \
+    defined(OPUS_ARM_PRESUME_MEDIA) || \
+    defined(OPUS_ARM_PRESUME_NEON)
 #   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
   ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
+
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+void xcorr_kernel_neon_fixed(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+#  endif
+
+#  if defined(OPUS_HAVE_RTCD) && \
+    (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+
+extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+
+#   define OVERRIDE_XCORR_KERNEL (1)
+#   define xcorr_kernel(x, y, sum, len, arch) \
+     ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
+
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+#   define OVERRIDE_XCORR_KERNEL (1)
+#   define xcorr_kernel(x, y, sum, len, arch) \
+      ((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len))
+
 #  endif
 
 #else /* Start !FIXED_POINT */
 /* Float case */
 #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
                                  opus_val32 *xcorr, int len, int max_pitch);
-#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR)
-#define OVERRIDE_PITCH_XCORR (1)
+#endif
+
+#  if defined(OPUS_HAVE_RTCD) && \
+    (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+extern void
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+
+#  define OVERRIDE_PITCH_XCORR (1)
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+#   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
    ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
-#endif
-#endif
+
+#  endif
 
 #endif /* end !FIXED_POINT */
+
 #endif
--- a/media/libopus/celt/bands.c
+++ b/media/libopus/celt/bands.c
@@ -409,17 +409,17 @@ static void stereo_merge(celt_norm * OPU
 #endif
    opus_val32 t, lgain, rgain;
 
    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
    dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
    /* Compensating for the mid normalization */
    xp = MULT16_32_Q15(mid, xp);
    /* mid and side are in Q15, not Q14 like X and Y */
-   mid2 = SHR32(mid, 1);
+   mid2 = SHR16(mid, 1);
    El = MULT16_16(mid2, mid2) + side - 2*xp;
    Er = MULT16_16(mid2, mid2) + side + 2*xp;
    if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
    {
       OPUS_COPY(Y, X, N);
       return;
    }
 
@@ -709,17 +709,17 @@ static void compute_theta(struct band_ct
          mid and side because we know that 1) they have unit norm and
          2) they are orthogonal. */
       itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
    }
    tell = ec_tell_frac(ec);
    if (qn!=1)
    {
       if (encode)
-         itheta = (itheta*qn+8192)>>14;
+         itheta = (itheta*(opus_int32)qn+8192)>>14;
 
       /* Entropy coding of the angle. We use a uniform pdf for the
          time split, a step for stereo, and a triangular one for the rest. */
       if (stereo && N>2)
       {
          int p0 = 3;
          int x = itheta;
          int x0 = qn/2;
--- a/media/libopus/celt/celt.h
+++ b/media/libopus/celt/celt.h
@@ -204,17 +204,17 @@ void comb_filter(opus_val32 *y, opus_val
       const opus_val16 *window, int overlap, int arch);
 
 #ifdef NON_STATIC_COMB_FILTER_CONST_C
 void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
                          opus_val16 g10, opus_val16 g11, opus_val16 g12);
 #endif
 
 #ifndef OVERRIDE_COMB_FILTER_CONST
-# define comb_filter_const(y, x, T, N, g10, g11, g12, arch)		\
+# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
     ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
 #endif
 
 void init_caps(const CELTMode *m,int *cap,int LM,int C);
 
 #ifdef RESYNTH
 void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
 void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
--- a/media/libopus/celt/celt_decoder.c
+++ b/media/libopus/celt/celt_decoder.c
@@ -77,16 +77,17 @@ struct OpusCustomDecoder {
 
    /* Everything beyond this point gets cleared on a reset */
 #define DECODER_RESET_START rng
 
    opus_uint32 rng;
    int error;
    int last_pitch_index;
    int loss_count;
+   int skip_plc;
    int postfilter_period;
    int postfilter_period_old;
    opus_val16 postfilter_gain;
    opus_val16 postfilter_gain_old;
    int postfilter_tapset;
    int postfilter_tapset_old;
 
    celt_sig preemph_memD[2];
@@ -159,18 +160,16 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_dec
    st->stream_channels = st->channels = channels;
 
    st->downsample = 1;
    st->start = 0;
    st->end = st->mode->effEBands;
    st->signalling = 1;
    st->arch = opus_select_arch();
 
-   st->loss_count = 0;
-
    opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
 
    return OPUS_OK;
 }
 
 #ifdef CUSTOM_MODES
 void opus_custom_decoder_destroy(CELTDecoder *st)
 {
@@ -442,17 +441,17 @@ static void celt_decode_lost(CELTDecoder
    lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
    oldBandE = lpc+C*LPC_ORDER;
    oldLogE = oldBandE + 2*nbEBands;
    oldLogE2 = oldLogE + 2*nbEBands;
    backgroundLogE = oldLogE2  + 2*nbEBands;
 
    loss_count = st->loss_count;
    start = st->start;
-   noise_based = loss_count >= 5 || start != 0;
+   noise_based = loss_count >= 5 || start != 0 || st->skip_plc;
    if (noise_based)
    {
       /* Noise-based PLC/CNG */
 #ifdef NORM_ALIASING_HACK
       celt_norm *X;
 #else
       VARDECL(celt_norm, X);
 #endif
@@ -827,16 +826,20 @@ int celt_decode_with_ec(CELTDecoder * OP
    if (data == NULL || len<=1)
    {
       celt_decode_lost(st, N, LM);
       deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
       RESTORE_STACK;
       return frame_size/st->downsample;
    }
 
+   /* Check if there are at least two packets received consecutively before
+    * turning on the pitch-based PLC */
+   st->skip_plc = st->loss_count != 0;
+
    if (dec == NULL)
    {
       ec_dec_init(&_dec,(unsigned char*)data,len);
       dec = &_dec;
    }
 
    if (C==1)
    {
@@ -1193,16 +1196,17 @@ int opus_custom_decoder_ctl(CELTDecoder 
          oldBandE = lpc+st->channels*LPC_ORDER;
          oldLogE = oldBandE + 2*st->mode->nbEBands;
          oldLogE2 = oldLogE + 2*st->mode->nbEBands;
          OPUS_CLEAR((char*)&st->DECODER_RESET_START,
                opus_custom_decoder_get_size(st->mode, st->channels)-
                ((char*)&st->DECODER_RESET_START - (char*)st));
          for (i=0;i<2*st->mode->nbEBands;i++)
             oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+         st->skip_plc = 1;
       }
       break;
       case OPUS_GET_PITCH_REQUEST:
       {
          opus_int32 *value = va_arg(ap, opus_int32*);
          if (value==NULL)
             goto bad_arg;
          *value = st->postfilter_period;
--- a/media/libopus/celt/celt_encoder.c
+++ b/media/libopus/celt/celt_encoder.c
@@ -1170,20 +1170,20 @@ static int run_prefilter(CELTEncoder *st
 
       comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
             st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
             st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch);
       OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap);
 
       if (N>COMBFILTER_MAXPERIOD)
       {
-         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
+         OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
       } else {
          OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
-         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
+         OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
       }
    } while (++c<CC);
 
    RESTORE_STACK;
    *gain = gain1;
    *pitch = pitch_index;
    *qgain = qg;
    return pf_on;
@@ -1276,22 +1276,25 @@ static int compute_vbr(const CELTMode *m
       floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
       floor_depth = IMAX(floor_depth, target>>2);
       target = IMIN(target, floor_depth);
       /*printf("%f %d\n", maxDepth, floor_depth);*/
    }
 
    if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
    {
-      opus_val16 rate_factor;
+      opus_val16 rate_factor = Q15ONE;
+      if (bitrate < 64000)
+      {
 #ifdef FIXED_POINT
-      rate_factor = MAX16(0,(bitrate-32000));
+         rate_factor = MAX16(0,(bitrate-32000));
 #else
-      rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
+         rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
 #endif
+      }
       if (constrained_vbr)
          rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
       target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);
 
    }
 
    if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14))
    {
--- a/media/libopus/celt/celt_lpc.c
+++ b/media/libopus/celt/celt_lpc.c
@@ -44,18 +44,17 @@ int          p
    opus_val32 r;
    opus_val32 error = ac[0];
 #ifdef FIXED_POINT
    opus_val32 lpc[LPC_ORDER];
 #else
    float *lpc = _lpc;
 #endif
 
-   for (i = 0; i < p; i++)
-      lpc[i] = 0;
+   OPUS_CLEAR(lpc, p);
    if (ac[0] != 0)
    {
       for (i = 0; i < p; i++) {
          /* Sum up this iteration's reflection coefficient */
          opus_val32 rr = 0;
          for (j = 0; j < i; j++)
             rr += MULT32_32_Q31(lpc[j],ac[i - j]);
          rr += SHR32(ac[i + 1],3);
--- a/media/libopus/celt/cwrs.c
+++ b/media/libopus/celt/cwrs.c
@@ -69,17 +69,17 @@ int log2_frac(opus_uint32 val, int frac)
   /*Exact powers of two require no rounding.*/
   else return (l-1)<<frac;
 }
 #endif
 
 /*Although derived separately, the pulse vector coding scheme is equivalent to
    a Pyramid Vector Quantizer \cite{Fis86}.
   Some additional notes about an early version appear at
-   http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
+   https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
    and the definitions of some terms have evolved since that was written.
 
   The conversion from a pulse vector to an integer index (encoding) and back
    (decoding) is governed by two related functions, V(N,K) and U(N,K).
 
   V(N,K) = the number of combinations, with replacement, of N items, taken K
    at a time, when a sign bit is added to each item taken at least once (i.e.,
    the number of N-dimensional unit pulse vectors with K pulses).
--- a/media/libopus/celt/fixed_generic.h
+++ b/media/libopus/celt/fixed_generic.h
@@ -32,26 +32,42 @@
 
 #ifndef FIXED_GENERIC_H
 #define FIXED_GENERIC_H
 
 /** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */
 #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
 
 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16))
+#else
 #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+#endif
 
 /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16))
+#else
 #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+#endif
 
 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15))
+#else
 #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+#endif
 
 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31))
+#else
 #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+#endif
 
 /** Compile-time conversion of float constant to 16-bit value */
 #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
 
 /** Compile-time conversion of float constant to 32-bit value */
 #define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
 
 /** Negate a 16-bit value */
--- a/media/libopus/celt/kiss_fft.c
+++ b/media/libopus/celt/kiss_fft.c
@@ -186,17 +186,17 @@ static void kf_bfly3(
    size_t k;
    const size_t m2 = 2*m;
    const kiss_twiddle_cpx *tw1,*tw2;
    kiss_fft_cpx scratch[5];
    kiss_twiddle_cpx epi3;
 
    kiss_fft_cpx * Fout_beg = Fout;
 #ifdef FIXED_POINT
-   epi3.r = -16384;
+   /*epi3.r = -16384;*/ /* Unused */
    epi3.i = -28378;
 #else
    epi3 = st->twiddles[fstride*m];
 #endif
    for (i=0;i<N;i++)
    {
       Fout = Fout_beg + i*mm;
       tw1=tw2=st->twiddles;
--- a/media/libopus/celt/mathops.c
+++ b/media/libopus/celt/mathops.c
@@ -159,17 +159,17 @@ opus_val16 celt_cos_norm(opus_val32 x)
    if (x>SHL32(EXTEND32(1), 16))
       x = SUB32(SHL32(EXTEND32(1), 17),x);
    if (x&0x00007fff)
    {
       if (x<SHL32(EXTEND32(1), 15))
       {
          return _celt_cos_pi_2(EXTRACT16(x));
       } else {
-         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
+         return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
       }
    } else {
       if (x&0x0000ffff)
          return 0;
       else if (x&0x0001ffff)
          return -32767;
       else
          return 32767;
--- a/media/libopus/celt/pitch.c
+++ b/media/libopus/celt/pitch.c
@@ -407,16 +407,51 @@ void pitch_search(const opus_val16 * OPU
    } else {
       offset = 0;
    }
    *pitch = 2*best_pitch[0]-offset;
 
    RESTORE_STACK;
 }
 
+#ifdef FIXED_POINT
+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
+{
+   opus_val32 x2y2;
+   int sx, sy, shift;
+   opus_val32 g;
+   opus_val16 den;
+   if (xy == 0 || xx == 0 || yy == 0)
+      return 0;
+   sx = celt_ilog2(xx)-14;
+   sy = celt_ilog2(yy)-14;
+   shift = sx + sy;
+   x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy));
+   if (shift & 1) {
+      if (x2y2 < 32768)
+      {
+         x2y2 <<= 1;
+         shift--;
+      } else {
+         x2y2 >>= 1;
+         shift++;
+      }
+   }
+   den = celt_rsqrt_norm(x2y2);
+   g = MULT16_32_Q15(den, xy);
+   g = VSHR32(g, (shift>>1)-1);
+   return EXTRACT16(MIN32(g, Q15ONE));
+}
+#else
+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
+{
+   return xy/celt_sqrt(1+xx*yy);
+}
+#endif
+
 static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
 {
    int k, i, T, T0;
    opus_val16 g, g0;
    opus_val16 pg;
    opus_val32 xy,xx,yy,xy2;
@@ -445,28 +480,17 @@ opus_val16 remove_doubling(opus_val16 *x
    for (i=1;i<=maxperiod;i++)
    {
       yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
       yy_lookup[i] = MAX32(0, yy);
    }
    yy = yy_lookup[T0];
    best_xy = xy;
    best_yy = yy;
-#ifdef FIXED_POINT
-      {
-         opus_val32 x2y2;
-         int sh, t;
-         x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
-         sh = celt_ilog2(x2y2)>>1;
-         t = VSHR32(x2y2, 2*(sh-7));
-         g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
-      }
-#else
-      g = g0 = xy/celt_sqrt(1+xx*yy);
-#endif
+   g = g0 = compute_pitch_gain(xy, xx, yy);
    /* Look for any pitch at T/k */
    for (k=2;k<=15;k++)
    {
       int T1, T1b;
       opus_val16 g1;
       opus_val16 cont=0;
       opus_val16 thresh;
       T1 = celt_udiv(2*T0+k, 2*k);
@@ -479,34 +503,23 @@ opus_val16 remove_doubling(opus_val16 *x
             T1b = T0;
          else
             T1b = T0+T1;
       } else
       {
          T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
       }
       dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
-      xy += xy2;
-      yy = yy_lookup[T1] + yy_lookup[T1b];
-#ifdef FIXED_POINT
-      {
-         opus_val32 x2y2;
-         int sh, t;
-         x2y2 = 1+MULT32_32_Q31(xx,yy);
-         sh = celt_ilog2(x2y2)>>1;
-         t = VSHR32(x2y2, 2*(sh-7));
-         g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
-      }
-#else
-      g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
-#endif
+      xy = HALF32(xy + xy2);
+      yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
+      g1 = compute_pitch_gain(xy, xx, yy);
       if (abs(T1-prev_period)<=1)
          cont = prev_gain;
       else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
-         cont = HALF32(prev_gain);
+         cont = HALF16(prev_gain);
       else
          cont = 0;
       thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
       /* Bias against very high pitch (very short period) to avoid false-positives
          due to short-term correlation */
       if (T1<3*minperiod)
          thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
       else if (T1<2*minperiod)
--- a/media/libopus/celt/pitch.h
+++ b/media/libopus/celt/pitch.h
@@ -182,39 +182,19 @@ void comb_filter_const_c(opus_val32 *y, 
 opus_val32
 #else
 void
 #endif
 celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
       opus_val32 *xcorr, int len, int max_pitch);
 
 #if !defined(OVERRIDE_PITCH_XCORR)
-/*Is run-time CPU detection enabled on this platform?*/
-# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \
-   || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \
-   && !defined(OPUS_ARM_PRESUME_NEON_INTR)))
-extern
-#  if defined(FIXED_POINT)
-opus_val32
-#  else
-void
-#  endif
-(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
-      const opus_val16 *, opus_val32 *, int, int);
-
-#  define OVERRIDE_PITCH_XCORR
-#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
-  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
-        xcorr, len, max_pitch))
-# else
-
 #ifdef FIXED_POINT
 opus_val32
 #else
 void
 #endif
 celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
       opus_val32 *xcorr, int len, int max_pitch, int arch);
 
-# endif
 #endif
 
 #endif
--- a/media/libopus/celt/rate.c
+++ b/media/libopus/celt/rate.c
@@ -291,17 +291,17 @@ static OPUS_INLINE int interp_bits2pulse
       else
          lo = mid;
    }
    psum = 0;
    /*printf ("interp bisection gave %d\n", lo);*/
    done = 0;
    for (j=end;j-->start;)
    {
-      int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS);
+      int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS);
       if (tmp < thresh[j] && !done)
       {
          if (tmp >= alloc_floor)
             tmp = alloc_floor;
          else
             tmp = 0;
       } else
          done = 1;
--- a/media/libopus/celt/vq.c
+++ b/media/libopus/celt/vq.c
@@ -266,17 +266,17 @@ unsigned alg_quant(celt_norm *X, int N, 
       int rshift;
 #endif
 #ifdef FIXED_POINT
       rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
 #endif
       best_id = 0;
       /* The squared magnitude term gets added anyway, so we might as well
          add it outside the loop */
-      yy = ADD32(yy, 1);
+      yy = ADD16(yy, 1);
       j=0;
       do {
          opus_val16 Rxy, Ryy;
          /* Temporary sums of the new pulse(s) */
          Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
          /* We're multiplying y[j] by two so we don't have to do it here */
          Ryy = ADD16(yy, y[j]);
 
--- a/media/libopus/celt/x86/pitch_sse.h
+++ b/media/libopus/celt/x86/pitch_sse.h
@@ -97,31 +97,31 @@ opus_val32 celt_inner_prod_sse(
     const opus_val16 *y,
     int               N);
 #endif
 
 
 #if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
-	((void)arch, celt_inner_prod_sse4_1(x, y, N))
+    ((void)arch, celt_inner_prod_sse4_1(x, y, N))
 
 #elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
-	((void)arch, celt_inner_prod_sse2(x, y, N))
+    ((void)arch, celt_inner_prod_sse2(x, y, N))
 
 #elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
-	((void)arch, celt_inner_prod_sse(x, y, N))
+    ((void)arch, celt_inner_prod_sse(x, y, N))
 
 
 #elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
-	(defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
+    (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
 
 extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
                     const opus_val16 *x,
                     const opus_val16 *y,
                     int               N);
 
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
@@ -133,29 +133,29 @@ extern opus_val32 (*const CELT_INNER_PRO
 
 #define OVERRIDE_DUAL_INNER_PROD
 #define OVERRIDE_COMB_FILTER_CONST
 
 #undef dual_inner_prod
 #undef comb_filter_const
 
 void dual_inner_prod_sse(const opus_val16 *x,
-	const opus_val16 *y01,
-	const opus_val16 *y02,
-	int               N,
-	opus_val32       *xy1,
-	opus_val32       *xy2);
+    const opus_val16 *y01,
+    const opus_val16 *y02,
+    int               N,
+    opus_val32       *xy1,
+    opus_val32       *xy2);
 
 void comb_filter_const_sse(opus_val32 *y,
-	opus_val32 *x,
-	int         T,
-	int         N,
-	opus_val16  g10,
-	opus_val16  g11,
-	opus_val16  g12);
+    opus_val32 *x,
+    int         T,
+    int         N,
+    opus_val16  g10,
+    opus_val16  g11,
+    opus_val16  g12);
 
 
 #if defined(OPUS_X86_PRESUME_SSE)
 # define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
     ((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
 
 # define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
     ((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
@@ -164,29 +164,29 @@ void comb_filter_const_sse(opus_val32 *y
 extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
               const opus_val16 *x,
               const opus_val16 *y01,
               const opus_val16 *y02,
               int               N,
               opus_val32       *xy1,
               opus_val32       *xy2);
 
-#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch)			\
+#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
     ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
 
 extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
               opus_val32 *y,
               opus_val32 *x,
               int         T,
               int         N,
               opus_val16  g10,
               opus_val16  g11,
               opus_val16  g12);
 
-#define comb_filter_const(y, x, T, N, g10, g11, g12, arch)				\
+#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
     ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
 
 #define NON_STATIC_COMB_FILTER_CONST_C
 
 #endif
 #endif
 
 #endif
--- a/media/libopus/celt/x86/x86_celt_map.c
+++ b/media/libopus/celt/x86/x86_celt_map.c
@@ -67,17 +67,17 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCH
   xcorr_kernel_c,
   MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1  */
   MAY_HAVE_SSE4_1(xcorr_kernel)  /* avx  */
 };
 
 #endif
 
 #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) ||  \
-	(!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
+ (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
 
 opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
          const opus_val16 *x,
          const opus_val16 *y,
          int              N
 ) = {
   celt_inner_prod_c,                /* non-sse */
   celt_inner_prod_c,
--- a/media/libopus/celt/x86/x86cpu.c
+++ b/media/libopus/celt/x86/x86cpu.c
@@ -41,17 +41,17 @@
   (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
 
 
 #if defined(_MSC_VER)
 
 #include <intrin.h>
 static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
 {
-	__cpuid((int*)CPUInfo, InfoType);
+    __cpuid((int*)CPUInfo, InfoType);
 }
 
 #else
 
 #if defined(CPU_INFO_BY_C)
 #include <cpuid.h>
 #endif
 
--- a/media/libopus/include/opus.h
+++ b/media/libopus/include/opus.h
@@ -137,17 +137,17 @@ extern "C" {
   * <li>frame_size is the duration of the frame in samples (per channel)</li>
   * <li>packet is the byte array to which the compressed data is written</li>
   * <li>max_packet is the maximum number of bytes that can be written in the packet (4000 bytes is recommended).
   *     Do not use max_packet to control VBR target bitrate, instead use the #OPUS_SET_BITRATE CTL.</li>
   * </ul>
   *
   * opus_encode() and opus_encode_float() return the number of bytes actually written to the packet.
   * The return value <b>can be negative</b>, which indicates that an error has occurred. If the return value
-  * is 1 byte, then the packet does not need to be transmitted (DTX).
+  * is 2 bytes or less, then the packet does not need to be transmitted (DTX).
   *
   * Once the encoder state if no longer needed, it can be destroyed with
   *
   * @code
   * opus_encoder_destroy(enc);
   * @endcode
   *
   * If the encoder was created with opus_encoder_init() rather than opus_encoder_create(),
--- a/media/libopus/include/opus_defines.h
+++ b/media/libopus/include/opus_defines.h
@@ -60,17 +60,17 @@ extern "C" {
 #define OPUS_ALLOC_FAIL       -7
 /**@}*/
 
 /** @cond OPUS_INTERNAL_DOC */
 /**Export control for opus functions */
 
 #ifndef OPUS_EXPORT
 # if defined(WIN32)
-#  ifdef OPUS_BUILD
+#  if defined(OPUS_BUILD) && defined(DLL_EXPORT)
 #   define OPUS_EXPORT __declspec(dllexport)
 #  else
 #   define OPUS_EXPORT
 #  endif
 # elif defined(__GNUC__) && defined(OPUS_BUILD)
 #  define OPUS_EXPORT __attribute__ ((visibility ("default")))
 # else
 #  define OPUS_EXPORT
--- a/media/libopus/include/opus_multistream.h
+++ b/media/libopus/include/opus_multistream.h
@@ -105,20 +105,20 @@ extern "C" {
   *
   * The multistream API allows individual Opus streams to be combined into a
   * single packet, enabling support for up to 255 channels. Unlike an
   * elementary Opus stream, the encoder and decoder must negotiate the channel
   * configuration before the decoder can successfully interpret the data in the
   * packets produced by the encoder. Some basic information, such as packet
   * duration, can be computed without any special negotiation.
   *
-  * The format for multistream Opus packets is defined in the
-  * <a href="https://tools.ietf.org/html/draft-ietf-codec-oggopus">Ogg
-  * encapsulation specification</a> and is based on the self-delimited Opus
-  * framing described in Appendix B of <a href="https://tools.ietf.org/html/rfc6716">RFC 6716</a>.
+  * The format for multistream Opus packets is defined in
+  * <a href="https://tools.ietf.org/html/rfc7845">RFC 7845</a>
+  * and is based on the self-delimited Opus framing described in Appendix B of
+  * <a href="https://tools.ietf.org/html/rfc6716">RFC 6716</a>.
   * Normal Opus packets are just a degenerate case of multistream Opus packets,
   * and can be encoded or decoded with the multistream API by setting
   * <code>streams</code> to <code>1</code> when initializing the encoder or
   * decoder.
   *
   * Multistream Opus streams can contain up to 255 elementary Opus streams.
   * These may be either "uncoupled" or "coupled", indicating that the decoder
   * is configured to decode them to either 1 or 2 channels, respectively.
--- a/media/libopus/moz.build
+++ b/media/libopus/moz.build
@@ -15,17 +15,17 @@ EXPORTS.opus += [
 ]
 
 # We allow warnings for third-party code that can be updated from upstream.
 ALLOW_COMPILER_WARNINGS = True
 
 FINAL_LIBRARY = 'gkmedias'
 
 DEFINES['OPUS_BUILD'] = True
-DEFINES['OPUS_VERSION'] = '"v1.1.2-mozilla"'
+DEFINES['OPUS_VERSION'] = '"v1.1.3-mozilla"'
 DEFINES['USE_ALLOCA'] = True
 
 # Don't export symbols
 DEFINES['OPUS_EXPORT'] = ''
 
 if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['GNU_AS']:
     DEFINES['OPUS_ARM_ASM'] = True
     DEFINES['OPUS_ARM_EXTERNAL_ASM'] = True
--- a/media/libopus/silk/CNG.c
+++ b/media/libopus/silk/CNG.c
@@ -29,19 +29,18 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "stack_alloc.h"
 
 /* Generates excitation for CNG LPC synthesis */
 static OPUS_INLINE void silk_CNG_exc(
-    opus_int32                       exc_Q10[],          /* O    CNG excitation signal Q10                   */
+    opus_int32                       exc_Q14[],          /* O    CNG excitation signal Q10                   */
     opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
-    opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
     opus_int                         length,             /* I    Length                                      */
     opus_int32                       *rand_seed          /* I/O  Seed to random index generator              */
 )
 {
     opus_int32 seed;
     opus_int   i, idx, exc_mask;
 
     exc_mask = CNG_BUF_MASK_MAX;
@@ -50,17 +49,17 @@ static OPUS_INLINE void silk_CNG_exc(
     }
 
     seed = *rand_seed;
     for( i = 0; i < length; i++ ) {
         seed = silk_RAND( seed );
         idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
         silk_assert( idx >= 0 );
         silk_assert( idx <= CNG_BUF_MASK_MAX );
-        exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+        exc_Q14[ i ] = exc_buf_Q14[ idx ];
     }
     *rand_seed = seed;
 }
 
 void silk_CNG_Reset(
     silk_decoder_state          *psDec                          /* I/O  Decoder state                               */
 )
 {
@@ -80,17 +79,17 @@ void silk_CNG_Reset(
 void silk_CNG(
     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
     silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
     opus_int16                  frame[],                        /* I/O  Signal                                      */
     opus_int                    length                          /* I    Length of residual                          */
 )
 {
     opus_int   i, subfr;
-    opus_int32 sum_Q6, max_Gain_Q16, gain_Q16;
+    opus_int32 LPC_pred_Q10, max_Gain_Q16, gain_Q16, gain_Q10;
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
     silk_CNG_struct *psCNG = &psDec->sCNG;
     SAVE_STACK;
 
     if( psDec->fs_kHz != psCNG->fs_kHz ) {
         /* Reset state */
         silk_CNG_Reset( psDec );
 
@@ -119,63 +118,67 @@ void silk_CNG(
         /* Smooth gains */
         for( i = 0; i < psDec->nb_subfr; i++ ) {
             psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 );
         }
     }
 
     /* Add CNG when packet is lost or during DTX */
     if( psDec->lossCnt ) {
-        VARDECL( opus_int32, CNG_sig_Q10 );
-        ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
+        VARDECL( opus_int32, CNG_sig_Q14 );
+        ALLOC( CNG_sig_Q14, length + MAX_LPC_ORDER, opus_int32 );
 
         /* Generate CNG excitation */
         gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
         if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) {
             gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 );
             gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
             gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 );
         } else {
             gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 );
             gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
             gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
         }
-        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed );
+        gain_Q10 = silk_RSHIFT( gain_Q16, 6 );
+        
+        silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed );
 
         /* Convert CNG NLSF to filter representation */
         silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
 
         /* Generate CNG signal, by synthesis filtering */
-        silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
         for( i = 0; i < length; i++ ) {
             silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-            sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+            LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
             if( psDec->LPC_order == 16 ) {
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
             }
 
             /* Update states */
-            CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
-
-            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) );
+            CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+            
+            /* Scale with Gain and add to input signal */
+            frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) );
+            
         }
-        silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
     } else {
         silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order *  sizeof( opus_int32 ) );
     }
     RESTORE_STACK;
 }
--- a/media/libopus/silk/NLSF_del_dec_quant.c
+++ b/media/libopus/silk/NLSF_del_dec_quant.c
@@ -41,18 +41,19 @@ opus_int32 silk_NLSF_del_dec_quant(     
     const opus_uint8            ec_rates_Q5[],                  /* I    Rates []                                    */
     const opus_int              quant_step_size_Q16,            /* I    Quantization step size                      */
     const opus_int16            inv_quant_step_size_Q6,         /* I    Inverse quantization step size              */
     const opus_int32            mu_Q20,                         /* I    R/D tradeoff                                */
     const opus_int16            order                           /* I    Number of input values                      */
 )
 {
     opus_int         i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
-    opus_int         pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5;
-    opus_int32       RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16;
+    opus_int         pred_Q10, diff_Q10, rate0_Q5, rate1_Q5;
+    opus_int16       out0_Q10, out1_Q10;
+    opus_int32       RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25;
     opus_int         ind_sort[         NLSF_QUANT_DEL_DEC_STATES ];
     opus_int8        ind[              NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
     opus_int16       prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
     opus_int32       RD_Q25[       2 * NLSF_QUANT_DEL_DEC_STATES ];
     opus_int32       RD_min_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
     opus_int32       RD_max_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
     const opus_uint8 *rates_Q5;
 
@@ -69,33 +70,32 @@ opus_int32 silk_NLSF_del_dec_quant(     
         } else if( i == 0 ) {
             out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
         } else if( i == -1 ) {
             out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
         } else {
             out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
             out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
         }
-        out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
-        out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
+        out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out0_Q10, quant_step_size_Q16 ), 16 );
+        out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out1_Q10, quant_step_size_Q16 ), 16 );
     }
 
     silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 );     /* must be power of two */
 
     nStates = 1;
     RD_Q25[ 0 ] = 0;
     prev_out_Q10[ 0 ] = 0;
     for( i = order - 1; ; i-- ) {
         rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
-        pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
         in_Q10 = x_Q10[ i ];
         for( j = 0; j < nStates; j++ ) {
-            pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] );
+            pred_Q10 = silk_RSHIFT( silk_SMULBB( (opus_int16)pred_coef_Q8[ i ], prev_out_Q10[ j ] ), 8 );
             res_Q10  = silk_SUB16( in_Q10, pred_Q10 );
-            ind_tmp  = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 );
+            ind_tmp  = silk_RSHIFT( silk_SMULBB( inv_quant_step_size_Q6, res_Q10 ), 16 );
             ind_tmp  = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
             ind[ j ][ i ] = (opus_int8)ind_tmp;
 
             /* compute outputs for ind_tmp and ind_tmp + 1 */
             out0_Q10 = out0_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
             out1_Q10 = out1_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
 
             out0_Q10  = silk_ADD16( out0_Q10, pred_Q10 );
--- a/media/libopus/silk/NLSF_encode.c
+++ b/media/libopus/silk/NLSF_encode.c
@@ -41,17 +41,17 @@ opus_int32 silk_NLSF_encode(            
     const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
     const opus_int16            *pW_QW,                         /* I    NLSF weight vector [ LPC_ORDER ]            */
     const opus_int              NLSF_mu_Q20,                    /* I    Rate weight for the RD optimization         */
     const opus_int              nSurvivors,                     /* I    Max survivors after first stage             */
     const opus_int              signalType                      /* I    Signal type: 0/1/2                          */
 )
 {
     opus_int         i, s, ind1, bestIndex, prob_Q8, bits_q7;
-    opus_int32       W_tmp_Q9;
+    opus_int32       W_tmp_Q9, ret;
     VARDECL( opus_int32, err_Q26 );
     VARDECL( opus_int32, RD_Q25 );
     VARDECL( opus_int, tempIndices1 );
     VARDECL( opus_int8, tempIndices2 );
     opus_int16       res_Q15[      MAX_LPC_ORDER ];
     opus_int16       res_Q10[      MAX_LPC_ORDER ];
     opus_int16       NLSF_tmp_Q15[ MAX_LPC_ORDER ];
     opus_int16       W_tmp_QW[     MAX_LPC_ORDER ];
@@ -126,11 +126,12 @@ opus_int32 silk_NLSF_encode(            
     silk_insertion_sort_increasing( RD_Q25, &bestIndex, nSurvivors, 1 );
 
     NLSFIndices[ 0 ] = (opus_int8)tempIndices1[ bestIndex ];
     silk_memcpy( &NLSFIndices[ 1 ], &tempIndices2[ bestIndex * MAX_LPC_ORDER ], psNLSF_CB->order * sizeof( opus_int8 ) );
 
     /* Decode */
     silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
 
+    ret = RD_Q25[ 0 ];
     RESTORE_STACK;
-    return RD_Q25[ 0 ];
+    return ret;
 }
--- a/media/libopus/silk/NSQ.c
+++ b/media/libopus/silk/NSQ.c
@@ -26,16 +26,18 @@ POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "stack_alloc.h"
+#include "NSQ.h"
+
 
 static OPUS_INLINE void silk_nsq_scale_states(
     const silk_encoder_state *psEncC,           /* I    Encoder State                   */
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
     const opus_int32    x_Q3[],                 /* I    input in Q3                     */
     opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
     const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
     opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
@@ -61,17 +63,18 @@ static OPUS_INLINE void silk_noise_shape
     opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
     opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
     opus_int32          LF_shp_Q14,             /* I                                    */
     opus_int32          Gain_Q16,               /* I                                    */
     opus_int            Lambda_Q10,             /* I                                    */
     opus_int            offset_Q10,             /* I                                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
-    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
+    int                 arch                    /* I    Architecture                    */
 );
 #endif
 
 void silk_NSQ_c
 (
     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
@@ -150,17 +153,17 @@ void silk_NSQ_c
                 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
             }
         }
 
         silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
 
         silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
             AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
-            offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+            offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
 
         x_Q3   += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
         pxq    += psEncC->subfr_length;
     }
 
     /* Update lagPrev for next frame */
     NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
@@ -193,59 +196,47 @@ void silk_noise_shape_quantizer(
     opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
     opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
     opus_int32          LF_shp_Q14,             /* I                                    */
     opus_int32          Gain_Q16,               /* I                                    */
     opus_int            Lambda_Q10,             /* I                                    */
     opus_int            offset_Q10,             /* I                                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
-    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
+    int                 arch                    /* I    Architecture                    */
 )
 {
-    opus_int     i, j;
+    opus_int     i;
     opus_int32   LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
     opus_int32   n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
     opus_int32   exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
     opus_int32   *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
+#ifdef silk_short_prediction_create_arch_coef
+    opus_int32   a_Q12_arch[MAX_LPC_ORDER];
+#endif
 
     shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
     Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
 
     /* Set up short term AR state */
     psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
 
+#ifdef silk_short_prediction_create_arch_coef
+    silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
+#endif
+
     for( i = 0; i < length; i++ ) {
         /* Generate dither */
         NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
 
         /* Short-term prediction */
-        silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
-        /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-        LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[  0 ], a_Q12[ 0 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
-        if( predictLPCOrder == 16 ) {
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
-        }
+        LPC_pred_Q10 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
 
         /* Long-term prediction */
         if( signalType == TYPE_VOICED ) {
             /* Unrolled loop */
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
             LTP_pred_Q13 = 2;
             LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[  0 ], b_Q14[ 0 ] );
             LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
@@ -254,33 +245,18 @@ void silk_noise_shape_quantizer(
             LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
             pred_lag_ptr++;
         } else {
             LTP_pred_Q13 = 0;
         }
 
         /* Noise shape feedback */
         silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
-        tmp2 = psLPC_Q14[ 0 ];
-        tmp1 = NSQ->sAR2_Q14[ 0 ];
-        NSQ->sAR2_Q14[ 0 ] = tmp2;
-        n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 );
-        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] );
-        for( j = 2; j < shapingLPCOrder; j += 2 ) {
-            tmp2 = NSQ->sAR2_Q14[ j - 1 ];
-            NSQ->sAR2_Q14[ j - 1 ] = tmp1;
-            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] );
-            tmp1 = NSQ->sAR2_Q14[ j + 0 ];
-            NSQ->sAR2_Q14[ j + 0 ] = tmp2;
-            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] );
-        }
-        NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
-        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+        n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
 
-        n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 );                                /* Q11 -> Q12 */
         n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
 
         n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
         n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 );
 
         silk_assert( lag > 0 || signalType != TYPE_VOICED );
 
         /* Combine prediction and noise shaping signals */
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/NSQ.h
@@ -0,0 +1,101 @@
+/***********************************************************************
+Copyright (c) 2014 Vidyo.
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifndef SILK_NSQ_H
+#define SILK_NSQ_H
+
+#include "SigProc_FIX.h"
+
+#undef silk_short_prediction_create_arch_coef
+
+static OPUS_INLINE opus_int32 silk_noise_shape_quantizer_short_prediction_c(const opus_int32 *buf32, const opus_int16 *coef16, opus_int order)
+{
+    opus_int32 out;
+    silk_assert( order == 10 || order == 16 );
+
+    /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+    out = silk_RSHIFT( order, 1 );
+    out = silk_SMLAWB( out, buf32[  0 ], coef16[ 0 ] );
+    out = silk_SMLAWB( out, buf32[ -1 ], coef16[ 1 ] );
+    out = silk_SMLAWB( out, buf32[ -2 ], coef16[ 2 ] );
+    out = silk_SMLAWB( out, buf32[ -3 ], coef16[ 3 ] );
+    out = silk_SMLAWB( out, buf32[ -4 ], coef16[ 4 ] );
+    out = silk_SMLAWB( out, buf32[ -5 ], coef16[ 5 ] );
+    out = silk_SMLAWB( out, buf32[ -6 ], coef16[ 6 ] );
+    out = silk_SMLAWB( out, buf32[ -7 ], coef16[ 7 ] );
+    out = silk_SMLAWB( out, buf32[ -8 ], coef16[ 8 ] );
+    out = silk_SMLAWB( out, buf32[ -9 ], coef16[ 9 ] );
+
+    if( order == 16 )
+    {
+        out = silk_SMLAWB( out, buf32[ -10 ], coef16[ 10 ] );
+        out = silk_SMLAWB( out, buf32[ -11 ], coef16[ 11 ] );
+        out = silk_SMLAWB( out, buf32[ -12 ], coef16[ 12 ] );
+        out = silk_SMLAWB( out, buf32[ -13 ], coef16[ 13 ] );
+        out = silk_SMLAWB( out, buf32[ -14 ], coef16[ 14 ] );
+        out = silk_SMLAWB( out, buf32[ -15 ], coef16[ 15 ] );
+    }
+    return out;
+}
+
+#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch)  ((void)arch,silk_noise_shape_quantizer_short_prediction_c(in, coef, order))
+
+static OPUS_INLINE opus_int32 silk_NSQ_noise_shape_feedback_loop_c(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order)
+{
+    opus_int32 out;
+    opus_int32 tmp1, tmp2;
+    opus_int j;
+
+    tmp2 = data0[0];
+    tmp1 = data1[0];
+    data1[0] = tmp2;
+
+    out = silk_RSHIFT(order, 1);
+    out = silk_SMLAWB(out, tmp2, coef[0]);
+
+    for (j = 2; j < order; j += 2) {
+        tmp2 = data1[j - 1];
+        data1[j - 1] = tmp1;
+        out = silk_SMLAWB(out, tmp1, coef[j - 1]);
+        tmp1 = data1[j + 0];
+        data1[j + 0] = tmp2;
+        out = silk_SMLAWB(out, tmp2, coef[j]);
+    }
+    data1[order - 1] = tmp1;
+    out = silk_SMLAWB(out, tmp1, coef[order - 1]);
+    /* Q11 -> Q12 */
+    out = silk_LSHIFT32( out, 1 );
+    return out;
+}
+
+#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch)  ((void)arch,silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order))
+
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#include "arm/NSQ_neon.h"
+#endif
+
+#endif /* SILK_NSQ_H */
--- a/media/libopus/silk/NSQ_del_dec.c
+++ b/media/libopus/silk/NSQ_del_dec.c
@@ -26,16 +26,18 @@ POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "stack_alloc.h"
+#include "NSQ.h"
+
 
 typedef struct {
     opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
     opus_int32 RandState[ DECISION_DELAY ];
     opus_int32 Q_Q10[     DECISION_DELAY ];
     opus_int32 Xq_Q14[    DECISION_DELAY ];
     opus_int32 Pred_Q15[  DECISION_DELAY ];
     opus_int32 Shape_Q14[ DECISION_DELAY ];
@@ -101,17 +103,18 @@ static OPUS_INLINE void silk_noise_shape
     opus_int            offset_Q10,             /* I                                        */
     opus_int            length,                 /* I    Input length                        */
     opus_int            subfr,                  /* I    Subframe number                     */
     opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
     opus_int            predictLPCOrder,        /* I    Prediction filter order             */
     opus_int            warping_Q16,            /* I                                        */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
-    opus_int            decisionDelay           /* I                                        */
+    opus_int            decisionDelay,          /* I                                        */
+    int                 arch                    /* I                                        */
 );
 
 void silk_NSQ_del_dec_c(
     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
     const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
     opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
@@ -255,17 +258,17 @@ void silk_NSQ_del_dec_c(
         }
 
         silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
             psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
 
         silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
             delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
             Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
-            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
+            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch );
 
         x_Q3   += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
         pxq    += psEncC->subfr_length;
     }
 
     /* Find winner */
     RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
@@ -328,38 +331,47 @@ static OPUS_INLINE void silk_noise_shape
     opus_int            offset_Q10,             /* I                                        */
     opus_int            length,                 /* I    Input length                        */
     opus_int            subfr,                  /* I    Subframe number                     */
     opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
     opus_int            predictLPCOrder,        /* I    Prediction filter order             */
     opus_int            warping_Q16,            /* I                                        */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
-    opus_int            decisionDelay           /* I                                        */
+    opus_int            decisionDelay,          /* I                                        */
+    int                 arch                    /* I                                        */
 )
 {
     opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
     opus_int32   Winner_rand_state;
     opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
     opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
     opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
     opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+#ifdef silk_short_prediction_create_arch_coef
+    opus_int32   a_Q12_arch[MAX_LPC_ORDER];
+#endif
+
     VARDECL( NSQ_sample_pair, psSampleState );
     NSQ_del_dec_struct *psDD;
     NSQ_sample_struct  *psSS;
     SAVE_STACK;
 
     silk_assert( nStatesDelayedDecision > 0 );
     ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
 
     shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
     Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
 
+#ifdef silk_short_prediction_create_arch_coef
+    silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
+#endif
+
     for( i = 0; i < length; i++ ) {
         /* Perform common calculations used in all states */
 
         /* Long-term prediction */
         if( signalType == TYPE_VOICED ) {
             /* Unrolled loop */
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
             LTP_pred_Q14 = 2;
@@ -393,37 +405,17 @@ static OPUS_INLINE void silk_noise_shape
             psSS = psSampleState[ k ];
 
             /* Generate dither */
             psDD->Seed = silk_RAND( psDD->Seed );
 
             /* Pointer used in short term prediction and shaping */
             psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
             /* Short-term prediction */
-            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
-            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-            LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[  0 ], a_Q12[ 0 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
-            if( predictLPCOrder == 16 ) {
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
-            }
+            LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
             LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
 
             /* Noise shape feedback */
             silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
             /* Output of lowpass section */
             tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
             /* Output of allpass section */
             tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
--- a/media/libopus/silk/PLC.c
+++ b/media/libopus/silk/PLC.c
@@ -360,17 +360,18 @@ static OPUS_INLINE void silk_PLC_conceal
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
         for( j = 10; j < psDec->LPC_order; j++ ) {
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] );
         }
 
         /* Add prediction to LPC excitation */
-        sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+        sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ],
+                                            silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ));
 
         /* Scale with Gain */
         frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );
     }
 
     /* Save LPC state */
     silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
 
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/arm/NSQ_neon.c
@@ -0,0 +1,112 @@
+/***********************************************************************
+Copyright (C) 2014 Vidyo
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <arm_neon.h>
+#include "main.h"
+#include "stack_alloc.h"
+#include "NSQ.h"
+#include "celt/cpu_support.h"
+#include "celt/arm/armcpu.h"
+
+opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order)
+{
+    int32x4_t coef0 = vld1q_s32(coef32);
+    int32x4_t coef1 = vld1q_s32(coef32 + 4);
+    int32x4_t coef2 = vld1q_s32(coef32 + 8);
+    int32x4_t coef3 = vld1q_s32(coef32 + 12);
+
+    int32x4_t a0 = vld1q_s32(buf32 - 15);
+    int32x4_t a1 = vld1q_s32(buf32 - 11);
+    int32x4_t a2 = vld1q_s32(buf32 - 7);
+    int32x4_t a3 = vld1q_s32(buf32 - 3);
+
+    int32x4_t b0 = vqdmulhq_s32(coef0, a0);
+    int32x4_t b1 = vqdmulhq_s32(coef1, a1);
+    int32x4_t b2 = vqdmulhq_s32(coef2, a2);
+    int32x4_t b3 = vqdmulhq_s32(coef3, a3);
+
+    int32x4_t c0 = vaddq_s32(b0, b1);
+    int32x4_t c1 = vaddq_s32(b2, b3);
+
+    int32x4_t d = vaddq_s32(c0, c1);
+
+    int64x2_t e = vpaddlq_s32(d);
+
+    int64x1_t f = vadd_s64(vget_low_s64(e), vget_high_s64(e));
+
+    opus_int32 out = vget_lane_s32(vreinterpret_s32_s64(f), 0);
+
+    out += silk_RSHIFT( order, 1 );
+
+    return out;
+}
+
+
+opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order)
+{
+    opus_int32 out;
+    if (order == 8)
+    {
+        int32x4_t a00 = vdupq_n_s32(data0[0]);
+        int32x4_t a01 = vld1q_s32(data1);  /* data1[0] ... [3] */
+
+        int32x4_t a0 = vextq_s32 (a00, a01, 3); /* data0[0] data1[0] ...[2] */
+        int32x4_t a1 = vld1q_s32(data1 + 3);  /* data1[3] ... [6] */
+
+        /*TODO: Convert these once in advance instead of once per sample, like
+          silk_noise_shape_quantizer_short_prediction_neon() does.*/
+        int16x8_t coef16 = vld1q_s16(coef);
+        int32x4_t coef0 = vmovl_s16(vget_low_s16(coef16));
+        int32x4_t coef1 = vmovl_s16(vget_high_s16(coef16));
+
+        /*This is not bit-exact with the C version, since we do not drop the
+          lower 16 bits of each multiply, but wait until the end to truncate
+          precision. This is an encoder-specific calculation (and unlike
+          silk_noise_shape_quantizer_short_prediction_neon(), is not meant to
+          simulate what the decoder will do). We still could use vqdmulhq_s32()
+          like silk_noise_shape_quantizer_short_prediction_neon() and save
+          half the multiplies, but the speed difference is not large, since we
+          then need two extra adds.*/
+        int64x2_t b0 = vmull_s32(vget_low_s32(a0), vget_low_s32(coef0));
+        int64x2_t b1 = vmlal_s32(b0, vget_high_s32(a0), vget_high_s32(coef0));
+        int64x2_t b2 = vmlal_s32(b1, vget_low_s32(a1), vget_low_s32(coef1));
+        int64x2_t b3 = vmlal_s32(b2, vget_high_s32(a1), vget_high_s32(coef1));
+
+        int64x1_t c = vadd_s64(vget_low_s64(b3), vget_high_s64(b3));
+        int64x1_t cS = vrshr_n_s64(c, 15);
+        int32x2_t d = vreinterpret_s32_s64(cS);
+
+        out = vget_lane_s32(d, 0);
+        vst1q_s32(data1, a0);
+        vst1q_s32(data1 + 4, a1);
+        return out;
+    }
+    return silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order);
+}
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/arm/NSQ_neon.h
@@ -0,0 +1,113 @@
+/***********************************************************************
+Copyright (C) 2014 Vidyo
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifndef SILK_NSQ_NEON_H
+#define SILK_NSQ_NEON_H
+
+#include "cpu_support.h"
+
+#undef silk_short_prediction_create_arch_coef
+/* For vectorized calc, reverse a_Q12 coefs, convert to 32-bit, and shift for vqdmulhq_s32. */
+static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *out, const opus_int16 *in, opus_int order)
+{
+    out[15] = in[0] << 15;
+    out[14] = in[1] << 15;
+    out[13] = in[2] << 15;
+    out[12] = in[3] << 15;
+    out[11] = in[4] << 15;
+    out[10] = in[5] << 15;
+    out[9]  = in[6] << 15;
+    out[8]  = in[7] << 15;
+    out[7]  = in[8] << 15;
+    out[6]  = in[9] << 15;
+
+    if (order == 16)
+    {
+        out[5] = in[10] << 15;
+        out[4] = in[11] << 15;
+        out[3] = in[12] << 15;
+        out[2] = in[13] << 15;
+        out[1] = in[14] << 15;
+        out[0] = in[15] << 15;
+    }
+    else
+    {
+        out[5] = 0;
+        out[4] = 0;
+        out[3] = 0;
+        out[2] = 0;
+        out[1] = 0;
+        out[0] = 0;
+    }
+}
+
+#if defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+#define silk_short_prediction_create_arch_coef(out, in, order) \
+    (silk_short_prediction_create_arch_coef_neon(out, in, order))
+
+#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+
+#define silk_short_prediction_create_arch_coef(out, in, order) \
+    do { if (arch == OPUS_ARCH_ARM_NEON) { silk_short_prediction_create_arch_coef_neon(out, in, order); } } while (0)
+
+#endif
+
+opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order);
+
+opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order);
+
+#if defined(OPUS_ARM_PRESUME_NEON_INTR)
+#undef silk_noise_shape_quantizer_short_prediction
+#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \
+    ((void)arch,silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order))
+
+#undef silk_NSQ_noise_shape_feedback_loop
+#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch)  ((void)arch,silk_NSQ_noise_shape_feedback_loop_neon(data0, data1, coef, order))
+
+#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+
+/* silk_noise_shape_quantizer_short_prediction implementations take different parameters based on arch
+   (coef vs. coefRev) so can't use the usual IMPL table implementation */
+#undef silk_noise_shape_quantizer_short_prediction
+#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch)  \
+    (arch == OPUS_ARCH_ARM_NEON ? \
+        silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order) : \
+        silk_noise_shape_quantizer_short_prediction_c(in, coef, order))
+
+extern opus_int32
+ (*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])(
+ const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef,
+ opus_int order);
+
+#undef silk_NSQ_noise_shape_feedback_loop
+#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) \
+ (SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[(arch)&OPUS_ARCHMASK](data0, data1, \
+ coef, order))
+
+#endif
+
+#endif /* SILK_NSQ_NEON_H */
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/arm/arm_silk_map.c
@@ -0,0 +1,55 @@
+/***********************************************************************
+Copyright (C) 2014 Vidyo
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "NSQ.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
+ !defined(OPUS_ARM_PRESUME_NEON_INTR))
+
+/*There is no table for silk_noise_shape_quantizer_short_prediction because the
+   NEON version takes different parameters than the C version.
+  Instead RTCD is done via if statements at the call sites.
+  See NSQ_neon.h for details.*/
+
+opus_int32
+ (*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])(
+ const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef,
+ opus_int order) = {
+  silk_NSQ_noise_shape_feedback_loop_c,    /* ARMv4 */
+  silk_NSQ_noise_shape_feedback_loop_c,    /* EDSP */
+  silk_NSQ_noise_shape_feedback_loop_c,    /* Media */
+  silk_NSQ_noise_shape_feedback_loop_neon, /* NEON */
+};
+
+# endif
+
+#endif /* OPUS_HAVE_RTCD */
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/arm/macros_arm64.h
@@ -0,0 +1,39 @@
+/***********************************************************************
+Copyright (C) 2015 Vidyo
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARM64_H
+#define SILK_MACROS_ARM64_H
+
+#include <arm_neon.h>
+
+#undef silk_ADD_SAT32
+#define silk_ADD_SAT32(a, b) (vqadds_s32((a), (b)))
+
+#undef silk_SUB_SAT32
+#define silk_SUB_SAT32(a, b) (vqsubs_s32((a), (b)))
+
+#endif /* SILK_MACROS_ARM64_H */
--- a/media/libopus/silk/decode_core.c
+++ b/media/libopus/silk/decode_core.c
@@ -214,17 +214,17 @@ void silk_decode_core(
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12_tmp[ 11 ] );
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12_tmp[ 12 ] );
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12_tmp[ 13 ] );
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12_tmp[ 14 ] );
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12_tmp[ 15 ] );
             }
 
             /* Add prediction to LPC excitation */
-            sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 );
+            sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( pres_Q14[ i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) );
 
             /* Scale with gain */
             pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );
         }
 
         /* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */
 
         /* Update LPC filter state */
--- a/media/libopus/silk/fixed/burg_modified_FIX.c
+++ b/media/libopus/silk/fixed/burg_modified_FIX.c
@@ -145,18 +145,21 @@ void silk_burg_modified_c(
                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
                 for( k = 0; k < n; k++ ) {
                     C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
                     C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
                     Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
-                    tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
-                    tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
+                    /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32),
+                       but they cancel each other and the real result seems to always fit in a 32-bit
+                       signed integer. This was determined experimentally, not theoretically (unfortunately). */
+                    tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
+                    tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
                 }
                 tmp1 = -tmp1;                                                                           /* Q17 */
                 tmp2 = -tmp2;                                                                           /* Q17 */
                 for( k = 0; k <= n; k++ ) {
                     CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
                         silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
                     CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
                         silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
@@ -195,22 +198,24 @@ void silk_burg_modified_c(
 
         /* Update inverse prediction gain */
         tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
         tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
         if( tmp1 <= minInvGain_Q30 ) {
             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
-            /* Newton-Raphson iteration */
-            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
-            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
-            if( num < 0 ) {
-                /* Ensure adjusted reflection coefficients has the original sign */
-                rc_Q31 = -rc_Q31;
+            if( rc_Q31 > 0 ) {
+                /* Newton-Raphson iteration */
+                rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                       /* Q15 */
+                rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                                   /* Q31 */
+                if( num < 0 ) {
+                    /* Ensure adjusted reflection coefficients has the original sign */
+                    rc_Q31 = -rc_Q31;
+                }
             }
             invGain_Q30 = minInvGain_Q30;
             reached_max_gain = 1;
         } else {
             invGain_Q30 = tmp1;
         }
 
         /* Update the AR coefficients */
--- a/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c
+++ b/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c
@@ -295,22 +295,24 @@ void silk_burg_modified_sse4_1(
 
         /* Update inverse prediction gain */
         tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
         tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
         if( tmp1 <= minInvGain_Q30 ) {
             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
-            /* Newton-Raphson iteration */
-            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
-            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
-            if( num < 0 ) {
-                /* Ensure adjusted reflection coefficients has the original sign */
-                rc_Q31 = -rc_Q31;
+            if( rc_Q31 > 0 ) {
+                 /* Newton-Raphson iteration */
+                rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
+                rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
+                if( num < 0 ) {
+                    /* Ensure adjusted reflection coefficients has the original sign */
+                    rc_Q31 = -rc_Q31;
+                }
             }
             invGain_Q30 = minInvGain_Q30;
             reached_max_gain = 1;
         } else {
             invGain_Q30 = tmp1;
         }
 
         /* Update the AR coefficients */
--- a/media/libopus/silk/macros.h
+++ b/media/libopus/silk/macros.h
@@ -29,50 +29,52 @@ POSSIBILITY OF SUCH DAMAGE.
 #define SILK_MACROS_H
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "opus_types.h"
 #include "opus_defines.h"
+#include "arch.h"
 
 #if OPUS_GNUC_PREREQ(3, 0)
 #define opus_likely(x)       (__builtin_expect(!!(x), 1))
 #define opus_unlikely(x)     (__builtin_expect(!!(x), 0))
 #else
 #define opus_likely(x)       (!!(x))
 #define opus_unlikely(x)     (!!(x))
 #endif
 
-/* Set this if opus_int64 is a native type of the CPU. */
-#define OPUS_FAST_INT64 (defined(__x86_64__) || defined(__LP64__) || defined(_WIN64))
-
 /* This is an OPUS_INLINE header file for general platform. */
 
 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 #if OPUS_FAST_INT64
-#define silk_SMULWB(a32, b32)            (((a32) * (opus_int64)((opus_int16)(b32))) >> 16)
+#define silk_SMULWB(a32, b32)            ((opus_int32)(((a32) * (opus_int64)((opus_int16)(b32))) >> 16))
 #else
 #define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
 #endif
 
 /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
 #if OPUS_FAST_INT64
-#define silk_SMLAWB(a32, b32, c32)       ((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16))
+#define silk_SMLAWB(a32, b32, c32)       ((opus_int32)((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16)))
 #else
 #define silk_SMLAWB(a32, b32, c32)       ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
 #endif
 
 /* (a32 * (b32 >> 16)) >> 16 */
+#if OPUS_FAST_INT64
+#define silk_SMULWT(a32, b32)            ((opus_int32)(((a32) * (opus_int64)((b32) >> 16)) >> 16))
+#else
 #define silk_SMULWT(a32, b32)            (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
+#endif
 
 /* a32 + (b32 * (c32 >> 16)) >> 16 */
 #if OPUS_FAST_INT64
-#define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16))
+#define silk_SMLAWT(a32, b32, c32)       ((opus_int32)((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16)))
 #else
 #define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
 #endif
 
 /* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
 #define silk_SMULBB(a32, b32)            ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32)))
 
 /* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
@@ -84,24 +86,24 @@ POSSIBILITY OF SUCH DAMAGE.
 /* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
 #define silk_SMLABT(a32, b32, c32)       ((a32) + ((opus_int32)((opus_int16)(b32))) * ((c32) >> 16))
 
 /* a64 + (b32 * c32) */
 #define silk_SMLAL(a64, b32, c32)        (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32))))
 
 /* (a32 * b32) >> 16 */
 #if OPUS_FAST_INT64
-#define silk_SMULWW(a32, b32)            (((opus_int64)(a32) * (b32)) >> 16)
+#define silk_SMULWW(a32, b32)            ((opus_int32)(((opus_int64)(a32) * (b32)) >> 16))
 #else
 #define silk_SMULWW(a32, b32)            silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
 #endif
 
 /* a32 + ((b32 * c32) >> 16) */
 #if OPUS_FAST_INT64
-#define silk_SMLAWW(a32, b32, c32)       ((a32) + (((opus_int64)(b32) * (c32)) >> 16))
+#define silk_SMLAWW(a32, b32, c32)       ((opus_int32)((a32) + (((opus_int64)(b32) * (c32)) >> 16)))
 #else
 #define silk_SMLAWW(a32, b32, c32)       silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16))
 #endif
 
 /* add/subtract with output saturated */
 #define silk_ADD_SAT32(a, b)             ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ?                              \
                                         ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) :   \
                                         ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) )
@@ -144,10 +146,14 @@ static OPUS_INLINE opus_int32 silk_CLZ32
 #ifdef OPUS_ARM_INLINE_ASM
 #include "arm/macros_armv4.h"
 #endif
 
 #ifdef OPUS_ARM_INLINE_EDSP
 #include "arm/macros_armv5e.h"
 #endif
 
+#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
+#include "arm/macros_arm64.h"
+#endif
+
 #endif /* SILK_MACROS_H */
 
--- a/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
+++ b/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
@@ -57,17 +57,18 @@ static inline void silk_noise_shape_quan
     opus_int            offset_Q10,             /* I                                        */
     opus_int            length,                 /* I    Input length                        */
     opus_int            subfr,                  /* I    Subframe number                     */
     opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
     opus_int            predictLPCOrder,        /* I    Prediction filter order             */
     opus_int            warping_Q16,            /* I                                        */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
-    opus_int            decisionDelay           /* I                                        */
+    opus_int            decisionDelay,          /* I                                        */
+    int                 arch                    /* I                                        */
 )
 {
     opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
     opus_int32   Winner_rand_state;
     opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
     opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
     opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
@@ -77,16 +78,19 @@ static inline void silk_noise_shape_quan
     NSQ_sample_struct  *psSS;
     opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4;
     opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6;
     opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13;
     opus_int16 a_Q12_14, a_Q12_15;
 
     opus_int32 cur, prev, next;
 
+    /*Unused.*/
+    (void)arch;
+
     //Intialize b_Q14 variables
     b_Q14_0 = b_Q14[ 0 ];
     b_Q14_1 = b_Q14[ 1 ];
     b_Q14_2 = b_Q14[ 2 ];
     b_Q14_3 = b_Q14[ 3 ];
     b_Q14_4 = b_Q14[ 4 ];
 
     //Intialize a_Q12 variables
--- a/media/libopus/silk/process_NLSFs.c
+++ b/media/libopus/silk/process_NLSFs.c
@@ -36,17 +36,17 @@ void silk_process_NLSFs(
     silk_encoder_state          *psEncC,                            /* I/O  Encoder state                               */
     opus_int16                  PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O    Prediction coefficients                     */
     opus_int16                  pNLSF_Q15[         MAX_LPC_ORDER ], /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
     const opus_int16            prev_NLSFq_Q15[    MAX_LPC_ORDER ]  /* I    Previous Normalized LSFs (0 - (2^15-1))     */
 )
 {
     opus_int     i, doInterpolate;
     opus_int     NLSF_mu_Q20;
-    opus_int32   i_sqr_Q15;
+    opus_int16   i_sqr_Q15;
     opus_int16   pNLSF0_temp_Q15[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW_QW[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW0_temp_QW[ MAX_LPC_ORDER ];
 
     silk_assert( psEncC->speech_activity_Q8 >=   0 );
     silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) );
     silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) );
 
@@ -74,17 +74,18 @@ void silk_process_NLSFs(
             psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
 
         /* Calculate first half NLSF weights for the interpolated NLSFs */
         silk_NLSF_VQ_weights_laroia( pNLSFW0_temp_QW, pNLSF0_temp_Q15, psEncC->predictLPCOrder );
 
         /* Update NLSF weights with contribution from first half */
         i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 );
         for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
-            pNLSFW_QW[ i ] = silk_SMLAWB( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), (opus_int32)pNLSFW0_temp_QW[ i ], i_sqr_Q15 );
+            pNLSFW_QW[ i ] = silk_ADD16( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), silk_RSHIFT(
+                  silk_SMULBB( pNLSFW0_temp_QW[ i ], i_sqr_Q15 ), 16) );
             silk_assert( pNLSFW_QW[ i ] >= 1 );
         }
     }
 
     silk_NLSF_encode( psEncC->indices.NLSFIndices, pNLSF_Q15, psEncC->psNLSF_CB, pNLSFW_QW,
         NLSF_mu_Q20, psEncC->NLSF_MSVQ_Survivors, psEncC->indices.signalType );
 
     /* Convert quantized NLSFs back to LPC coefficients */
@@ -95,11 +96,12 @@ void silk_process_NLSFs(
         silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15,
             psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
 
         /* Convert back to LPC coefficients */
         silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder );
 
     } else {
         /* Copy LPC coefficients for first half from second half */
+        silk_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER );
         silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) );
     }
 }
--- a/media/libopus/silk/sort.c
+++ b/media/libopus/silk/sort.c
@@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 /* Insertion sort (fast for already almost sorted arrays):   */
 /* Best case:  O(n)   for an already sorted array            */
 /* Worst case: O(n^2) for an inversely sorted array          */
 /*                                                           */
-/* Shell short:    http://en.wikipedia.org/wiki/Shell_sort   */
+/* Shell short:    https://en.wikipedia.org/wiki/Shell_sort  */
 
 #include "SigProc_FIX.h"
 
 void silk_insertion_sort_increasing(
     opus_int32           *a,             /* I/O   Unsorted / Sorted vector               */
     opus_int             *idx,           /* O     Index vector for the sorted elements   */
     const opus_int       L,              /* I     Vector length                          */
     const opus_int       K               /* I     Number of correctly sorted positions   */
--- a/media/libopus/silk/stereo_LR_to_MS.c
+++ b/media/libopus/silk/stereo_LR_to_MS.c
@@ -72,26 +72,26 @@ void silk_stereo_LR_to_MS(
     silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) );
     silk_memcpy( state->sMid,  &mid[  frame_length ], 2 * sizeof( opus_int16 ) );
     silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) );
 
     /* LP and HP filter mid signal */
     ALLOC( LP_mid, frame_length, opus_int16 );
     ALLOC( HP_mid, frame_length, opus_int16 );
     for( n = 0; n < frame_length; n++ ) {
-        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
         LP_mid[ n ] = sum;
         HP_mid[ n ] = mid[ n + 1 ] - sum;
     }
 
     /* LP and HP filter side signal */
     ALLOC( LP_side, frame_length, opus_int16 );
     ALLOC( HP_side, frame_length, opus_int16 );
     for( n = 0; n < frame_length; n++ ) {
-        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
         LP_side[ n ] = sum;
         HP_side[ n ] = side[ n + 1 ] - sum;
     }
 
     /* Find energies and predictors */
     is10msFrame = frame_length == 10 * fs_kHz;
     smooth_coef_Q16 = is10msFrame ?
         SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) :
@@ -202,27 +202,27 @@ void silk_stereo_LR_to_MS(
     denom_Q16  = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
     delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
     delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
     deltaw_Q24 =  silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 );
     for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
         pred0_Q13 += delta0_Q13;
         pred1_Q13 += delta1_Q13;
         w_Q24   += deltaw_Q24;
-        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
         sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
         sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
         x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
     }
 
     pred0_Q13 = -pred_Q13[ 0 ];
     pred1_Q13 = -pred_Q13[ 1 ];
     w_Q24     =  silk_LSHIFT( width_Q14, 10 );
     for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
-        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
         sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
         sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
         x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
     }
     state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ];
     state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ];
     state->width_prev_Q14     = (opus_int16)width_Q14;
     RESTORE_STACK;
--- a/media/libopus/silk/x86/NSQ_sse.c
+++ b/media/libopus/silk/x86/NSQ_sse.c
@@ -216,17 +216,17 @@ void silk_NSQ_sse4_1(
             silk_noise_shape_quantizer_10_16_sse4_1( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
                 AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ],
                 offset_Q10, psEncC->subfr_length, &(table[32]) );
         }
         else
         {
             silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
                 AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
-                offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+                offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
         }
 
         x_Q3   += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
         pxq    += psEncC->subfr_length;
     }
 
     /* Update lagPrev for next frame */
--- a/media/libopus/silk/x86/main_sse.h
+++ b/media/libopus/silk/x86/main_sse.h
@@ -202,17 +202,18 @@ void silk_noise_shape_quantizer(
     opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
     opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
     opus_int32          LF_shp_Q14,             /* I                                    */
     opus_int32          Gain_Q16,               /* I                                    */
     opus_int            Lambda_Q10,             /* I                                    */
     opus_int            offset_Q10,             /* I                                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
-    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
+    int                 arch                    /* I    Architecture                    */
 );
 
 /**************************/
 /* Noise level estimation */
 /**************************/
 void silk_VAD_GetNoiseLevels(
     const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
--- a/media/libopus/sources.mozbuild
+++ b/media/libopus/sources.mozbuild
@@ -158,16 +158,21 @@ silk_sources = [
 silk_sources_sse4_1 = [
     'silk/x86/NSQ_del_dec_sse.c',
     'silk/x86/NSQ_sse.c',
     'silk/x86/VAD_sse.c',
     'silk/x86/VQ_WMat_EC_sse.c',
     'silk/x86/x86_silk_map.c',
 ]
 
+silk_sources_arm_neon_intr = [
+    'silk/arm/arm_silk_map.c',
+    'silk/arm/NSQ_neon.c',
+]
+
 silk_sources_fixed = [
     'silk/fixed/apply_sine_window_FIX.c',
     'silk/fixed/autocorr_FIX.c',
     'silk/fixed/burg_modified_FIX.c',
     'silk/fixed/corrMatrix_FIX.c',
     'silk/fixed/encode_frame_FIX.c',
     'silk/fixed/find_LPC_FIX.c',
     'silk/fixed/find_LTP_FIX.c',
--- a/media/libopus/src/analysis.c
+++ b/media/libopus/src/analysis.c
@@ -535,27 +535,24 @@ static void tonality_analysis(TonalityAn
        float p0, p1;
        /* Probabilities for "all speech" and "all music" */
        float s0, m0;
        /* Probability sum for renormalisation */
        float psum;
        /* Instantaneous probability of speech and music, with beta pre-applied. */
        float speech0;
        float music0;
+       float p, q;
 
        /* One transition every 3 minutes of active audio */
        tau = .00005f*frame_probs[1];
-       beta = .05f;
-       if (1) {
-          /* Adapt beta based on how "unexpected" the new prob is */
-          float p, q;
-          p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
-          q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
-          beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
-       }
+       /* Adapt beta based on how "unexpected" the new prob is */
+       p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
+       q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
+       beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
        /* p0 and p1 are the probabilities of speech and music at this frame
           using only information from previous frame and applying the
           state transition model */
        p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
        p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
        /* We apply the current probability with exponent beta to work around
           the fact that the probability estimates aren't independent. */
        p0 *= (float)pow(1-frame_probs[0], beta);
--- a/media/libopus/src/opus.c
+++ b/media/libopus/src/opus.c
@@ -99,16 +99,20 @@ OPUS_EXPORT void opus_pcm_soft_clip(floa
             }
             end++;
          }
          /* Detect the special case where we clip before the first zero crossing */
          special = (start==0 && x[i*C]*x[0]>=0);
 
          /* Compute a such that maxval + a*maxval^2 = 1 */
          a=(maxval-1)/(maxval*maxval);
+         /* Slightly boost "a" by 2^-22. This is just enough to ensure -ffast-math
+            does not cause output values larger than +/-1, but small enough not
+            to matter even for 24-bit output.  */
+         a += a*2.4e-7;
          if (x[i*C]>0)
             a = -a;
          /* Apply soft clipping */
          for (i=start;i<end;i++)
             x[i*C] = x[i*C]+a*x[i*C]*x[i*C];
 
          if (special && peak_pos>=2)
          {
@@ -196,18 +200,20 @@ int opus_packet_parse_impl(const unsigne
    int count;
    int cbr;
    unsigned char ch, toc;
    int framesize;
    opus_int32 last_size;
    opus_int32 pad = 0;
    const unsigned char *data0 = data;
 
-   if (size==NULL)
+   if (size==NULL || len<0)
       return OPUS_BAD_ARG;
+   if (len==0)
+      return OPUS_INVALID_PACKET;
 
    framesize = opus_packet_get_samples_per_frame(data, 48000);
 
    cbr = 0;
    toc = *data++;
    len--;
    last_size = len;
    switch (toc&0x3)
--- a/media/libopus/src/opus_encoder.c
+++ b/media/libopus/src/opus_encoder.c
@@ -855,30 +855,30 @@ opus_int32 compute_frame_size(const void
    }
    if (frame_size<0)
       return -1;
    return frame_size;
 }
 
 opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
 {
-   opus_val16 corr;
-   opus_val16 ldiff;
-   opus_val16 width;
    opus_val32 xx, xy, yy;
    opus_val16 sqrt_xx, sqrt_yy;
    opus_val16 qrrt_xx, qrrt_yy;
    int frame_rate;
    int i;
    opus_val16 short_alpha;
 
    frame_rate = Fs/frame_size;
-   short_alpha = Q15ONE - 25*Q15ONE/IMAX(50,frame_rate);
+   short_alpha = Q15ONE - MULT16_16(25, Q15ONE)/IMAX(50,frame_rate);
    xx=xy=yy=0;
-   for (i=0;i<frame_size;i+=4)
+   /* Unroll by 4. The frame size is always a multiple of 4 *except* for
+      2.5 ms frames at 12 kHz. Since this setting is very rare (and very
+      stupid), we just discard the last two samples. */
+   for (i=0;i<frame_size-3;i+=4)
    {
       opus_val32 pxx=0;
       opus_val32 pxy=0;
       opus_val32 pyy=0;
       opus_val16 x, y;
       x = pcm[2*i];
       y = pcm[2*i+1];
       pxx = SHR32(MULT16_16(x,x),2);
@@ -907,37 +907,36 @@ opus_val16 compute_stereo_width(const op
    mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX);
    mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY);
    mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY);
    mem->XX = MAX32(0, mem->XX);
    mem->XY = MAX32(0, mem->XY);
    mem->YY = MAX32(0, mem->YY);
    if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18))
    {
+      opus_val16 corr;
+      opus_val16 ldiff;
+      opus_val16 width;
       sqrt_xx = celt_sqrt(mem->XX);
       sqrt_yy = celt_sqrt(mem->YY);
       qrrt_xx = celt_sqrt(sqrt_xx);
       qrrt_yy = celt_sqrt(sqrt_yy);
       /* Inter-channel correlation */
       mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy);
       corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16);
       /* Approximate loudness difference */
-      ldiff = Q15ONE*ABS16(qrrt_xx-qrrt_yy)/(EPSILON+qrrt_xx+qrrt_yy);
+      ldiff = MULT16_16(Q15ONE, ABS16(qrrt_xx-qrrt_yy))/(EPSILON+qrrt_xx+qrrt_yy);
       width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff);
       /* Smoothing over one second */
       mem->smoothed_width += (width-mem->smoothed_width)/frame_rate;
       /* Peak follower */
       mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width);
-   } else {
-      width = 0;
-      corr=Q15ONE;
-      ldiff=0;
    }
    /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/
-   return EXTRACT16(MIN32(Q15ONE,20*mem->max_follower));
+   return EXTRACT16(MIN32(Q15ONE, MULT16_16(20, mem->max_follower)));
 }
 
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
                 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
                 const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
                 int analysis_channels, downmix_func downmix, int float_api)
 {
     void *silk_enc;
@@ -1045,44 +1044,54 @@ opus_int32 opus_encode_native(OpusEncode
     if (st->channels==2 && st->force_channels!=1)
        stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem);
     else
        stereo_width = 0;
     total_buffer = delay_compensation;
     st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
 
     frame_rate = st->Fs/frame_size;
+    if (!st->use_vbr)
+    {
+       int cbrBytes;
+       /* Multiply by 3 to make sure the division is exact. */
+       int frame_rate3 = 3*st->Fs/frame_size;
+       /* We need to make sure that "int" values always fit in 16 bits. */
+       cbrBytes = IMIN( (3*st->bitrate_bps/8 + frame_rate3/2)/frame_rate3, max_data_bytes);
+       st->bitrate_bps = cbrBytes*(opus_int32)frame_rate3*8/3;
+       max_data_bytes = cbrBytes;
+    }
     if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
        || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
     {
        /*If the space is too low to do something useful, emit 'PLC' frames.*/
        int tocmode = st->mode;
        int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth;
        if (tocmode==0)
           tocmode = MODE_SILK_ONLY;
        if (frame_rate>100)
           tocmode = MODE_CELT_ONLY;
        if (frame_rate < 50)
           tocmode = MODE_SILK_ONLY;
        if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND)
           bw=OPUS_BANDWIDTH_WIDEBAND;
        else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND)
           bw=OPUS_BANDWIDTH_NARROWBAND;
-       else if (bw<=OPUS_BANDWIDTH_SUPERWIDEBAND)
+       else if (tocmode==MODE_HYBRID&&bw<=OPUS_BANDWIDTH_SUPERWIDEBAND)
           bw=OPUS_BANDWIDTH_SUPERWIDEBAND;
        data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels);
+       ret = 1;
+       if (!st->use_vbr)
+       {
+          ret = opus_packet_pad(data, ret, max_data_bytes);
+          if (ret == OPUS_OK)
+             ret = max_data_bytes;
+       }
        RESTORE_STACK;
-       return 1;
-    }
-    if (!st->use_vbr)
-    {
-       int cbrBytes;
-       cbrBytes = IMIN( (st->bitrate_bps + 4*frame_rate)/(8*frame_rate) , max_data_bytes);
-       st->bitrate_bps = cbrBytes * (8*frame_rate);
-       max_data_bytes = cbrBytes;
+       return ret;
     }
     max_rate = frame_rate*max_data_bytes*8;
 
     /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
     equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50);
 
     if (st->signal_type == OPUS_SIGNAL_VOICE)
        voice_est = 127;
@@ -1508,17 +1517,17 @@ opus_int32 opus_encode_native(OpusEncode
                 st->silk_mode.bitRate = total_bitRate * 4/5;
             }
             if (!st->energy_masking)
             {
                /* Increasingly attenuate high band when it gets allocated fewer bits */
                celt_rate = total_bitRate - st->silk_mode.bitRate;
                HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600;
                HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6);
-               HB_gain = HB_gain < Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE;
+               HB_gain = HB_gain < (opus_val32)Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE;
             }
         } else {
             /* SILK gets all bits */
             st->silk_mode.bitRate = total_bitRate;
         }
 
         /* Surround masking for SILK */
         if (st->energy_masking && st->use_vbr && !st->lfe)
--- a/media/libopus/src/opus_multistream_encoder.c
+++ b/media/libopus/src/opus_multistream_encoder.c
@@ -65,23 +65,32 @@ typedef void (*opus_copy_channel_in_func
   opus_val16 *dst,
   int dst_stride,
   const void *src,
   int src_stride,
   int src_channel,
   int frame_size
 );
 
+typedef enum {
+  MAPPING_TYPE_NONE,
+  MAPPING_TYPE_SURROUND
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+  ,  /* Do not include comma at end of enumerator list */
+  MAPPING_TYPE_AMBISONICS
+#endif
+} MappingType;
+
 struct OpusMSEncoder {
    ChannelLayout layout;
    int arch;
    int lfe_stream;
    int application;
    int variable_duration;
-   int surround;
+   MappingType mapping_type;
    opus_int32 bitrate_bps;
    float subframe_mem[3];
    /* Encoder states go here */
    /* then opus_val32 window_mem[channels*120]; */
    /* then opus_val32 preemph_mem[channels]; */
 };
 
 static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
@@ -237,16 +246,17 @@ void surround_analysis(const CELTMode *c
    VARDECL(opus_val32, in);
    VARDECL(opus_val16, x);
    VARDECL(opus_val32, freq);
    SAVE_STACK;
 
    upsample = resampling_factor(rate);
    frame_size = len*upsample;
 
+   /* LM = log2(frame_size / 120) */
    for (LM=0;LM<celt_mode->maxLM;LM++)
       if (celt_mode->shortMdctSize<<LM==frame_size)
          break;
 
    ALLOC(in, frame_size+overlap, opus_val32);
    ALLOC(x, len, opus_val16);
    ALLOC(freq, frame_size, opus_val32);
 
@@ -393,53 +403,58 @@ opus_int32 opus_multistream_surround_enc
    } else if (mapping_family==1 && channels<=8 && channels>=1)
    {
       nb_streams=vorbis_mappings[channels-1].nb_streams;
       nb_coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams;
    } else if (mapping_family==255)
    {
       nb_streams=channels;
       nb_coupled_streams=0;
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+   } else if (mapping_family==254)
+   {
+      nb_streams=channels;
+      nb_coupled_streams=0;
+#endif
    } else
       return 0;
    size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
    if (channels>2)
    {
       size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32));
    }
    return size;
 }
 
-
 static int opus_multistream_encoder_init_impl(
       OpusMSEncoder *st,
       opus_int32 Fs,
       int channels,
       int streams,
       int coupled_streams,
       const unsigned char *mapping,
       int application,
-      int surround
+      MappingType mapping_type
 )
 {
    int coupled_size;
    int mono_size;
    int i, ret;
    char *ptr;
 
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
        (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
       return OPUS_BAD_ARG;
 
    st->arch = opus_select_arch();
    st->layout.nb_channels = channels;
    st->layout.nb_streams = streams;
    st->layout.nb_coupled_streams = coupled_streams;
    st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0;
-   if (!surround)
+   if (mapping_type != MAPPING_TYPE_SURROUND)
       st->lfe_stream = -1;
    st->bitrate_bps = OPUS_AUTO;
    st->application = application;
    st->variable_duration = OPUS_FRAMESIZE_ARG;
    for (i=0;i<st->layout.nb_channels;i++)
       st->layout.mapping[i] = mapping[i];
    if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout))
       return OPUS_BAD_ARG;
@@ -458,49 +473,53 @@ static int opus_multistream_encoder_init
    for (;i<st->layout.nb_streams;i++)
    {
       ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 1, application);
       if (i==st->lfe_stream)
          opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1));
       if(ret!=OPUS_OK)return ret;
       ptr += align(mono_size);
    }
-   if (surround)
+   if (mapping_type == MAPPING_TYPE_SURROUND)
    {
       OPUS_CLEAR(ms_get_preemph_mem(st), channels);
       OPUS_CLEAR(ms_get_window_mem(st), channels*120);
    }
-   st->surround = surround;
+   st->mapping_type = mapping_type;
    return OPUS_OK;
 }
 
 int opus_multistream_encoder_init(
       OpusMSEncoder *st,
       opus_int32 Fs,
       int channels,
       int streams,
       int coupled_streams,
       const unsigned char *mapping,
       int application
 )
 {
-   return opus_multistream_encoder_init_impl(st, Fs, channels, streams, coupled_streams, mapping, application, 0);
+   return opus_multistream_encoder_init_impl(st, Fs, channels, streams,
+                                             coupled_streams, mapping,
+                                             application, MAPPING_TYPE_NONE);
 }
 
 int opus_multistream_surround_encoder_init(
       OpusMSEncoder *st,
       opus_int32 Fs,
       int channels,
       int mapping_family,
       int *streams,
       int *coupled_streams,
       unsigned char *mapping,
       int application
 )
 {
+   MappingType mapping_type;
+
    if ((channels>255) || (channels<1))
       return OPUS_BAD_ARG;
    st->lfe_stream = -1;
    if (mapping_family==0)
    {
       if (channels==1)
       {
          *streams=1;
@@ -525,20 +544,42 @@ int opus_multistream_surround_encoder_in
          st->lfe_stream = *streams-1;
    } else if (mapping_family==255)
    {
       int i;
       *streams=channels;
       *coupled_streams=0;
       for(i=0;i<channels;i++)
          mapping[i] = i;
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+   } else if (mapping_family==254)
+   {
+      int i;
+      *streams=channels;
+      *coupled_streams=0;
+      for(i=0;i<channels;i++)
+         mapping[i] = i;
+#endif
    } else
       return OPUS_UNIMPLEMENTED;
-   return opus_multistream_encoder_init_impl(st, Fs, channels, *streams, *coupled_streams,
-         mapping, application, channels>2&&mapping_family==1);
+
+   if (channels>2 && mapping_family==1) {
+      mapping_type = MAPPING_TYPE_SURROUND;
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+   } else if (mapping_family==254)
+   {
+      mapping_type = MAPPING_TYPE_AMBISONICS;
+#endif
+   } else
+   {
+      mapping_type = MAPPING_TYPE_NONE;
+   }
+   return opus_multistream_encoder_init_impl(st, Fs, channels, *streams,
+                                             *coupled_streams, mapping,
+                                             application, mapping_type);
 }
 
 OpusMSEncoder *opus_multistream_encoder_create(
       opus_int32 Fs,
       int channels,
       int streams,
       int coupled_streams,
       const unsigned char *mapping,
@@ -613,34 +654,29 @@ OpusMSEncoder *opus_multistream_surround
       opus_free(st);
       st = NULL;
    }
    if (error)
       *error = ret;
    return st;
 }
 
-static opus_int32 surround_rate_allocation(
+static void surround_rate_allocation(
       OpusMSEncoder *st,
       opus_int32 *rate,
-      int frame_size
+      int frame_size,
+      opus_int32 Fs
       )
 {
    int i;
    opus_int32 channel_rate;
-   opus_int32 Fs;
-   char *ptr;
    int stream_offset;
    int lfe_offset;
    int coupled_ratio; /* Q8 */
    int lfe_ratio;     /* Q8 */
-   opus_int32 rate_sum=0;
-
-   ptr = (char*)st + align(sizeof(OpusMSEncoder));
-   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
 
    if (st->bitrate_bps > st->layout.nb_channels*40000)
       stream_offset = 20000;
    else
       stream_offset = st->bitrate_bps/st->layout.nb_channels/2;
    stream_offset += 60*(Fs/frame_size-50);
    /* We start by giving each stream (coupled or uncoupled) the same bitrate.
       This models the main saving of coupled channels over uncoupled. */
@@ -683,16 +719,98 @@ static opus_int32 surround_rate_allocati
    for (i=0;i<st->layout.nb_streams;i++)
    {
       if (i<st->layout.nb_coupled_streams)
          rate[i] = stream_offset+(channel_rate*coupled_ratio>>8);
       else if (i!=st->lfe_stream)
          rate[i] = stream_offset+channel_rate;
       else
          rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8);
+   }
+}
+
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+static void ambisonics_rate_allocation(
+      OpusMSEncoder *st,
+      opus_int32 *rate,
+      int frame_size,
+      opus_int32 Fs
+      )
+{
+   int i;
+   int non_mono_rate;
+   int total_rate;
+
+   /* The mono channel gets (rate_ratio_num / rate_ratio_den) times as many bits
+    * as all other channels */
+   const int rate_ratio_num = 4;
+   const int rate_ratio_den = 3;
+   const int num_channels = st->layout.nb_streams;
+
+   if (st->bitrate_bps==OPUS_AUTO)
+   {
+      total_rate = num_channels * (20000 + st->layout.nb_streams*(Fs+60*Fs/frame_size));
+   } else if (st->bitrate_bps==OPUS_BITRATE_MAX)
+   {
+      total_rate = num_channels * 320000;
+   } else {
+      total_rate = st->bitrate_bps;
+   }
+
+   /* Let y be the non-mono rate and let p, q be integers such that the mono
+    * channel rate is (p/q) * y.
+    * Also let T be the total bitrate to allocate. Then
+    *   (n - 1) y + (p/q) y = T
+    *   y = (T q) / (qn - q + p)
+    */
+   non_mono_rate =
+         total_rate * rate_ratio_den
+         / (rate_ratio_den*num_channels + rate_ratio_num - rate_ratio_den);
+
+#ifndef FIXED_POINT
+   if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50)
+   {
+      opus_int32 bonus = 60*(Fs/frame_size-50);
+      non_mono_rate += bonus;
+   }
+#endif
+
+   rate[0] = total_rate - (num_channels - 1) * non_mono_rate;
+   for (i=1;i<st->layout.nb_streams;i++)
+   {
+      rate[i] = non_mono_rate;
+   }
+}
+#endif /* ENABLE_EXPERIMENTAL_AMBISONICS */
+
+static opus_int32 rate_allocation(
+      OpusMSEncoder *st,
+      opus_int32 *rate,
+      int frame_size
+      )
+{
+   int i;
+   opus_int32 rate_sum=0;
+   opus_int32 Fs;
+   char *ptr;
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+   if (st->mapping_type == MAPPING_TYPE_AMBISONICS) {
+     ambisonics_rate_allocation(st, rate, frame_size, Fs);
+   } else
+#endif
+   {
+     surround_rate_allocation(st, rate, frame_size, Fs);
+   }
+
+   for (i=0;i<st->layout.nb_streams;i++)
+   {
       rate[i] = IMAX(rate[i], 500);
       rate_sum += rate[i];
    }
    return rate_sum;
 }
 
 /* Max size in case the encoder decides to return three frames */
 #define MS_FRAME_TMP (3*1275+7)
@@ -725,17 +843,17 @@ static int opus_multistream_encode_nativ
    opus_val16 bandLogE[42];
    opus_val32 *mem = NULL;
    opus_val32 *preemph_mem=NULL;
    int frame_size;
    opus_int32 rate_sum;
    opus_int32 smallest_packet;
    ALLOC_STACK;
 
-   if (st->surround)
+   if (st->mapping_type == MAPPING_TYPE_SURROUND)
    {
       preemph_mem = ms_get_preemph_mem(st);
       mem = ms_get_window_mem(st);
    }
 
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_VBR(&vbr));
@@ -779,23 +897,23 @@ static int opus_multistream_encode_nativ
       RESTORE_STACK;
       return OPUS_BUFFER_TOO_SMALL;
    }
    ALLOC(buf, 2*frame_size, opus_val16);
    coupled_size = opus_encoder_get_size(2);
    mono_size = opus_encoder_get_size(1);
 
    ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
-   if (st->surround)
+   if (st->mapping_type == MAPPING_TYPE_SURROUND)
    {
       surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch);
    }
 
    /* Compute bitrate allocation between streams (this could be a lot better) */
-   rate_sum = surround_rate_allocation(st, bitrates, frame_size);
+   rate_sum = rate_allocation(st, bitrates, frame_size);
 
    if (!vbr)
    {
       if (st->bitrate_bps == OPUS_AUTO)
       {
          max_data_bytes = IMIN(max_data_bytes, 3*rate_sum/(3*8*Fs/frame_size));
       } else if (st->bitrate_bps != OPUS_BITRATE_MAX)
       {
@@ -808,17 +926,17 @@ static int opus_multistream_encode_nativ
    {
       OpusEncoder *enc;
       enc = (OpusEncoder*)ptr;
       if (s < st->layout.nb_coupled_streams)
          ptr += align(coupled_size);
       else
          ptr += align(mono_size);
       opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s]));
-      if (st->surround)
+      if (st->mapping_type == MAPPING_TYPE_SURROUND)
       {
          opus_int32 equiv_rate;
          equiv_rate = st->bitrate_bps;
          if (frame_size*50 < Fs)
             equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels;
          if (equiv_rate > 10000*st->layout.nb_channels)
             opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
          else if (equiv_rate > 7000*st->layout.nb_channels)
@@ -829,66 +947,72 @@ static int opus_multistream_encode_nativ
             opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));
          if (s < st->layout.nb_coupled_streams)
          {
             /* To preserve the spatial image, force stereo CELT on coupled streams */
             opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
             opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2));
          }
       }
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+      else if (st->mapping_type == MAPPING_TYPE_AMBISONICS) {
+        opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
+      }
+#endif
    }
 
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    /* Counting ToC */
    tot_size = 0;
    for (s=0;s<st->layout.nb_streams;s++)
    {
       OpusEncoder *enc;
       int len;
       int curr_max;
       int c1, c2;
+      int ret;
 
       opus_repacketizer_init(&rp);
       enc = (OpusEncoder*)ptr;
       if (s < st->layout.nb_coupled_streams)
       {
          int i;
          int left, right;
          left = get_left_channel(&st->layout, s, -1);
          right = get_right_channel(&st->layout, s, -1);
          (*copy_channel_in)(buf, 2,
             pcm, st->layout.nb_channels, left, frame_size);
          (*copy_channel_in)(buf+1, 2,
             pcm, st->layout.nb_channels, right, frame_size);
          ptr += align(coupled_size);
-         if (st->surround)
+         if (st->mapping_type == MAPPING_TYPE_SURROUND)
          {
             for (i=0;i<21;i++)
             {
                bandLogE[i] = bandSMR[21*left+i];
                bandLogE[21+i] = bandSMR[21*right+i];
             }
          }
          c1 = left;
          c2 = right;
       } else {
          int i;
          int chan = get_mono_channel(&st->layout, s, -1);
          (*copy_channel_in)(buf, 1,
             pcm, st->layout.nb_channels, chan, frame_size);
          ptr += align(mono_size);
-         if (st->surround)
+         if (st->mapping_type == MAPPING_TYPE_SURROUND)
          {
             for (i=0;i<21;i++)
                bandLogE[i] = bandSMR[21*chan+i];
          }
          c1 = chan;
          c2 = -1;
       }
-      if (st->surround)
+      if (st->mapping_type == MAPPING_TYPE_SURROUND)
          opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
       /* number of bytes left (+Toc) */
       curr_max = max_data_bytes - tot_size;
       /* Reserve one byte for the last stream and two for the others */
       curr_max -= IMAX(0,2*(st->layout.nb_streams-s-1)-1);
       curr_max = IMIN(curr_max,MS_FRAME_TMP);
       /* Repacketizer will add one or two bytes for self-delimited frames */
       if (s != st->layout.nb_streams-1) curr_max -=  curr_max>253 ? 2 : 1;
@@ -899,17 +1023,24 @@ static int opus_multistream_encode_nativ
       if (len<0)
       {
          RESTORE_STACK;
          return len;
       }
       /* We need to use the repacketizer to add the self-delimiting lengths
          while taking into account the fact that the encoder can now return
          more than one frame at a time (e.g. 60 ms CELT-only) */
-      opus_repacketizer_cat(&rp, tmp_data, len);
+      ret = opus_repacketizer_cat(&rp, tmp_data, len);
+      /* If the opus_repacketizer_cat() fails, then something's seriously wrong
+         with the encoder. */
+      if (ret != OPUS_OK)
+      {
+         RESTORE_STACK;
+         return OPUS_INTERNAL_ERROR;
+      }
       len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp),
             data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1);
       data += len;
       tot_size += len;
    }
    /*printf("\n");*/
    RESTORE_STACK;
    return tot_size;
@@ -1178,17 +1309,17 @@ int opus_multistream_encoder_ctl(OpusMSE
        }
        *value = st->variable_duration;
    }
    break;
    case OPUS_RESET_STATE:
    {
       int s;
       st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0;
-      if (st->surround)
+      if (st->mapping_type == MAPPING_TYPE_SURROUND)
       {
          OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels);
          OPUS_CLEAR(ms_get_window_mem(st), st->layout.nb_channels*120);
       }
       for (s=0;s<st->layout.nb_streams;s++)
       {
          OpusEncoder *enc;
          enc = (OpusEncoder*)ptr;
--- a/media/libopus/src/repacketizer.c
+++ b/media/libopus/src/repacketizer.c
@@ -244,17 +244,19 @@ int opus_packet_pad(unsigned char *data,
       return OPUS_BAD_ARG;
    if (len==new_len)
       return OPUS_OK;
    else if (len > new_len)
       return OPUS_BAD_ARG;
    opus_repacketizer_init(&rp);
    /* Moving payload to the end of the packet so we can do in-place padding */
    OPUS_MOVE(data+new_len-len, data, len);
-   opus_repacketizer_cat(&rp, data+new_len-len, len);
+   ret = opus_repacketizer_cat(&rp, data+new_len-len, len);
+   if (ret != OPUS_OK)
+      return ret;
    ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1);
    if (ret > 0)
       return OPUS_OK;
    else
       return ret;
 }
 
 opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len)