Bug 894941 - Import SSE-optimized routines for the speex resampler. r=ehsan
authorPaul Adenot <paul@paul.cx>
Fri, 26 Jul 2013 18:46:32 +0200
changeset 140178 f134b528c97d9f10ae10b0f2702f4cb66c95cec4
parent 140177 f6412c912847128e5aa1c8a7ced227356959fb84
child 140179 adeb3608cd4114359cc0eff753f704ed578deb11
push id25016
push userryanvm@gmail.com
push dateSat, 27 Jul 2013 02:25:56 +0000
treeherdermozilla-central@fb48c7d58b8b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersehsan
bugs894941
milestone25.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 894941 - Import SSE-optimized routines for the speex resampler. r=ehsan
media/libspeex_resampler/src/resample_sse.h
media/libspeex_resampler/update.sh
new file mode 100644
--- /dev/null
+++ b/media/libspeex_resampler/src/resample_sse.h
@@ -0,0 +1,128 @@
+/* Copyright (C) 2007-2008 Jean-Marc Valin
+ * Copyright (C) 2008 Thorvald Natvig
+ */
+/**
+   @file resample_sse.h
+   @brief Resampler functions (SSE version)
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   
+   - Neither the name of the Xiph.org Foundation nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+   
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <xmmintrin.h>
+
+#define OVERRIDE_INNER_PRODUCT_SINGLE
+static inline float inner_product_single(const float *a, const float *b, unsigned int len)
+{
+   int i;
+   float ret;
+   __m128 sum = _mm_setzero_ps();
+   for (i=0;i<len;i+=8)
+   {
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
+   }
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&ret, sum);
+   return ret;
+}
+
+#define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
+static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
+  int i;
+  float ret;
+  __m128 sum = _mm_setzero_ps();
+  __m128 f = _mm_loadu_ps(frac);
+  for(i=0;i<len;i+=2)
+  {
+    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
+    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
+  }
+   sum = _mm_mul_ps(f, sum);
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&ret, sum);
+   return ret;
+}
+
+#ifdef _USE_SSE2
+#include <emmintrin.h>
+#define OVERRIDE_INNER_PRODUCT_DOUBLE
+
+static inline double inner_product_double(const float *a, const float *b, unsigned int len)
+{
+   int i;
+   double ret;
+   __m128d sum = _mm_setzero_pd();
+   __m128 t;
+   for (i=0;i<len;i+=8)
+   {
+      t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
+      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+
+      t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
+      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+   }
+   sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
+   _mm_store_sd(&ret, sum);
+   return ret;
+}
+
+#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
+static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
+  int i;
+  double ret;
+  __m128d sum;
+  __m128d sum1 = _mm_setzero_pd();
+  __m128d sum2 = _mm_setzero_pd();
+  __m128 f = _mm_loadu_ps(frac);
+  __m128d f1 = _mm_cvtps_pd(f);
+  __m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f,f));
+  __m128 t;
+  for(i=0;i<len;i+=2)
+  {
+    t = _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample));
+    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
+    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+
+    t = _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample));
+    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
+    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+  }
+  sum1 = _mm_mul_pd(f1, sum1);
+  sum2 = _mm_mul_pd(f2, sum2);
+  sum = _mm_add_pd(sum1, sum2);
+  sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
+  _mm_store_sd(&ret, sum);
+  return ret;
+}
+
+#endif
--- a/media/libspeex_resampler/update.sh
+++ b/media/libspeex_resampler/update.sh
@@ -1,18 +1,19 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # Usage: ./update.sh <libspeex_src_directory>
 #
 # Copies the needed files from a directory containing the original
-# libspeex sources that we need for HTML5 media playback rate change.
+# libspeex sources.
 
 cp $1/libspeex/resample.c src
+cp $1/libspeex/resample_sse.h src
 cp $1/libspeex/arch.h src
 cp $1/libspeex/stack_alloc.h src
 cp $1/libspeex/fixed_generic.h src
 cp $1/include/speex/speex_resampler.h src
 cp $1/include/speex/speex_types.h src
 sed -e 's/unsigned @SIZE16@/uint16_t/g' -e 's/unsigned @SIZE32@/uint32_t/g' -e 's/@SIZE16@/int16_t/g' -e 's/@SIZE32@/int32_t/g' < $1/include/speex/speex_config_types.h.in > src/speex_config_types.h
 cp $1/AUTHORS .
 cp $1/COPYING .