Bug 894941 - Import SSE-optimized routines for the speex resampler. r=ehsan
authorPaul Adenot <paul@paul.cx>
Fri, 26 Jul 2013 18:46:32 +0200
changeset 140185 f134b528c97d9f10ae10b0f2702f4cb66c95cec4
parent 140184 f6412c912847128e5aa1c8a7ced227356959fb84
child 140186 adeb3608cd4114359cc0eff753f704ed578deb11
push id1945
push userryanvm@gmail.com
push dateSat, 27 Jul 2013 02:27:26 +0000
treeherderfx-team@4874fa438b1c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersehsan
bugs894941
milestone25.0a1
Bug 894941 - Import SSE-optimized routines for the speex resampler. r=ehsan
media/libspeex_resampler/src/resample_sse.h
media/libspeex_resampler/update.sh
new file mode 100644
--- /dev/null
+++ b/media/libspeex_resampler/src/resample_sse.h
@@ -0,0 +1,128 @@
+/* Copyright (C) 2007-2008 Jean-Marc Valin
+ * Copyright (C) 2008 Thorvald Natvig
+ */
+/**
+   @file resample_sse.h
+   @brief Resampler functions (SSE version)
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   
+   - Neither the name of the Xiph.org Foundation nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+   
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <xmmintrin.h>
+
+#define OVERRIDE_INNER_PRODUCT_SINGLE
+static inline float inner_product_single(const float *a, const float *b, unsigned int len)
+{
+   int i;
+   float ret;
+   __m128 sum = _mm_setzero_ps();
+   for (i=0;i<len;i+=8)
+   {
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
+   }
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&ret, sum);
+   return ret;
+}
+
+#define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
+static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
+  int i;
+  float ret;
+  __m128 sum = _mm_setzero_ps();
+  __m128 f = _mm_loadu_ps(frac);
+  for(i=0;i<len;i+=2)
+  {
+    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
+    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
+  }
+   sum = _mm_mul_ps(f, sum);
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&ret, sum);
+   return ret;
+}
+
+#ifdef _USE_SSE2
+#include <emmintrin.h>
+#define OVERRIDE_INNER_PRODUCT_DOUBLE
+
+static inline double inner_product_double(const float *a, const float *b, unsigned int len)
+{
+   int i;
+   double ret;
+   __m128d sum = _mm_setzero_pd();
+   __m128 t;
+   for (i=0;i<len;i+=8)
+   {
+      t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
+      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+
+      t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
+      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+   }
+   sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
+   _mm_store_sd(&ret, sum);
+   return ret;
+}
+
+#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
+static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
+  int i;
+  double ret;
+  __m128d sum;
+  __m128d sum1 = _mm_setzero_pd();
+  __m128d sum2 = _mm_setzero_pd();
+  __m128 f = _mm_loadu_ps(frac);
+  __m128d f1 = _mm_cvtps_pd(f);
+  __m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f,f));
+  __m128 t;
+  for(i=0;i<len;i+=2)
+  {
+    t = _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample));
+    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
+    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+
+    t = _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample));
+    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
+    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+  }
+  sum1 = _mm_mul_pd(f1, sum1);
+  sum2 = _mm_mul_pd(f2, sum2);
+  sum = _mm_add_pd(sum1, sum2);
+  sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
+  _mm_store_sd(&ret, sum);
+  return ret;
+}
+
+#endif
--- a/media/libspeex_resampler/update.sh
+++ b/media/libspeex_resampler/update.sh
@@ -1,18 +1,19 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # Usage: ./update.sh <libspeex_src_directory>
 #
 # Copies the needed files from a directory containing the original
-# libspeex sources that we need for HTML5 media playback rate change.
+# libspeex sources.
 
 cp $1/libspeex/resample.c src
+cp $1/libspeex/resample_sse.h src
 cp $1/libspeex/arch.h src
 cp $1/libspeex/stack_alloc.h src
 cp $1/libspeex/fixed_generic.h src
 cp $1/include/speex/speex_resampler.h src
 cp $1/include/speex/speex_types.h src
 sed -e 's/unsigned @SIZE16@/uint16_t/g' -e 's/unsigned @SIZE32@/uint32_t/g' -e 's/@SIZE16@/int16_t/g' -e 's/@SIZE32@/int32_t/g' < $1/include/speex/speex_config_types.h.in > src/speex_config_types.h
 cp $1/AUTHORS .
 cp $1/COPYING .