Bug 877662 - Add an SSE2 implementation of AudioNodeEngine.cpp functions. r=ehsan
authorPaul Adenot <paul@paul.cx>
Tue, 11 Jun 2013 02:01:54 +0200
changeset 331474 6e96c35c78bd475a0963decd32756e0df5854f07
parent 331473 5ae01cbc55491297156c97e1dea4c215add27d4f
child 331475 5f1d898a440e70f83e8ad12335785d092fc41d8b
push id6048
push userkmoir@mozilla.com
push dateMon, 06 Jun 2016 19:02:08 +0000
treeherdermozilla-beta@46d72a56c57d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersehsan
bugs877662
milestone48.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 877662 - Add an SSE2 implementation of AudioNodeEngine.cpp functions. r=ehsan MozReview-Commit-ID: GhxFwFlmqPr
dom/media/webaudio/AudioNodeEngineSSE2.cpp
dom/media/webaudio/AudioNodeEngineSSE2.h
new file mode 100644
--- /dev/null
+++ b/dom/media/webaudio/AudioNodeEngineSSE2.cpp
@@ -0,0 +1,252 @@
+/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* this source code form is subject to the terms of the mozilla public
+ * license, v. 2.0. if a copy of the mpl was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioNodeEngineSSE2.h"
+#include <emmintrin.h>
+
+#ifdef DEBUG
+  #define ASSERT_ALIGNED(ptr)                                                  \
+            MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr,      \
+                       #ptr " has to be aligned 16-bytes aligned.");
+#else
+  #define ASSERT_ALIGNED(ptr)
+#endif
+
+namespace mozilla {
+void
+AudioBlockAddChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
+                                  float aScale,
+                                  float aOutput[WEBAUDIO_BLOCK_SIZE])
+{
+  __m128 vin0, vin1, vin2, vin3,
+         vscaled0, vscaled1, vscaled2, vscaled3,
+         vout0, vout1, vout2, vout3,
+         vgain;
+
+  ASSERT_ALIGNED(aInput);
+  ASSERT_ALIGNED(aOutput);
+
+  vgain = _mm_load1_ps(&aScale);
+
+  for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
+    vin0 = _mm_load_ps(&aInput[i]);
+    vin1 = _mm_load_ps(&aInput[i + 4]);
+    vin2 = _mm_load_ps(&aInput[i + 8]);
+    vin3 = _mm_load_ps(&aInput[i + 12]);
+
+    vscaled0 = _mm_mul_ps(vin0, vgain);
+    vscaled1 = _mm_mul_ps(vin1, vgain);
+    vscaled2 = _mm_mul_ps(vin2, vgain);
+    vscaled3 = _mm_mul_ps(vin3, vgain);
+
+    vin0 = _mm_load_ps(&aOutput[i]);
+    vin1 = _mm_load_ps(&aOutput[i + 4]);
+    vin2 = _mm_load_ps(&aOutput[i + 8]);
+    vin3 = _mm_load_ps(&aOutput[i + 12]);
+
+    vout0 = _mm_add_ps(vin0, vscaled0);
+    vout1 = _mm_add_ps(vin1, vscaled1);
+    vout2 = _mm_add_ps(vin2, vscaled2);
+    vout3 = _mm_add_ps(vin3, vscaled3);
+
+    _mm_store_ps(&aOutput[i], vout0);
+    _mm_store_ps(&aOutput[i + 4], vout1);
+    _mm_store_ps(&aOutput[i + 8], vout2);
+    _mm_store_ps(&aOutput[i + 12], vout3);
+  }
+}
+
+void
+AudioBlockAddChannel_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
+                         float aOutput[WEBAUDIO_BLOCK_SIZE])
+{
+  __m128 vin0, vin1, vin2, vin3,
+         vin4, vin5, vin6, vin7,
+         vout0, vout1, vout2, vout3;
+
+  ASSERT_ALIGNED(aInput);
+  ASSERT_ALIGNED(aOutput);
+
+  for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
+    vin0 = _mm_load_ps(&aInput[i]);
+    vin1 = _mm_load_ps(&aInput[i + 4]);
+    vin2 = _mm_load_ps(&aInput[i + 8]);
+    vin3 = _mm_load_ps(&aInput[i + 12]);
+
+    vin4 = _mm_load_ps(&aOutput[i]);
+    vin5 = _mm_load_ps(&aOutput[i + 4]);
+    vin6 = _mm_load_ps(&aOutput[i + 8]);
+    vin7 = _mm_load_ps(&aOutput[i + 12]);
+
+    vout0 = _mm_add_ps(vin0, vin4);
+    vout1 = _mm_add_ps(vin1, vin5);
+    vout2 = _mm_add_ps(vin2, vin6);
+    vout3 = _mm_add_ps(vin3, vin7);
+
+    _mm_store_ps(&aOutput[i], vout0);
+    _mm_store_ps(&aOutput[i + 4], vout1);
+    _mm_store_ps(&aOutput[i + 8], vout2);
+    _mm_store_ps(&aOutput[i + 12], vout3);
+  }
+}
+
+void
+AudioBlockCopyChannelWithScale_SSE(const float* aInput,
+                                   float aScale,
+                                   float* aOutput)
+{
+  __m128 vin0, vin1, vin2, vin3,
+         vout0, vout1, vout2, vout3;
+
+  ASSERT_ALIGNED(aInput);
+  ASSERT_ALIGNED(aOutput);
+
+  __m128 vgain = _mm_load1_ps(&aScale);
+
+  for (unsigned i = 0 ; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
+    vin0 = _mm_load_ps(&aInput[i]);
+    vin1 = _mm_load_ps(&aInput[i + 4]);
+    vin2 = _mm_load_ps(&aInput[i + 8]);
+    vin3 = _mm_load_ps(&aInput[i + 12]);
+    vout0 = _mm_mul_ps(vin0, vgain);
+    vout1 = _mm_mul_ps(vin1, vgain);
+    vout2 = _mm_mul_ps(vin2, vgain);
+    vout3 = _mm_mul_ps(vin3, vgain);
+    _mm_store_ps(&aOutput[i], vout0);
+    _mm_store_ps(&aOutput[i + 4], vout1);
+    _mm_store_ps(&aOutput[i + 8], vout2);
+    _mm_store_ps(&aOutput[i + 12], vout3);
+  }
+}
+
+void
+AudioBlockCopyChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
+                                   const float aScale[WEBAUDIO_BLOCK_SIZE],
+                                   float aOutput[WEBAUDIO_BLOCK_SIZE])
+{
+  __m128 vin0, vin1, vin2, vin3,
+         vscaled0, vscaled1, vscaled2, vscaled3,
+         vout0, vout1, vout2, vout3;
+
+  ASSERT_ALIGNED(aInput);
+  ASSERT_ALIGNED(aScale);
+  ASSERT_ALIGNED(aOutput);
+
+  for (unsigned i = 0 ; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
+    vscaled0 = _mm_load_ps(&aScale[i]);
+    vscaled1 = _mm_load_ps(&aScale[i+4]);
+    vscaled2 = _mm_load_ps(&aScale[i+8]);
+    vscaled3 = _mm_load_ps(&aScale[i+12]);
+
+    vin0 = _mm_load_ps(&aInput[i]);
+    vin1 = _mm_load_ps(&aInput[i + 4]);
+    vin2 = _mm_load_ps(&aInput[i + 8]);
+    vin3 = _mm_load_ps(&aInput[i + 12]);
+
+    vout0 = _mm_mul_ps(vin0, vscaled0);
+    vout1 = _mm_mul_ps(vin1, vscaled1);
+    vout2 = _mm_mul_ps(vin2, vscaled2);
+    vout3 = _mm_mul_ps(vin3, vscaled3);
+
+    _mm_store_ps(&aOutput[i], vout0);
+    _mm_store_ps(&aOutput[i + 4], vout1);
+    _mm_store_ps(&aOutput[i + 8], vout2);
+    _mm_store_ps(&aOutput[i + 12], vout3);
+  }
+}
+
+void
+AudioBlockInPlaceScale_SSE(float aBlock[WEBAUDIO_BLOCK_SIZE],
+                           uint32_t aChannelCount,
+                           float aScale)
+{
+  __m128 vout0, vout1, vout2, vout3,
+         vin0, vin1, vin2, vin3;
+
+  ASSERT_ALIGNED(aBlock);
+
+  __m128 vgain = _mm_load1_ps(&aScale);
+
+  for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
+    vin0 = _mm_load_ps(&aBlock[i]);
+    vin1 = _mm_load_ps(&aBlock[i + 4]);
+    vin2 = _mm_load_ps(&aBlock[i + 8]);
+    vin3 = _mm_load_ps(&aBlock[i + 12]);
+    vout0 = _mm_mul_ps(vin0, vgain);
+    vout1 = _mm_mul_ps(vin1, vgain);
+    vout2 = _mm_mul_ps(vin2, vgain);
+    vout3 = _mm_mul_ps(vin3, vgain);
+    _mm_store_ps(&aBlock[i], vout0);
+    _mm_store_ps(&aBlock[i + 4], vout1);
+    _mm_store_ps(&aBlock[i + 8], vout2);
+    _mm_store_ps(&aBlock[i + 12], vout3);
+  }
+}
+
+void
+AudioBlockPanStereoToStereo_SSE(const float aInputL[WEBAUDIO_BLOCK_SIZE],
+                                const float aInputR[WEBAUDIO_BLOCK_SIZE],
+                                float aGainL, float aGainR, bool aIsOnTheLeft,
+                                float aOutputL[WEBAUDIO_BLOCK_SIZE],
+                                float aOutputR[WEBAUDIO_BLOCK_SIZE])
+{
+  __m128 vinl0, vinr0, vinl1, vinr1,
+         vout0, vout1,
+         vscaled0, vscaled1,
+         vgainl, vgainr;
+
+  ASSERT_ALIGNED(aInputL);
+  ASSERT_ALIGNED(aInputR);
+  ASSERT_ALIGNED(aOutputL);
+  ASSERT_ALIGNED(aOutputR);
+
+  vgainl = _mm_load1_ps(&aGainL);
+  vgainr = _mm_load1_ps(&aGainR);
+
+  if (aIsOnTheLeft) {
+    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
+      vinl0 = _mm_load_ps(&aInputL[i]);
+      vinr0 = _mm_load_ps(&aInputR[i]);
+      vinl1 = _mm_load_ps(&aInputL[i+4]);
+      vinr1 = _mm_load_ps(&aInputR[i+4]);
+
+      /* left channel : aOutputL  = aInputL + aInputR * gainL */
+      vscaled0 = _mm_mul_ps(vinr0, vgainl);
+      vscaled1 = _mm_mul_ps(vinr1, vgainl);
+      vout0 = _mm_add_ps(vscaled0, vinl0);
+      vout1 = _mm_add_ps(vscaled1, vinl1);
+      _mm_store_ps(&aOutputL[i], vout0);
+      _mm_store_ps(&aOutputL[i+4], vout1);
+
+      /* right channel : aOutputR = aInputR * gainR */
+      vscaled0 = _mm_mul_ps(vinr0, vgainr);
+      vscaled1 = _mm_mul_ps(vinr1, vgainr);
+      _mm_store_ps(&aOutputR[i], vscaled0);
+      _mm_store_ps(&aOutputR[i+4], vscaled1);
+    }
+  } else {
+    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
+      vinl0 = _mm_load_ps(&aInputL[i]);
+      vinr0 = _mm_load_ps(&aInputR[i]);
+      vinl1 = _mm_load_ps(&aInputL[i+4]);
+      vinr1 = _mm_load_ps(&aInputR[i+4]);
+
+      /* left channel : aInputL * gainL */
+      vscaled0 = _mm_mul_ps(vinl0, vgainl);
+      vscaled1 = _mm_mul_ps(vinl1, vgainl);
+      _mm_store_ps(&aOutputL[i], vscaled0);
+      _mm_store_ps(&aOutputL[i+4], vscaled1);
+
+      /* right channel: aOutputR = aInputR + aInputL * gainR */
+      vscaled0 = _mm_mul_ps(vinl0, vgainr);
+      vscaled1 = _mm_mul_ps(vinl1, vgainr);
+      vout0 = _mm_add_ps(vscaled0, vinr0);
+      vout1 = _mm_add_ps(vscaled1, vinr1);
+      _mm_store_ps(&aOutputR[i], vout0);
+      _mm_store_ps(&aOutputR[i+4], vout1);
+    }
+  }
+}
+}
new file mode 100644
--- /dev/null
+++ b/dom/media/webaudio/AudioNodeEngineSSE2.h
@@ -0,0 +1,39 @@
+/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* this source code form is subject to the terms of the mozilla public
+ * license, v. 2.0. if a copy of the mpl was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioNodeEngine.h"
+
+namespace mozilla {
+void
+AudioBlockAddChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
+                                  float aScale,
+                                  float aOutput[WEBAUDIO_BLOCK_SIZE]);
+
+void
+AudioBlockAddChannel_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
+                         float aOutput[WEBAUDIO_BLOCK_SIZE]);
+
+void
+AudioBlockCopyChannelWithScale_SSE(const float* aInput,
+                                   float aScale,
+                                   float* aOutput);
+
+void
+AudioBlockCopyChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
+                                   const float aScale[WEBAUDIO_BLOCK_SIZE],
+                                   float aOutput[WEBAUDIO_BLOCK_SIZE]);
+
+void
+AudioBlockInPlaceScale_SSE(float aBlock[WEBAUDIO_BLOCK_SIZE],
+                           uint32_t aChannelCount,
+                           float aScale);
+
+void
+AudioBlockPanStereoToStereo_SSE(const float aInputL[WEBAUDIO_BLOCK_SIZE],
+                                const float aInputR[WEBAUDIO_BLOCK_SIZE],
+                                float aGainL, float aGainR, bool aIsOnTheLeft,
+                                float aOutputL[WEBAUDIO_BLOCK_SIZE],
+                                float aOutputR[WEBAUDIO_BLOCK_SIZE]);
+}