Bug 877662 - Update SSE2 versions of AudioNodeEngine functions r=padenot
authorDan Minor <dminor@mozilla.com>
Mon, 11 Apr 2016 16:10:45 -0400
changeset 331475 5f1d898a440e70f83e8ad12335785d092fc41d8b
parent 331474 6e96c35c78bd475a0963decd32756e0df5854f07
child 331476 58f6d3815cacb8f88b39bf4b1226b49e82f8c276
push id6048
push userkmoir@mozilla.com
push dateMon, 06 Jun 2016 19:02:08 +0000
treeherdermozilla-beta@46d72a56c57d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspadenot
bugs877662
milestone48.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 877662 - Update SSE2 versions of AudioNodeEngine functions r=padenot This updates the original SSE2 implementations to match the current AudioNodeEngine API and adds them to the build. MozReview-Commit-ID: KULBD7KTr3n
dom/media/webaudio/AudioNodeEngineSSE2.cpp
dom/media/webaudio/AudioNodeEngineSSE2.h
dom/media/webaudio/blink/moz.build
dom/media/webaudio/moz.build
--- a/dom/media/webaudio/AudioNodeEngineSSE2.cpp
+++ b/dom/media/webaudio/AudioNodeEngineSSE2.cpp
@@ -1,41 +1,37 @@
 /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* this source code form is subject to the terms of the mozilla public
  * license, v. 2.0. if a copy of the mpl was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include "AlignmentUtils.h"
 #include "AudioNodeEngineSSE2.h"
 #include <emmintrin.h>
 
-#ifdef DEBUG
-  #define ASSERT_ALIGNED(ptr)                                                  \
-            MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr,      \
-                       #ptr " has to be aligned 16-bytes aligned.");
-#else
-  #define ASSERT_ALIGNED(ptr)
-#endif
 
 namespace mozilla {
 void
-AudioBlockAddChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
-                                  float aScale,
-                                  float aOutput[WEBAUDIO_BLOCK_SIZE])
+AudioBufferAddWithScale_SSE(const float* aInput,
+                            float aScale,
+                            float* aOutput,
+                            uint32_t aSize)
 {
   __m128 vin0, vin1, vin2, vin3,
          vscaled0, vscaled1, vscaled2, vscaled3,
          vout0, vout1, vout2, vout3,
          vgain;
 
-  ASSERT_ALIGNED(aInput);
-  ASSERT_ALIGNED(aOutput);
+  ASSERT_ALIGNED16(aInput);
+  ASSERT_ALIGNED16(aOutput);
+  ASSERT_MULTIPLE16(aSize);
 
   vgain = _mm_load1_ps(&aScale);
 
-  for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
+  for (unsigned i = 0; i < aSize; i+=16) {
     vin0 = _mm_load_ps(&aInput[i]);
     vin1 = _mm_load_ps(&aInput[i + 4]);
     vin2 = _mm_load_ps(&aInput[i + 8]);
     vin3 = _mm_load_ps(&aInput[i + 12]);
 
     vscaled0 = _mm_mul_ps(vin0, vgain);
     vscaled1 = _mm_mul_ps(vin1, vgain);
     vscaled2 = _mm_mul_ps(vin2, vgain);
@@ -54,59 +50,25 @@ AudioBlockAddChannelWithScale_SSE(const 
     _mm_store_ps(&aOutput[i], vout0);
     _mm_store_ps(&aOutput[i + 4], vout1);
     _mm_store_ps(&aOutput[i + 8], vout2);
     _mm_store_ps(&aOutput[i + 12], vout3);
   }
 }
 
 void
-AudioBlockAddChannel_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
-                         float aOutput[WEBAUDIO_BLOCK_SIZE])
-{
-  __m128 vin0, vin1, vin2, vin3,
-         vin4, vin5, vin6, vin7,
-         vout0, vout1, vout2, vout3;
-
-  ASSERT_ALIGNED(aInput);
-  ASSERT_ALIGNED(aOutput);
-
-  for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
-    vin0 = _mm_load_ps(&aInput[i]);
-    vin1 = _mm_load_ps(&aInput[i + 4]);
-    vin2 = _mm_load_ps(&aInput[i + 8]);
-    vin3 = _mm_load_ps(&aInput[i + 12]);
-
-    vin4 = _mm_load_ps(&aOutput[i]);
-    vin5 = _mm_load_ps(&aOutput[i + 4]);
-    vin6 = _mm_load_ps(&aOutput[i + 8]);
-    vin7 = _mm_load_ps(&aOutput[i + 12]);
-
-    vout0 = _mm_add_ps(vin0, vin4);
-    vout1 = _mm_add_ps(vin1, vin5);
-    vout2 = _mm_add_ps(vin2, vin6);
-    vout3 = _mm_add_ps(vin3, vin7);
-
-    _mm_store_ps(&aOutput[i], vout0);
-    _mm_store_ps(&aOutput[i + 4], vout1);
-    _mm_store_ps(&aOutput[i + 8], vout2);
-    _mm_store_ps(&aOutput[i + 12], vout3);
-  }
-}
-
-void
 AudioBlockCopyChannelWithScale_SSE(const float* aInput,
                                    float aScale,
                                    float* aOutput)
 {
   __m128 vin0, vin1, vin2, vin3,
          vout0, vout1, vout2, vout3;
 
-  ASSERT_ALIGNED(aInput);
-  ASSERT_ALIGNED(aOutput);
+  ASSERT_ALIGNED16(aInput);
+  ASSERT_ALIGNED16(aOutput);
 
   __m128 vgain = _mm_load1_ps(&aScale);
 
   for (unsigned i = 0 ; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
     vin0 = _mm_load_ps(&aInput[i]);
     vin1 = _mm_load_ps(&aInput[i + 4]);
     vin2 = _mm_load_ps(&aInput[i + 8]);
     vin3 = _mm_load_ps(&aInput[i + 12]);
@@ -125,19 +87,19 @@ void
 AudioBlockCopyChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
                                    const float aScale[WEBAUDIO_BLOCK_SIZE],
                                    float aOutput[WEBAUDIO_BLOCK_SIZE])
 {
   __m128 vin0, vin1, vin2, vin3,
          vscaled0, vscaled1, vscaled2, vscaled3,
          vout0, vout1, vout2, vout3;
 
-  ASSERT_ALIGNED(aInput);
-  ASSERT_ALIGNED(aScale);
-  ASSERT_ALIGNED(aOutput);
+  ASSERT_ALIGNED16(aInput);
+  ASSERT_ALIGNED16(aScale);
+  ASSERT_ALIGNED16(aOutput);
 
   for (unsigned i = 0 ; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
     vscaled0 = _mm_load_ps(&aScale[i]);
     vscaled1 = _mm_load_ps(&aScale[i+4]);
     vscaled2 = _mm_load_ps(&aScale[i+8]);
     vscaled3 = _mm_load_ps(&aScale[i+12]);
 
     vin0 = _mm_load_ps(&aInput[i]);
@@ -153,28 +115,29 @@ AudioBlockCopyChannelWithScale_SSE(const
     _mm_store_ps(&aOutput[i], vout0);
     _mm_store_ps(&aOutput[i + 4], vout1);
     _mm_store_ps(&aOutput[i + 8], vout2);
     _mm_store_ps(&aOutput[i + 12], vout3);
   }
 }
 
 void
-AudioBlockInPlaceScale_SSE(float aBlock[WEBAUDIO_BLOCK_SIZE],
-                           uint32_t aChannelCount,
-                           float aScale)
+AudioBufferInPlaceScale_SSE(float* aBlock,
+                            float aScale,
+                            uint32_t aSize)
 {
   __m128 vout0, vout1, vout2, vout3,
          vin0, vin1, vin2, vin3;
 
-  ASSERT_ALIGNED(aBlock);
+  ASSERT_ALIGNED16(aBlock);
+  ASSERT_MULTIPLE16(aSize);
 
   __m128 vgain = _mm_load1_ps(&aScale);
 
-  for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
+  for (unsigned i = 0; i < aSize; i+=16) {
     vin0 = _mm_load_ps(&aBlock[i]);
     vin1 = _mm_load_ps(&aBlock[i + 4]);
     vin2 = _mm_load_ps(&aBlock[i + 8]);
     vin3 = _mm_load_ps(&aBlock[i + 12]);
     vout0 = _mm_mul_ps(vin0, vgain);
     vout1 = _mm_mul_ps(vin1, vgain);
     vout2 = _mm_mul_ps(vin2, vgain);
     vout3 = _mm_mul_ps(vin3, vgain);
@@ -192,20 +155,20 @@ AudioBlockPanStereoToStereo_SSE(const fl
                                 float aOutputL[WEBAUDIO_BLOCK_SIZE],
                                 float aOutputR[WEBAUDIO_BLOCK_SIZE])
 {
   __m128 vinl0, vinr0, vinl1, vinr1,
          vout0, vout1,
          vscaled0, vscaled1,
          vgainl, vgainr;
 
-  ASSERT_ALIGNED(aInputL);
-  ASSERT_ALIGNED(aInputR);
-  ASSERT_ALIGNED(aOutputL);
-  ASSERT_ALIGNED(aOutputR);
+  ASSERT_ALIGNED16(aInputL);
+  ASSERT_ALIGNED16(aInputR);
+  ASSERT_ALIGNED16(aOutputL);
+  ASSERT_ALIGNED16(aOutputR);
 
   vgainl = _mm_load1_ps(&aGainL);
   vgainr = _mm_load1_ps(&aGainR);
 
   if (aIsOnTheLeft) {
     for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
       vinl0 = _mm_load_ps(&aInputL[i]);
       vinr0 = _mm_load_ps(&aInputR[i]);
--- a/dom/media/webaudio/AudioNodeEngineSSE2.h
+++ b/dom/media/webaudio/AudioNodeEngineSSE2.h
@@ -2,38 +2,35 @@
 /* this source code form is subject to the terms of the mozilla public
  * license, v. 2.0. if a copy of the mpl was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioNodeEngine.h"
 
 namespace mozilla {
 void
-AudioBlockAddChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
-                                  float aScale,
-                                  float aOutput[WEBAUDIO_BLOCK_SIZE]);
-
-void
-AudioBlockAddChannel_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
-                         float aOutput[WEBAUDIO_BLOCK_SIZE]);
+AudioBufferAddWithScale_SSE(const float* aInput,
+                            float aScale,
+                            float* aOutput,
+                            uint32_t aSize);
 
 void
 AudioBlockCopyChannelWithScale_SSE(const float* aInput,
                                    float aScale,
                                    float* aOutput);
 
 void
 AudioBlockCopyChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
                                    const float aScale[WEBAUDIO_BLOCK_SIZE],
                                    float aOutput[WEBAUDIO_BLOCK_SIZE]);
 
 void
-AudioBlockInPlaceScale_SSE(float aBlock[WEBAUDIO_BLOCK_SIZE],
-                           uint32_t aChannelCount,
-                           float aScale);
+AudioBufferInPlaceScale_SSE(float* aBlock,
+                            float aScale,
+                            uint32_t aSize);
 
 void
 AudioBlockPanStereoToStereo_SSE(const float aInputL[WEBAUDIO_BLOCK_SIZE],
                                 const float aInputR[WEBAUDIO_BLOCK_SIZE],
                                 float aGainL, float aGainR, bool aIsOnTheLeft,
                                 float aOutputL[WEBAUDIO_BLOCK_SIZE],
                                 float aOutputR[WEBAUDIO_BLOCK_SIZE]);
 }
--- a/dom/media/webaudio/blink/moz.build
+++ b/dom/media/webaudio/blink/moz.build
@@ -18,14 +18,18 @@ UNIFIED_SOURCES += [
     'Reverb.cpp',
     'ReverbAccumulationBuffer.cpp',
     'ReverbConvolver.cpp',
     'ReverbConvolverStage.cpp',
     'ReverbInputBuffer.cpp',
     'ZeroPole.cpp',
 ]
 
+# Are we targeting x86 or x64?  If so, build SSE2 files.
+if CONFIG['INTEL_ARCHITECTURE']:
+    DEFINES['USE_SSE2'] = True
+
 include('/ipc/chromium/chromium-config.mozbuild')
 
 FINAL_LIBRARY = 'xul'
 LOCAL_INCLUDES += [
     '/dom/media/webaudio',
 ]
--- a/dom/media/webaudio/moz.build
+++ b/dom/media/webaudio/moz.build
@@ -109,15 +109,22 @@ UNIFIED_SOURCES += [
 
 if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
     SOURCES += ['AudioNodeEngineNEON.cpp']
     SOURCES['AudioNodeEngineNEON.cpp'].flags += ['-mfpu=neon']
     LOCAL_INCLUDES += [
         '/media/openmax_dl/dl/api/'
     ]
 
+# Are we targeting x86 or x64?  If so, build SSE2 files.
+if CONFIG['INTEL_ARCHITECTURE']:
+    SOURCES += ['AudioNodeEngineSSE2.cpp']
+    DEFINES['USE_SSE2'] = True
+    SOURCES['AudioNodeEngineSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
+
+
 include('/ipc/chromium/chromium-config.mozbuild')
 
 FINAL_LIBRARY = 'xul'
 LOCAL_INCLUDES += [
     '..'
 ]