Bug 877662 - Use SSE2 versions of AudioNodeEngine functions r=padenot
authorDan Minor <dminor@mozilla.com>
Fri, 18 Mar 2016 16:24:02 -0400
changeset 331476 58f6d3815cacb8f88b39bf4b1226b49e82f8c276
parent 331475 5f1d898a440e70f83e8ad12335785d092fc41d8b
child 331477 e719cc5de7b797b82d821cfd6f6c364a001fbea5
push id6048
push userkmoir@mozilla.com
push dateMon, 06 Jun 2016 19:02:08 +0000
treeherdermozilla-beta@46d72a56c57d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspadenot
bugs877662
milestone48.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 877662 - Use SSE2 versions of AudioNodeEngine functions r=padenot MozReview-Commit-ID: AJ2f5YBobPv
dom/media/webaudio/AudioNodeEngine.cpp
dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
--- a/dom/media/webaudio/AudioNodeEngine.cpp
+++ b/dom/media/webaudio/AudioNodeEngine.cpp
@@ -4,16 +4,19 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioNodeEngine.h"
 #ifdef BUILD_ARM_NEON
 #include "mozilla/arm.h"
 #include "AudioNodeEngineNEON.h"
 #endif
+#ifdef USE_SSE2
+#include "AudioNodeEngineSSE2.h"
+#endif
 
 namespace mozilla {
 
 already_AddRefed<ThreadSharedFloatArrayBufferList>
 ThreadSharedFloatArrayBufferList::Create(uint32_t aChannelCount,
                                          size_t aLength,
                                          const mozilla::fallible_t&)
 {
@@ -66,16 +69,24 @@ void AudioBufferAddWithScale(const float
                              uint32_t aSize)
 {
 #ifdef BUILD_ARM_NEON
   if (mozilla::supports_neon()) {
     AudioBufferAddWithScale_NEON(aInput, aScale, aOutput, aSize);
     return;
   }
 #endif
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBufferAddWithScale_SSE(aInput, aScale, aOutput, aSize);
+    return;
+  }
+#endif
+
   if (aScale == 1.0f) {
     for (uint32_t i = 0; i < aSize; ++i) {
       aOutput[i] += aInput[i];
     }
   } else {
     for (uint32_t i = 0; i < aSize; ++i) {
       aOutput[i] += aInput[i]*aScale;
     }
@@ -99,16 +110,24 @@ AudioBlockCopyChannelWithScale(const flo
     memcpy(aOutput, aInput, WEBAUDIO_BLOCK_SIZE*sizeof(float));
   } else {
 #ifdef BUILD_ARM_NEON
     if (mozilla::supports_neon()) {
       AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
       return;
     }
 #endif
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBlockCopyChannelWithScale_SSE(aInput, aScale, aOutput);
+    return;
+  }
+#endif
+
     for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
       aOutput[i] = aInput[i]*aScale;
     }
   }
 }
 
 void
 BufferComplexMultiply(const float* aInput,
@@ -147,16 +166,24 @@ AudioBlockCopyChannelWithScale(const flo
                                float aOutput[WEBAUDIO_BLOCK_SIZE])
 {
 #ifdef BUILD_ARM_NEON
   if (mozilla::supports_neon()) {
     AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
     return;
   }
 #endif
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBlockCopyChannelWithScale_SSE(aInput, aScale, aOutput);
+    return;
+  }
+#endif
+
   for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
     aOutput[i] = aInput[i]*aScale[i];
   }
 }
 
 void
 AudioBlockInPlaceScale(float aBlock[WEBAUDIO_BLOCK_SIZE],
                        float aScale)
@@ -173,16 +200,24 @@ AudioBufferInPlaceScale(float* aBlock,
     return;
   }
 #ifdef BUILD_ARM_NEON
   if (mozilla::supports_neon()) {
     AudioBufferInPlaceScale_NEON(aBlock, aScale, aSize);
     return;
   }
 #endif
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBufferInPlaceScale_SSE(aBlock, aScale, aSize);
+    return;
+  }
+#endif
+
   for (uint32_t i = 0; i < aSize; ++i) {
     *aBlock++ *= aScale;
   }
 }
 
 void
 AudioBlockPanMonoToStereo(const float aInput[WEBAUDIO_BLOCK_SIZE],
                           float aGainL[WEBAUDIO_BLOCK_SIZE],
@@ -215,16 +250,25 @@ AudioBlockPanStereoToStereo(const float 
   if (mozilla::supports_neon()) {
     AudioBlockPanStereoToStereo_NEON(aInputL, aInputR,
                                      aGainL, aGainR, aIsOnTheLeft,
                                      aOutputL, aOutputR);
     return;
   }
 #endif
 
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBlockPanStereoToStereo_SSE(aInputL, aInputR,
+                                    aGainL, aGainR, aIsOnTheLeft,
+                                    aOutputL, aOutputR);
+    return;
+  }
+#endif
+
   uint32_t i;
 
   if (aIsOnTheLeft) {
     for (i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
       aOutputL[i] = aInputL[i] + aInputR[i] * aGainL;
       aOutputR[i] = aInputR[i] * aGainR;
     }
   } else {
--- a/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
+++ b/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
@@ -91,22 +91,40 @@ int ReverbAccumulationBuffer::accumulate
 
     float* destination = m_buffer.Elements();
 
     bool isSafe = writeIndex <= bufferLength && numberOfFrames1 + writeIndex <= bufferLength && numberOfFrames2 <= bufferLength;
     MOZ_ASSERT(isSafe);
     if (!isSafe)
         return 0;
 
-    AudioBufferAddWithScale(source, 1.0f, destination + writeIndex, numberOfFrames1);
+#ifdef USE_SSE2
+    // It is unlikely either the source is aligned or the number of values
+    // is a multiple of 16, so we just add them here rather than calling
+    // AudioBufferAddWithScale.
+    //
+    // TODO: Ideally we would use scalar calls when necessary and switch
+    //       to vector calls when we have aligned sources and destinations.
+    //       See Bug 1263910.
+    for (uint32_t i = 0; i < numberOfFrames1; ++i) {
+      destination[writeIndex + i] += source[i];
+    }
 
-    // Handle wrap-around if necessary
+    // Handle wrap-around if necessary.
+    if (numberOfFrames2 > 0) {
+        for (uint32_t i = 0; i < numberOfFrames2; ++i) {
+          destination[i] += source[numberOfFrames1 + i];
+        }
+    }
+#else
+    AudioBufferAddWithScale(source, 1.0f, destination + writeIndex, numberOfFrames1);
     if (numberOfFrames2 > 0) {
         AudioBufferAddWithScale(source + numberOfFrames1, 1.0f, destination, numberOfFrames2);
     }
+#endif
 
     return writeIndex;
 }
 
 void ReverbAccumulationBuffer::reset()
 {
     PodZero(m_buffer.Elements(), m_buffer.Length());
     m_readIndex = 0;