Bug 877662 - Align audio buffer allocations to 16 byte boundaries r=padenot
authorDan Minor <dminor@mozilla.com>
Wed, 13 Apr 2016 15:31:50 -0400
changeset 331473 5ae01cbc55491297156c97e1dea4c215add27d4f
parent 331472 826d16396107def2873839344ebb6306832114d0
child 331474 6e96c35c78bd475a0963decd32756e0df5854f07
push id6048
push userkmoir@mozilla.com
push dateMon, 06 Jun 2016 19:02:08 +0000
treeherdermozilla-beta@46d72a56c57d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspadenot
bugs877662
milestone48.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 877662 - Align audio buffer allocations to 16 byte boundaries r=padenot To be able to use SSE2 routines, we need to audio buffers to be allocated on 16 byte boundaries. MozReview-Commit-ID: 2mjxMWqysFd
dom/media/webaudio/AudioBlock.cpp
dom/media/webaudio/AudioDestinationNode.cpp
dom/media/webaudio/AudioNodeExternalInputStream.cpp
dom/media/webaudio/AudioNodeStream.cpp
dom/media/webaudio/AudioNodeStream.h
dom/media/webaudio/BiquadFilterNode.cpp
dom/media/webaudio/ConvolverNode.cpp
dom/media/webaudio/GainNode.cpp
dom/media/webaudio/StereoPannerNode.cpp
dom/media/webaudio/WaveShaperNode.cpp
dom/media/webaudio/blink/ReverbAccumulationBuffer.h
--- a/dom/media/webaudio/AudioBlock.cpp
+++ b/dom/media/webaudio/AudioBlock.cpp
@@ -1,50 +1,53 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioBlock.h"
+#include "AlignmentUtils.h"
 
 namespace mozilla {
 
 /**
  * Heap-allocated buffer of channels of 128-sample float arrays, with
  * threadsafe refcounting.  Typically you would allocate one of these, fill it
  * in, and then treat it as immutable while it's shared.
  *
  * Downstream references are accounted specially so that the creator of the
  * buffer can reuse and modify its contents next iteration if other references
  * are all downstream temporary references held by AudioBlock.
  *
- * This only guarantees 4-byte alignment of the data. For alignment we simply
- * assume that the memory from malloc is at least 4-byte aligned and that
- * AudioBlockBuffer's size is divisible by 4.
+ * We guarantee 16 byte alignment of the channel data.
  */
 class AudioBlockBuffer final : public ThreadSharedObject {
 public:
 
   virtual AudioBlockBuffer* AsAudioBlockBuffer() override { return this; };
 
   float* ChannelData(uint32_t aChannel)
   {
-    return reinterpret_cast<float*>(this + 1) + aChannel * WEBAUDIO_BLOCK_SIZE;
+    float* base = reinterpret_cast<float*>(((uintptr_t)(this + 1) + 15) & ~0x0F);
+    ASSERT_ALIGNED16(base);
+    return base + aChannel * WEBAUDIO_BLOCK_SIZE;
   }
 
   static already_AddRefed<AudioBlockBuffer> Create(uint32_t aChannelCount)
   {
     CheckedInt<size_t> size = WEBAUDIO_BLOCK_SIZE;
     size *= aChannelCount;
     size *= sizeof(float);
     size += sizeof(AudioBlockBuffer);
+    size += 15;  //padding for alignment
     if (!size.isValid()) {
       MOZ_CRASH();
     }
+
     void* m = moz_xmalloc(size.value());
     RefPtr<AudioBlockBuffer> p = new (m) AudioBlockBuffer();
     NS_ASSERTION((reinterpret_cast<char*>(p.get() + 1) - reinterpret_cast<char*>(p.get())) % 4 == 0,
                  "AudioBlockBuffers should be at least 4-byte aligned");
     return p.forget();
   }
 
   // Graph thread only.
@@ -145,18 +148,16 @@ AudioBlock::AllocateChannels(uint32_t aC
     if (buffer && !buffer->HasLastingShares()) {
       MOZ_ASSERT(mBufferFormat == AUDIO_FORMAT_FLOAT32);
       // No need to allocate again.
       mVolume = 1.0f;
       return;
     }
   }
 
-  // XXX for SIMD purposes we should do something here to make sure the
-  // channel buffers are 16-byte aligned.
   RefPtr<AudioBlockBuffer> buffer = AudioBlockBuffer::Create(aChannelCount);
   mChannelData.SetLength(aChannelCount);
   for (uint32_t i = 0; i < aChannelCount; ++i) {
     mChannelData[i] = buffer->ChannelData(i);
   }
   mBuffer = buffer.forget();
   mVolume = 1.0f;
   mBufferFormat = AUDIO_FORMAT_FLOAT32;
--- a/dom/media/webaudio/AudioDestinationNode.cpp
+++ b/dom/media/webaudio/AudioDestinationNode.cpp
@@ -1,15 +1,16 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioDestinationNode.h"
+#include "AlignmentUtils.h"
 #include "AudioContext.h"
 #include "mozilla/dom/AudioDestinationNodeBinding.h"
 #include "mozilla/dom/ScriptSettings.h"
 #include "mozilla/Preferences.h"
 #include "mozilla/Services.h"
 #include "AudioChannelAgent.h"
 #include "AudioChannelService.h"
 #include "AudioNodeEngine.h"
@@ -82,17 +83,17 @@ public:
     const uint32_t duration = std::min(WEBAUDIO_BLOCK_SIZE, mLength - mWriteIndex);
     const uint32_t inputChannelCount = aInput.ChannelCount();
     for (uint32_t i = 0; i < outputChannelCount; ++i) {
       float* outputData = mBuffer->GetDataForWrite(i) + mWriteIndex;
       if (aInput.IsNull() || i >= inputChannelCount) {
         PodZero(outputData, duration);
       } else {
         const float* inputBuffer = static_cast<const float*>(aInput.mChannelData[i]);
-        if (duration == WEBAUDIO_BLOCK_SIZE) {
+        if (duration == WEBAUDIO_BLOCK_SIZE && IS_ALIGNED16(inputBuffer)) {
           // Use the optimized version of the copy with scale operation
           AudioBlockCopyChannelWithScale(inputBuffer, aInput.mVolume,
                                          outputData);
         } else {
           if (aInput.mVolume == 1.0f) {
             PodCopy(outputData, inputBuffer, duration);
           } else {
             for (uint32_t j = 0; j < duration; ++j) {
--- a/dom/media/webaudio/AudioNodeExternalInputStream.cpp
+++ b/dom/media/webaudio/AudioNodeExternalInputStream.cpp
@@ -1,13 +1,15 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include "AlignedTArray.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeExternalInputStream.h"
 #include "AudioChannelFormat.h"
 #include "mozilla/dom/MediaStreamAudioSourceNode.h"
 
 using namespace mozilla::dom;
 
 namespace mozilla {
@@ -85,19 +87,30 @@ static void ConvertSegmentToAudioBlock(A
 {
   NS_ASSERTION(aSegment->GetDuration() == WEBAUDIO_BLOCK_SIZE, "Bad segment duration");
 
   {
     AudioSegment::ChunkIterator ci(*aSegment);
     NS_ASSERTION(!ci.IsEnded(), "Should be at least one chunk!");
     if (ci->GetDuration() == WEBAUDIO_BLOCK_SIZE &&
         (ci->IsNull() || ci->mBufferFormat == AUDIO_FORMAT_FLOAT32)) {
+
+      bool aligned = true;
+      for (size_t i = 0; i < ci->mChannelData.Length(); ++i) {
+        if (!IS_ALIGNED16(ci->mChannelData[i])) {
+            aligned = false;
+            break;
+        }
+      }
+
       // Return this chunk directly to avoid copying data.
-      *aBlock = *ci;
-      return;
+      if (aligned) {
+        *aBlock = *ci;
+        return;
+      }
     }
   }
 
   aBlock->AllocateChannels(aFallbackChannelCount);
 
   uint32_t duration = 0;
   for (AudioSegment::ChunkIterator ci(*aSegment); !ci.IsEnded(); ci.Next()) {
     switch (ci->mBufferFormat) {
@@ -187,17 +200,20 @@ AudioNodeExternalInputStream::ProcessInp
 
     for (AudioSegment::ChunkIterator iter(segment); !iter.IsEnded(); iter.Next()) {
       inputChannels = GetAudioChannelsSuperset(inputChannels, iter->ChannelCount());
     }
   }
 
   uint32_t accumulateIndex = 0;
   if (inputChannels) {
-    AutoTArray<float,GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer;
+    // TODO: See Bug 1261168. Ideally we would use an aligned version of
+    // AutoTArray (of size GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE) here.
+    AlignedTArray<float,16> downmixBuffer;
+    downmixBuffer.SetLength(GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE);
     for (uint32_t i = 0; i < audioSegments.Length(); ++i) {
       AudioBlock tmpChunk;
       ConvertSegmentToAudioBlock(&audioSegments[i], &tmpChunk, inputChannels);
       if (!tmpChunk.IsNull()) {
         if (accumulateIndex == 0) {
           mLastChunks[0].AllocateChannels(inputChannels);
         }
         AccumulateInputChunk(accumulateIndex, tmpChunk, &mLastChunks[0], &downmixBuffer);
--- a/dom/media/webaudio/AudioNodeStream.cpp
+++ b/dom/media/webaudio/AudioNodeStream.cpp
@@ -448,29 +448,31 @@ AudioNodeStream::ObtainInputBlock(AudioB
   }
 
   if (outputChannelCount == 0) {
     aTmpChunk.SetNull(WEBAUDIO_BLOCK_SIZE);
     return;
   }
 
   aTmpChunk.AllocateChannels(outputChannelCount);
-  // The static storage here should be 1KB, so it's fine
-  AutoTArray<float, GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer;
+  // TODO: See Bug 1261168. Ideally we would use an aligned version of
+  // AutoTArray (of size GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE) here.
+  AlignedTArray<float, 16> downmixBuffer;
+  downmixBuffer.SetLength(GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE);
 
   for (uint32_t i = 0; i < inputChunkCount; ++i) {
     AccumulateInputChunk(i, *inputChunks[i], &aTmpChunk, &downmixBuffer);
   }
 }
 
 void
 AudioNodeStream::AccumulateInputChunk(uint32_t aInputIndex,
                                       const AudioBlock& aChunk,
                                       AudioBlock* aBlock,
-                                      nsTArray<float>* aDownmixBuffer)
+                                      AlignedTArray<float, 16>* aDownmixBuffer)
 {
   AutoTArray<const float*,GUESS_AUDIO_CHANNELS> channels;
   UpMixDownMixChunk(&aChunk, aBlock->ChannelCount(), channels, *aDownmixBuffer);
 
   for (uint32_t c = 0; c < channels.Length(); ++c) {
     const float* inputData = static_cast<const float*>(channels[c]);
     float* outputData = aBlock->ChannelFloatsForWrite(c);
     if (inputData) {
@@ -486,17 +488,17 @@ AudioNodeStream::AccumulateInputChunk(ui
     }
   }
 }
 
 void
 AudioNodeStream::UpMixDownMixChunk(const AudioBlock* aChunk,
                                    uint32_t aOutputChannelCount,
                                    nsTArray<const float*>& aOutputChannels,
-                                   nsTArray<float>& aDownmixBuffer)
+                                   AlignedTArray<float, 16>& aDownmixBuffer)
 {
   for (uint32_t i = 0; i < aChunk->ChannelCount(); i++) {
     aOutputChannels.AppendElement(static_cast<const float*>(aChunk->mChannelData[i]));
   }
   if (aOutputChannels.Length() < aOutputChannelCount) {
     if (mChannelInterpretation == ChannelInterpretation::Speakers) {
       AudioChannelsUpMix<float>(&aOutputChannels, aOutputChannelCount, nullptr);
       NS_ASSERTION(aOutputChannelCount == aOutputChannels.Length(),
--- a/dom/media/webaudio/AudioNodeStream.h
+++ b/dom/media/webaudio/AudioNodeStream.h
@@ -3,16 +3,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef MOZILLA_AUDIONODESTREAM_H_
 #define MOZILLA_AUDIONODESTREAM_H_
 
 #include "MediaStreamGraph.h"
 #include "mozilla/dom/AudioNodeBinding.h"
+#include "AlignedTArray.h"
 #include "AudioBlock.h"
 
 namespace mozilla {
 
 namespace dom {
 struct ThreeDPoint;
 struct AudioTimelineEvent;
 class AudioContext;
@@ -185,20 +186,20 @@ protected:
    * again until SetActive() is called.
    */
   void CheckForInactive();
 
   void AdvanceOutputSegment();
   void FinishOutput();
   void AccumulateInputChunk(uint32_t aInputIndex, const AudioBlock& aChunk,
                             AudioBlock* aBlock,
-                            nsTArray<float>* aDownmixBuffer);
+                            AlignedTArray<float, 16>* aDownmixBuffer);
   void UpMixDownMixChunk(const AudioBlock* aChunk, uint32_t aOutputChannelCount,
                          nsTArray<const float*>& aOutputChannels,
-                         nsTArray<float>& aDownmixBuffer);
+                         AlignedTArray<float, 16>& aDownmixBuffer);
 
   uint32_t ComputedNumberOfChannels(uint32_t aInputChannelCount);
   void ObtainInputBlock(AudioBlock& aTmpChunk, uint32_t aPortIndex);
   void IncrementActiveInputCount();
   void DecrementActiveInputCount();
 
   // The engine that will generate output for this node.
   nsAutoPtr<AudioNodeEngine> mEngine;
--- a/dom/media/webaudio/BiquadFilterNode.cpp
+++ b/dom/media/webaudio/BiquadFilterNode.cpp
@@ -1,15 +1,16 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "BiquadFilterNode.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioDestinationNode.h"
 #include "PlayingRefChangeHandler.h"
 #include "WebAudioUtils.h"
 #include "blink/Biquad.h"
 #include "mozilla/Preferences.h"
 #include "mozilla/UniquePtr.h"
@@ -132,17 +133,19 @@ public:
   }
 
   void ProcessBlock(AudioNodeStream* aStream,
                     GraphTime aFrom,
                     const AudioBlock& aInput,
                     AudioBlock* aOutput,
                     bool* aFinished) override
   {
-    float inputBuffer[WEBAUDIO_BLOCK_SIZE];
+    float inputBuffer[WEBAUDIO_BLOCK_SIZE + 4];
+    float* alignedInputBuffer = ALIGNED16(inputBuffer);
+    ASSERT_ALIGNED16(alignedInputBuffer);
 
     if (aInput.IsNull()) {
       bool hasTail = false;
       for (uint32_t i = 0; i < mBiquads.Length(); ++i) {
         if (mBiquads[i].hasTail()) {
           hasTail = true;
           break;
         }
@@ -186,22 +189,22 @@ public:
     double freq = mFrequency.GetValueAtTime(pos);
     double q = mQ.GetValueAtTime(pos);
     double gain = mGain.GetValueAtTime(pos);
     double detune = mDetune.GetValueAtTime(pos);
 
     for (uint32_t i = 0; i < numberOfChannels; ++i) {
       const float* input;
       if (aInput.IsNull()) {
-        input = inputBuffer;
+        input = alignedInputBuffer;
       } else {
         input = static_cast<const float*>(aInput.mChannelData[i]);
         if (aInput.mVolume != 1.0) {
-          AudioBlockCopyChannelWithScale(input, aInput.mVolume, inputBuffer);
-          input = inputBuffer;
+          AudioBlockCopyChannelWithScale(input, aInput.mVolume, alignedInputBuffer);
+          input = alignedInputBuffer;
         }
       }
       SetParamsOnBiquad(mBiquads[i], aStream->SampleRate(), mType, freq, q, gain, detune);
 
       mBiquads[i].process(input,
                           aOutput->ChannelFloatsForWrite(i),
                           aInput.GetDuration());
     }
--- a/dom/media/webaudio/ConvolverNode.cpp
+++ b/dom/media/webaudio/ConvolverNode.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "ConvolverNode.h"
 #include "mozilla/dom/ConvolverNodeBinding.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "blink/Reverb.h"
 #include "PlayingRefChangeHandler.h"
 
 namespace mozilla {
 namespace dom {
 
@@ -256,21 +257,23 @@ ConvolverNode::SetBuffer(JSContext* aCx,
       mBuffer->GetThreadSharedChannelsForRate(aCx);
     if (data && length < WEBAUDIO_BLOCK_SIZE) {
       // For very small impulse response buffers, we need to pad the
       // buffer with 0 to make sure that the Reverb implementation
       // has enough data to compute FFTs from.
       length = WEBAUDIO_BLOCK_SIZE;
       RefPtr<ThreadSharedFloatArrayBufferList> paddedBuffer =
         new ThreadSharedFloatArrayBufferList(data->GetChannels());
-      float* channelData = (float*) malloc(sizeof(float) * length * data->GetChannels());
+      void* channelData = malloc(sizeof(float) * length * data->GetChannels() + 15);
+      float* alignedChannelData = ALIGNED16(channelData);
+      ASSERT_ALIGNED16(alignedChannelData);
       for (uint32_t i = 0; i < data->GetChannels(); ++i) {
-        PodCopy(channelData + length * i, data->GetData(i), mBuffer->Length());
-        PodZero(channelData + length * i + mBuffer->Length(), WEBAUDIO_BLOCK_SIZE - mBuffer->Length());
-        paddedBuffer->SetData(i, (i == 0) ? channelData : nullptr, free, channelData);
+        PodCopy(alignedChannelData + length * i, data->GetData(i), mBuffer->Length());
+        PodZero(alignedChannelData + length * i + mBuffer->Length(), WEBAUDIO_BLOCK_SIZE - mBuffer->Length());
+        paddedBuffer->SetData(i, (i == 0) ? channelData : nullptr, free, alignedChannelData);
       }
       data = paddedBuffer;
     }
     SendInt32ParameterToStream(ConvolverNodeEngine::BUFFER_LENGTH, length);
     SendDoubleParameterToStream(ConvolverNodeEngine::SAMPLE_RATE,
                                 mBuffer->SampleRate());
     ns->SetBuffer(data.forget());
   } else {
--- a/dom/media/webaudio/GainNode.cpp
+++ b/dom/media/webaudio/GainNode.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "GainNode.h"
 #include "mozilla/dom/GainNodeBinding.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioDestinationNode.h"
 #include "WebAudioUtils.h"
 
 namespace mozilla {
 namespace dom {
 
@@ -74,28 +75,30 @@ public:
     } else {
       // First, compute a vector of gains for each track tick based on the
       // timeline at hand, and then for each channel, multiply the values
       // in the buffer with the gain vector.
       aOutput->AllocateChannels(aInput.ChannelCount());
 
       // Compute the gain values for the duration of the input AudioChunk
       StreamTime tick = mDestination->GraphTimeToStreamTime(aFrom);
-      float computedGain[WEBAUDIO_BLOCK_SIZE];
-      mGain.GetValuesAtTime(tick, computedGain, WEBAUDIO_BLOCK_SIZE);
+      float computedGain[WEBAUDIO_BLOCK_SIZE + 4];
+      float* alignedComputedGain = ALIGNED16(computedGain);
+      ASSERT_ALIGNED16(alignedComputedGain);
+      mGain.GetValuesAtTime(tick, alignedComputedGain, WEBAUDIO_BLOCK_SIZE);
 
       for (size_t counter = 0; counter < WEBAUDIO_BLOCK_SIZE; ++counter) {
-        computedGain[counter] *= aInput.mVolume;
+        alignedComputedGain[counter] *= aInput.mVolume;
       }
 
       // Apply the gain to the output buffer
       for (size_t channel = 0; channel < aOutput->ChannelCount(); ++channel) {
         const float* inputBuffer = static_cast<const float*> (aInput.mChannelData[channel]);
         float* buffer = aOutput->ChannelFloatsForWrite(channel);
-        AudioBlockCopyChannelWithScale(inputBuffer, computedGain, buffer);
+        AudioBlockCopyChannelWithScale(inputBuffer, alignedComputedGain, buffer);
       }
     }
   }
 
   size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override
   {
     // Not owned:
     // - mDestination (probably)
--- a/dom/media/webaudio/StereoPannerNode.cpp
+++ b/dom/media/webaudio/StereoPannerNode.cpp
@@ -4,16 +4,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "StereoPannerNode.h"
 #include "mozilla/dom/StereoPannerNodeBinding.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioDestinationNode.h"
+#include "AlignmentUtils.h"
 #include "WebAudioUtils.h"
 #include "PanningUtils.h"
 #include "AudioParamTimeline.h"
 #include "AudioParam.h"
 
 namespace mozilla {
 namespace dom {
 
@@ -132,34 +133,36 @@ public:
 
         GetGainValuesForPanning(panning, monoToStereo, gainL, gainR);
         ApplyStereoPanning(aInput, aOutput,
                            gainL * aInput.mVolume,
                            gainR * aInput.mVolume,
                            panning <= 0);
       }
     } else {
-      float computedGain[2][WEBAUDIO_BLOCK_SIZE];
+      float computedGain[2*WEBAUDIO_BLOCK_SIZE + 4];
       bool onLeft[WEBAUDIO_BLOCK_SIZE];
 
       float values[WEBAUDIO_BLOCK_SIZE];
       StreamTime tick = mDestination->GraphTimeToStreamTime(aFrom);
       mPan.GetValuesAtTime(tick, values, WEBAUDIO_BLOCK_SIZE);
 
+      float* alignedComputedGain = ALIGNED16(computedGain);
+      ASSERT_ALIGNED16(alignedComputedGain);
       for (size_t counter = 0; counter < WEBAUDIO_BLOCK_SIZE; ++counter) {
         float left, right;
         GetGainValuesForPanning(values[counter], monoToStereo, left, right);
 
-        computedGain[0][counter] = left * aInput.mVolume;
-        computedGain[1][counter] = right * aInput.mVolume;
+        alignedComputedGain[counter] = left * aInput.mVolume;
+        alignedComputedGain[WEBAUDIO_BLOCK_SIZE + counter] = right * aInput.mVolume;
         onLeft[counter] = values[counter] <= 0;
       }
 
       // Apply the gain to the output buffer
-      ApplyStereoPanning(aInput, aOutput, computedGain[0], computedGain[1], onLeft);
+      ApplyStereoPanning(aInput, aOutput, alignedComputedGain, &alignedComputedGain[WEBAUDIO_BLOCK_SIZE], onLeft);
     }
   }
 
   virtual size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override
   {
     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
   }
 
--- a/dom/media/webaudio/WaveShaperNode.cpp
+++ b/dom/media/webaudio/WaveShaperNode.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "WaveShaperNode.h"
 #include "mozilla/dom/WaveShaperNodeBinding.h"
+#include "AlignmentUtils.h"
 #include "AudioNode.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "mozilla/PodOperations.h"
 
 namespace mozilla {
 namespace dom {
 
@@ -226,23 +227,25 @@ public:
       // or the input is null.
       *aOutput = aInput;
       return;
     }
 
     aOutput->AllocateChannels(channelCount);
     for (uint32_t i = 0; i < channelCount; ++i) {
       const float* inputSamples;
-      float scaledInput[WEBAUDIO_BLOCK_SIZE];
+      float scaledInput[WEBAUDIO_BLOCK_SIZE + 4];
+      float* alignedScaledInput = ALIGNED16(scaledInput);
+      ASSERT_ALIGNED16(alignedScaledInput);
       if (aInput.mVolume != 1.0f) {
         AudioBlockCopyChannelWithScale(
             static_cast<const float*>(aInput.mChannelData[i]),
                                       aInput.mVolume,
-                                      scaledInput);
-        inputSamples = scaledInput;
+                                      alignedScaledInput);
+        inputSamples = alignedScaledInput;
       } else {
         inputSamples = static_cast<const float*>(aInput.mChannelData[i]);
       }
       float* outputBuffer = aOutput->ChannelFloatsForWrite(i);
       float* sampleBuffer;
 
       switch (mType) {
       case OverSampleType::None:
--- a/dom/media/webaudio/blink/ReverbAccumulationBuffer.h
+++ b/dom/media/webaudio/blink/ReverbAccumulationBuffer.h
@@ -24,23 +24,21 @@
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef ReverbAccumulationBuffer_h
 #define ReverbAccumulationBuffer_h
 
-#include "nsTArray.h"
+#include "AlignedTArray.h"
 #include "mozilla/MemoryReporting.h"
 
 namespace WebCore {
 
-typedef nsTArray<float> AudioFloatArray;
-
 // ReverbAccumulationBuffer is a circular delay buffer with one client reading from it and multiple clients
 // writing/accumulating to it at different delay offsets from the read position.  The read operation will zero the memory
 // just read from the buffer, so it will be ready for accumulation the next time around.
 class ReverbAccumulationBuffer {
 public:
     explicit ReverbAccumulationBuffer(size_t length);
 
     // This will read from, then clear-out numberOfFrames
@@ -60,16 +58,16 @@ public:
     void reset();
 
     size_t sizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
     {
         return m_buffer.ShallowSizeOfExcludingThis(aMallocSizeOf);
     }
 
 private:
-    AudioFloatArray m_buffer;
+    AlignedTArray<float, 16> m_buffer;
     size_t m_readIndex;
     size_t m_readTimeFrame; // for debugging (frame on continuous timeline)
 };
 
 } // namespace WebCore
 
 #endif // ReverbAccumulationBuffer_h