b=815643 Implement HRTF panner processing based on Blink implementation r=ehsan
authorKarl Tomlinson <karlt+@karlt.net>
Fri, 09 Aug 2013 10:08:06 +1200
changeset 142367 62ad090a94a4
parent 142366 746b2ba6cf30
child 142368 a1d63e503ee9
push id32374
push userktomlinson@mozilla.com
push dateTue, 13 Aug 2013 02:49:14 +0000
treeherdermozilla-inbound@62ad090a94a4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersehsan
bugs815643
milestone26.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
b=815643 Implement HRTF panner processing based on Blink implementation r=ehsan
content/media/webaudio/PannerNode.cpp
content/media/webaudio/blink/HRTFPanner.cpp
content/media/webaudio/blink/HRTFPanner.h
--- a/content/media/webaudio/PannerNode.cpp
+++ b/content/media/webaudio/PannerNode.cpp
@@ -4,16 +4,20 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "PannerNode.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioListener.h"
 #include "AudioBufferSourceNode.h"
+#include "blink/HRTFPanner.h"
+
+using WebCore::HRTFDatabaseLoader;
+using WebCore::HRTFPanner;
 
 namespace mozilla {
 namespace dom {
 
 using namespace std;
 
 NS_IMPL_CYCLE_COLLECTION_CLASS(PannerNode)
 
@@ -51,16 +55,20 @@ public:
     , mConeInnerAngle(360.)
     , mConeOuterAngle(360.)
     , mConeOuterGain(0.)
     // These will be initialized when a PannerNode is created, so just initialize them
     // to some dummy values here.
     , mListenerDopplerFactor(0.)
     , mListenerSpeedOfSound(0.)
   {
+    // HRTFDatabaseLoader needs to be fetched on the main thread.
+    TemporaryRef<HRTFDatabaseLoader> loader =
+      HRTFDatabaseLoader::createAndLoadAsynchronouslyIfNecessary(aNode->Context()->SampleRate());
+    mHRTFPanner = new HRTFPanner(aNode->Context()->SampleRate(), loader);
   }
 
   virtual void SetInt32Parameter(uint32_t aIndex, int32_t aParam) MOZ_OVERRIDE
   {
     switch (aIndex) {
     case PannerNode::PANNING_MODEL:
       mPanningModel = PanningModelType(aParam);
       switch (mPanningModel) {
@@ -126,39 +134,38 @@ public:
     }
   }
 
   virtual void ProduceAudioBlock(AudioNodeStream* aStream,
                                  const AudioChunk& aInput,
                                  AudioChunk* aOutput,
                                  bool *aFinished) MOZ_OVERRIDE
   {
-    if (aInput.IsNull()) {
-      *aOutput = aInput;
-      return;
-    }
     (this->*mPanningModelFunction)(aInput, aOutput);
   }
 
   void ComputeAzimuthAndElevation(float& aAzimuth, float& aElevation);
   void DistanceAndConeGain(AudioChunk* aChunk, float aGain);
   float ComputeConeGain();
+  // Compute how much the distance contributes to the gain reduction.
+  float ComputeDistanceGain();
 
   void GainMonoToStereo(const AudioChunk& aInput, AudioChunk* aOutput,
                         float aGainL, float aGainR);
   void GainStereoToStereo(const AudioChunk& aInput, AudioChunk* aOutput,
                           float aGainL, float aGainR, double aAzimuth);
 
   void EqualPowerPanningFunction(const AudioChunk& aInput, AudioChunk* aOutput);
   void HRTFPanningFunction(const AudioChunk& aInput, AudioChunk* aOutput);
 
   float LinearGainFunction(float aDistance);
   float InverseGainFunction(float aDistance);
   float ExponentialGainFunction(float aDistance);
 
+  nsAutoPtr<HRTFPanner> mHRTFPanner;
   PanningModelType mPanningModel;
   typedef void (PannerNodeEngine::*PanningModelFunction)(const AudioChunk& aInput, AudioChunk* aOutput);
   PanningModelFunction mPanningModelFunction;
   DistanceModelType mDistanceModel;
   typedef float (PannerNodeEngine::*DistanceModelFunction)(float aDistance);
   DistanceModelFunction mDistanceModelFunction;
   ThreeDPoint mPosition;
   ThreeDPoint mOrientation;
@@ -240,27 +247,53 @@ PannerNodeEngine::ExponentialGainFunctio
 {
   return pow(aDistance / mRefDistance, -mRolloffFactor);
 }
 
 void
 PannerNodeEngine::HRTFPanningFunction(const AudioChunk& aInput,
                                       AudioChunk* aOutput)
 {
-  // not implemented: noop
-  *aOutput = aInput;
+  int numChannels = aInput.mChannelData.Length();
+
+  // The output of this node is always stereo, no matter what the inputs are.
+  AllocateAudioBlock(2, aOutput);
+
+  float azimuth, elevation;
+  ComputeAzimuthAndElevation(azimuth, elevation);
+
+  AudioChunk input = aInput;
+  // Gain is applied before the delay and convolution of the HRTF
+  if (!input.IsNull()) {
+    float gain = ComputeConeGain() * ComputeDistanceGain() * aInput.mVolume;
+    if (gain != 1.0f) {
+      AllocateAudioBlock(numChannels, &input);
+      for (int i = 0; i < numChannels; ++i) {
+        const float* src = static_cast<const float*>(aInput.mChannelData[i]);
+        float* dest =
+          static_cast<float*>(const_cast<void*>(input.mChannelData[i]));
+        AudioBlockCopyChannelWithScale(src, gain, dest);
+      }
+    }
+  }
+
+  mHRTFPanner->pan(azimuth, elevation, &input, aOutput, WEBAUDIO_BLOCK_SIZE);
 }
 
 void
 PannerNodeEngine::EqualPowerPanningFunction(const AudioChunk& aInput,
                                             AudioChunk* aOutput)
 {
-  float azimuth, elevation, gainL, gainR, normalizedAzimuth, distance, distanceGain, coneGain;
+  if (aInput.IsNull()) {
+    *aOutput = aInput;
+    return;
+  }
+
+  float azimuth, elevation, gainL, gainR, normalizedAzimuth, distanceGain, coneGain;
   int inputChannels = aInput.mChannelData.Length();
-  ThreeDPoint distanceVec;
 
   // If both the listener are in the same spot, and no cone gain is specified,
   // this node is noop.
   if (mListenerPosition == mPosition &&
       mConeInnerAngle == 360 &&
       mConeOuterAngle == 360) {
     *aOutput = aInput;
     return;
@@ -289,20 +322,17 @@ PannerNodeEngine::EqualPowerPanningFunct
   } else {
     if (azimuth <= 0) {
       normalizedAzimuth = (azimuth + 90.f) / 90.f;
     } else {
       normalizedAzimuth = azimuth / 90.f;
     }
   }
 
-  // Compute how much the distance contributes to the gain reduction.
-  distanceVec = mPosition - mListenerPosition;
-  distance = sqrt(distanceVec.DotProduct(distanceVec));
-  distanceGain = (this->*mDistanceModelFunction)(distance);
+  distanceGain = ComputeDistanceGain();
 
   // Actually compute the left and right gain.
   gainL = cos(0.5 * M_PI * normalizedAzimuth) * aInput.mVolume;
   gainR = sin(0.5 * M_PI * normalizedAzimuth) * aInput.mVolume;
 
   // Compute the output.
   if (inputChannels == 1) {
     GainMonoToStereo(aInput, aOutput, gainL, gainR);
@@ -438,16 +468,24 @@ PannerNodeEngine::ComputeConeGain()
     double x = (absAngle - absInnerAngle) / (absOuterAngle - absInnerAngle);
     gain = (1 - x) + mConeOuterGain * x;
   }
 
   return gain;
 }
 
 float
+PannerNodeEngine::ComputeDistanceGain()
+{
+  ThreeDPoint distanceVec = mPosition - mListenerPosition;
+  float distance = sqrt(distanceVec.DotProduct(distanceVec));
+  return (this->*mDistanceModelFunction)(distance);
+}
+
+float
 PannerNode::ComputeDopplerShift()
 {
   double dopplerShift = 1.0; // Initialize to default value
 
   AudioListener* listener = Context()->Listener();
 
   if (listener->DopplerFactor() > 0) {
     // Don't bother if both source and listener have no velocity.
--- a/content/media/webaudio/blink/HRTFPanner.cpp
+++ b/content/media/webaudio/blink/HRTFPanner.cpp
@@ -36,17 +36,17 @@ namespace WebCore {
 
 // The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds).
 // We ASSERT the delay values used in process() with this value.
 const double MaxDelayTimeSeconds = 0.002;
 
 const int UninitializedAzimuth = -1;
 const unsigned RenderingQuantum = 128;
 
-HRTFPanner::HRTFPanner(float sampleRate, HRTFDatabaseLoader* databaseLoader)
+HRTFPanner::HRTFPanner(float sampleRate, mozilla::TemporaryRef<HRTFDatabaseLoader> databaseLoader)
     : m_databaseLoader(databaseLoader)
     , m_sampleRate(sampleRate)
     , m_crossfadeSelection(CrossfadeSelection1)
     , m_azimuthIndex1(UninitializedAzimuth)
     , m_elevation1(0)
     , m_azimuthIndex2(UninitializedAzimuth)
     , m_elevation2(0)
     , m_crossfadeX(0)
@@ -55,25 +55,28 @@ HRTFPanner::HRTFPanner(float sampleRate,
     , m_convolverR1(m_convolverL1.fftSize())
     , m_convolverL2(m_convolverL1.fftSize())
     , m_convolverR2(m_convolverL1.fftSize())
     , m_delayLineL(ceilf(MaxDelayTimeSeconds * sampleRate),
                    WebAudioUtils::ComputeSmoothingRate(0.02, sampleRate))
     , m_delayLineR(ceilf(MaxDelayTimeSeconds * sampleRate),
                    WebAudioUtils::ComputeSmoothingRate(0.02, sampleRate))
 {
-    MOZ_ASSERT(databaseLoader);
+    MOZ_ASSERT(m_databaseLoader);
+    MOZ_COUNT_CTOR(HRTFPanner);
+
     m_tempL1.SetLength(RenderingQuantum);
     m_tempR1.SetLength(RenderingQuantum);
     m_tempL2.SetLength(RenderingQuantum);
     m_tempR2.SetLength(RenderingQuantum);
 }
 
 HRTFPanner::~HRTFPanner()
 {
+    MOZ_COUNT_DTOR(HRTFPanner);
 }
 
 void HRTFPanner::reset()
 {
     m_convolverL1.reset();
     m_convolverR1.reset();
     m_convolverL2.reset();
     m_convolverR2.reset();
@@ -106,32 +109,31 @@ int HRTFPanner::calculateDesiredAzimuthI
     return desiredAzimuthIndex;
 }
 
 void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioChunk* inputBus, AudioChunk* outputBus, TrackTicks framesToProcess)
 {
     unsigned numInputChannels =
         inputBus->IsNull() ? 0 : inputBus->mChannelData.Length();
 
-    bool isInputGood = inputBus &&  numInputChannels >= 1 && numInputChannels <= 2;
+    bool isInputGood = inputBus && numInputChannels <= 2;
     MOZ_ASSERT(isInputGood);
     MOZ_ASSERT(framesToProcess <= inputBus->mDuration);
 
     bool isOutputGood = outputBus && outputBus->mChannelData.Length() == 2 && framesToProcess <= outputBus->mDuration;
     MOZ_ASSERT(isOutputGood);
 
     if (!isInputGood || !isOutputGood) {
         if (outputBus)
             outputBus->SetNull(outputBus->mDuration);
         return;
     }
 
     HRTFDatabase* database = m_databaseLoader->database();
-    MOZ_ASSERT(database);
-    if (!database) {
+    if (!database) { // not yet loaded
         outputBus->SetNull(outputBus->mDuration);
         return;
     }
 
     // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth.
     double azimuth = -desiredAzimuth;
 
     bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0;
@@ -140,17 +142,18 @@ void HRTFPanner::pan(double desiredAzimu
         outputBus->SetNull(outputBus->mDuration);
         return;
     }
 
     // Normally, we'll just be dealing with mono sources.
     // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF.
 
     // Get source and destination pointers.
-    const float* sourceL = static_cast<const float*>(inputBus->mChannelData[0]);
+    const float* sourceL = numInputChannels > 0 ?
+        static_cast<const float*>(inputBus->mChannelData[0]) : nullptr;
     const float* sourceR = numInputChannels > 1 ?
         static_cast<const float*>(inputBus->mChannelData[1]) : sourceL;
     float* destinationL =
         static_cast<float*>(const_cast<void*>(outputBus->mChannelData[0]));
     float* destinationR =
         static_cast<float*>(const_cast<void*>(outputBus->mChannelData[1]));
 
     double azimuthBlend;
@@ -220,18 +223,18 @@ void HRTFPanner::pan(double desiredAzimu
         MOZ_ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds);
 
         // Crossfade inter-aural delays based on transitions.
         double frameDelayL = (1 - m_crossfadeX) * frameDelayL1 + m_crossfadeX * frameDelayL2;
         double frameDelayR = (1 - m_crossfadeX) * frameDelayR1 + m_crossfadeX * frameDelayR2;
 
         // Calculate the source and destination pointers for the current segment.
         unsigned offset = segment * framesPerSegment;
-        const float* segmentSourceL = sourceL + offset;
-        const float* segmentSourceR = sourceR + offset;
+        const float* segmentSourceL = sourceL ? sourceL + offset : nullptr;
+        const float* segmentSourceR = sourceR ? sourceR + offset : nullptr;
         float* segmentDestinationL = destinationL + offset;
         float* segmentDestinationR = destinationR + offset;
 
         // First run through delay lines for inter-aural time difference.
         m_delayLineL.Process(frameDelayL, &segmentSourceL, &segmentDestinationL, 1, framesPerSegment);
         m_delayLineR.Process(frameDelayR, &segmentSourceR, &segmentDestinationR, 1, framesPerSegment);
 
         bool needsCrossfading = m_crossfadeIncr;
--- a/content/media/webaudio/blink/HRTFPanner.h
+++ b/content/media/webaudio/blink/HRTFPanner.h
@@ -34,17 +34,17 @@ struct AudioChunk;
 }
 
 namespace WebCore {
 
 using mozilla::AudioChunk;
 
 class HRTFPanner {
 public:
-    HRTFPanner(float sampleRate, HRTFDatabaseLoader*);
+    HRTFPanner(float sampleRate, mozilla::TemporaryRef<HRTFDatabaseLoader> databaseLoader);
     ~HRTFPanner();
 
     // framesToProcess must be a power of 2 and greater than 128
     void pan(double azimuth, double elevation, const AudioChunk* inputBus, AudioChunk* outputBus, mozilla::TrackTicks framesToProcess);
     void reset();
 
     size_t fftSize() const { return m_convolverL1.fftSize(); }