b=815643 Add code to interpret and resample impulse response data r=ehsan
authorKarl Tomlinson <karlt+@karlt.net>
Thu, 08 Aug 2013 21:38:30 +1200
changeset 142359 9b01d3f61a1b
parent 142358 c56ecdd125ec
child 142360 5a05c2215091
push id32374
push userktomlinson@mozilla.com
push dateTue, 13 Aug 2013 02:49:14 +0000
treeherdermozilla-inbound@62ad090a94a4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersehsan
bugs815643
milestone26.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
b=815643 Add code to interpret and resample impulse response data r=ehsan
content/media/webaudio/blink/HRTFElevation.cpp
content/media/webaudio/blink/HRTFElevation.h
content/media/webaudio/blink/HRTFPanner.cpp
content/media/webaudio/blink/HRTFPanner.h
--- a/content/media/webaudio/blink/HRTFElevation.cpp
+++ b/content/media/webaudio/blink/HRTFElevation.cpp
@@ -27,84 +27,115 @@
  */
 
 #include "config.h"
 
 #if ENABLE(WEB_AUDIO)
 
 #include "core/platform/audio/HRTFElevation.h"
 
+#include "speex/speex_resampler.h"
+#include "mozilla/PodOperations.h"
+#include "AudioSampleFormat.h"
 #include <math.h>
 #include <algorithm>
 #include "core/platform/PlatformMemoryInstrumentation.h"
 #include "core/platform/audio/AudioBus.h"
 #include "core/platform/audio/HRTFPanner.h"
 #include <wtf/MemoryInstrumentationVector.h>
 #include <wtf/OwnPtr.h>
 
+#include "IRC_Composite_C_R0195-incl.cpp"
+
 using namespace std;
+using namespace mozilla;
  
 namespace WebCore {
 
-const unsigned HRTFElevation::AzimuthSpacing = 15;
-const unsigned HRTFElevation::NumberOfRawAzimuths = 360 / AzimuthSpacing;
-const unsigned HRTFElevation::InterpolationFactor = 8;
-const unsigned HRTFElevation::NumberOfTotalAzimuths = NumberOfRawAzimuths * InterpolationFactor;
+const int elevationSpacing = irc_composite_c_r0195_elevation_interval;
+const int firstElevation = irc_composite_c_r0195_first_elevation;
+const int numberOfElevations = MOZ_ARRAY_LENGTH(irc_composite_c_r0195);
+
+const unsigned HRTFElevation::NumberOfTotalAzimuths = 360 / 15 * 8;
+
+const int rawSampleRate = irc_composite_c_r0195_sample_rate;
 
 // Number of frames in an individual impulse response.
 const size_t ResponseFrameSize = 256;
 
-bool HRTFElevation::calculateKernelForAzimuthElevation(int azimuth, int elevation, float sampleRate, const String& subjectName,
+size_t HRTFElevation::fftSizeForSampleRate(float sampleRate)
+{
+    // The HRTF impulse responses (loaded as audio resources) are 512 sample-frames @44.1KHz.
+    // Currently, we truncate the impulse responses to half this size, but an FFT-size of twice impulse response size is needed (for convolution).
+    // So for sample rates around 44.1KHz an FFT size of 512 is good. We double the FFT-size only for sample rates at least double this.
+    ASSERT(sampleRate >= 44100 && sampleRate <= 96000.0);
+    return (sampleRate < 88200.0) ? 512 : 1024;
+}
+
+bool HRTFElevation::calculateKernelForAzimuthElevation(int azimuth, int elevation, SpeexResamplerState* resampler, float sampleRate,
                                                        RefPtr<HRTFKernel>& kernelL)
 {
-    // Valid values for azimuth are 0 -> 345 in 15 degree increments.
-    // Valid values for elevation are -45 -> +90 in 15 degree increments.
+    int elevationIndex = (elevation - firstElevation) / elevationSpacing;
+    MOZ_ASSERT(elevationIndex >= 0 && elevationIndex <= numberOfElevations);
 
-    bool isAzimuthGood = azimuth >= 0 && azimuth <= 345 && (azimuth / 15) * 15 == azimuth;
-    ASSERT(isAzimuthGood);
-    if (!isAzimuthGood)
-        return false;
+    int numberOfAzimuths = irc_composite_c_r0195[elevationIndex].count;
+    int azimuthSpacing = 360 / numberOfAzimuths;
+    MOZ_ASSERT(numberOfAzimuths * azimuthSpacing == 360);
+
+    int azimuthIndex = azimuth / azimuthSpacing;
+    MOZ_ASSERT(azimuthIndex * azimuthSpacing == azimuth);
 
-    bool isElevationGood = elevation >= -45 && elevation <= 90 && (elevation / 15) * 15 == elevation;
-    ASSERT(isElevationGood);
-    if (!isElevationGood)
-        return false;
-    
-    // Construct the resource name from the subject name, azimuth, and elevation, for example:
-    // "IRC_Composite_C_R0195_T015_P000"
-    // Note: the passed in subjectName is not a string passed in via JavaScript or the web.
-    // It's passed in as an internal ASCII identifier and is an implementation detail.
-    int positiveElevation = elevation < 0 ? elevation + 360 : elevation;
+    const int16_t (&impulse_response_data)[ResponseFrameSize] =
+        irc_composite_c_r0195[elevationIndex].azimuths[azimuthIndex];
+    float floatResponse[ResponseFrameSize];
+    ConvertAudioSamples(impulse_response_data, floatResponse,
+                        ResponseFrameSize);
+
+    // Note that depending on the fftSize returned by the panner, we may be truncating the impulse response.
+    const size_t responseLength = fftSizeForSampleRate(sampleRate) / 2;
 
-    String resourceName = String::format("IRC_%s_C_R0195_T%03d_P%03d", subjectName.utf8().data(), azimuth, positiveElevation);
-
-    RefPtr<AudioBus> impulseResponse(AudioBus::loadPlatformResource(resourceName.utf8().data(), sampleRate));
+    float* response;
+    nsAutoTArray<float, 2 * ResponseFrameSize> resampled;
+    if (sampleRate == rawSampleRate) {
+        response = floatResponse;
+        MOZ_ASSERT(responseLength == ResponseFrameSize);
+    } else {
+        resampled.SetLength(responseLength);
+        response = resampled.Elements();
+        speex_resampler_skip_zeros(resampler);
 
-    ASSERT(impulseResponse.get());
-    if (!impulseResponse.get())
-        return false;
-    
-    size_t responseLength = impulseResponse->length();
-    size_t expectedLength = static_cast<size_t>(256 * (sampleRate / 44100.0));
+        // Feed the input buffer into the resampler.
+        spx_uint32_t in_len = ResponseFrameSize;
+        spx_uint32_t out_len = resampled.Length();
+        speex_resampler_process_float(resampler, 0, floatResponse, &in_len,
+                                      response, &out_len);
 
-    // Check number of channels and length.  For now these are fixed and known.
-    bool isBusGood = responseLength == expectedLength && impulseResponse->numberOfChannels() == 2;
-    ASSERT(isBusGood);
-    if (!isBusGood)
-        return false;
-    
-    AudioChannel* leftEarImpulseResponse = impulseResponse->channelByType(AudioBus::ChannelLeft);
+        if (out_len < resampled.Length()) {
+            // The input should have all been processed.
+            MOZ_ASSERT(in_len == ResponseFrameSize);
+            // Feed in zeros get the data remaining in the resampler.
+            spx_uint32_t out_index = out_len;
+            in_len = speex_resampler_get_input_latency(resampler);
+            nsAutoTArray<float, 256> zeros;
+            zeros.SetLength(in_len);
+            PodZero(zeros.Elements(), in_len);
+            out_len = resampled.Length() - out_index;
+            speex_resampler_process_float(resampler, 0,
+                                          zeros.Elements(), &in_len,
+                                          response + out_index, &out_len);
+            out_index += out_len;
+            // There may be some uninitialized samples remaining for low
+            // sample rates.
+            PodZero(response + out_index, resampled.Length() - out_index);
+        }
 
-    // Note that depending on the fftSize returned by the panner, we may be truncating the impulse response we just loaded in.
-    const size_t fftSize = HRTFPanner::fftSizeForSampleRate(sampleRate);
-    MOZ_ASSERT(responseLength >= fftSize / 2);
-    if (responseLength < fftSize / 2)
-        return false;
+        speex_resampler_reset_mem(resampler);
+    }
 
-    kernelL = HRTFKernel::create(leftEarImpulseResponse, fftSize / 2, sampleRate);
+    kernelL = HRTFKernel::create(response, responseLength, sampleRate);
     
     return true;
 }
 
 // The range of elevations for the IRCAM impulse responses varies depending on azimuth, but the minimum elevation appears to always be -45.
 //
 // Here's how it goes:
 static int maxElevations[] = {
@@ -133,37 +164,56 @@ static int maxElevations[] = {
     75, // 300 
     45, // 315 
     60, // 330 
     45 //  345 
 };
 
 PassOwnPtr<HRTFElevation> HRTFElevation::createForSubject(const String& subjectName, int elevation, float sampleRate)
 {
-    bool isElevationGood = elevation >= -45 && elevation <= 90 && (elevation / 15) * 15 == elevation;
-    ASSERT(isElevationGood);
-    if (!isElevationGood)
+    if (elevation < firstElevation ||
+        elevation > firstElevation + numberOfElevations * elevationSpacing ||
+        (elevation / elevationSpacing) * elevationSpacing != elevation)
         return nullptr;
         
+    // Spacing, in degrees, between every azimuth loaded from resource.
+    // Some elevations do not have data for all these intervals.
+    // See maxElevations.
+    static const unsigned AzimuthSpacing = 15;
+    static const unsigned NumberOfRawAzimuths = 360 / AzimuthSpacing;
+    static_assert(AzimuthSpacing * NumberOfRawAzimuths == 360,
+                  "Not a multiple");
+    static const unsigned InterpolationFactor =
+        NumberOfTotalAzimuths / NumberOfRawAzimuths;
+    static_assert(NumberOfTotalAzimuths ==
+                  NumberOfRawAzimuths * InterpolationFactor, "Not a multiple");
+
     OwnPtr<HRTFKernelList> kernelListL = adoptPtr(new HRTFKernelList(NumberOfTotalAzimuths));
 
+    SpeexResamplerState* resampler = sampleRate == rawSampleRate ? nullptr :
+        speex_resampler_init(1, rawSampleRate, sampleRate,
+                             SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr);
+
     // Load convolution kernels from HRTF files.
     int interpolatedIndex = 0;
     for (unsigned rawIndex = 0; rawIndex < NumberOfRawAzimuths; ++rawIndex) {
         // Don't let elevation exceed maximum for this azimuth.
         int maxElevation = maxElevations[rawIndex];
         int actualElevation = min(elevation, maxElevation);
 
-        bool success = calculateKernelForAzimuthElevation(rawIndex * AzimuthSpacing, actualElevation, sampleRate, subjectName, kernelListL->at(interpolatedIndex));
+        bool success = calculateKernelForAzimuthElevation(rawIndex * AzimuthSpacing, actualElevation, resampler, sampleRate, kernelListL->at(interpolatedIndex));
         if (!success)
             return nullptr;
             
         interpolatedIndex += InterpolationFactor;
     }
 
+    if (resampler)
+        speex_resampler_destroy(resampler);
+
     // Now go back and interpolate intermediate azimuth values.
     for (unsigned i = 0; i < NumberOfTotalAzimuths; i += InterpolationFactor) {
         int j = (i + InterpolationFactor) % NumberOfTotalAzimuths;
 
         // Create the interpolated convolution kernels and delays.
         for (unsigned jj = 1; jj < InterpolationFactor; ++jj) {
             float x = float(jj) / float(InterpolationFactor); // interpolate from 0 -> 1
 
--- a/content/media/webaudio/blink/HRTFElevation.h
+++ b/content/media/webaudio/blink/HRTFElevation.h
@@ -34,16 +34,19 @@
 #include <wtf/OwnPtr.h>
 #include <wtf/PassOwnPtr.h>
 #include <wtf/PassRefPtr.h>
 #include <wtf/RefCounted.h>
 #include <wtf/RefPtr.h>
 #include <wtf/text/CString.h>
 #include <wtf/text/WTFString.h>
 
+struct SpeexResamplerState_;
+typedef struct SpeexResamplerState_ SpeexResamplerState;
+
 namespace WebCore {
 
 // HRTFElevation contains all of the HRTFKernels (one left ear and one right ear per azimuth angle) for a particular elevation.
 
 class HRTFElevation {
     WTF_MAKE_NONCOPYABLE(HRTFElevation);
 public:
     // Loads and returns an HRTFElevation with the given HRTF database subject name and elevation from browser (or WebKit.framework) resources.
@@ -58,25 +61,16 @@ public:
     double elevationAngle() const { return m_elevationAngle; }
     unsigned numberOfAzimuths() const { return NumberOfTotalAzimuths; }
     float sampleRate() const { return m_sampleRate; }
     
     // Returns the left and right kernels for the given azimuth index.
     // The interpolated delays based on azimuthBlend: 0 -> 1 are returned in frameDelayL and frameDelayR.
     void getKernelsFromAzimuth(double azimuthBlend, unsigned azimuthIndex, HRTFKernel* &kernelL, HRTFKernel* &kernelR, double& frameDelayL, double& frameDelayR);
     
-    // Spacing, in degrees, between every azimuth loaded from resource.
-    static const unsigned AzimuthSpacing;
-    
-    // Number of azimuths loaded from resource.
-    static const unsigned NumberOfRawAzimuths;
-
-    // Interpolates by this factor to get the total number of azimuths from every azimuth loaded from resource.
-    static const unsigned InterpolationFactor;
-    
     // Total number of azimuths after interpolation.
     static const unsigned NumberOfTotalAzimuths;
 
     void reportMemoryUsage(MemoryObjectInfo*) const;
 
 private:
     HRTFElevation(PassOwnPtr<HRTFKernelList> kernelListL, int elevation, float sampleRate)
         : m_kernelListL(kernelListL)
@@ -84,20 +78,21 @@ private:
         , m_sampleRate(sampleRate)
     {
     }
 
     // Returns the list of left ear HRTFKernels for all the azimuths going from 0 to 360 degrees.
     HRTFKernelList* kernelListL() { return m_kernelListL.get(); }
 
     // Given a specific azimuth and elevation angle, returns the left HRTFKernel.
-    // Valid values for azimuth are 0 -> 345 in 15 degree increments.
+    // Values for azimuth must be multiples of 15 in 0 -> 345,
+    // but not all azimuths are available for elevations > +45.
     // Valid values for elevation are -45 -> +90 in 15 degree increments.
     // Returns true on success.
-    static bool calculateKernelForAzimuthElevation(int azimuth, int elevation, float sampleRate, const String& subjectName,
+    static bool calculateKernelForAzimuthElevation(int azimuth, int elevation, SpeexResamplerState* resampler, float sampleRate,
                                                    RefPtr<HRTFKernel>& kernelL);
 
     OwnPtr<HRTFKernelList> m_kernelListL;
     double m_elevationAngle;
     float m_sampleRate;
 };
 
 } // namespace WebCore
--- a/content/media/webaudio/blink/HRTFPanner.cpp
+++ b/content/media/webaudio/blink/HRTFPanner.cpp
@@ -52,43 +52,34 @@ HRTFPanner::HRTFPanner(float sampleRate,
     , m_sampleRate(sampleRate)
     , m_crossfadeSelection(CrossfadeSelection1)
     , m_azimuthIndex1(UninitializedAzimuth)
     , m_elevation1(0)
     , m_azimuthIndex2(UninitializedAzimuth)
     , m_elevation2(0)
     , m_crossfadeX(0)
     , m_crossfadeIncr(0)
-    , m_convolverL1(fftSizeForSampleRate(sampleRate))
-    , m_convolverR1(fftSizeForSampleRate(sampleRate))
-    , m_convolverL2(fftSizeForSampleRate(sampleRate))
-    , m_convolverR2(fftSizeForSampleRate(sampleRate))
+    , m_convolverL1(HRTFElevation::fftSizeForSampleRate(sampleRate))
+    , m_convolverR1(convolverL1.fftSize())
+    , m_convolverL2(convolverL1.fftSize())
+    , m_convolverR2(convolverL1.fftSize())
     , m_delayLineL(MaxDelayTimeSeconds, sampleRate)
     , m_delayLineR(MaxDelayTimeSeconds, sampleRate)
     , m_tempL1(RenderingQuantum)
     , m_tempR1(RenderingQuantum)
     , m_tempL2(RenderingQuantum)
     , m_tempR2(RenderingQuantum)
 {
     ASSERT(databaseLoader);
 }
 
 HRTFPanner::~HRTFPanner()
 {
 }
 
-size_t HRTFPanner::fftSizeForSampleRate(float sampleRate)
-{
-    // The HRTF impulse responses (loaded as audio resources) are 512 sample-frames @44.1KHz.
-    // Currently, we truncate the impulse responses to half this size, but an FFT-size of twice impulse response size is needed (for convolution).
-    // So for sample rates around 44.1KHz an FFT size of 512 is good. We double the FFT-size only for sample rates at least double this.
-    ASSERT(sampleRate >= 44100 && sampleRate <= 96000.0);
-    return (sampleRate < 88200.0) ? 512 : 1024;
-}
-
 void HRTFPanner::reset()
 {
     m_convolverL1.reset();
     m_convolverR1.reset();
     m_convolverL2.reset();
     m_convolverR2.reset();
     m_delayLineL.reset();
     m_delayLineR.reset();
--- a/content/media/webaudio/blink/HRTFPanner.h
+++ b/content/media/webaudio/blink/HRTFPanner.h
@@ -36,18 +36,17 @@ class HRTFPanner : public Panner {
 public:
     HRTFPanner(float sampleRate, HRTFDatabaseLoader*);
     virtual ~HRTFPanner();
 
     // Panner
     virtual void pan(double azimuth, double elevation, const AudioBus* inputBus, AudioBus* outputBus, size_t framesToProcess);
     virtual void reset();
 
-    size_t fftSize() const { return fftSizeForSampleRate(m_sampleRate); }
-    static size_t fftSizeForSampleRate(float sampleRate);
+    size_t fftSize() const { return m_convolverL1.fftSize(); }
 
     float sampleRate() const { return m_sampleRate; }
 
     virtual double tailTime() const OVERRIDE;
     virtual double latencyTime() const OVERRIDE;
 
 private:
     // Given an azimuth angle in the range -180 -> +180, returns the corresponding azimuth index for the database,