bug 1221833 reduce FFTConvolver latency by one block r=padenot
authorKarl Tomlinson <karlt+@karlt.net>
Wed, 04 Nov 2015 06:32:49 +1300
changeset 271374 26eaa1b7e87850896d3f334ce385db11c49c46e3
parent 271373 c7695f2acc277311b283c4d9417a0089b9199941
child 271375 fe2266a55ca19d4270b91f3658786d5549267b09
push id67632
push userktomlinson@mozilla.com
push dateThu, 05 Nov 2015 21:42:01 +0000
treeherdermozilla-inbound@21612d903003 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspadenot
bugs1221833
milestone45.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 1221833 reduce FFTConvolver latency by one block r=padenot
dom/media/webaudio/blink/FFTConvolver.cpp
dom/media/webaudio/blink/FFTConvolver.h
dom/media/webaudio/blink/HRTFPanner.cpp
dom/media/webaudio/blink/ReverbConvolver.cpp
dom/media/webaudio/blink/ReverbConvolverStage.cpp
--- a/dom/media/webaudio/blink/FFTConvolver.cpp
+++ b/dom/media/webaudio/blink/FFTConvolver.cpp
@@ -81,26 +81,17 @@ void FFTConvolver::process(FFTBlock* fft
         // Sanity check
         bool isCopyGood1 = sourceP && inputP && m_readWriteIndex + divisionSize <= m_inputBuffer.Length();
         MOZ_ASSERT(isCopyGood1);
         if (!isCopyGood1)
             return;
 
         memcpy(inputP + m_readWriteIndex, sourceP, sizeof(float) * divisionSize);
 
-        // Copy samples from output buffer
         float* outputP = m_outputBuffer.Elements();
-
-        // Sanity check
-        bool isCopyGood2 = destP && outputP && m_readWriteIndex + divisionSize <= m_outputBuffer.Length();
-        MOZ_ASSERT(isCopyGood2);
-        if (!isCopyGood2)
-            return;
-
-        memcpy(destP, outputP + m_readWriteIndex, sizeof(float) * divisionSize);
         m_readWriteIndex += divisionSize;
 
         // Check if it's time to perform the next FFT
         if (m_readWriteIndex == halfSize) {
             // The input buffer is now filled (get frequency-domain version)
             m_frame.PerformFFT(m_inputBuffer.Elements());
             m_frame.Multiply(*fftKernel);
             m_frame.GetInverseWithoutScaling(m_outputBuffer.Elements());
@@ -115,18 +106,33 @@ void FFTConvolver::process(FFTBlock* fft
             if (!isCopyGood3)
                 return;
 
             memcpy(m_lastOverlapBuffer.Elements(), m_outputBuffer.Elements() + halfSize, sizeof(float) * halfSize);
 
             // Reset index back to start for next time
             m_readWriteIndex = 0;
         }
+
+        // Sanity check
+        bool isCopyGood2 = destP && outputP && m_readWriteIndex + divisionSize <= m_outputBuffer.Length();
+        MOZ_ASSERT(isCopyGood2);
+        if (!isCopyGood2)
+            return;
+
+        // Copy samples from output buffer
+        memcpy(destP, outputP + m_readWriteIndex, sizeof(float) * divisionSize);
     }
 }
 
 void FFTConvolver::reset()
 {
     PodZero(m_lastOverlapBuffer.Elements(), m_lastOverlapBuffer.Length());
     m_readWriteIndex = 0;
 }
 
+size_t FFTConvolver::latencyFrames() const
+{
+    return std::max<size_t>(fftSize()/2, WEBAUDIO_BLOCK_SIZE) -
+        WEBAUDIO_BLOCK_SIZE;
+}
+
 } // namespace WebCore
--- a/dom/media/webaudio/blink/FFTConvolver.h
+++ b/dom/media/webaudio/blink/FFTConvolver.h
@@ -48,25 +48,27 @@ public:
     explicit FFTConvolver(size_t fftSize, size_t renderPhase = 0);
 
     // Process WEBAUDIO_BLOCK_SIZE elements of array |sourceP| to |destP|.
     //
     // |fftKernel| must be pre-scaled for FFTBlock::GetInverseWithoutScaling().
     //
     // FIXME: Later, we can do more sophisticated buffering to relax this requirement...
     //
-    // The input to output latency is equal to fftSize / 2
-    //
     // Processing in-place is allowed...
     void process(FFTBlock* fftKernel, const float* sourceP, float* destP);
 
     void reset();
 
     size_t fftSize() const { return m_frame.FFTSize(); }
 
+    // The input to output latency is up to fftSize / 2, but alignment of the
+    // FFTs with the blocks reduces this by one block.
+    size_t latencyFrames() const;
+
     size_t sizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
     size_t sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
 
 private:
     FFTBlock m_frame;
 
     // Buffer input until we get fftSize / 2 samples then do an FFT
     size_t m_readWriteIndex;
--- a/dom/media/webaudio/blink/HRTFPanner.cpp
+++ b/dom/media/webaudio/blink/HRTFPanner.cpp
@@ -302,15 +302,17 @@ void HRTFPanner::pan(double desiredAzimu
 }
 
 int HRTFPanner::maxTailFrames() const
 {
     // Although the ideal tail time would be the length of the impulse
     // response, there is additional tail time from the approximations in the
     // implementation.  Because HRTFPanner is implemented with a DelayKernel
     // and a FFTConvolver, the tailTime of the HRTFPanner is the sum of the
-    // tailTime of the DelayKernel and the tailTime of the FFTConvolver.
-    // The FFTConvolver has a tail time of fftSize(), including latency of
-    // fftSize()/2.
-    return m_delayLine.MaxDelayTicks() + fftSize();
+    // tailTime of the DelayKernel and the tailTime of the FFTConvolver.  The
+    // FFTs of the convolver are fftSize(), half of which is latency, but this
+    // is aligned with blocks and so is reduced by the one block which is
+    // processed immediately.
+    return m_delayLine.MaxDelayTicks() +
+        m_convolverL1.fftSize()/2 + m_convolverL1.latencyFrames();
 }
 
 } // namespace WebCore
--- a/dom/media/webaudio/blink/ReverbConvolver.cpp
+++ b/dom/media/webaudio/blink/ReverbConvolver.cpp
@@ -44,18 +44,18 @@ const int InputBufferSize = 8 * 16384;
 
 // We only process the leading portion of the impulse response in the real-time thread.  We don't exceed this length.
 // It turns out then, that the background thread has about 278msec of scheduling slop.
 // Empirically, this has been found to be a good compromise between giving enough time for scheduling slop,
 // while still minimizing the amount of processing done in the primary (high-priority) thread.
 // This was found to be a good value on Mac OS X, and may work well on other platforms as well, assuming
 // the very rough scheduling latencies are similar on these time-scales.  Of course, this code may need to be
 // tuned for individual platforms if this assumption is found to be incorrect.
-const size_t RealtimeFrameLimit = 8192  + 4096; // ~278msec @ 44.1KHz
-
+const size_t RealtimeFrameLimit = 8192 + 4096 // ~278msec @ 44.1KHz
+                                  - WEBAUDIO_BLOCK_SIZE;
 const size_t MinFFTSize = 128;
 const size_t MaxRealtimeFFTSize = 4096;
 
 ReverbConvolver::ReverbConvolver(const float* impulseResponseData,
                                  size_t impulseResponseLength,
                                  size_t maxFFTSize,
                                  size_t convolverRenderPhase,
                                  bool useBackgroundThreads)
--- a/dom/media/webaudio/blink/ReverbConvolverStage.cpp
+++ b/dom/media/webaudio/blink/ReverbConvolverStage.cpp
@@ -62,22 +62,22 @@ ReverbConvolverStage::ReverbConvolverSta
         m_directConvolver = new DirectConvolver(WEBAUDIO_BLOCK_SIZE);
     }
     m_temporaryBuffer.SetLength(WEBAUDIO_BLOCK_SIZE);
     PodZero(m_temporaryBuffer.Elements(), m_temporaryBuffer.Length());
 
     // The convolution stage at offset stageOffset needs to have a corresponding delay to cancel out the offset.
     size_t totalDelay = stageOffset + reverbTotalLatency;
 
-    // But, the FFT convolution itself incurs fftSize / 2 latency, so subtract this out...
-    size_t halfSize = fftSize / 2;
+    // But, the FFT convolution itself incurs latency, so subtract this out...
     if (!m_directMode) {
-        MOZ_ASSERT(totalDelay >= halfSize);
-        if (totalDelay >= halfSize)
-            totalDelay -= halfSize;
+        size_t fftLatency = m_fftConvolver->latencyFrames();
+        MOZ_ASSERT(totalDelay >= fftLatency);
+        if (totalDelay >= fftLatency)
+            totalDelay -= fftLatency;
     }
 
     m_postDelayLength = totalDelay;
 }
 
 size_t ReverbConvolverStage::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
 {
     size_t amount = aMallocSizeOf(this);