Bug 1264199: P6. Drain resampler when changing format or reaching the end. r=kinetik
☠☠ backed out by 95384829747b ☠ ☠
authorJean-Yves Avenard <jyavenard@mozilla.com>
Thu, 14 Apr 2016 15:44:02 +1000
changeset 332150 b83be2d0614b80d56b603d0f9311419d1edc78ad
parent 332149 93ab5d1b04b065e62e4e6364af7e0a124607ac58
child 332151 a9bfe66b235c0936cb34f53d24aee08d3814bdfd
push id6048
push userkmoir@mozilla.com
push dateMon, 06 Jun 2016 19:02:08 +0000
treeherdermozilla-beta@46d72a56c57d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskinetik
bugs1264199
milestone48.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1264199: P6. Drain resampler when changing format or reaching the end. r=kinetik MozReview-Commit-ID: KqcB0FYxNtC
dom/media/AudioConverter.cpp
dom/media/AudioConverter.h
dom/media/mediasink/DecodedAudioDataSink.cpp
dom/media/mediasink/DecodedAudioDataSink.h
--- a/dom/media/AudioConverter.cpp
+++ b/dom/media/AudioConverter.cpp
@@ -28,29 +28,17 @@ AudioConverter::AudioConverter(const Aud
                         aIn.Interleaved() == aOut.Interleaved(),
                         "No format or rate conversion is supported at this stage");
   MOZ_DIAGNOSTIC_ASSERT(aOut.Channels() <= 2 ||
                         aIn.Channels() == aOut.Channels(),
                         "Only down/upmixing to mono or stereo is supported at this stage");
   MOZ_DIAGNOSTIC_ASSERT(aOut.Interleaved(), "planar audio format not supported");
   mIn.Layout().MappingTable(mOut.Layout(), mChannelOrderMap);
   if (aIn.Rate() != aOut.Rate()) {
-    int error;
-    mResampler = speex_resampler_init(aOut.Channels(),
-                                      aIn.Rate(),
-                                      aOut.Rate(),
-                                      SPEEX_RESAMPLER_QUALITY_DEFAULT,
-                                      &error);
-
-    if (error == RESAMPLER_ERR_SUCCESS) {
-      speex_resampler_skip_zeros(mResampler);
-    } else {
-      NS_WARNING("Failed to initialize resampler.");
-      mResampler = nullptr;
-    }
+    RecreateResampler();
   }
 }
 
 AudioConverter::~AudioConverter()
 {
   if (mResampler) {
     speex_resampler_destroy(mResampler);
     mResampler = nullptr;
@@ -277,16 +265,56 @@ AudioConverter::ResampleAudio(void* aOut
                                             out, &outframes);
   } else {
     MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
   }
   MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped");
   return outframes;
 }
 
+void
+AudioConverter::RecreateResampler()
+{
+  if (mResampler) {
+    speex_resampler_destroy(mResampler);
+  }
+  int error;
+  mResampler = speex_resampler_init(mOut.Channels(),
+                                    mIn.Rate(),
+                                    mOut.Rate(),
+                                    SPEEX_RESAMPLER_QUALITY_DEFAULT,
+                                    &error);
+
+  if (error == RESAMPLER_ERR_SUCCESS) {
+    speex_resampler_skip_zeros(mResampler);
+  } else {
+    NS_WARNING("Failed to initialize resampler.");
+    mResampler = nullptr;
+  }
+}
+
+size_t
+AudioConverter::DrainResampler(void* aOut)
+{
+  if (!mResampler) {
+    return 0;
+  }
+  int frames = speex_resampler_get_input_latency(mResampler);
+  AlignedByteBuffer buffer(FramesOutToSamples(frames) *
+                           AudioConfig::SampleSize(mOut.Format()));
+  if (!buffer) {
+    // OOM
+    return 0;
+  }
+  frames = ResampleAudio(aOut, buffer.Data(), frames);
+  // Tore down the resampler as it's easier than handling follow-up.
+  RecreateResampler();
+  return frames;
+}
+
 size_t
 AudioConverter::UpmixAudio(void* aOut, const void* aIn, size_t aFrames) const
 {
   MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
              mIn.Format() == AudioConfig::FORMAT_FLT);
   MOZ_ASSERT(mIn.Channels() < mOut.Channels());
   MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now");
   MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now");
@@ -322,17 +350,23 @@ AudioConverter::UpmixAudio(void* aOut, c
   }
 
   return aFrames;
 }
 
 size_t
 AudioConverter::ResampleRecipientFrames(size_t aFrames) const
 {
-  return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+  if (!aFrames && mIn.Rate() != mOut.Rate()) {
+    // The resampler will be drained, account for frames currently buffered
+    // in the resampler.
+    return speex_resampler_get_output_latency(mResampler);
+  } else {
+    return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+  }
 }
 
 size_t
 AudioConverter::FramesOutToSamples(size_t aFrames) const
 {
   return aFrames * mOut.Channels();
 }
 
--- a/dom/media/AudioConverter.h
+++ b/dom/media/AudioConverter.h
@@ -118,16 +118,18 @@ typedef AudioDataBuffer<AudioConfig::FOR
 class AudioConverter {
 public:
   AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut);
   ~AudioConverter();
 
   // Convert the AudioDataBuffer.
   // Conversion will be done in place if possible. Otherwise a new buffer will
   // be returned.
+  // Providing an empty buffer and resampling is expected, the resampler
+  // will be drained.
   template <AudioConfig::SampleFormat Format, typename Value>
   AudioDataBuffer<Format, Value> Process(AudioDataBuffer<Format, Value>&& aBuffer)
   {
     MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format() && mIn.Format() == Format);
     AudioDataBuffer<Format, Value> buffer = Move(aBuffer);
     if (CanWorkInPlace()) {
       size_t frames = SamplesInToFrames(buffer.Length());
       frames = ProcessInternal(buffer.Data(), buffer.Data(), frames);
@@ -147,32 +149,36 @@ public:
     MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format() && mIn.Format() == Format);
     // Perform the downmixing / reordering in temporary buffer.
     size_t frames = SamplesInToFrames(aBuffer.Length());
     AlignedBuffer<Value> temp1;
     if (!temp1.SetLength(FramesOutToSamples(frames))) {
       return AudioDataBuffer<Format, Value>(Move(temp1));
     }
     frames = ProcessInternal(temp1.Data(), aBuffer.Data(), frames);
-    if (!frames || mIn.Rate() == mOut.Rate()) {
+    if (mIn.Rate() == mOut.Rate()) {
       temp1.SetLength(FramesOutToSamples(frames));
       return AudioDataBuffer<Format, Value>(Move(temp1));
     }
 
     // At this point, temp1 contains the buffer reordered and downmixed.
     // If we are downsampling we can re-use it.
     AlignedBuffer<Value>* outputBuffer = &temp1;
     AlignedBuffer<Value> temp2;
-    if (mOut.Rate() > mIn.Rate()) {
-      // We are upsampling, we can't work in place. Allocate another temporary
-      // buffer where the upsampling will occur.
+    if (!frames || mOut.Rate() > mIn.Rate()) {
+      // We are upsampling or about to drain, we can't work in place.
+      // Allocate another temporary buffer where the upsampling will occur.
       temp2.SetLength(FramesOutToSamples(ResampleRecipientFrames(frames)));
       outputBuffer = &temp2;
     }
-    frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames);
+    if (!frames) {
+      frames = DrainResampler(outputBuffer->Data());
+    } else {
+      frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames);
+    }
     outputBuffer->SetLength(FramesOutToSamples(frames));
     return AudioDataBuffer<Format, Value>(Move(*outputBuffer));
   }
 
   // Attempt to convert the AudioDataBuffer in place.
   // Will return 0 if the conversion wasn't possible.
   template <typename Value>
   size_t Process(Value* aBuffer, size_t aFrames)
@@ -218,13 +224,15 @@ private:
   size_t FramesOutToSamples(size_t aFrames) const;
   size_t SamplesInToFrames(size_t aSamples) const;
   size_t FramesOutToBytes(size_t aFrames) const;
 
   // Resampler context.
   SpeexResamplerState* mResampler;
   size_t ResampleAudio(void* aOut, const void* aIn, size_t aFrames);
   size_t ResampleRecipientFrames(size_t aFrames) const;
+  void RecreateResampler();
+  size_t DrainResampler(void* aOut);
 };
 
 } // namespace mozilla
 
 #endif /* AudioConverter_h */
--- a/dom/media/mediasink/DecodedAudioDataSink.cpp
+++ b/dom/media/mediasink/DecodedAudioDataSink.cpp
@@ -318,16 +318,22 @@ DecodedAudioDataSink::OnAudioPushed(cons
 }
 
 void
 DecodedAudioDataSink::NotifyAudioNeeded()
 {
   MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(),
              "Not called from the owner's thread");
 
+  if (AudioQueue().IsFinished() && !AudioQueue().GetSize()) {
+    // We have reached the end of the data, drain the resampler.
+    DrainConverter();
+    return;
+  }
+
   // Always ensure we have two processed frames pending to allow for processing
   // latency.
   while (AudioQueue().GetSize() && (mProcessedQueueLength < LOW_AUDIO_USECS ||
                                     mProcessedQueue.GetSize() < 2)) {
     RefPtr<AudioData> data =
       dont_AddRef(AudioQueue().PopFront().take()->As<AudioData>());
 
     // Ignore the element with 0 frames and try next.
@@ -338,16 +344,18 @@ DecodedAudioDataSink::NotifyAudioNeeded(
     if (!mConverter ||
         (data->mRate != mConverter->InputConfig().Rate() ||
          data->mChannels != mConverter->InputConfig().Channels())) {
       SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz",
                  mConverter? mConverter->InputConfig().Channels() : 0,
                  mConverter ? mConverter->InputConfig().Rate() : 0,
                  data->mChannels, data->mRate);
 
+      DrainConverter();
+
       // mFramesParsed indicates the current playtime in frames at the current
       // input sampling rate. Recalculate it per the new sampling rate.
       if (mFramesParsed) {
         // We minimize overflow.
         uint32_t oldRate = mConverter->InputConfig().Rate();
         uint32_t newRate = data->mRate;
         int64_t major = mFramesParsed / oldRate;
         int64_t remainder = mFramesParsed % oldRate;
@@ -384,35 +392,42 @@ DecodedAudioDataSink::NotifyAudioNeeded(
 
     if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
       // The next audio packet begins some time after the end of the last packet
       // we pushed to the audio hardware. We must push silence into the audio
       // hardware so that the next audio packet begins playback at the correct
       // time.
       missingFrames = std::min<int64_t>(INT32_MAX, missingFrames.value());
       mFramesParsed += missingFrames.value();
-      AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels);
-      if (!silenceData) {
-        NS_WARNING("OOM in DecodedAudioDataSink");
-        mErrored = true;
-        return;
+      // We need to insert silence, first use drained frames if any.
+      missingFrames -= DrainConverter(missingFrames.value());
+      // Insert silence is still needed.
+      if (missingFrames.value()) {
+        AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels);
+        if (!silenceData) {
+          NS_WARNING("OOM in DecodedAudioDataSink");
+          mErrored = true;
+          return;
+        }
+        RefPtr<AudioData> silence = CreateAudioFromBuffer(Move(silenceData), data);
+        PushProcessedAudio(silence);
       }
-      RefPtr<AudioData> silence = CreateAudioFromBuffer(Move(silenceData), data);
-      PushProcessedAudio(silence);
     }
 
     mLastEndTime = data->GetEndTime();
     mFramesParsed += data->mFrames;
 
     if (mConverter->InputConfig() != mConverter->OutputConfig()) {
       AlignedAudioBuffer convertedData =
         mConverter->Process(AudioSampleBuffer(Move(data->mAudioData))).Forget();
       data = CreateAudioFromBuffer(Move(convertedData), data);
     }
-    PushProcessedAudio(data);
+    if (PushProcessedAudio(data)) {
+      mLastProcessedPacket = Some(data);
+    }
   }
 }
 
 uint32_t
 DecodedAudioDataSink::PushProcessedAudio(AudioData* aData)
 {
   if (!aData || !aData->mFrames) {
     return 0;
@@ -442,10 +457,43 @@ DecodedAudioDataSink::CreateAudioFromBuf
                   duration.value(),
                   frames,
                   Move(aBuffer),
                   mOutputChannels,
                   mOutputRate);
   return data.forget();
 }
 
+uint32_t
+DecodedAudioDataSink::DrainConverter(uint32_t aMaxFrames)
+{
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+  if (!mConverter || !mLastProcessedPacket) {
+    // nothing to drain.
+    return 0;
+  }
+
+  RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref();
+  mLastProcessedPacket.reset();
+
+  // To drain we simply provide an empty packet to the audio converter.
+  AlignedAudioBuffer convertedData =
+    mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
+
+  uint32_t frames = convertedData.Length() / mOutputChannels;
+  if (!convertedData.SetLength(std::min(frames, aMaxFrames) * mOutputChannels)) {
+    // This can never happen as we were reducing the length of convertData.
+    mErrored = true;
+    return 0;
+  }
+
+  RefPtr<AudioData> data =
+    CreateAudioFromBuffer(Move(convertedData), lastPacket);
+  if (!data) {
+    return 0;
+  }
+  mProcessedQueue.Push(data);
+  return data->mFrames;
+}
+
 } // namespace media
 } // namespace mozilla
--- a/dom/media/mediasink/DecodedAudioDataSink.h
+++ b/dom/media/mediasink/DecodedAudioDataSink.h
@@ -109,31 +109,35 @@ private:
   Atomic<bool> mPlaybackComplete;
 
   const RefPtr<AbstractThread> mOwnerThread;
 
   // Audio Processing objects and methods
   void OnAudioPopped(const RefPtr<MediaData>& aSample);
   void OnAudioPushed(const RefPtr<MediaData>& aSample);
   void NotifyAudioNeeded();
+  // Drain the converter and add the output to the processed audio queue.
+  // A maximum of aMaxFrames will be added.
+  uint32_t DrainConverter(uint32_t aMaxFrames = UINT32_MAX);
   already_AddRefed<AudioData> CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
                                                     AudioData* aReference);
   // Add data to the processsed queue, update mProcessedQueueLength and
   // return the number of frames added.
   uint32_t PushProcessedAudio(AudioData* aData);
   UniquePtr<AudioConverter> mConverter;
   MediaQueue<AudioData> mProcessedQueue;
   // Length in microseconds of the ProcessedQueue
   Atomic<int32_t> mProcessedQueueLength;
   MediaEventListener mAudioQueueListener;
   MediaEventListener mProcessedQueueListener;
   // Number of frames processed from AudioQueue(). Used to determine gaps in
   // the input stream. It indicates the time in frames since playback started
   // at the current input framerate.
   int64_t mFramesParsed;
+  Maybe<RefPtr<AudioData>> mLastProcessedPacket;
   int64_t mLastEndTime;
   // Never modifed after construction.
   uint32_t mOutputRate;
   uint32_t mOutputChannels;
 };
 
 } // namespace media
 } // namespace mozilla