Bug 1264199: P6. Drain resampler when changing format or reaching the end. r=kinetik
authorJean-Yves Avenard <jyavenard@mozilla.com>
Thu, 14 Apr 2016 15:44:02 +1000
changeset 294869 1435aa2cf1ca0c2009a513b4faf86497f42ec851
parent 294868 ca918dcf2dfc25ad92f4cbcafa290c25ca106599
child 294870 14e0996c0dc588e6b89df5a08e71820edf736720
push id75709
push userjyavenard@mozilla.com
push dateTue, 26 Apr 2016 06:37:10 +0000
treeherdermozilla-inbound@92d6662bdf4f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskinetik
bugs1264199
milestone49.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1264199: P6. Drain resampler when changing format or reaching the end. r=kinetik MozReview-Commit-ID: KqcB0FYxNtC
dom/media/AudioConverter.cpp
dom/media/AudioConverter.h
dom/media/mediasink/DecodedAudioDataSink.cpp
dom/media/mediasink/DecodedAudioDataSink.h
--- a/dom/media/AudioConverter.cpp
+++ b/dom/media/AudioConverter.cpp
@@ -28,29 +28,17 @@ AudioConverter::AudioConverter(const Aud
                         aIn.Interleaved() == aOut.Interleaved(),
                         "No format or rate conversion is supported at this stage");
   MOZ_DIAGNOSTIC_ASSERT(aOut.Channels() <= 2 ||
                         aIn.Channels() == aOut.Channels(),
                         "Only down/upmixing to mono or stereo is supported at this stage");
   MOZ_DIAGNOSTIC_ASSERT(aOut.Interleaved(), "planar audio format not supported");
   mIn.Layout().MappingTable(mOut.Layout(), mChannelOrderMap);
   if (aIn.Rate() != aOut.Rate()) {
-    int error;
-    mResampler = speex_resampler_init(aOut.Channels(),
-                                      aIn.Rate(),
-                                      aOut.Rate(),
-                                      SPEEX_RESAMPLER_QUALITY_DEFAULT,
-                                      &error);
-
-    if (error == RESAMPLER_ERR_SUCCESS) {
-      speex_resampler_skip_zeros(mResampler);
-    } else {
-      NS_WARNING("Failed to initialize resampler.");
-      mResampler = nullptr;
-    }
+    RecreateResampler();
   }
 }
 
 AudioConverter::~AudioConverter()
 {
   if (mResampler) {
     speex_resampler_destroy(mResampler);
     mResampler = nullptr;
@@ -277,16 +265,56 @@ AudioConverter::ResampleAudio(void* aOut
                                             out, &outframes);
   } else {
     MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
   }
   MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped");
   return outframes;
 }
 
+void
+AudioConverter::RecreateResampler()
+{
+  if (mResampler) {
+    speex_resampler_destroy(mResampler);
+  }
+  int error;
+  mResampler = speex_resampler_init(mOut.Channels(),
+                                    mIn.Rate(),
+                                    mOut.Rate(),
+                                    SPEEX_RESAMPLER_QUALITY_DEFAULT,
+                                    &error);
+
+  if (error == RESAMPLER_ERR_SUCCESS) {
+    speex_resampler_skip_zeros(mResampler);
+  } else {
+    NS_WARNING("Failed to initialize resampler.");
+    mResampler = nullptr;
+  }
+}
+
+size_t
+AudioConverter::DrainResampler(void* aOut)
+{
+  if (!mResampler) {
+    return 0;
+  }
+  int frames = speex_resampler_get_input_latency(mResampler);
+  AlignedByteBuffer buffer(FramesOutToSamples(frames) *
+                           AudioConfig::SampleSize(mOut.Format()));
+  if (!buffer) {
+    // OOM
+    return 0;
+  }
+  frames = ResampleAudio(aOut, buffer.Data(), frames);
+  // Tore down the resampler as it's easier than handling follow-up.
+  RecreateResampler();
+  return frames;
+}
+
 size_t
 AudioConverter::UpmixAudio(void* aOut, const void* aIn, size_t aFrames) const
 {
   MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
              mIn.Format() == AudioConfig::FORMAT_FLT);
   MOZ_ASSERT(mIn.Channels() < mOut.Channels());
   MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now");
   MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now");
@@ -322,17 +350,23 @@ AudioConverter::UpmixAudio(void* aOut, c
   }
 
   return aFrames;
 }
 
 size_t
 AudioConverter::ResampleRecipientFrames(size_t aFrames) const
 {
-  return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+  if (!aFrames && mIn.Rate() != mOut.Rate()) {
+    // The resampler will be drained, account for frames currently buffered
+    // in the resampler.
+    return speex_resampler_get_output_latency(mResampler);
+  } else {
+    return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+  }
 }
 
 size_t
 AudioConverter::FramesOutToSamples(size_t aFrames) const
 {
   return aFrames * mOut.Channels();
 }
 
--- a/dom/media/AudioConverter.h
+++ b/dom/media/AudioConverter.h
@@ -118,16 +118,18 @@ typedef AudioDataBuffer<AudioConfig::FOR
 class AudioConverter {
 public:
   AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut);
   ~AudioConverter();
 
   // Convert the AudioDataBuffer.
   // Conversion will be done in place if possible. Otherwise a new buffer will
   // be returned.
+  // Providing an empty buffer and resampling is expected, the resampler
+  // will be drained.
   template <AudioConfig::SampleFormat Format, typename Value>
   AudioDataBuffer<Format, Value> Process(AudioDataBuffer<Format, Value>&& aBuffer)
   {
     MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format() && mIn.Format() == Format);
     AudioDataBuffer<Format, Value> buffer = Move(aBuffer);
     if (CanWorkInPlace()) {
       size_t frames = SamplesInToFrames(buffer.Length());
       frames = ProcessInternal(buffer.Data(), buffer.Data(), frames);
@@ -147,32 +149,36 @@ public:
     MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format() && mIn.Format() == Format);
     // Perform the downmixing / reordering in temporary buffer.
     size_t frames = SamplesInToFrames(aBuffer.Length());
     AlignedBuffer<Value> temp1;
     if (!temp1.SetLength(FramesOutToSamples(frames))) {
       return AudioDataBuffer<Format, Value>(Move(temp1));
     }
     frames = ProcessInternal(temp1.Data(), aBuffer.Data(), frames);
-    if (!frames || mIn.Rate() == mOut.Rate()) {
+    if (mIn.Rate() == mOut.Rate()) {
       temp1.SetLength(FramesOutToSamples(frames));
       return AudioDataBuffer<Format, Value>(Move(temp1));
     }
 
     // At this point, temp1 contains the buffer reordered and downmixed.
     // If we are downsampling we can re-use it.
     AlignedBuffer<Value>* outputBuffer = &temp1;
     AlignedBuffer<Value> temp2;
-    if (mOut.Rate() > mIn.Rate()) {
-      // We are upsampling, we can't work in place. Allocate another temporary
-      // buffer where the upsampling will occur.
+    if (!frames || mOut.Rate() > mIn.Rate()) {
+      // We are upsampling or about to drain, we can't work in place.
+      // Allocate another temporary buffer where the upsampling will occur.
       temp2.SetLength(FramesOutToSamples(ResampleRecipientFrames(frames)));
       outputBuffer = &temp2;
     }
-    frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames);
+    if (!frames) {
+      frames = DrainResampler(outputBuffer->Data());
+    } else {
+      frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames);
+    }
     outputBuffer->SetLength(FramesOutToSamples(frames));
     return AudioDataBuffer<Format, Value>(Move(*outputBuffer));
   }
 
   // Attempt to convert the AudioDataBuffer in place.
   // Will return 0 if the conversion wasn't possible.
   template <typename Value>
   size_t Process(Value* aBuffer, size_t aFrames)
@@ -218,13 +224,15 @@ private:
   size_t FramesOutToSamples(size_t aFrames) const;
   size_t SamplesInToFrames(size_t aSamples) const;
   size_t FramesOutToBytes(size_t aFrames) const;
 
   // Resampler context.
   SpeexResamplerState* mResampler;
   size_t ResampleAudio(void* aOut, const void* aIn, size_t aFrames);
   size_t ResampleRecipientFrames(size_t aFrames) const;
+  void RecreateResampler();
+  size_t DrainResampler(void* aOut);
 };
 
 } // namespace mozilla
 
 #endif /* AudioConverter_h */
--- a/dom/media/mediasink/DecodedAudioDataSink.cpp
+++ b/dom/media/mediasink/DecodedAudioDataSink.cpp
@@ -357,16 +357,18 @@ DecodedAudioDataSink::NotifyAudioNeeded(
     if (!mConverter ||
         (data->mRate != mConverter->InputConfig().Rate() ||
          data->mChannels != mConverter->InputConfig().Channels())) {
       SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz",
                  mConverter? mConverter->InputConfig().Channels() : 0,
                  mConverter ? mConverter->InputConfig().Rate() : 0,
                  data->mChannels, data->mRate);
 
+      DrainConverter();
+
       // mFramesParsed indicates the current playtime in frames at the current
       // input sampling rate. Recalculate it per the new sampling rate.
       if (mFramesParsed) {
         // We minimize overflow.
         uint32_t oldRate = mConverter->InputConfig().Rate();
         uint32_t newRate = data->mRate;
         CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate);
         if (!result.isValid()) {
@@ -410,16 +412,19 @@ DecodedAudioDataSink::NotifyAudioNeeded(
       missingFrames =
         SaferMultDiv(missingFrames.value(), mOutputRate, data->mRate);
       if (!missingFrames.isValid()) {
         NS_WARNING("Int overflow in DecodedAudioDataSink");
         mErrored = true;
         return;
       }
 
+      // We need to insert silence, first use drained frames if any.
+      missingFrames -= DrainConverter(missingFrames.value());
+      // Insert silence if still needed.
       if (missingFrames.value()) {
         AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels);
         if (!silenceData) {
           NS_WARNING("OOM in DecodedAudioDataSink");
           mErrored = true;
           return;
         }
         RefPtr<AudioData> silence = CreateAudioFromBuffer(Move(silenceData), data);
@@ -430,20 +435,24 @@ DecodedAudioDataSink::NotifyAudioNeeded(
     mLastEndTime = data->GetEndTime();
     mFramesParsed += data->mFrames;
 
     if (mConverter->InputConfig() != mConverter->OutputConfig()) {
       AlignedAudioBuffer convertedData =
         mConverter->Process(AudioSampleBuffer(Move(data->mAudioData))).Forget();
       data = CreateAudioFromBuffer(Move(convertedData), data);
     }
-    PushProcessedAudio(data);
+    if (PushProcessedAudio(data)) {
+      mLastProcessedPacket = Some(data);
+    }
   }
 
   if (AudioQueue().IsFinished()) {
+    // We have reached the end of the data, drain the resampler.
+    DrainConverter();
     mProcessedQueue.Finish();
   }
 }
 
 uint32_t
 DecodedAudioDataSink::PushProcessedAudio(AudioData* aData)
 {
   if (!aData || !aData->mFrames) {
@@ -474,10 +483,43 @@ DecodedAudioDataSink::CreateAudioFromBuf
                   duration.value(),
                   frames,
                   Move(aBuffer),
                   mOutputChannels,
                   mOutputRate);
   return data.forget();
 }
 
+uint32_t
+DecodedAudioDataSink::DrainConverter(uint32_t aMaxFrames)
+{
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+  if (!mConverter || !mLastProcessedPacket || !aMaxFrames) {
+    // nothing to drain.
+    return 0;
+  }
+
+  RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref();
+  mLastProcessedPacket.reset();
+
+  // To drain we simply provide an empty packet to the audio converter.
+  AlignedAudioBuffer convertedData =
+    mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
+
+  uint32_t frames = convertedData.Length() / mOutputChannels;
+  if (!convertedData.SetLength(std::min(frames, aMaxFrames) * mOutputChannels)) {
+    // This can never happen as we were reducing the length of convertData.
+    mErrored = true;
+    return 0;
+  }
+
+  RefPtr<AudioData> data =
+    CreateAudioFromBuffer(Move(convertedData), lastPacket);
+  if (!data) {
+    return 0;
+  }
+  mProcessedQueue.Push(data);
+  return data->mFrames;
+}
+
 } // namespace media
 } // namespace mozilla
--- a/dom/media/mediasink/DecodedAudioDataSink.h
+++ b/dom/media/mediasink/DecodedAudioDataSink.h
@@ -109,32 +109,36 @@ private:
   Atomic<bool> mPlaybackComplete;
 
   const RefPtr<AbstractThread> mOwnerThread;
 
   // Audio Processing objects and methods
   void OnAudioPopped(const RefPtr<MediaData>& aSample);
   void OnAudioPushed(const RefPtr<MediaData>& aSample);
   void NotifyAudioNeeded();
+  // Drain the converter and add the output to the processed audio queue.
+  // A maximum of aMaxFrames will be added.
+  uint32_t DrainConverter(uint32_t aMaxFrames = UINT32_MAX);
   already_AddRefed<AudioData> CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
                                                     AudioData* aReference);
   // Add data to the processsed queue, update mProcessedQueueLength and
   // return the number of frames added.
   uint32_t PushProcessedAudio(AudioData* aData);
   UniquePtr<AudioConverter> mConverter;
   MediaQueue<AudioData> mProcessedQueue;
   // Length in microseconds of the ProcessedQueue
   Atomic<int32_t> mProcessedQueueLength;
   MediaEventListener mAudioQueueListener;
   MediaEventListener mAudioQueueFinishListener;
   MediaEventListener mProcessedQueueListener;
   // Number of frames processed from AudioQueue(). Used to determine gaps in
   // the input stream. It indicates the time in frames since playback started
   // at the current input framerate.
   int64_t mFramesParsed;
+  Maybe<RefPtr<AudioData>> mLastProcessedPacket;
   int64_t mLastEndTime;
   // Never modifed after construction.
   uint32_t mOutputRate;
   uint32_t mOutputChannels;
 };
 
 } // namespace media
 } // namespace mozilla