Bug 1406503 - P1. Abstract FFmpeg decoding so that an av_parser can also be used for audio. r=jwwang
authorJean-Yves Avenard <jyavenard@mozilla.com>
Thu, 26 Oct 2017 22:08:33 +0200
changeset 389037 5017b988318884fbb07978d1ae512362f5fab3eb
parent 389036 47c479ea63f12372a50be8f8fa9e021ee31a5c26
child 389038 a1f28a42c07fb09a46ed6862749eb9a377e477f7
push id32777
push userarchaeopteryx@coole-files.de
push dateMon, 30 Oct 2017 22:44:45 +0000
treeherdermozilla-central@dd0f265a1300 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjwwang
bugs1406503
milestone58.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1406503 - P1. Abstract FFmpeg decoding so that an av_parser can also be used for audio. r=jwwang MozReview-Commit-ID: 4bNxLhYKqVG
dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
dom/media/platforms/ffmpeg/FFmpegAudioDecoder.h
dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp
dom/media/platforms/ffmpeg/FFmpegDataDecoder.h
dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h
--- a/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
@@ -114,111 +114,110 @@ CopyAndPackAudio(AVFrame* aFrame, uint32
         *tmp++ = AudioSampleToFloat(data[channel][frame]);
       }
     }
   }
 
   return audio;
 }
 
-RefPtr<MediaDataDecoder::DecodePromise>
-FFmpegAudioDecoder<LIBAV_VER>::ProcessDecode(MediaRawData* aSample)
+MediaResult
+FFmpegAudioDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample,
+                                        uint8_t* aData,
+                                        int aSize,
+                                        bool* aGotFrame,
+                                        DecodedData& aResults)
 {
   AVPacket packet;
   mLib->av_init_packet(&packet);
 
-  packet.data = const_cast<uint8_t*>(aSample->Data());
-  packet.size = aSample->Size();
+  packet.data = const_cast<uint8_t*>(aData);
+  packet.size = aSize;
+
+  if (aGotFrame) {
+    *aGotFrame = false;
+  }
 
   if (!PrepareFrame()) {
-    return DecodePromise::CreateAndReject(
-      MediaResult(
-        NS_ERROR_OUT_OF_MEMORY,
-        RESULT_DETAIL("FFmpeg audio decoder failed to allocate frame")),
-      __func__);
+    return MediaResult(
+      NS_ERROR_OUT_OF_MEMORY,
+      RESULT_DETAIL("FFmpeg audio decoder failed to allocate frame"));
   }
 
   int64_t samplePosition = aSample->mOffset;
   media::TimeUnit pts = aSample->mTime;
 
-  DecodedData results;
   while (packet.size > 0) {
     int decoded;
     int bytesConsumed =
       mLib->avcodec_decode_audio4(mCodecContext, mFrame, &decoded, &packet);
 
     if (bytesConsumed < 0) {
       NS_WARNING("FFmpeg audio decoder error.");
-      return DecodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
-                    RESULT_DETAIL("FFmpeg audio error:%d", bytesConsumed)),
-        __func__);
+      return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
+                         RESULT_DETAIL("FFmpeg audio error:%d", bytesConsumed));
     }
 
     if (decoded) {
       if (mFrame->format != AV_SAMPLE_FMT_FLT &&
           mFrame->format != AV_SAMPLE_FMT_FLTP &&
           mFrame->format != AV_SAMPLE_FMT_S16 &&
           mFrame->format != AV_SAMPLE_FMT_S16P &&
           mFrame->format != AV_SAMPLE_FMT_S32 &&
           mFrame->format != AV_SAMPLE_FMT_S32P) {
-        return DecodePromise::CreateAndReject(
-          MediaResult(
-            NS_ERROR_DOM_MEDIA_DECODE_ERR,
-            RESULT_DETAIL(
-              "FFmpeg audio decoder outputs unsupported audio format")),
-          __func__);
+        return MediaResult(
+          NS_ERROR_DOM_MEDIA_DECODE_ERR,
+          RESULT_DETAIL(
+            "FFmpeg audio decoder outputs unsupported audio format"));
       }
       uint32_t numChannels = mCodecContext->channels;
       AudioConfig::ChannelLayout layout(numChannels);
       if (!layout.IsValid()) {
-        return DecodePromise::CreateAndReject(
-          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
-                      RESULT_DETAIL("Unsupported channel layout:%u", numChannels)),
-          __func__);
+        return MediaResult(
+          NS_ERROR_DOM_MEDIA_FATAL_ERR,
+          RESULT_DETAIL("Unsupported channel layout:%u", numChannels));
       }
 
       uint32_t samplingRate = mCodecContext->sample_rate;
 
       AlignedAudioBuffer audio =
         CopyAndPackAudio(mFrame, numChannels, mFrame->nb_samples);
       if (!audio) {
-        return DecodePromise::CreateAndReject(
-          MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__), __func__);
+        return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__);
       }
 
       media::TimeUnit duration =
         FramesToTimeUnit(mFrame->nb_samples, samplingRate);
       if (!duration.IsValid()) {
-        return DecodePromise::CreateAndReject(
-          MediaResult(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
-                      RESULT_DETAIL("Invalid sample duration")),
-          __func__);
+        return MediaResult(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
+                           RESULT_DETAIL("Invalid sample duration"));
       }
 
       media::TimeUnit newpts = pts + duration;
       if (!newpts.IsValid()) {
-        return DecodePromise::CreateAndReject(
-          MediaResult(
-            NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
-            RESULT_DETAIL("Invalid count of accumulated audio samples")),
-          __func__);
+        return MediaResult(
+          NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
+          RESULT_DETAIL("Invalid count of accumulated audio samples"));
       }
 
-      results.AppendElement(new AudioData(
+      aResults.AppendElement(new AudioData(
         samplePosition, pts, duration,
         mFrame->nb_samples, Move(audio), numChannels, samplingRate));
 
       pts = newpts;
+
+      if (aGotFrame) {
+        *aGotFrame = true;
+      }
     }
     packet.data += bytesConsumed;
     packet.size -= bytesConsumed;
     samplePosition += bytesConsumed;
   }
-  return DecodePromise::CreateAndResolve(Move(results), __func__);
+  return NS_OK;
 }
 
 RefPtr<MediaDataDecoder::DecodePromise>
 FFmpegAudioDecoder<LIBAV_VER>::ProcessDrain()
 {
   ProcessFlush();
   return DecodePromise::CreateAndResolve(DecodedData(), __func__);
 }
--- a/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.h
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.h
@@ -28,15 +28,19 @@ public:
   void InitCodecContext() override;
   static AVCodecID GetCodecId(const nsACString& aMimeType);
   nsCString GetDescriptionName() const override
   {
     return NS_LITERAL_CSTRING("ffmpeg audio decoder");
   }
 
 private:
-  RefPtr<DecodePromise> ProcessDecode(MediaRawData* aSample) override;
   RefPtr<DecodePromise> ProcessDrain() override;
+  MediaResult DoDecode(MediaRawData* aSample,
+                       uint8_t* aData,
+                       int aSize,
+                       bool* aGotFrame,
+                       DecodedData& aResults) override;
 };
 
 } // namespace mozilla
 
 #endif // __FFmpegAACDecoder_h__
--- a/dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp
@@ -19,28 +19,33 @@ namespace mozilla {
 
 StaticMutex FFmpegDataDecoder<LIBAV_VER>::sMonitor;
 
 FFmpegDataDecoder<LIBAV_VER>::FFmpegDataDecoder(FFmpegLibWrapper* aLib,
                                                 TaskQueue* aTaskQueue,
                                                 AVCodecID aCodecID)
   : mLib(aLib)
   , mCodecContext(nullptr)
+  , mCodecParser(nullptr)
   , mFrame(NULL)
   , mExtraData(nullptr)
   , mCodecID(aCodecID)
   , mTaskQueue(aTaskQueue)
 {
   MOZ_ASSERT(aLib);
   MOZ_COUNT_CTOR(FFmpegDataDecoder);
 }
 
 FFmpegDataDecoder<LIBAV_VER>::~FFmpegDataDecoder()
 {
   MOZ_COUNT_DTOR(FFmpegDataDecoder);
+  if (mCodecParser) {
+    mLib->av_parser_close(mCodecParser);
+    mCodecParser = nullptr;
+  }
 }
 
 MediaResult
 FFmpegDataDecoder<LIBAV_VER>::InitDecoder()
 {
   FFMPEG_LOG("Initialising FFmpeg decoder.");
 
   AVCodec* codec = FindAVCodec(mLib, mCodecID);
@@ -51,16 +56,23 @@ FFmpegDataDecoder<LIBAV_VER>::InitDecode
 
   StaticMutexAutoLock mon(sMonitor);
 
   if (!(mCodecContext = mLib->avcodec_alloc_context3(codec))) {
     return MediaResult(NS_ERROR_OUT_OF_MEMORY,
                        RESULT_DETAIL("Couldn't init ffmpeg context"));
   }
 
+  if (NeedParser()) {
+    MOZ_ASSERT(mCodecParser == nullptr);
+    mCodecParser = mLib->av_parser_init(mCodecID);
+    if (mCodecParser) {
+      mCodecParser->flags |= ParserFlags();
+    }
+  }
   mCodecContext->opaque = this;
 
   InitCodecContext();
 
   if (mExtraData) {
     mCodecContext->extradata_size = mExtraData->Length();
     // FFmpeg may use SIMD instructions to access the data which reads the
     // data in 32 bytes block. Must ensure we have enough data to read.
@@ -101,16 +113,64 @@ FFmpegDataDecoder<LIBAV_VER>::Shutdown()
 
 RefPtr<MediaDataDecoder::DecodePromise>
 FFmpegDataDecoder<LIBAV_VER>::Decode(MediaRawData* aSample)
 {
   return InvokeAsync<MediaRawData*>(mTaskQueue, this, __func__,
                                     &FFmpegDataDecoder::ProcessDecode, aSample);
 }
 
+RefPtr<MediaDataDecoder::DecodePromise>
+FFmpegDataDecoder<LIBAV_VER>::ProcessDecode(MediaRawData* aSample)
+{
+  bool gotFrame = false;
+  DecodedData results;
+  MediaResult rv = DoDecode(aSample, &gotFrame, results);
+  if (NS_FAILED(rv)) {
+    return DecodePromise::CreateAndReject(rv, __func__);
+  }
+  return DecodePromise::CreateAndResolve(Move(results), __func__);
+}
+
+MediaResult
+FFmpegDataDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample, bool* aGotFrame,
+                                       MediaDataDecoder::DecodedData& aResults)
+{
+  uint8_t* inputData = const_cast<uint8_t*>(aSample->Data());
+  size_t inputSize = aSample->Size();
+
+  if (inputSize && mCodecParser) {
+    while (inputSize) {
+      uint8_t* data = inputData;
+      int size = inputSize;
+      int len = mLib->av_parser_parse2(
+        mCodecParser, mCodecContext, &data, &size, inputData, inputSize,
+        aSample->mTime.ToMicroseconds(), aSample->mTimecode.ToMicroseconds(),
+        aSample->mOffset);
+      if (size_t(len) > inputSize) {
+        return NS_ERROR_DOM_MEDIA_DECODE_ERR;
+      }
+      inputData += len;
+      inputSize -= len;
+      if (size) {
+        bool gotFrame = false;
+        MediaResult rv = DoDecode(aSample, data, size, &gotFrame, aResults);
+        if (NS_FAILED(rv)) {
+          return rv;
+        }
+        if (gotFrame && aGotFrame) {
+          *aGotFrame = true;
+        }
+      }
+    }
+    return NS_OK;
+  }
+  return DoDecode(aSample, inputData, inputSize, aGotFrame, aResults);
+}
+
 RefPtr<MediaDataDecoder::FlushPromise>
 FFmpegDataDecoder<LIBAV_VER>::Flush()
 {
   return InvokeAsync(mTaskQueue, this, __func__,
                      &FFmpegDataDecoder<LIBAV_VER>::ProcessFlush);
 }
 
 RefPtr<MediaDataDecoder::DecodePromise>
--- a/dom/media/platforms/ffmpeg/FFmpegDataDecoder.h
+++ b/dom/media/platforms/ffmpeg/FFmpegDataDecoder.h
@@ -37,29 +37,40 @@ public:
 
   static AVCodec* FindAVCodec(FFmpegLibWrapper* aLib, AVCodecID aCodec);
 
 protected:
   // Flush and Drain operation, always run
   virtual RefPtr<FlushPromise> ProcessFlush();
   virtual void ProcessShutdown();
   virtual void InitCodecContext() { }
-  AVFrame*        PrepareFrame();
-  MediaResult     InitDecoder();
+  AVFrame* PrepareFrame();
+  MediaResult InitDecoder();
+  MediaResult DoDecode(MediaRawData* aSample,
+                       bool* aGotFrame,
+                       DecodedData& aOutResults);
 
   FFmpegLibWrapper* mLib;
 
   AVCodecContext* mCodecContext;
-  AVFrame*        mFrame;
+  AVCodecParserContext* mCodecParser;
+  AVFrame* mFrame;
   RefPtr<MediaByteBuffer> mExtraData;
   AVCodecID mCodecID;
 
 private:
-  virtual RefPtr<DecodePromise> ProcessDecode(MediaRawData* aSample) = 0;
+  RefPtr<DecodePromise> ProcessDecode(MediaRawData* aSample);
   virtual RefPtr<DecodePromise> ProcessDrain() = 0;
+  virtual MediaResult DoDecode(MediaRawData* aSample,
+                               uint8_t* aData,
+                               int aSize,
+                               bool* aGotFrame,
+                               MediaDataDecoder::DecodedData& aOutResults) = 0;
+  virtual bool NeedParser() const { return false; }
+  virtual int ParserFlags() const { return PARSER_FLAG_COMPLETE_FRAMES; }
 
   static StaticMutex sMonitor;
   const RefPtr<TaskQueue> mTaskQueue;
   MozPromiseHolder<DecodePromise> mPromise;
 };
 
 } // namespace mozilla
 
--- a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
@@ -123,21 +123,19 @@ FFmpegVideoDecoder<LIBAV_VER>::PtsCorrec
 FFmpegVideoDecoder<LIBAV_VER>::FFmpegVideoDecoder(
   FFmpegLibWrapper* aLib, TaskQueue* aTaskQueue, const VideoInfo& aConfig,
   KnowsCompositor* aAllocator, ImageContainer* aImageContainer,
   bool aLowLatency)
   : FFmpegDataDecoder(aLib, aTaskQueue, GetCodecId(aConfig.mMimeType))
   , mImageAllocator(aAllocator)
   , mImageContainer(aImageContainer)
   , mInfo(aConfig)
-  , mCodecParser(nullptr)
   , mLastInputDts(INT64_MIN)
   , mLowLatency(aLowLatency)
 {
-  MOZ_COUNT_CTOR(FFmpegVideoDecoder);
   // Use a new MediaByteBuffer as the object will be modified during
   // initialization.
   mExtraData = new MediaByteBuffer;
   mExtraData->AppendElements(*aConfig.mExtraData);
 }
 
 RefPtr<MediaDataDecoder::InitPromise>
 FFmpegVideoDecoder<LIBAV_VER>::Init()
@@ -179,75 +177,16 @@ FFmpegVideoDecoder<LIBAV_VER>::InitCodec
     mCodecContext->thread_count = decode_threads;
     if (decode_threads > 1) {
       mCodecContext->thread_type = FF_THREAD_SLICE | FF_THREAD_FRAME;
     }
   }
 
   // FFmpeg will call back to this to negotiate a video pixel format.
   mCodecContext->get_format = ChoosePixelFormat;
-
-  mCodecParser = mLib->av_parser_init(mCodecID);
-  if (mCodecParser) {
-    mCodecParser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
-  }
-}
-
-RefPtr<MediaDataDecoder::DecodePromise>
-FFmpegVideoDecoder<LIBAV_VER>::ProcessDecode(MediaRawData* aSample)
-{
-  bool gotFrame = false;
-  DecodedData results;
-  MediaResult rv = DoDecode(aSample, &gotFrame, results);
-  if (NS_FAILED(rv)) {
-    return DecodePromise::CreateAndReject(rv, __func__);
-  }
-  return DecodePromise::CreateAndResolve(Move(results), __func__);
-}
-
-MediaResult
-FFmpegVideoDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample, bool* aGotFrame,
-                                        MediaDataDecoder::DecodedData& aResults)
-{
-  uint8_t* inputData = const_cast<uint8_t*>(aSample->Data());
-  size_t inputSize = aSample->Size();
-
-#if LIBAVCODEC_VERSION_MAJOR >= 54
-  if (inputSize && mCodecParser && (mCodecID == AV_CODEC_ID_VP8
-#if LIBAVCODEC_VERSION_MAJOR >= 55
-      || mCodecID == AV_CODEC_ID_VP9
-#endif
-      )) {
-    while (inputSize) {
-      uint8_t* data = inputData;
-      int size = inputSize;
-      int len = mLib->av_parser_parse2(
-        mCodecParser, mCodecContext, &data, &size, inputData, inputSize,
-        aSample->mTime.ToMicroseconds(), aSample->mTimecode.ToMicroseconds(),
-        aSample->mOffset);
-      if (size_t(len) > inputSize) {
-        return NS_ERROR_DOM_MEDIA_DECODE_ERR;
-      }
-      inputData += len;
-      inputSize -= len;
-      if (size) {
-        bool gotFrame = false;
-        MediaResult rv = DoDecode(aSample, data, size, &gotFrame, aResults);
-        if (NS_FAILED(rv)) {
-          return rv;
-        }
-        if (gotFrame && aGotFrame) {
-          *aGotFrame = true;
-        }
-      }
-    }
-    return NS_OK;
-  }
-#endif
-  return DoDecode(aSample, inputData, inputSize, aGotFrame, aResults);
 }
 
 MediaResult
 FFmpegVideoDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample,
                                         uint8_t* aData, int aSize,
                                         bool* aGotFrame,
                                         MediaDataDecoder::DecodedData& aResults)
 {
@@ -421,38 +360,30 @@ FFmpegVideoDecoder<LIBAV_VER>::DoDecode(
 
 RefPtr<MediaDataDecoder::DecodePromise>
 FFmpegVideoDecoder<LIBAV_VER>::ProcessDrain()
 {
   RefPtr<MediaRawData> empty(new MediaRawData());
   empty->mTimecode = TimeUnit::FromMicroseconds(mLastInputDts);
   bool gotFrame = false;
   DecodedData results;
-  while (NS_SUCCEEDED(DoDecode(empty, &gotFrame, results)) && gotFrame) {
+  while (NS_SUCCEEDED(DoDecode(empty, nullptr, 0, &gotFrame, results)) &&
+         gotFrame) {
   }
   return DecodePromise::CreateAndResolve(Move(results), __func__);
 }
 
 RefPtr<MediaDataDecoder::FlushPromise>
 FFmpegVideoDecoder<LIBAV_VER>::ProcessFlush()
 {
   mPtsContext.Reset();
   mDurationMap.Clear();
   return FFmpegDataDecoder::ProcessFlush();
 }
 
-FFmpegVideoDecoder<LIBAV_VER>::~FFmpegVideoDecoder()
-{
-  MOZ_COUNT_DTOR(FFmpegVideoDecoder);
-  if (mCodecParser) {
-    mLib->av_parser_close(mCodecParser);
-    mCodecParser = nullptr;
-  }
-}
-
 AVCodecID
 FFmpegVideoDecoder<LIBAV_VER>::GetCodecId(const nsACString& aMimeType)
 {
   if (MP4Decoder::IsH264(aMimeType)) {
     return AV_CODEC_ID_H264;
   }
 
   if (aMimeType.EqualsLiteral("video/x-vnd.on2.vp6")) {
--- a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h
@@ -28,17 +28,16 @@ class FFmpegVideoDecoder<LIBAV_VER> : pu
   typedef SimpleMap<int64_t> DurationMap;
 
 public:
   FFmpegVideoDecoder(FFmpegLibWrapper* aLib, TaskQueue* aTaskQueue,
                      const VideoInfo& aConfig,
                      KnowsCompositor* aAllocator,
                      ImageContainer* aImageContainer,
                      bool aLowLatency);
-  virtual ~FFmpegVideoDecoder();
 
   RefPtr<InitPromise> Init() override;
   void InitCodecContext() override;
   nsCString GetDescriptionName() const override
   {
 #ifdef USING_MOZFFVPX
     return NS_LITERAL_CSTRING("ffvpx video decoder");
 #else
@@ -48,41 +47,46 @@ public:
   ConversionRequired NeedsConversion() const override
   {
     return ConversionRequired::kNeedAVCC;
   }
 
   static AVCodecID GetCodecId(const nsACString& aMimeType);
 
 private:
-  RefPtr<DecodePromise> ProcessDecode(MediaRawData* aSample) override;
   RefPtr<DecodePromise> ProcessDrain() override;
   RefPtr<FlushPromise> ProcessFlush() override;
-  MediaResult DoDecode(MediaRawData* aSample, bool* aGotFrame,
-                       DecodedData& aResults);
-  MediaResult DoDecode(MediaRawData* aSample, uint8_t* aData, int aSize,
-                       bool* aGotFrame, DecodedData& aResults);
+  MediaResult DoDecode(MediaRawData* aSample,
+                       uint8_t* aData,
+                       int aSize,
+                       bool* aGotFrame,
+                       DecodedData& aResults) override;
   void OutputDelayedFrames();
+  bool NeedParser() const override
+  {
+    return
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+      mCodecID == AV_CODEC_ID_VP9 ||
+#endif
+      mCodecID == AV_CODEC_ID_VP8;
+  }
 
   /**
    * This method allocates a buffer for FFmpeg's decoder, wrapped in an Image.
    * Currently it only supports Planar YUV420, which appears to be the only
    * non-hardware accelerated image format that FFmpeg's H264 decoder is
    * capable of outputting.
    */
   int AllocateYUV420PVideoBuffer(AVCodecContext* aCodecContext,
                                  AVFrame* aFrame);
 
   RefPtr<KnowsCompositor> mImageAllocator;
   RefPtr<ImageContainer> mImageContainer;
   VideoInfo mInfo;
 
-  // Parser used for VP8 and VP9 decoding.
-  AVCodecParserContext* mCodecParser;
-
   class PtsCorrectionContext
   {
   public:
     PtsCorrectionContext();
     int64_t GuessCorrectPts(int64_t aPts, int64_t aDts);
     void Reset();
     int64_t LastDts() const { return mLastDts; }