Bug 1513511 - P1. Use new FFmpeg decode API with recent FFmpeg version. r=bryce, a=jcristau
authorJean-Yves Avenard <jyavenard@mozilla.com>
Mon, 17 Dec 2018 17:29:13 +0000
changeset 501486 19efb724bf2aeb4190697c90e91b554744390ea5
parent 501485 308fd26a204ea6fbeb163b2f9c6d47b7387cb06e
child 501487 ceab3f2abadf7b06c997d6be2a58d3549af7834b
push id1891
push userjcristau@mozilla.com
push dateTue, 08 Jan 2019 16:05:30 +0000
treeherdermozilla-release@c58ea2229c33 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbryce, jcristau
bugs1513511
milestone64.0.2
Bug 1513511 - P1. Use new FFmpeg decode API with recent FFmpeg version. r=bryce, a=jcristau In libavcodec 58 and later, the old avcodec_decode_video2 is broken and only return the first visible frame found after a VP9 super-frame. This resulted in some YouTube videos for about 10% of the frames to never be returned. Only the new API properly behaves so we upgrade our code to use it. Differential Revision: https://phabricator.services.mozilla.com/D14682
dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp
dom/media/platforms/ffmpeg/FFmpegLibWrapper.cpp
dom/media/platforms/ffmpeg/FFmpegLibWrapper.h
dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h
media/ffvpx/libavcodec/avcodec.symbols
--- a/dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp
@@ -203,16 +203,19 @@ FFmpegDataDecoder<LIBAV_VER>::Drain()
 
 RefPtr<MediaDataDecoder::DecodePromise>
 FFmpegDataDecoder<LIBAV_VER>::ProcessDrain()
 {
   RefPtr<MediaRawData> empty(new MediaRawData());
   empty->mTimecode = mLastInputDts;
   bool gotFrame = false;
   DecodedData results;
+  // When draining the FFmpeg decoder will return either a single frame at a
+  // time until gotFrame is set to false; or return a block of frames with
+  // NS_ERROR_DOM_MEDIA_END_OF_STREAM
   while (NS_SUCCEEDED(DoDecode(empty, &gotFrame, results)) &&
          gotFrame) {
   }
   return DecodePromise::CreateAndResolve(std::move(results), __func__);
 }
 
 RefPtr<MediaDataDecoder::FlushPromise>
 FFmpegDataDecoder<LIBAV_VER>::ProcessFlush()
--- a/dom/media/platforms/ffmpeg/FFmpegLibWrapper.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegLibWrapper.cpp
@@ -133,16 +133,18 @@ FFmpegLibWrapper::Link()
   AV_FUNC(avcodec_register_all, AV_FUNC_AVCODEC_ALL)
   AV_FUNC(av_init_packet, AV_FUNC_AVCODEC_ALL)
   AV_FUNC(av_parser_init, AV_FUNC_AVCODEC_ALL)
   AV_FUNC(av_parser_close, AV_FUNC_AVCODEC_ALL)
   AV_FUNC(av_parser_parse2, AV_FUNC_AVCODEC_ALL)
   AV_FUNC(avcodec_alloc_frame, (AV_FUNC_53 | AV_FUNC_54))
   AV_FUNC(avcodec_get_frame_defaults, (AV_FUNC_53 | AV_FUNC_54))
   AV_FUNC(avcodec_free_frame, AV_FUNC_54)
+  AV_FUNC(avcodec_send_packet, AV_FUNC_58)
+  AV_FUNC(avcodec_receive_frame, AV_FUNC_58)
   AV_FUNC_OPTION(av_rdft_init, AV_FUNC_AVCODEC_ALL)
   AV_FUNC_OPTION(av_rdft_calc, AV_FUNC_AVCODEC_ALL)
   AV_FUNC_OPTION(av_rdft_end, AV_FUNC_AVCODEC_ALL)
   AV_FUNC(av_log_set_level, AV_FUNC_AVUTIL_ALL)
   AV_FUNC(av_malloc, AV_FUNC_AVUTIL_ALL)
   AV_FUNC(av_freep, AV_FUNC_AVUTIL_ALL)
   AV_FUNC(av_frame_alloc, (AV_FUNC_AVUTIL_55 | AV_FUNC_AVUTIL_56 | AV_FUNC_AVUTIL_57 | AV_FUNC_AVUTIL_58))
   AV_FUNC(av_frame_free, (AV_FUNC_AVUTIL_55 | AV_FUNC_AVUTIL_56 | AV_FUNC_AVUTIL_57 | AV_FUNC_AVUTIL_58))
--- a/dom/media/platforms/ffmpeg/FFmpegLibWrapper.h
+++ b/dom/media/platforms/ffmpeg/FFmpegLibWrapper.h
@@ -71,16 +71,20 @@ struct MOZ_ONLY_USED_TO_AVOID_STATIC_CON
   int (*av_parser_parse2)(AVCodecParserContext* s, AVCodecContext* avctx, uint8_t** poutbuf, int* poutbuf_size, const uint8_t* buf, int buf_size, int64_t pts, int64_t dts, int64_t pos);
 
   // only used in libavcodec <= 54
   AVFrame* (*avcodec_alloc_frame)();
   void (*avcodec_get_frame_defaults)(AVFrame* pic);
   // libavcodec v54 only
   void (*avcodec_free_frame)(AVFrame** frame);
 
+  // libavcodec v58 and later only
+  int (*avcodec_send_packet)(AVCodecContext* avctx, const AVPacket* avpkt);
+  int (*avcodec_receive_frame)(AVCodecContext* avctx, AVFrame* frame);
+
   // libavcodec optional
   AvRdftInitFn av_rdft_init;
   AvRdftCalcFn av_rdft_calc;
   AvRdftEndFn av_rdft_end;
 
   // libavutil
   void (*av_log_set_level)(int level);
   void*	(*av_malloc)(size_t size);
--- a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
@@ -2,18 +2,18 @@
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "FFmpegVideoDecoder.h"
 #include "FFmpegLog.h"
 #include "ImageContainer.h"
+#include "MP4Decoder.h"
 #include "MediaInfo.h"
-#include "MP4Decoder.h"
 #include "VPXDecoder.h"
 #include "mozilla/layers/KnowsCompositor.h"
 
 #include "libavutil/pixfmt.h"
 #if LIBAVCODEC_VERSION_MAJOR < 54
 #define AVPixelFormat PixelFormat
 #define AV_PIX_FMT_YUV420P PIX_FMT_YUV420P
 #define AV_PIX_FMT_YUVJ420P PIX_FMT_YUVJ420P
@@ -204,16 +204,58 @@ FFmpegVideoDecoder<LIBAV_VER>::DoDecode(
 
   packet.data = aData;
   packet.size = aSize;
   packet.dts = aSample->mTimecode.ToMicroseconds();
   packet.pts = aSample->mTime.ToMicroseconds();
   packet.flags = aSample->mKeyframe ? AV_PKT_FLAG_KEY : 0;
   packet.pos = aSample->mOffset;
 
+#if LIBAVCODEC_VERSION_MAJOR >= 58
+  packet.duration = aSample->mDuration.ToMicroseconds();
+  int res = mLib->avcodec_send_packet(mCodecContext, &packet);
+  if (res < 0) {
+    // In theory, avcodec_send_packet could sent -EAGAIN should its internal
+    // buffers be full. In practice this can't happen as we only feed one frame
+    // at a time, and we immediately call avcodec_receive_frame right after.
+    FFMPEG_LOG("avcodec_send_packet error: %d", res);
+    return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
+                       RESULT_DETAIL("avcodec_send_packet error: %d", res));
+  }
+
+  if (aGotFrame) {
+    *aGotFrame = false;
+  }
+  do {
+    if (!PrepareFrame()) {
+      NS_WARNING("FFmpeg h264 decoder failed to allocate frame.");
+      return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__);
+    }
+    res = mLib->avcodec_receive_frame(mCodecContext, mFrame);
+    if (res == int(AVERROR_EOF)) {
+      return NS_ERROR_DOM_MEDIA_END_OF_STREAM;
+    }
+    if (res == AVERROR(EAGAIN)) {
+      return NS_OK;
+    }
+    if (res < 0) {
+      FFMPEG_LOG("avcodec_receive_frame error: %d", res);
+      return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
+                         RESULT_DETAIL("avcodec_receive_frame error: %d", res));
+    }
+    MediaResult rv = CreateImage(mFrame->pkt_pos, mFrame->pkt_pts,
+                                 mFrame->pkt_duration, aResults);
+    if (NS_FAILED(rv)) {
+      return rv;
+    }
+    if (aGotFrame) {
+      *aGotFrame = true;
+    }
+  } while (true);
+#else
   // LibAV provides no API to retrieve the decoded sample's duration.
   // (FFmpeg >= 1.0 provides av_frame_get_pkt_duration)
   // As such we instead use a map using the dts as key that we will retrieve
   // later.
   // The map will have a typical size of 16 entry.
   mDurationMap.Insert(
     aSample->mTimecode.ToMicroseconds(), aSample->mDuration.ToMicroseconds());
 
@@ -257,20 +299,32 @@ FFmpegVideoDecoder<LIBAV_VER>::DoDecode(
   if (!mDurationMap.Find(mFrame->pkt_dts, duration)) {
     NS_WARNING("Unable to retrieve duration from map");
     duration = aSample->mDuration.ToMicroseconds();
     // dts are probably incorrectly reported ; so clear the map as we're
     // unlikely to find them in the future anyway. This also guards
     // against the map becoming extremely big.
     mDurationMap.Clear();
   }
+
+  MediaResult rv = CreateImage(aSample->mOffset, pts, duration, aResults);
+  if (NS_SUCCEEDED(rv) && aGotFrame) {
+    *aGotFrame = true;
+  }
+  return rv;
+#endif
+}
+
+MediaResult FFmpegVideoDecoder<LIBAV_VER>::CreateImage(
+    int64_t aOffset, int64_t aPts, int64_t aDuration,
+    MediaDataDecoder::DecodedData& aResults) {
   FFMPEG_LOG(
     "Got one frame output with pts=%" PRId64 " dts=%" PRId64
     " duration=%" PRId64 " opaque=%" PRId64,
-    pts, mFrame->pkt_dts, duration, mCodecContext->reordered_opaque);
+    aPts, mFrame->pkt_dts, aDuration, mCodecContext->reordered_opaque);
 
   VideoData::YCbCrBuffer b;
   b.mPlanes[0].mData = mFrame->data[0];
   b.mPlanes[1].mData = mFrame->data[1];
   b.mPlanes[2].mData = mFrame->data[2];
 
   b.mPlanes[0].mStride = mFrame->linesize[0];
   b.mPlanes[1].mStride = mFrame->linesize[1];
@@ -344,34 +398,31 @@ FFmpegVideoDecoder<LIBAV_VER>::DoDecode(
         break;
       default:
         break;
     }
   }
   RefPtr<VideoData> v =
     VideoData::CreateAndCopyData(mInfo,
                                   mImageContainer,
-                                  aSample->mOffset,
-                                  TimeUnit::FromMicroseconds(pts),
-                                  TimeUnit::FromMicroseconds(duration),
+                                  aOffset,
+                                  TimeUnit::FromMicroseconds(aPts),
+                                  TimeUnit::FromMicroseconds(aDuration),
                                   b,
                                   !!mFrame->key_frame,
                                   TimeUnit::FromMicroseconds(-1),
                                   mInfo.ScaledImageRect(mFrame->width,
                                                         mFrame->height),
                                   mImageAllocator);
 
   if (!v) {
     return MediaResult(NS_ERROR_OUT_OF_MEMORY,
                        RESULT_DETAIL("image allocation error"));
   }
   aResults.AppendElement(std::move(v));
-  if (aGotFrame) {
-    *aGotFrame = true;
-  }
   return NS_OK;
 }
 
 RefPtr<MediaDataDecoder::FlushPromise>
 FFmpegVideoDecoder<LIBAV_VER>::ProcessFlush()
 {
   mPtsContext.Reset();
   mDurationMap.Clear();
--- a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h
@@ -64,22 +64,29 @@ private:
                        uint8_t* aData,
                        int aSize,
                        bool* aGotFrame,
                        DecodedData& aResults) override;
   void OutputDelayedFrames();
   bool NeedParser() const override
   {
     return
+#if LIBAVCODEC_VERSION_MAJOR >= 58
+        false;
+#else
 #if LIBAVCODEC_VERSION_MAJOR >= 55
       mCodecID == AV_CODEC_ID_VP9 ||
 #endif
       mCodecID == AV_CODEC_ID_VP8;
+#endif
   }
 
+  MediaResult CreateImage(int64_t aOffset, int64_t aPts, int64_t aDuration,
+                          MediaDataDecoder::DecodedData& aResults);
+
   /**
    * This method allocates a buffer for FFmpeg's decoder, wrapped in an Image.
    * Currently it only supports Planar YUV420, which appears to be the only
    * non-hardware accelerated image format that FFmpeg's H264 decoder is
    * capable of outputting.
    */
   int AllocateYUV420PVideoBuffer(AVCodecContext* aCodecContext,
                                  AVFrame* aFrame);
--- a/media/ffvpx/libavcodec/avcodec.symbols
+++ b/media/ffvpx/libavcodec/avcodec.symbols
@@ -97,8 +97,10 @@ avcodec_get_type
 avcodec_is_open
 avcodec_license
 avcodec_open2
 avcodec_register
 avcodec_register_all
 avcodec_string
 avcodec_version
 avsubtitle_free
+avcodec_send_packet
+avcodec_receive_frame