1301869: P1. Do not rely on MFT passing the proper time values. r?cpearce draft
authorJean-Yves Avenard <jyavenard@mozilla.com>
Thu, 29 Sep 2016 18:57:43 +1000
changeset 420088 309104a1b4695c8e92f43a5799eb2cd65aa05c01
parent 420017 955840bfd3c20eb24dd5a01be27bdc55c489a285
child 420089 a471e4c5b910b4ce3e2f6dbb9413fb656d81e8c2
push id31087
push userbmo:jyavenard@mozilla.com
push dateMon, 03 Oct 2016 08:57:18 +0000
reviewerscpearce
bugs1301869
milestone52.0a1
1301869: P1. Do not rely on MFT passing the proper time values. r?cpearce The H264 decoding MFT often does not return the time originally set. So instead we use the sample time as a sample ID which we will use to retrieve the original sample's time and duration. MozReview-Commit-ID: 5DNj24FzmiP
dom/media/platforms/wmf/WMFVideoMFTManager.cpp
dom/media/platforms/wmf/WMFVideoMFTManager.h
--- a/dom/media/platforms/wmf/WMFVideoMFTManager.cpp
+++ b/dom/media/platforms/wmf/WMFVideoMFTManager.cpp
@@ -498,16 +498,23 @@ WMFVideoMFTManager::SetDecoderMediaTypes
   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
 
   hr = inputType->SetGUID(MF_MT_SUBTYPE, GetMediaSubtypeGUID());
   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
 
   hr = inputType->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_MixedInterlaceOrProgressive);
   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
 
+  // We set the frame rate to 1fps so that the MFT will not attempt to estimate
+  // the duration of each frame.
+  // However, it still does so with some videos or if low latency is enabled.
+  // We will attempt in Output() to determine what that estimation is.
+  hr = MFSetAttributeRatio(inputType, MF_MT_FRAME_RATE, 1, 1);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
   RefPtr<IMFMediaType> outputType;
   hr = wmf::MFCreateMediaType(getter_AddRefs(outputType));
   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
 
   hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
 
   GUID outputSubType = mUseHwAccel ? MFVideoFormat_NV12 : MFVideoFormat_YV12;
@@ -526,23 +533,23 @@ WMFVideoMFTManager::Input(MediaRawData* 
 
   if (!mDecoder) {
     // This can happen during shutdown.
     return E_FAIL;
   }
 
   HRESULT hr = mDecoder->CreateInputSample(aSample->Data(),
                                            uint32_t(aSample->Size()),
-                                           aSample->mTime,
+                                           mCurrentId,
                                            &mLastInput);
   NS_ENSURE_TRUE(SUCCEEDED(hr) && mLastInput != nullptr, hr);
 
+  mSamplesTable.AppendElement(SampleEntry(mCurrentId++, aSample->mTime, aSample->mDuration));
+
   mLastDuration = aSample->mDuration;
-  mLastTime = aSample->mTime;
-  mSamplesCount++;
 
   // Forward sample data to the decoder.
   return mDecoder->Input(mLastInput);
 }
 
 class SupportsConfigEvent : public Runnable {
 public:
   SupportsConfigEvent(DXVA2Manager* aDXVA2Manager, IMFMediaType* aMediaType, float aFramerate)
@@ -740,30 +747,26 @@ WMFVideoMFTManager::CreateBasicVideoFram
   // V plane (Cr)
   b.mPlanes[2].mData = data + y_size;
   b.mPlanes[2].mStride = halfStride;
   b.mPlanes[2].mHeight = halfHeight;
   b.mPlanes[2].mWidth = halfWidth;
   b.mPlanes[2].mOffset = 0;
   b.mPlanes[2].mSkip = 0;
 
-  media::TimeUnit pts = GetSampleTime(aSample);
-  NS_ENSURE_TRUE(pts.IsValid(), E_FAIL);
-  media::TimeUnit duration = GetSampleDuration(aSample);
-  NS_ENSURE_TRUE(duration.IsValid(), E_FAIL);
   nsIntRect pictureRegion = mVideoInfo.ScaledImageRect(videoWidth, videoHeight);
 
   if (mLayersBackend != LayersBackend::LAYERS_D3D9 &&
       mLayersBackend != LayersBackend::LAYERS_D3D11) {
     RefPtr<VideoData> v =
       VideoData::CreateAndCopyData(mVideoInfo,
                                    mImageContainer,
                                    aStreamOffset,
-                                   pts.ToMicroseconds(),
-                                   duration.ToMicroseconds(),
+                                   0,
+                                   0,
                                    b,
                                    false,
                                    -1,
                                    pictureRegion);
     if (twoDBuffer) {
       twoDBuffer->Unlock2D();
     } else {
       buffer->Unlock();
@@ -779,18 +782,18 @@ WMFVideoMFTManager::CreateBasicVideoFram
                                  mVideoInfo,
                                  b,
                                  pictureRegion,
                                  false);
 
   RefPtr<VideoData> v =
     VideoData::CreateFromImage(mVideoInfo,
                                aStreamOffset,
-                               pts.ToMicroseconds(),
-                               duration.ToMicroseconds(),
+                               0,
+                               0,
                                image.forget(),
                                false,
                                -1,
                                pictureRegion);
 
   v.forget(aOutVideoData);
   return S_OK;
 }
@@ -812,24 +815,20 @@ WMFVideoMFTManager::CreateD3DVideoFrame(
     mVideoInfo.ScaledImageRect(mImageSize.width, mImageSize.height);
   RefPtr<Image> image;
   hr = mDXVA2Manager->CopyToImage(aSample,
                                   pictureRegion,
                                   getter_AddRefs(image));
   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
   NS_ENSURE_TRUE(image, E_FAIL);
 
-  media::TimeUnit pts = GetSampleTime(aSample);
-  NS_ENSURE_TRUE(pts.IsValid(), E_FAIL);
-  media::TimeUnit duration = GetSampleDuration(aSample);
-  NS_ENSURE_TRUE(duration.IsValid(), E_FAIL);
   RefPtr<VideoData> v = VideoData::CreateFromImage(mVideoInfo,
                                                    aStreamOffset,
-                                                   pts.ToMicroseconds(),
-                                                   duration.ToMicroseconds(),
+                                                   0,
+                                                   0,
                                                    image.forget(),
                                                    false,
                                                    -1,
                                                    pictureRegion);
 
   NS_ENSURE_TRUE(v, E_FAIL);
   v.forget(aOutVideoData);
 
@@ -840,31 +839,33 @@ WMFVideoMFTManager::CreateD3DVideoFrame(
 HRESULT
 WMFVideoMFTManager::Output(int64_t aStreamOffset,
                            RefPtr<MediaData>& aOutData)
 {
   RefPtr<IMFSample> sample;
   HRESULT hr;
   aOutData = nullptr;
   int typeChangeCount = 0;
-  bool wasDraining = mDraining;
-  int64_t sampleCount = mSamplesCount;
-  if (wasDraining) {
-    mSamplesCount = 0;
-    mDraining = false;
-  }
 
   media::TimeUnit pts;
   media::TimeUnit duration;
 
   // Loop until we decode a sample, or an unexpected error that we can't
   // handle occurs.
   while (true) {
     hr = mDecoder->Output(&sample);
     if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
+      if (mDraining) {
+        NS_ASSERTION(!mSamplesTable.Length(),
+                     "We should have processed all samples");
+        mCurrentId = 0;
+        mDraining = false;
+        mTimestampRatio.reset();
+        mSamplesTable.Clear();
+      }
       return MF_E_TRANSFORM_NEED_MORE_INPUT;
     }
     if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
       // Video stream output type change. Probably a geometric apperature
       // change. Reconfigure the video geometry, so that we output the
       // correct size frames.
       MOZ_ASSERT(!sample);
       hr = ConfigureVideoFrameGeometry();
@@ -890,30 +891,55 @@ WMFVideoMFTManager::Output(int64_t aStre
         ++mNullOutputCount;
         if (mNullOutputCount > 250) {
           LOG("Excessive Video MFTDecoder returning success but no output; giving up");
           mGotExcessiveNullOutput = true;
           return E_FAIL;
         }
         continue;
       }
-      pts = GetSampleTime(sample);
-      duration = GetSampleDuration(sample);
-      if (!pts.IsValid() || !duration.IsValid()) {
-        return E_FAIL;
+      LONGLONG id = 0;
+      HRESULT hr = sample->GetSampleTime(&id);
+      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
+
+      if (id && !mTimestampRatio) {
+        // This is our first non-zero sample. Attempt to determine
+        // by how much the MFT has messed with our time.
+        int64_t lowestPts = INT64_MAX;
+        uint32_t index = 0;
+        for (uint32_t i = 0; i < mSamplesTable.Length(); i++) {
+          const SampleEntry& entry = mSamplesTable[i];
+          if (entry.mTime < lowestPts) {
+            lowestPts = entry.mTime;
+            index = i;
+          }
+        }
+        MOZ_ASSERT(mSamplesTable[index].mId);
+        mTimestampRatio = Some(id / mSamplesTable[index].mId);
       }
-      if (wasDraining && sampleCount == 1 && pts == media::TimeUnit()) {
-        // WMF is unable to calculate a duration if only a single sample
-        // was parsed. Additionally, the pts always comes out at 0 under those
-        // circumstances.
-        // Seeing that we've only fed the decoder a single frame, the pts
-        // and duration are known, it's of the last sample.
-        pts = media::TimeUnit::FromMicroseconds(mLastTime);
-        duration = media::TimeUnit::FromMicroseconds(mLastDuration);
+      id /= mTimestampRatio.refOr(1);
+
+      // Lookup sample id in our table.
+      size_t i = 0;
+      for (; i < mSamplesTable.Length(); i++) {
+        const SampleEntry& entry = mSamplesTable[i];
+        if (entry.mId == id) {
+          pts = media::TimeUnit::FromMicroseconds(entry.mTime);
+          duration = media::TimeUnit::FromMicroseconds(entry.mDuration);
+          break;
+        }
       }
+      if (i == mSamplesTable.Length()) {
+        // Entry not found in table... We're doomed.
+        LOG("Unexpected sample id returned.");
+        // Return a unique error code that we can identify later.
+        return NAP_E_MISMATCHED_ID;
+      }
+      mSamplesTable.RemoveElementAt(i);
+
       if (mSeekTargetThreshold.isSome()) {
         if ((pts + duration) < mSeekTargetThreshold.ref()) {
           LOG("Dropping video frame which pts is smaller than seek target.");
           // It is necessary to clear the pointer to release the previous output
           // buffer.
           sample = nullptr;
           continue;
         }
@@ -932,20 +958,19 @@ WMFVideoMFTManager::Output(int64_t aStre
   } else {
     hr = CreateBasicVideoFrame(sample, aStreamOffset, getter_AddRefs(frame));
   }
   // Frame should be non null only when we succeeded.
   MOZ_ASSERT((frame != nullptr) == SUCCEEDED(hr));
   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
   NS_ENSURE_TRUE(frame, E_FAIL);
 
+  frame->mTime = pts.ToMicroseconds();
+  frame->mDuration = duration.ToMicroseconds();
   aOutData = frame;
-  // Set the potentially corrected pts and duration.
-  aOutData->mTime = pts.ToMicroseconds();
-  aOutData->mDuration = duration.ToMicroseconds();
 
   if (mNullOutputCount) {
     mGotValidOutputAfterNullOutput = true;
   }
 
   return S_OK;
 }
 
--- a/dom/media/platforms/wmf/WMFVideoMFTManager.h
+++ b/dom/media/platforms/wmf/WMFVideoMFTManager.h
@@ -45,18 +45,20 @@ public:
     nsCString failureReason;
     return IsHardwareAccelerated(failureReason)
       ? "wmf hardware video decoder" : "wmf software video decoder";
   }
 
   void Flush() override
   {
     MFTManager::Flush();
+    mSamplesTable.Clear();
+    mCurrentId = 0;
     mDraining = false;
-    mSamplesCount = 0;
+    mTimestampRatio.reset();
   }
 
   void Drain() override
   {
     MFTManager::Drain();
     mDraining = true;
   }
 
@@ -87,19 +89,16 @@ private:
   uint32_t mVideoStride;
   nsIntSize mImageSize;
 
   RefPtr<layers::ImageContainer> mImageContainer;
   nsAutoPtr<DXVA2Manager> mDXVA2Manager;
 
   RefPtr<IMFSample> mLastInput;
   float mLastDuration;
-  int64_t mLastTime = 0;
-  bool mDraining = false;
-  int64_t mSamplesCount = 0;
 
   bool mDXVAEnabled;
   const layers::LayersBackend mLayersBackend;
   bool mUseHwAccel;
 
   nsCString mDXVAFailureReason;
 
   enum StreamType {
@@ -113,13 +112,28 @@ private:
 
   const GUID& GetMFTGUID();
   const GUID& GetMediaSubtypeGUID();
 
   uint32_t mNullOutputCount;
   bool mGotValidOutputAfterNullOutput;
   bool mGotExcessiveNullOutput;
   bool mIsValid;
+
+  struct SampleEntry
+  {
+    SampleEntry(uint32_t aId, int64_t aTime, int64_t aDuration)
+      : mId(aId), mTime(aTime), mDuration(aDuration)
+    {
+    }
+    uint32_t mId;
+    int64_t mTime;
+    int64_t mDuration;
+  };
+  nsTArray<SampleEntry> mSamplesTable;
+  uint32_t mCurrentId = 0;
+  bool mDraining = false;
+  Maybe<int64_t> mTimestampRatio;
 };
 
 } // namespace mozilla
 
 #endif // WMFVideoMFTManager_h_