Bug 870400 - Calculate audio timestamps for WMF based on audio frames played, rather than trusting the container's timestamps. r=padenot, a=lsblakk
authorChris Pearce <cpearce@mozilla.com>
Fri, 31 May 2013 11:34:52 +1200
changeset 142851 82a41a9e888a34deaa33bac6e211c891daa738f5
parent 142850 713a74635e1c00c38b06eddcf109c91a2b9d7860
child 142852 0e2ca88d53e2d7b369a3c614add927afb95ed81a
push id2579
push userakeybl@mozilla.com
push dateMon, 24 Jun 2013 18:52:47 +0000
treeherdermozilla-beta@b69b7de8a05a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspadenot, lsblakk
bugs870400
milestone23.0a2
Bug 870400 - Calculate audio timestamps for WMF based on audio frames played, rather than trusting the container's timestamps. r=padenot, a=lsblakk
content/media/wmf/WMFReader.cpp
content/media/wmf/WMFReader.h
--- a/content/media/wmf/WMFReader.cpp
+++ b/content/media/wmf/WMFReader.cpp
@@ -43,20 +43,23 @@ WMFReader::WMFReader(AbstractMediaDecode
   : MediaDecoderReader(aDecoder),
     mSourceReader(nullptr),
     mAudioChannels(0),
     mAudioBytesPerSample(0),
     mAudioRate(0),
     mVideoWidth(0),
     mVideoHeight(0),
     mVideoStride(0),
+    mAudioFrameSum(0),
+    mAudioFrameOffset(0),
     mHasAudio(false),
     mHasVideo(false),
     mCanSeek(false),
     mUseHwAccel(false),
+    mMustRecaptureAudioPosition(true),
     mIsMP3Enabled(WMFDecoder::IsMP3Supported())
 {
   NS_ASSERTION(NS_IsMainThread(), "Must be on main thread.");
   MOZ_COUNT_CTOR(WMFReader);
 }
 
 WMFReader::~WMFReader()
 {
@@ -600,16 +603,41 @@ WMFReader::ReadMetadata(VideoInfo* aInfo
 static int64_t
 GetSampleDuration(IMFSample* aSample)
 {
   int64_t duration = 0;
   aSample->GetSampleDuration(&duration);
   return HNsToUsecs(duration);
 }
 
+HRESULT
+HNsToFrames(int64_t aHNs, uint32_t aRate, int64_t* aOutFrames)
+{
+  MOZ_ASSERT(aOutFrames);
+  const int64_t HNS_PER_S = USECS_PER_S * 10;
+  CheckedInt<int64_t> i = aHNs;
+  i *= aRate;
+  i /= HNS_PER_S;
+  NS_ENSURE_TRUE(i.isValid(), E_FAIL);
+  *aOutFrames = i.value();
+  return S_OK;
+}
+
+HRESULT
+FramesToUsecs(int64_t aSamples, uint32_t aRate, int64_t* aOutUsecs)
+{
+  MOZ_ASSERT(aOutUsecs);
+  CheckedInt<int64_t> i = aSamples;
+  i *= USECS_PER_S;
+  i /= aRate;
+  NS_ENSURE_TRUE(i.isValid(), E_FAIL);
+  *aOutUsecs = i.value();
+  return S_OK;
+}
+
 bool
 WMFReader::DecodeAudioData()
 {
   NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
 
   HRESULT hr;
   hr = mSourceReader->ReadSample(MF_SOURCE_READER_FIRST_AUDIO_STREAM,
                                  0, // control flags
@@ -655,21 +683,43 @@ WMFReader::DecodeAudioData()
   NS_ENSURE_TRUE(SUCCEEDED(hr), false);
 
   uint32_t numFrames = currentLength / mAudioBytesPerSample / mAudioChannels;
   NS_ASSERTION(sizeof(AudioDataValue) == mAudioBytesPerSample, "Size calculation is wrong");
   nsAutoArrayPtr<AudioDataValue> pcmSamples(new AudioDataValue[numFrames * mAudioChannels]);
   memcpy(pcmSamples.get(), data, currentLength);
   buffer->Unlock();
 
-  int64_t offset = mDecoder->GetResource()->Tell();
-  int64_t timestamp = HNsToUsecs(timestampHns);
-  int64_t duration = GetSampleDuration(sample);
+  // We calculate the timestamp and the duration based on the number of audio
+  // frames we've already played. We don't trust the timestamp stored on the
+  // IMFSample, as sometimes it's wrong, possibly due to buggy encoders?
 
-  mAudioQueue.Push(new AudioData(offset,
+  // If this sample block comes after a discontinuity (i.e. a gap or seek)
+  // reset the frame counters, and capture the timestamp. Future timestamps
+  // will be offset from this block's timestamp.
+  UINT32 discontinuity = false;
+  sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity);
+  if (mMustRecaptureAudioPosition || discontinuity) {
+    mAudioFrameSum = 0;
+    hr = HNsToFrames(timestampHns, mAudioRate, &mAudioFrameOffset);
+    NS_ENSURE_TRUE(SUCCEEDED(hr), false);
+    mMustRecaptureAudioPosition = false;
+  }
+
+  int64_t timestamp;
+  hr = FramesToUsecs(mAudioFrameOffset + mAudioFrameSum, mAudioRate, &timestamp);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
+
+  mAudioFrameSum += numFrames;
+
+  int64_t duration;
+  hr = FramesToUsecs(numFrames, mAudioRate, &duration);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
+
+  mAudioQueue.Push(new AudioData(mDecoder->GetResource()->Tell(),
                                  timestamp,
                                  duration,
                                  numFrames,
                                  pcmSamples.forget(),
                                  mAudioChannels));
 
   #ifdef LOG_SAMPLE_DECODE
   LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u",
@@ -896,17 +946,17 @@ WMFReader::DecodeVideoFrame(bool &aKeyfr
   NS_ENSURE_TRUE(SUCCEEDED(hr) && v, false);
 
   parsed++;
   decoded++;
   mVideoQueue.Push(v);
 
   #ifdef LOG_SAMPLE_DECODE
   LOG("Decoded video sample timestamp=%lld duration=%lld stride=%d height=%u flags=%u",
-      timestamp, duration, stride, mVideoHeight, flags);
+      timestamp, duration, mVideoStride, mVideoHeight, flags);
   #endif
 
   if ((flags & MF_SOURCE_READERF_ENDOFSTREAM)) {
     // End of stream.
     mVideoQueue.Finish();
     LOG("End of video stream");
     return false;
   }
@@ -925,16 +975,22 @@ WMFReader::Seek(int64_t aTargetUs,
   NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
   if (!mCanSeek) {
     return NS_ERROR_FAILURE;
   }
 
   nsresult rv = ResetDecode();
   NS_ENSURE_SUCCESS(rv, rv);
 
+  // Mark that we must recapture the audio frame count from the next sample.
+  // WMF doesn't set a discontinuity marker when we seek to time 0, so we
+  // must remember to recapture the audio frame offset and reset the frame
+  // sum on the next audio packet we decode.
+  mMustRecaptureAudioPosition = true;
+
   AutoPropVar var;
   HRESULT hr = InitPropVariantFromInt64(UsecsToHNs(aTargetUs), &var);
   NS_ENSURE_TRUE(SUCCEEDED(hr), NS_ERROR_FAILURE);
 
   hr = mSourceReader->SetCurrentPosition(GUID_NULL, var);
   NS_ENSURE_TRUE(SUCCEEDED(hr), NS_ERROR_FAILURE);
 
   return DecodeToTarget(aTargetUs);
--- a/content/media/wmf/WMFReader.h
+++ b/content/media/wmf/WMFReader.h
@@ -86,16 +86,27 @@ private:
   uint32_t mAudioChannels;
   uint32_t mAudioBytesPerSample;
   uint32_t mAudioRate;
 
   uint32_t mVideoWidth;
   uint32_t mVideoHeight;
   uint32_t mVideoStride;
 
+  // The offset, in audio frames, at which playback started since the
+  // last discontinuity.
+  int64_t mAudioFrameOffset;
+  // The number of audio frames that we've played since the last
+  // discontinuity.
+  int64_t mAudioFrameSum;
+  // True if we need to re-initialize mAudioFrameOffset and mAudioFrameSum
+  // from the next audio packet we decode. This happens after a seek, since
+  // WMF doesn't mark a stream as having a discontinuity after a seek(0).
+  bool mMustRecaptureAudioPosition;
+
   bool mHasAudio;
   bool mHasVideo;
   bool mCanSeek;
   bool mUseHwAccel;
 
   // We can't call WMFDecoder::IsMP3Supported() on non-main threads, since it
   // checks a pref, so we cache its value in mIsMP3Enabled and use that on
   // the decode thread.