Bug 1530234 - P2. Don't recalculate audio sample start time. r=bryce
authorJean-Yves Avenard <jyavenard@mozilla.com>
Wed, 27 Feb 2019 14:19:00 +0000
changeset 461552 4e5ac450f2b6dd57e9264fddc97971f6b5d8dc18
parent 461551 0bd5d868443675304f64f1d85866338f651f5900
child 461553 f9e25c03e77aaccbb128ef0362e1538affb03fd7
push id35625
push usercsabou@mozilla.com
push dateThu, 28 Feb 2019 10:55:23 +0000
treeherdermozilla-central@fd53d5e80bca [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbryce
bugs1530234
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1530234 - P2. Don't recalculate audio sample start time. r=bryce The WMF audio decoder recalculated the timestamp of each audio sample according to the number of frames decoded so far. This is incompatible with the trimming mechanism that rely on the timestamps of the audio to be matching what is found in the container. All the other audio decoders do it that way already. Depends on D20969 Differential Revision: https://phabricator.services.mozilla.com/D21305
dom/media/platforms/wmf/WMFAudioMFTManager.cpp
dom/media/platforms/wmf/WMFAudioMFTManager.h
--- a/dom/media/platforms/wmf/WMFAudioMFTManager.cpp
+++ b/dom/media/platforms/wmf/WMFAudioMFTManager.cpp
@@ -13,16 +13,18 @@
 #include "mozilla/Logging.h"
 #include "mozilla/Telemetry.h"
 #include "nsTArray.h"
 
 #define LOG(...) MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__))
 
 namespace mozilla {
 
+using media::TimeUnit;
+
 static void AACAudioSpecificConfigToUserData(uint8_t aAACProfileLevelIndication,
                                              const uint8_t* aAudioSpecConfig,
                                              uint32_t aConfigLength,
                                              nsTArray<BYTE>& aOutUserData) {
   MOZ_ASSERT(aOutUserData.IsEmpty());
 
   // The MF_MT_USER_DATA for AAC is defined here:
   // http://msdn.microsoft.com/en-us/library/windows/desktop/dd742784%28v=vs.85%29.aspx
@@ -249,60 +251,29 @@ WMFAudioMFTManager::Output(int64_t aStre
           LOG("Reporting telemetry AUDIO_MFT_OUTPUT_NULL_SAMPLES");
           Telemetry::Accumulate(
               Telemetry::HistogramID::AUDIO_MFT_OUTPUT_NULL_SAMPLES, 1);
         });
     SystemGroup::Dispatch(TaskCategory::Other, task.forget());
     return E_FAIL;
   }
 
+  TimeUnit pts = GetSampleTime(sample);
+  NS_ENSURE_TRUE(pts.IsValid(), E_FAIL);
+
   RefPtr<IMFMediaBuffer> buffer;
   hr = sample->ConvertToContiguousBuffer(getter_AddRefs(buffer));
   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
 
   BYTE* data = nullptr;  // Note: *data will be owned by the IMFMediaBuffer, we
                          // don't need to free it.
   DWORD maxLength = 0, currentLength = 0;
   hr = buffer->Lock(&data, &maxLength, &currentLength);
   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
 
-  // Sometimes when starting decoding, the AAC decoder gives us samples
-  // with a negative timestamp. AAC does usually have preroll (or encoder
-  // delay) encoded into its bitstream, but the amount encoded to the stream
-  // is variable, and it not signalled in-bitstream. There is sometimes
-  // signalling in the MP4 container what the preroll amount, but it's
-  // inconsistent. It looks like WMF's AAC encoder may take this into
-  // account, so strip off samples with a negative timestamp to get us
-  // to a 0-timestamp start. This seems to maintain A/V sync, so we can run
-  // with this until someone complains...
-
-  // We calculate the timestamp and the duration based on the number of audio
-  // frames we've already played. We don't trust the timestamp stored on the
-  // IMFSample, as sometimes it's wrong, possibly due to buggy encoders?
-
-  // If this sample block comes after a discontinuity (i.e. a gap or seek)
-  // reset the frame counters, and capture the timestamp. Future timestamps
-  // will be offset from this block's timestamp.
-  UINT32 discontinuity = false;
-  sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity);
-  if (mMustRecaptureAudioPosition || discontinuity) {
-    // Update the output type, in case this segment has a different
-    // rate. This also triggers on the first sample, which can have a
-    // different rate than is advertised in the container, and sometimes we
-    // don't get a MF_E_TRANSFORM_STREAM_CHANGE when the rate changes.
-    hr = UpdateOutputType();
-    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
-
-    mAudioFrameSum = 0;
-    LONGLONG timestampHns = 0;
-    hr = sample->GetSampleTime(&timestampHns);
-    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
-    mAudioTimeOffset = media::TimeUnit::FromMicroseconds(timestampHns / 10);
-    mMustRecaptureAudioPosition = false;
-  }
   // Output is made of floats.
   int32_t numSamples = currentLength / sizeof(float);
   int32_t numFrames = numSamples / mAudioChannels;
   MOZ_ASSERT(numFrames >= 0);
   MOZ_ASSERT(numSamples >= 0);
   if (numFrames == 0) {
     // All data from this chunk stripped, loop back and try to output the next
     // frame, if possible.
@@ -313,32 +284,26 @@ WMFAudioMFTManager::Output(int64_t aStre
   if (!audioData) {
     return E_OUTOFMEMORY;
   }
 
   PodCopy(audioData.Data(), reinterpret_cast<float*>(data), numSamples);
 
   buffer->Unlock();
 
-  media::TimeUnit timestamp =
-      mAudioTimeOffset + FramesToTimeUnit(mAudioFrameSum, mAudioRate);
-  NS_ENSURE_TRUE(timestamp.IsValid(), E_FAIL);
-
-  mAudioFrameSum += numFrames;
-
-  media::TimeUnit duration = FramesToTimeUnit(numFrames, mAudioRate);
+  TimeUnit duration = FramesToTimeUnit(numFrames, mAudioRate);
   NS_ENSURE_TRUE(duration.IsValid(), E_FAIL);
 
-  aOutData = new AudioData(aStreamOffset, timestamp, std::move(audioData),
+  aOutData = new AudioData(aStreamOffset, pts, std::move(audioData),
                            mAudioChannels, mAudioRate, mChannelsMap);
   MOZ_DIAGNOSTIC_ASSERT(duration == aOutData->mDuration, "must be equal");
 
 #ifdef LOG_SAMPLE_DECODE
   LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u",
-      timestamp.ToMicroseconds(), duration.ToMicroseconds(), currentLength);
+      pts.ToMicroseconds(), duration.ToMicroseconds(), currentLength);
 #endif
 
   return S_OK;
 }
 
 void WMFAudioMFTManager::Shutdown() { mDecoder = nullptr; }
 
 }  // namespace mozilla
--- a/dom/media/platforms/wmf/WMFAudioMFTManager.h
+++ b/dom/media/platforms/wmf/WMFAudioMFTManager.h
@@ -39,30 +39,18 @@ class WMFAudioMFTManager : public MFTMan
  private:
   HRESULT UpdateOutputType();
 
   uint32_t mAudioChannels;
   AudioConfig::ChannelLayout::ChannelMap mChannelsMap;
   uint32_t mAudioRate;
   nsTArray<BYTE> mUserData;
 
-  // The offset, at which playback started since the
-  // last discontinuity.
-  media::TimeUnit mAudioTimeOffset;
-  // The number of audio frames that we've played since the last
-  // discontinuity.
-  int64_t mAudioFrameSum = 0;
-
   enum StreamType { Unknown, AAC, MP3 };
   StreamType mStreamType;
 
   const GUID& GetMFTGUID();
   const GUID& GetMediaSubtypeGUID();
-
-  // True if we need to re-initialize mAudioTimeOffset and mAudioFrameSum
-  // from the next audio packet we decode. This happens after a seek, since
-  // WMF doesn't mark a stream as having a discontinuity after a seek(0).
-  bool mMustRecaptureAudioPosition = true;
 };
 
 }  // namespace mozilla
 
 #endif  // WMFAudioOutputSource_h_