Bug 1128411. Part 1 - fix some bugs in MediaDecoderStateMachine::SendStreamData(). r=roc. a=lmandel
authorJW Wang <jwwang@mozilla.com>
Mon, 02 Feb 2015 13:53:54 +0800
changeset 249761 d89186268fb48a66a04d71f4a0a69c96a847eaa8
parent 249760 8f0a84889c63aee5b0c05ec7a49c9cba1fa2be75
child 249762 636a4a911276cd861b60eb6a2db87c9cc5769f53
push id4489
push userraliiev@mozilla.com
push dateMon, 23 Feb 2015 15:17:55 +0000
treeherdermozilla-beta@fd7c3dc24146 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersroc, lmandel
bugs1128411
milestone37.0a2
Bug 1128411. Part 1 - fix some bugs in MediaDecoderStateMachine::SendStreamData(). r=roc. a=lmandel
dom/media/MediaDecoder.cpp
dom/media/MediaDecoder.h
dom/media/MediaDecoderStateMachine.cpp
--- a/dom/media/MediaDecoder.cpp
+++ b/dom/media/MediaDecoder.cpp
@@ -274,21 +274,20 @@ void MediaDecoder::ConnectDecodedStreamT
   // Unblock the output stream now. While it's connected to mDecodedStream,
   // mDecodedStream is responsible for controlling blocking.
   aStream->mStream->ChangeExplicitBlockerCount(-1);
 }
 
 MediaDecoder::DecodedStreamData::DecodedStreamData(MediaDecoder* aDecoder,
                                                    int64_t aInitialTime,
                                                    SourceMediaStream* aStream)
-  : mLastAudioPacketTime(-1),
-    mLastAudioPacketEndTime(-1),
-    mAudioFramesWritten(0),
+  : mAudioFramesWritten(0),
     mInitialTime(aInitialTime),
-    mNextVideoTime(aInitialTime),
+    mNextVideoTime(-1),
+    mNextAudioTime(-1),
     mDecoder(aDecoder),
     mStreamInitialized(false),
     mHaveSentFinish(false),
     mHaveSentFinishAudio(false),
     mHaveSentFinishVideo(false),
     mStream(aStream),
     mHaveBlockedForPlayState(false),
     mHaveBlockedForStateMachineNotPlaying(false)
--- a/dom/media/MediaDecoder.h
+++ b/dom/media/MediaDecoder.h
@@ -400,27 +400,26 @@ public:
     ~DecodedStreamData();
 
     // microseconds
     int64_t GetLastOutputTime() { return mListener->GetLastOutputTime(); }
     bool IsFinished() { return mListener->IsFinishedOnMainThread(); }
 
     // The following group of fields are protected by the decoder's monitor
     // and can be read or written on any thread.
-    int64_t mLastAudioPacketTime; // microseconds
-    int64_t mLastAudioPacketEndTime; // microseconds
     // Count of audio frames written to the stream
     int64_t mAudioFramesWritten;
     // Saved value of aInitialTime. Timestamp of the first audio and/or
     // video packet written.
     int64_t mInitialTime; // microseconds
     // mNextVideoTime is the end timestamp for the last packet sent to the stream.
     // Therefore video packets starting at or after this time need to be copied
     // to the output stream.
     int64_t mNextVideoTime; // microseconds
+    int64_t mNextAudioTime; // microseconds
     MediaDecoder* mDecoder;
     // The last video image sent to the stream. Useful if we need to replicate
     // the image.
     nsRefPtr<layers::Image> mLastVideoImage;
     IntSize mLastVideoImageDisplaySize;
     // This is set to true when the stream is initialized (audio and
     // video tracks added).
     bool mStreamInitialized;
--- a/dom/media/MediaDecoderStateMachine.cpp
+++ b/dom/media/MediaDecoderStateMachine.cpp
@@ -321,73 +321,66 @@ int64_t MediaDecoderStateMachine::GetDec
   }
   return audioDecoded;
 }
 
 void MediaDecoderStateMachine::SendStreamAudio(AudioData* aAudio,
                                                DecodedStreamData* aStream,
                                                AudioSegment* aOutput)
 {
-  NS_ASSERTION(OnDecodeThread() ||
-               OnStateMachineThread(), "Should be on decode thread or state machine thread");
+  NS_ASSERTION(OnDecodeThread() || OnStateMachineThread(),
+               "Should be on decode thread or state machine thread");
   AssertCurrentThreadInMonitor();
 
-  if (aAudio->mTime <= aStream->mLastAudioPacketTime) {
-    // ignore packet that we've already processed
-    return;
-  }
-  aStream->mLastAudioPacketTime = aAudio->mTime;
-  aStream->mLastAudioPacketEndTime = aAudio->GetEndTime();
-
   // This logic has to mimic AudioSink closely to make sure we write
   // the exact same silences
-  CheckedInt64 audioWrittenOffset = UsecsToFrames(mInfo.mAudio.mRate,
-      aStream->mInitialTime + mStartTime) + aStream->mAudioFramesWritten;
+  CheckedInt64 audioWrittenOffset = aStream->mAudioFramesWritten +
+      UsecsToFrames(mInfo.mAudio.mRate, aStream->mInitialTime + mStartTime);
   CheckedInt64 frameOffset = UsecsToFrames(mInfo.mAudio.mRate, aAudio->mTime);
-  if (!audioWrittenOffset.isValid() || !frameOffset.isValid())
+
+  if (!audioWrittenOffset.isValid() ||
+      !frameOffset.isValid() ||
+      // ignore packet that we've already processed
+      frameOffset.value() + aAudio->mFrames <= audioWrittenOffset.value()) {
     return;
+  }
+
   if (audioWrittenOffset.value() < frameOffset.value()) {
+    int64_t silentFrames = frameOffset.value() - audioWrittenOffset.value();
     // Write silence to catch up
-    VERBOSE_LOG("writing %d frames of silence to MediaStream",
-                int32_t(frameOffset.value() - audioWrittenOffset.value()));
+    VERBOSE_LOG("writing %lld frames of silence to MediaStream", silentFrames);
     AudioSegment silence;
-    silence.InsertNullDataAtStart(frameOffset.value() - audioWrittenOffset.value());
-    aStream->mAudioFramesWritten += silence.GetDuration();
+    StreamTime duration = aStream->mStream->TicksToTimeRoundDown(
+        mInfo.mAudio.mRate, silentFrames);
+    silence.InsertNullDataAtStart(duration);
+    aStream->mAudioFramesWritten += silentFrames;
+    audioWrittenOffset += silentFrames;
     aOutput->AppendFrom(&silence);
   }
 
-  int64_t offset;
-  if (aStream->mAudioFramesWritten == 0) {
-    NS_ASSERTION(frameOffset.value() <= audioWrittenOffset.value(),
-                 "Otherwise we'd have taken the write-silence path");
-    // We're starting in the middle of a packet. Split the packet.
-    offset = audioWrittenOffset.value() - frameOffset.value();
-  } else {
-    // Write the entire packet.
-    offset = 0;
-  }
-
-  if (offset >= aAudio->mFrames)
-    return;
-
+  MOZ_ASSERT(audioWrittenOffset.value() >= frameOffset.value());
+
+  int64_t offset = audioWrittenOffset.value() - frameOffset.value();
   size_t framesToWrite = aAudio->mFrames - offset;
 
   aAudio->EnsureAudioBuffer();
   nsRefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer;
   AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
   nsAutoTArray<const AudioDataValue*,2> channels;
   for (uint32_t i = 0; i < aAudio->mChannels; ++i) {
     channels.AppendElement(bufferData + i*aAudio->mFrames + offset);
   }
   aOutput->AppendFrames(buffer.forget(), channels, framesToWrite);
   VERBOSE_LOG("writing %u frames of data to MediaStream for AudioData at %lld",
               static_cast<unsigned>(framesToWrite),
               aAudio->mTime);
   aStream->mAudioFramesWritten += framesToWrite;
   aOutput->ApplyVolume(mVolume);
+
+  aStream->mNextAudioTime = aAudio->GetEndTime();
 }
 
 static void WriteVideoToMediaStream(MediaStream* aStream,
                                     layers::Image* aImage,
                                     int64_t aEndMicroseconds,
                                     int64_t aStartMicroseconds,
                                     const IntSize& aIntrinsicSize,
                                     VideoSegment* aOutput)
@@ -432,61 +425,79 @@ void MediaDecoderStateMachine::SendStrea
     StreamTime endPosition = 0;
 
     if (!stream->mStreamInitialized) {
       if (mInfo.HasAudio()) {
         AudioSegment* audio = new AudioSegment();
         mediaStream->AddAudioTrack(kAudioTrack, mInfo.mAudio.mRate, 0, audio);
         stream->mStream->DispatchWhenNotEnoughBuffered(kAudioTrack,
             GetStateMachineThread(), GetWakeDecoderRunnable());
+        stream->mNextAudioTime = mStartTime + stream->mInitialTime;
       }
       if (mInfo.HasVideo()) {
         VideoSegment* video = new VideoSegment();
         mediaStream->AddTrack(kVideoTrack, 0, video);
         stream->mStream->DispatchWhenNotEnoughBuffered(kVideoTrack,
             GetStateMachineThread(), GetWakeDecoderRunnable());
+
+        // TODO: We can't initialize |mNextVideoTime| until |mStartTime|
+        // is set. This is a good indication that DecodedStreamData is in
+        // deep coupling with the state machine and we should move the class
+        // into MediaDecoderStateMachine.
+        stream->mNextVideoTime = mStartTime + stream->mInitialTime;
       }
       stream->mStreamInitialized = true;
     }
 
     if (mInfo.HasAudio()) {
+      MOZ_ASSERT(stream->mNextAudioTime != -1, "Should've been initialized");
       nsAutoTArray<nsRefPtr<AudioData>,10> audio;
-      // It's OK to hold references to the AudioData because while audio
-      // is captured, only the decoder thread pops from the queue (see below).
-      AudioQueue().GetElementsAfter(stream->mLastAudioPacketTime, &audio);
+      // It's OK to hold references to the AudioData because AudioData
+      // is ref-counted.
+      AudioQueue().GetElementsAfter(stream->mNextAudioTime, &audio);
       AudioSegment output;
       for (uint32_t i = 0; i < audio.Length(); ++i) {
         SendStreamAudio(audio[i], stream, &output);
       }
+      // |mNextAudioTime| is updated as we process each audio sample in
+      // SendStreamAudio(). This is consistent with how |mNextVideoTime|
+      // is updated for video samples.
       if (output.GetDuration() > 0) {
         mediaStream->AppendToTrack(kAudioTrack, &output);
       }
       if (AudioQueue().IsFinished() && !stream->mHaveSentFinishAudio) {
         mediaStream->EndTrack(kAudioTrack);
         stream->mHaveSentFinishAudio = true;
       }
-      minLastAudioPacketTime = std::min(minLastAudioPacketTime, stream->mLastAudioPacketTime);
       endPosition = std::max(endPosition,
           mediaStream->TicksToTimeRoundDown(mInfo.mAudio.mRate,
                                             stream->mAudioFramesWritten));
     }
 
     if (mInfo.HasVideo()) {
+      MOZ_ASSERT(stream->mNextVideoTime != -1, "Should've been initialized");
       nsAutoTArray<nsRefPtr<VideoData>,10> video;
-      // It's OK to hold references to the VideoData only the decoder thread
-      // pops from the queue.
+      // It's OK to hold references to the VideoData because VideoData
+      // is ref-counted.
       VideoQueue().GetElementsAfter(stream->mNextVideoTime, &video);
       VideoSegment output;
       for (uint32_t i = 0; i < video.Length(); ++i) {
         VideoData* v = video[i];
         if (stream->mNextVideoTime < v->mTime) {
           VERBOSE_LOG("writing last video to MediaStream %p for %lldus",
                       mediaStream, v->mTime - stream->mNextVideoTime);
           // Write last video frame to catch up. mLastVideoImage can be null here
           // which is fine, it just means there's no video.
+
+          // TODO: |mLastVideoImage| should come from the last image rendered
+          // by the state machine. This will avoid the black frame when capture
+          // happens in the middle of playback (especially in th middle of a
+          // video frame). E.g. if we have a video frame that is 30 sec long
+          // and capture happens at 15 sec, we'll have to append a black frame
+          // that is 15 sec long.
           WriteVideoToMediaStream(mediaStream, stream->mLastVideoImage,
             v->mTime, stream->mNextVideoTime, stream->mLastVideoImageDisplaySize,
               &output);
           stream->mNextVideoTime = v->mTime;
         }
         if (stream->mNextVideoTime < v->GetEndTime()) {
           VERBOSE_LOG("writing video frame %lldus to MediaStream %p for %lldus",
                       v->mTime, mediaStream, v->GetEndTime() - stream->mNextVideoTime);
@@ -518,36 +529,36 @@ void MediaDecoderStateMachine::SendStrea
     }
 
     if (finished && !stream->mHaveSentFinish) {
       stream->mHaveSentFinish = true;
       stream->mStream->Finish();
     }
   }
 
-  if (mAudioCaptured) {
-    // Discard audio packets that are no longer needed.
-    while (true) {
-      const AudioData* a = AudioQueue().PeekFront();
-      // Packet times are not 100% reliable so this may discard packets that
-      // actually contain data for mCurrentFrameTime. This means if someone might
-      // create a new output stream and we actually don't have the audio for the
-      // very start. That's OK, we'll play silence instead for a brief moment.
-      // That's OK. Seeking to this time would have a similar issue for such
-      // badly muxed resources.
-      if (!a || a->GetEndTime() >= minLastAudioPacketTime)
-        break;
+  const auto clockTime = GetClock();
+  while (true) {
+    const AudioData* a = AudioQueue().PeekFront();
+    // If we discard audio samples fed to the stream immediately, we will
+    // keep decoding audio samples till the end and consume a lot of memory.
+    // Therefore we only discard those behind the stream clock to throttle
+    // the decoding speed.
+    if (a && a->mTime <= clockTime) {
       OnAudioEndTimeUpdate(std::max(mAudioEndTime, a->GetEndTime()));
       nsRefPtr<AudioData> releaseMe = AudioQueue().PopFront();
+      continue;
     }
-
-    if (finished) {
-      mAudioCompleted = true;
-      UpdateReadyState();
-    }
+    break;
+  }
+
+  // To be consistent with AudioSink, |mAudioCompleted| is not set
+  // until all samples are drained.
+  if (finished && AudioQueue().GetSize() == 0) {
+    mAudioCompleted = true;
+    UpdateReadyState();
   }
 }
 
 MediaDecoderStateMachine::WakeDecoderRunnable*
 MediaDecoderStateMachine::GetWakeDecoderRunnable()
 {
   AssertCurrentThreadInMonitor();