Bug 1134434 - Fire loadedmetadata before encrypted event on encrypted MP4s - r=cpearce
authorEdwin Flores <eflores@mozilla.com>
Mon, 23 Mar 2015 15:31:15 +1300
changeset 235020 706ee8458ffb6f06da7874cb870d02c3f5696f54
parent 235019 b7e5ddf82a1abe1d1b2840b86992f5653ac92676
child 235021 56f07a799bd170e3ad0b48f338bd8b2351809c59
push id28462
push usercbook@mozilla.com
push dateMon, 23 Mar 2015 12:19:34 +0000
treeherdermozilla-central@bc85c479668a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerscpearce
bugs1134434
milestone39.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1134434 - Fire loadedmetadata before encrypted event on encrypted MP4s - r=cpearce
dom/html/HTMLMediaElement.cpp
dom/media/MediaDecoderStateMachine.cpp
dom/media/MediaDecoderStateMachine.h
dom/media/MediaInfo.h
dom/media/fmp4/MP4Reader.cpp
dom/media/fmp4/MP4Reader.h
dom/media/mediasource/MediaSourceReader.cpp
dom/media/mediasource/MediaSourceReader.h
dom/media/mediasource/TrackBuffer.cpp
dom/media/mediasource/TrackBuffer.h
--- a/dom/html/HTMLMediaElement.cpp
+++ b/dom/html/HTMLMediaElement.cpp
@@ -3061,17 +3061,17 @@ void HTMLMediaElement::ProcessMediaFragm
     mFragmentStart = parser.GetStartTime();
   }
 }
 
 void HTMLMediaElement::MetadataLoaded(const MediaInfo* aInfo,
                                       nsAutoPtr<const MetadataTags> aTags)
 {
   mMediaInfo = *aInfo;
-  mIsEncrypted = aInfo->mIsEncrypted;
+  mIsEncrypted = aInfo->IsEncrypted();
   mTags = aTags.forget();
   mLoadedDataFired = false;
   ChangeReadyState(nsIDOMHTMLMediaElement::HAVE_METADATA);
 
   if (mIsEncrypted) {
     nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
     obs->NotifyObservers(static_cast<nsIContent*>(this), "media-eme-metadataloaded", nullptr);
   }
@@ -3081,16 +3081,24 @@ void HTMLMediaElement::MetadataLoaded(co
     mMediaSize = aInfo->mVideo.mDisplay;
     DispatchAsyncEvent(NS_LITERAL_STRING("resize"));
   }
   DispatchAsyncEvent(NS_LITERAL_STRING("loadedmetadata"));
   if (mDecoder && mDecoder->IsTransportSeekable() && mDecoder->IsMediaSeekable()) {
     ProcessMediaFragmentURI();
     mDecoder->SetFragmentEndTime(mFragmentEnd);
   }
+  if (mIsEncrypted) {
+    if (!mMediaSource) {
+      DecodeError();
+      return;
+    }
+
+    DispatchEncrypted(aInfo->mCrypto.mInitData, aInfo->mCrypto.mType);
+  }
 
   // Expose the tracks to JS directly.
   for (OutputMediaStream& out : mOutputStreams) {
     if (aInfo->HasAudio()) {
       TrackID audioTrackId = aInfo->mAudio.mTrackInfo.mOutputId;
       out.mStream->CreateDOMTrack(audioTrackId, MediaSegment::AUDIO);
     }
     if (aInfo->HasVideo()) {
--- a/dom/media/MediaDecoderStateMachine.cpp
+++ b/dom/media/MediaDecoderStateMachine.cpp
@@ -1277,16 +1277,17 @@ MediaDecoderOwner::NextFrameStatus Media
   }
   return MediaDecoderOwner::NEXT_FRAME_UNAVAILABLE;
 }
 
 static const char* const gMachineStateStr[] = {
   "NONE",
   "DECODING_METADATA",
   "WAIT_FOR_RESOURCES",
+  "WAIT_FOR_CDM",
   "DECODING_FIRSTFRAME",
   "DORMANT",
   "DECODING",
   "SEEKING",
   "BUFFERING",
   "COMPLETED",
   "SHUTDOWN"
 };
@@ -1577,23 +1578,29 @@ void MediaDecoderStateMachine::NotifyWai
       &MediaDecoderStateMachine::DoNotifyWaitingForResourcesStatusChanged));
   DecodeTaskQueue()->Dispatch(task);
 }
 
 void MediaDecoderStateMachine::DoNotifyWaitingForResourcesStatusChanged()
 {
   NS_ASSERTION(OnDecodeThread(), "Should be on decode thread.");
   ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
-  if (mState != DECODER_STATE_WAIT_FOR_RESOURCES) {
-    return;
-  }
+
   DECODER_LOG("DoNotifyWaitingForResourcesStatusChanged");
-  // The reader is no longer waiting for resources (say a hardware decoder),
-  // we can now proceed to decode metadata.
-  SetState(DECODER_STATE_DECODING_NONE);
+
+  if (mState == DECODER_STATE_WAIT_FOR_RESOURCES) {
+    // The reader is no longer waiting for resources (say a hardware decoder),
+    // we can now proceed to decode metadata.
+    SetState(DECODER_STATE_DECODING_NONE);
+  } else if (mState == DECODER_STATE_WAIT_FOR_CDM &&
+             !mReader->IsWaitingOnCDMResource()) {
+    SetState(DECODER_STATE_DECODING_FIRSTFRAME);
+    EnqueueDecodeFirstFrameTask();
+  }
+
   ScheduleStateMachine();
 }
 
 void MediaDecoderStateMachine::PlayInternal()
 {
   NS_ASSERTION(OnStateMachineThread(), "Should be on state machine thread.");
   ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
 
@@ -2264,16 +2271,23 @@ nsresult MediaDecoderStateMachine::Decod
 
   if (mGotDurationFromMetaData) {
     // We have all the information required: duration and size
     // Inform the element that we've loaded the metadata.
     EnqueueLoadedMetadataEvent();
   }
 
   if (mState == DECODER_STATE_DECODING_METADATA) {
+    if (mReader->IsWaitingOnCDMResource()) {
+      // Metadata parsing was successful but we're still waiting for CDM caps
+      // to become available so that we can build the correct decryptor/decoder.
+      SetState(DECODER_STATE_WAIT_FOR_CDM);
+      return NS_OK;
+    }
+
     SetState(DECODER_STATE_DECODING_FIRSTFRAME);
     res = EnqueueDecodeFirstFrameTask();
     if (NS_FAILED(res)) {
       return NS_ERROR_FAILURE;
     }
   }
   ScheduleStateMachine();
 
@@ -2673,16 +2687,17 @@ nsresult MediaDecoderStateMachine::RunSt
       DECODER_LOG("Shutdown started");
       return NS_OK;
     }
 
     case DECODER_STATE_DORMANT: {
       return NS_OK;
     }
 
+    case DECODER_STATE_WAIT_FOR_CDM:
     case DECODER_STATE_WAIT_FOR_RESOURCES: {
       return NS_OK;
     }
 
     case DECODER_STATE_DECODING_NONE: {
       SetState(DECODER_STATE_DECODING_METADATA);
       // Ensure we have a decode thread to decode metadata.
       return EnqueueDecodeMetadataTask();
--- a/dom/media/MediaDecoderStateMachine.h
+++ b/dom/media/MediaDecoderStateMachine.h
@@ -134,16 +134,17 @@ public:
 
   nsresult Init(MediaDecoderStateMachine* aCloneDonor);
 
   // Enumeration for the valid decoding states
   enum State {
     DECODER_STATE_DECODING_NONE,
     DECODER_STATE_DECODING_METADATA,
     DECODER_STATE_WAIT_FOR_RESOURCES,
+    DECODER_STATE_WAIT_FOR_CDM,
     DECODER_STATE_DECODING_FIRSTFRAME,
     DECODER_STATE_DORMANT,
     DECODER_STATE_DECODING,
     DECODER_STATE_SEEKING,
     DECODER_STATE_BUFFERING,
     DECODER_STATE_COMPLETED,
     DECODER_STATE_SHUTDOWN
   };
--- a/dom/media/MediaInfo.h
+++ b/dom/media/MediaInfo.h
@@ -5,16 +5,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #if !defined(MediaInfo_h)
 #define MediaInfo_h
 
 #include "nsSize.h"
 #include "nsRect.h"
 #include "ImageTypes.h"
 #include "nsString.h"
+#include "nsTArray.h"
 #include "StreamBuffer.h" // for TrackID
 
 namespace mozilla {
 
 struct TrackInfo {
   void Init(const nsAString& aId,
             const nsAString& aKind,
             const nsAString& aLabel,
@@ -97,37 +98,54 @@ public:
   uint32_t mChannels;
 
   // True if we have an active audio bitstream.
   bool mHasAudio;
 
   TrackInfo mTrackInfo;
 };
 
+class EncryptionInfo {
+public:
+  EncryptionInfo() : mIsEncrypted(false) {}
+
+  // Encryption type to be passed to JS. Usually `cenc'.
+  nsString mType;
+
+  // Encryption data.
+  nsTArray<uint8_t> mInitData;
+
+  // True if the stream has encryption metadata
+  bool mIsEncrypted;
+};
+
 class MediaInfo {
 public:
-  MediaInfo() : mIsEncrypted(false) {}
-
   bool HasVideo() const
   {
     return mVideo.mHasVideo;
   }
 
   bool HasAudio() const
   {
     return mAudio.mHasAudio;
   }
 
+  bool IsEncrypted() const
+  {
+    return mCrypto.mIsEncrypted;
+  }
+
   bool HasValidMedia() const
   {
     return HasVideo() || HasAudio();
   }
 
-  bool mIsEncrypted;
-
   // TODO: Store VideoInfo and AudioIndo in arrays to support multi-tracks.
   VideoInfo mVideo;
   AudioInfo mAudio;
+
+  EncryptionInfo mCrypto;
 };
 
 } // namespace mozilla
 
 #endif // MediaInfo_h
--- a/dom/media/fmp4/MP4Reader.cpp
+++ b/dom/media/fmp4/MP4Reader.cpp
@@ -151,16 +151,17 @@ MP4Reader::MP4Reader(AbstractMediaDecode
   : MediaDecoderReader(aDecoder)
   , mAudio(MediaData::AUDIO_DATA, Preferences::GetUint("media.mp4-audio-decode-ahead", 2))
   , mVideo(MediaData::VIDEO_DATA, Preferences::GetUint("media.mp4-video-decode-ahead", 2))
   , mLastReportedNumDecodedFrames(0)
   , mLayersBackendType(layers::LayersBackend::LAYERS_NONE)
   , mDemuxerInitialized(false)
   , mFoundSPSForTelemetry(false)
   , mIsEncrypted(false)
+  , mAreDecodersSetup(false)
   , mIndexReady(false)
   , mDemuxerMonitor("MP4 Demuxer")
 #if defined(MP4_READER_DORMANT_HEURISTIC)
   , mDormantEnabled(Preferences::GetBool("media.decoder.heuristic.dormant.enabled", false))
 #endif
 {
   MOZ_ASSERT(NS_IsMainThread(), "Must be on main thread.");
   MOZ_COUNT_CTOR(MP4Reader);
@@ -258,51 +259,23 @@ MP4Reader::Init(MediaDecoderReader* aClo
     sSetupPrefCache = true;
     Preferences::AddBoolVarCache(&sIsEMEEnabled, "media.eme.enabled", false);
     Preferences::AddBoolVarCache(&sDemuxSkipToNextKeyframe, "media.fmp4.demux-skip", true);
   }
 
   return NS_OK;
 }
 
-#ifdef MOZ_EME
-class DispatchKeyNeededEvent : public nsRunnable {
-public:
-  DispatchKeyNeededEvent(AbstractMediaDecoder* aDecoder,
-                         nsTArray<uint8_t>& aInitData,
-                         const nsString& aInitDataType)
-    : mDecoder(aDecoder)
-    , mInitData(aInitData)
-    , mInitDataType(aInitDataType)
-  {
-  }
-  NS_IMETHOD Run() {
-    // Note: Null check the owner, as the decoder could have been shutdown
-    // since this event was dispatched.
-    MediaDecoderOwner* owner = mDecoder->GetOwner();
-    if (owner) {
-      owner->DispatchEncrypted(mInitData, mInitDataType);
-    }
-    mDecoder = nullptr;
-    return NS_OK;
-  }
-private:
-  nsRefPtr<AbstractMediaDecoder> mDecoder;
-  nsTArray<uint8_t> mInitData;
-  nsString mInitDataType;
-};
-#endif
-
 void MP4Reader::RequestCodecResource() {
   if (mVideo.mDecoder) {
     mVideo.mDecoder->AllocateMediaResources();
   }
 }
 
-bool MP4Reader::IsWaitingOnCodecResource() {
+bool MP4Reader::IsWaitingMediaResources() {
   return mVideo.mDecoder && mVideo.mDecoder->IsWaitingMediaResources();
 }
 
 bool MP4Reader::IsWaitingOnCDMResource() {
 #ifdef MOZ_EME
   nsRefPtr<CDMProxy> proxy;
   {
     ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
@@ -322,25 +295,16 @@ bool MP4Reader::IsWaitingOnCDMResource()
     LOG("capsKnown=%d", caps.AreCapsKnown());
     return !caps.AreCapsKnown();
   }
 #else
   return false;
 #endif
 }
 
-bool MP4Reader::IsWaitingMediaResources()
-{
-  // IsWaitingOnCDMResource() *must* come first, because we don't know whether
-  // we can create a decoder until the CDM is initialized and it has told us
-  // whether *it* will decode, or whether we need to create a PDM to do the
-  // decoding
-  return IsWaitingOnCDMResource() || IsWaitingOnCodecResource();
-}
-
 void
 MP4Reader::ExtractCryptoInitData(nsTArray<uint8_t>& aInitData)
 {
   MOZ_ASSERT(mCrypto.valid);
   const nsTArray<mp4_demuxer::PsshInfo>& psshs = mCrypto.pssh;
   for (uint32_t i = 0; i < psshs.Length(); i++) {
     aInitData.AppendElements(psshs[i].data);
   }
@@ -399,140 +363,163 @@ MP4Reader::ReadMetadata(MediaInfo* aInfo
     if (mAudio.mActive) {
       mAudio.mTrackDemuxer = new MP4AudioDemuxer(mDemuxer);
     }
     mCrypto = mDemuxer->Crypto();
 
     {
       MonitorAutoUnlock unlock(mDemuxerMonitor);
       ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
-      mInfo.mIsEncrypted = mIsEncrypted = mCrypto.valid;
+      mInfo.mCrypto.mIsEncrypted = mIsEncrypted = mCrypto.valid;
     }
 
     // Remember that we've initialized the demuxer, so that if we're decoding
     // an encrypted stream and we need to wait for a CDM to be set, we don't
     // need to reinit the demuxer.
     mDemuxerInitialized = true;
   } else if (mPlatform && !IsWaitingMediaResources()) {
     *aInfo = mInfo;
     *aTags = nullptr;
     return NS_OK;
   }
 
-  if (mCrypto.valid) {
-#ifdef MOZ_EME
-    // We have encrypted audio or video. We'll need a CDM to decrypt and
-    // possibly decode this. Wait until we've received a CDM from the
-    // JavaScript player app. Note: we still go through the motions here
-    // even if EME is disabled, so that if script tries and fails to create
-    // a CDM, we can detect that and notify chrome and show some UI explaining
-    // that we failed due to EME being disabled.
-    nsRefPtr<CDMProxy> proxy;
+  if (HasAudio()) {
+    const AudioDecoderConfig& audio = mDemuxer->AudioConfig();
+    mInfo.mAudio.mRate = audio.samples_per_second;
+    mInfo.mAudio.mChannels = audio.channel_count;
+    mAudio.mCallback = new DecoderCallback(this, kAudio);
+  }
+
+  if (HasVideo()) {
+    const VideoDecoderConfig& video = mDemuxer->VideoConfig();
+    mInfo.mVideo.mDisplay =
+      nsIntSize(video.display_width, video.display_height);
+    mVideo.mCallback = new DecoderCallback(this, kVideo);
+
+    // Collect telemetry from h264 AVCC SPS.
+    if (!mFoundSPSForTelemetry) {
+      mFoundSPSForTelemetry = AccumulateSPSTelemetry(video.extra_data);
+    }
+  }
+
+  if (mIsEncrypted) {
     nsTArray<uint8_t> initData;
     ExtractCryptoInitData(initData);
     if (initData.Length() == 0) {
       return NS_ERROR_FAILURE;
     }
-    if (!mInitDataEncountered.Contains(initData)) {
-      mInitDataEncountered.AppendElement(initData);
-      NS_DispatchToMainThread(new DispatchKeyNeededEvent(mDecoder, initData, NS_LITERAL_STRING("cenc")));
-    }
-    if (IsWaitingMediaResources()) {
-      return NS_OK;
-    }
-    MOZ_ASSERT(!IsWaitingMediaResources());
 
-    {
-      ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
-      proxy = mDecoder->GetCDMProxy();
-    }
-    MOZ_ASSERT(proxy);
-
-    mPlatform = PlatformDecoderModule::CreateCDMWrapper(proxy,
-                                                        HasAudio(),
-                                                        HasVideo());
-    NS_ENSURE_TRUE(mPlatform, NS_ERROR_FAILURE);
-#else
-    // EME not supported.
-    return NS_ERROR_FAILURE;
-#endif
-  } else {
-    mPlatform = PlatformDecoderModule::Create();
-    NS_ENSURE_TRUE(mPlatform, NS_ERROR_FAILURE);
-  }
-
-  if (HasAudio()) {
-    const AudioDecoderConfig& audio = mDemuxer->AudioConfig();
-    if (mInfo.mAudio.mHasAudio && !IsSupportedAudioMimeType(audio.mime_type)) {
-      return NS_ERROR_FAILURE;
-    }
-    mInfo.mAudio.mRate = audio.samples_per_second;
-    mInfo.mAudio.mChannels = audio.channel_count;
-    mAudio.mCallback = new DecoderCallback(this, kAudio);
-    mAudio.mDecoder = mPlatform->CreateAudioDecoder(audio,
-                                                    mAudio.mTaskQueue,
-                                                    mAudio.mCallback);
-    NS_ENSURE_TRUE(mAudio.mDecoder != nullptr, NS_ERROR_FAILURE);
-    nsresult rv = mAudio.mDecoder->Init();
-    NS_ENSURE_SUCCESS(rv, rv);
-  }
-
-  if (HasVideo()) {
-    const VideoDecoderConfig& video = mDemuxer->VideoConfig();
-    if (mInfo.mVideo.mHasVideo && !IsSupportedVideoMimeType(video.mime_type)) {
-      return NS_ERROR_FAILURE;
-    }
-    mInfo.mVideo.mDisplay =
-      nsIntSize(video.display_width, video.display_height);
-    mVideo.mCallback = new DecoderCallback(this, kVideo);
-    if (mSharedDecoderManager && mPlatform->SupportsSharedDecoders(video)) {
-      mVideo.mDecoder =
-        mSharedDecoderManager->CreateVideoDecoder(mPlatform,
-                                                  video,
-                                                  mLayersBackendType,
-                                                  mDecoder->GetImageContainer(),
-                                                  mVideo.mTaskQueue,
-                                                  mVideo.mCallback);
-    } else {
-      mVideo.mDecoder = mPlatform->CreateVideoDecoder(video,
-                                                      mLayersBackendType,
-                                                      mDecoder->GetImageContainer(),
-                                                      mVideo.mTaskQueue,
-                                                      mVideo.mCallback);
-    }
-    NS_ENSURE_TRUE(mVideo.mDecoder != nullptr, NS_ERROR_FAILURE);
-    nsresult rv = mVideo.mDecoder->Init();
-    NS_ENSURE_SUCCESS(rv, rv);
-
-    // Collect telemetry from h264 AVCC SPS.
-    if (!mFoundSPSForTelemetry) {
-      mFoundSPSForTelemetry = AccumulateSPSTelemetry(video.extra_data);
-    }
+    mInfo.mCrypto.mInitData = initData;
+    mInfo.mCrypto.mType = NS_LITERAL_STRING("cenc");
   }
 
   // Get the duration, and report it to the decoder if we have it.
   Microseconds duration;
   {
     MonitorAutoLock lock(mDemuxerMonitor);
     duration = mDemuxer->Duration();
   }
   if (duration != -1) {
     ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
     mDecoder->SetMediaDuration(duration);
   }
 
   *aInfo = mInfo;
   *aTags = nullptr;
 
+  if (!IsWaitingMediaResources() && !IsWaitingOnCDMResource()) {
+    NS_ENSURE_TRUE(EnsureDecodersSetup(), NS_ERROR_FAILURE);
+  }
+
   MonitorAutoLock mon(mDemuxerMonitor);
   UpdateIndex();
 
   return NS_OK;
 }
 
+bool
+MP4Reader::EnsureDecodersSetup()
+{
+  if (mAreDecodersSetup) {
+    return !!mPlatform;
+  }
+
+  if (mIsEncrypted) {
+#ifdef MOZ_EME
+    // We have encrypted audio or video. We'll need a CDM to decrypt and
+    // possibly decode this. Wait until we've received a CDM from the
+    // JavaScript player app. Note: we still go through the motions here
+    // even if EME is disabled, so that if script tries and fails to create
+    // a CDM, we can detect that and notify chrome and show some UI explaining
+    // that we failed due to EME being disabled.
+    nsRefPtr<CDMProxy> proxy;
+    if (IsWaitingMediaResources()) {
+      return true;
+    }
+    MOZ_ASSERT(!IsWaitingMediaResources());
+
+    {
+      ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
+      proxy = mDecoder->GetCDMProxy();
+    }
+    MOZ_ASSERT(proxy);
+
+    mPlatform = PlatformDecoderModule::CreateCDMWrapper(proxy,
+                                                        HasAudio(),
+                                                        HasVideo());
+    NS_ENSURE_TRUE(mPlatform, false);
+#else
+    // EME not supported.
+    return false;
+#endif
+  } else {
+    mPlatform = PlatformDecoderModule::Create();
+    NS_ENSURE_TRUE(mPlatform, false);
+  }
+
+  if (HasAudio()) {
+    NS_ENSURE_TRUE(IsSupportedAudioMimeType(mDemuxer->AudioConfig().mime_type),
+                   false);
+
+    mAudio.mDecoder = mPlatform->CreateAudioDecoder(mDemuxer->AudioConfig(),
+                                                    mAudio.mTaskQueue,
+                                                    mAudio.mCallback);
+    NS_ENSURE_TRUE(mAudio.mDecoder != nullptr, false);
+    nsresult rv = mAudio.mDecoder->Init();
+    NS_ENSURE_SUCCESS(rv, false);
+  }
+
+  if (HasVideo()) {
+    NS_ENSURE_TRUE(IsSupportedVideoMimeType(mDemuxer->VideoConfig().mime_type),
+                   false);
+
+    if (mSharedDecoderManager && mPlatform->SupportsSharedDecoders(mDemuxer->VideoConfig())) {
+      mVideo.mDecoder =
+        mSharedDecoderManager->CreateVideoDecoder(mPlatform,
+                                                  mDemuxer->VideoConfig(),
+                                                  mLayersBackendType,
+                                                  mDecoder->GetImageContainer(),
+                                                  mVideo.mTaskQueue,
+                                                  mVideo.mCallback);
+    } else {
+      mVideo.mDecoder = mPlatform->CreateVideoDecoder(mDemuxer->VideoConfig(),
+                                                      mLayersBackendType,
+                                                      mDecoder->GetImageContainer(),
+                                                      mVideo.mTaskQueue,
+                                                      mVideo.mCallback);
+    }
+    NS_ENSURE_TRUE(mVideo.mDecoder != nullptr, false);
+    nsresult rv = mVideo.mDecoder->Init();
+    NS_ENSURE_SUCCESS(rv, false);
+  }
+
+  mAreDecodersSetup = true;
+  return true;
+}
+
 void
 MP4Reader::ReadUpdatedMetadata(MediaInfo* aInfo)
 {
   *aInfo = mInfo;
 }
 
 bool
 MP4Reader::IsMediaSeekable()
@@ -610,16 +597,21 @@ MP4Reader::ShouldSkip(bool aSkipToNextKe
 
 nsRefPtr<MediaDecoderReader::VideoDataPromise>
 MP4Reader::RequestVideoData(bool aSkipToNextKeyframe,
                             int64_t aTimeThreshold)
 {
   MOZ_ASSERT(GetTaskQueue()->IsCurrentThreadIn());
   VLOG("skip=%d time=%lld", aSkipToNextKeyframe, aTimeThreshold);
 
+  if (!EnsureDecodersSetup()) {
+    NS_WARNING("Error constructing MP4 decoders");
+    return VideoDataPromise::CreateAndReject(DECODE_ERROR, __func__);
+  }
+
   if (mShutdown) {
     NS_WARNING("RequestVideoData on shutdown MP4Reader!");
     return VideoDataPromise::CreateAndReject(CANCELED, __func__);
   }
 
   MOZ_ASSERT(HasVideo() && mPlatform && mVideo.mDecoder);
 
   bool eos = false;
@@ -645,16 +637,22 @@ MP4Reader::RequestVideoData(bool aSkipTo
   return p;
 }
 
 nsRefPtr<MediaDecoderReader::AudioDataPromise>
 MP4Reader::RequestAudioData()
 {
   MOZ_ASSERT(GetTaskQueue()->IsCurrentThreadIn());
   VLOG("");
+
+  if (!EnsureDecodersSetup()) {
+    NS_WARNING("Error constructing MP4 decoders");
+    return AudioDataPromise::CreateAndReject(DECODE_ERROR, __func__);
+  }
+
   if (mShutdown) {
     NS_WARNING("RequestAudioData on shutdown MP4Reader!");
     return AudioDataPromise::CreateAndReject(CANCELED, __func__);
   }
 
   MonitorAutoLock lock(mAudio.mMonitor);
   nsRefPtr<AudioDataPromise> p = mAudio.mPromise.Ensure(__func__);
   ScheduleUpdate(kAudio);
--- a/dom/media/fmp4/MP4Reader.h
+++ b/dom/media/fmp4/MP4Reader.h
@@ -98,16 +98,18 @@ public:
 
   virtual void DisableHardwareAcceleration() override;
 
 private:
 
   bool InitDemuxer();
   void ReturnOutput(MediaData* aData, TrackType aTrack);
 
+  bool EnsureDecodersSetup();
+
   // Sends input to decoder for aTrack, and output to the state machine,
   // if necessary.
   void Update(TrackType aTrack);
 
   // Enqueues a task to call Update(aTrack) on the decoder task queue.
   // Lock for corresponding track must be held.
   void ScheduleUpdate(TrackType aTrack);
 
@@ -130,17 +132,16 @@ private:
   void Error(mp4_demuxer::TrackType aTrack);
   void Flush(mp4_demuxer::TrackType aTrack);
   void DrainComplete(mp4_demuxer::TrackType aTrack);
   void UpdateIndex();
   bool IsSupportedAudioMimeType(const char* aMimeType);
   bool IsSupportedVideoMimeType(const char* aMimeType);
   void NotifyResourcesStatusChanged();
   void RequestCodecResource();
-  bool IsWaitingOnCodecResource();
   virtual bool IsWaitingOnCDMResource() override;
 
   Microseconds GetNextKeyframeTime();
   bool ShouldSkip(bool aSkipToNextKeyframe, int64_t aTimeThreshold);
 
   size_t SizeOfQueue(TrackType aTrack);
 
   nsRefPtr<MP4Stream> mStream;
@@ -280,16 +281,18 @@ private:
   bool mDemuxerInitialized;
 
   // True if we've gathered telemetry from an SPS.
   bool mFoundSPSForTelemetry;
 
   // Synchronized by decoder monitor.
   bool mIsEncrypted;
 
+  bool mAreDecodersSetup;
+
   bool mIndexReady;
   Monitor mDemuxerMonitor;
   nsRefPtr<SharedDecoderManager> mSharedDecoderManager;
 
 #if defined(MP4_READER_DORMANT_HEURISTIC)
   const bool mDormantEnabled;
 #endif
 };
--- a/dom/media/mediasource/MediaSourceReader.cpp
+++ b/dom/media/mediasource/MediaSourceReader.cpp
@@ -88,16 +88,31 @@ MediaSourceReader::IsWaitingMediaResourc
     if (!mEssentialTrackBuffers[i]->IsReady()) {
       return true;
     }
   }
 
   return !mHasEssentialTrackBuffers;
 }
 
+bool
+MediaSourceReader::IsWaitingOnCDMResource()
+{
+  ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
+  MOZ_ASSERT(!IsWaitingMediaResources());
+
+  for (auto& trackBuffer : mTrackBuffers) {
+    if (trackBuffer->IsWaitingOnCDMResource()) {
+      return true;
+    }
+  }
+
+  return mInfo.IsEncrypted() && !mCDMProxy;
+}
+
 size_t
 MediaSourceReader::SizeOfVideoQueueInFrames()
 {
   if (!GetVideoReader()) {
     MSE_DEBUG("called with no video reader");
     return 0;
   }
   return GetVideoReader()->SizeOfVideoQueueInFrames();
@@ -1015,16 +1030,32 @@ MediaSourceReader::MaybeNotifyHaveData()
   if (!IsSeeking() && mVideoTrack && HaveData(mLastVideoTime, MediaData::VIDEO_DATA)) {
     haveVideo = true;
     WaitPromise(MediaData::VIDEO_DATA).ResolveIfExists(MediaData::VIDEO_DATA, __func__);
   }
   MSE_DEBUG("isSeeking=%d haveAudio=%d, haveVideo=%d",
             IsSeeking(), haveAudio, haveVideo);
 }
 
+static void
+CombineEncryptionData(EncryptionInfo& aTo, const EncryptionInfo& aFrom)
+{
+  if (!aFrom.mIsEncrypted) {
+    return;
+  }
+  aTo.mIsEncrypted = true;
+
+  if (!aTo.mType.IsEmpty() && !aTo.mType.Equals(aFrom.mType)) {
+    NS_WARNING("mismatched encryption types");
+  }
+
+  aTo.mType = aFrom.mType;
+  aTo.mInitData.AppendElements(aFrom.mInitData);
+}
+
 nsresult
 MediaSourceReader::ReadMetadata(MediaInfo* aInfo, MetadataTags** aTags)
 {
   ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
 
   MSE_DEBUG("tracks=%u/%u audio=%p video=%p",
             mEssentialTrackBuffers.Length(), mTrackBuffers.Length(),
             mAudioTrack.get(), mVideoTrack.get());
@@ -1038,30 +1069,30 @@ MediaSourceReader::ReadMetadata(MediaInf
 
   if (mAudioTrack) {
     MOZ_ASSERT(mAudioTrack->IsReady());
     mAudioSourceDecoder = mAudioTrack->Decoders()[0];
 
     const MediaInfo& info = GetAudioReader()->GetMediaInfo();
     MOZ_ASSERT(info.HasAudio());
     mInfo.mAudio = info.mAudio;
-    mInfo.mIsEncrypted = mInfo.mIsEncrypted || info.mIsEncrypted;
+    CombineEncryptionData(mInfo.mCrypto, info.mCrypto);
     MSE_DEBUG("audio reader=%p duration=%lld",
               mAudioSourceDecoder.get(),
               mAudioSourceDecoder->GetReader()->GetDecoder()->GetMediaDuration());
   }
 
   if (mVideoTrack) {
     MOZ_ASSERT(mVideoTrack->IsReady());
     mVideoSourceDecoder = mVideoTrack->Decoders()[0];
 
     const MediaInfo& info = GetVideoReader()->GetMediaInfo();
     MOZ_ASSERT(info.HasVideo());
     mInfo.mVideo = info.mVideo;
-    mInfo.mIsEncrypted = mInfo.mIsEncrypted || info.mIsEncrypted;
+    CombineEncryptionData(mInfo.mCrypto, info.mCrypto);
     MSE_DEBUG("video reader=%p duration=%lld",
               GetVideoReader(),
               GetVideoReader()->GetDecoder()->GetMediaDuration());
   }
 
   *aInfo = mInfo;
   *aTags = nullptr; // TODO: Handle metadata.
 
--- a/dom/media/mediasource/MediaSourceReader.h
+++ b/dom/media/mediasource/MediaSourceReader.h
@@ -40,16 +40,17 @@ public:
     return NS_OK;
   }
 
   // Indicates the point in time at which the reader should consider
   // registered TrackBuffers essential for initialization.
   void PrepareInitialization();
 
   bool IsWaitingMediaResources() override;
+  bool IsWaitingOnCDMResource() override;
 
   nsRefPtr<AudioDataPromise> RequestAudioData() override;
   nsRefPtr<VideoDataPromise>
   RequestVideoData(bool aSkipToNextKeyframe, int64_t aTimeThreshold) override;
 
   virtual size_t SizeOfVideoQueueInFrames() override;
   virtual size_t SizeOfAudioQueueInFrames() override;
 
--- a/dom/media/mediasource/TrackBuffer.cpp
+++ b/dom/media/mediasource/TrackBuffer.cpp
@@ -42,16 +42,17 @@ extern PRLogModuleInfo* GetMediaSourceLo
 namespace mozilla {
 
 TrackBuffer::TrackBuffer(MediaSourceDecoder* aParentDecoder, const nsACString& aType)
   : mParentDecoder(aParentDecoder)
   , mType(aType)
   , mLastStartTimestamp(0)
   , mLastTimestampOffset(0)
   , mAdjustedTimestamp(0)
+  , mIsWaitingOnCDM(false)
   , mShutdown(false)
 {
   MOZ_COUNT_CTOR(TrackBuffer);
   mParser = ContainerParser::CreateForMIMEType(aType);
   mTaskQueue = new MediaTaskQueue(GetMediaDecodeThreadPool());
   aParentDecoder->AddTrackBuffer(this);
   mDecoderPerSegment = Preferences::GetBool("media.mediasource.decoder-per-segment", false);
   MSE_DEBUG("TrackBuffer created for parent decoder %p", aParentDecoder);
@@ -640,18 +641,17 @@ TrackBuffer::InitializeDecoder(SourceBuf
     return;
   }
   if (mCurrentDecoder != aDecoder) {
     MSE_DEBUG("append was cancelled. Aborting initialization.");
     return;
   }
 
   if (NS_SUCCEEDED(rv) && reader->IsWaitingOnCDMResource()) {
-    mWaitingDecoders.AppendElement(aDecoder);
-    return;
+    mIsWaitingOnCDM = true;
   }
 
   aDecoder->SetTaskQueue(nullptr);
 
   if (NS_FAILED(rv) || (!mi.HasVideo() && !mi.HasAudio())) {
     // XXX: Need to signal error back to owning SourceBuffer.
     MSE_DEBUG("Reader %p failed to initialize rv=%x audio=%d video=%d",
               reader, rv, mi.HasAudio(), mi.HasVideo());
@@ -803,16 +803,22 @@ bool
 TrackBuffer::IsReady()
 {
   ReentrantMonitorAutoEnter mon(mParentDecoder->GetReentrantMonitor());
   MOZ_ASSERT((mInfo.HasAudio() || mInfo.HasVideo()) || mInitializedDecoders.IsEmpty());
   return mInfo.HasAudio() || mInfo.HasVideo();
 }
 
 bool
+TrackBuffer::IsWaitingOnCDMResource()
+{
+  return mIsWaitingOnCDM;
+}
+
+bool
 TrackBuffer::ContainsTime(int64_t aTime, int64_t aTolerance)
 {
   ReentrantMonitorAutoEnter mon(mParentDecoder->GetReentrantMonitor());
   for (uint32_t i = 0; i < mInitializedDecoders.Length(); ++i) {
     nsRefPtr<dom::TimeRanges> r = new dom::TimeRanges();
     mInitializedDecoders[i]->GetBuffered(r);
     if (r->Find(double(aTime) / USECS_PER_S,
                 double(aTolerance) / USECS_PER_S) != dom::TimeRanges::NoIndex) {
@@ -870,31 +876,22 @@ TrackBuffer::Decoders()
 }
 
 #ifdef MOZ_EME
 nsresult
 TrackBuffer::SetCDMProxy(CDMProxy* aProxy)
 {
   ReentrantMonitorAutoEnter mon(mParentDecoder->GetReentrantMonitor());
 
-  for (uint32_t i = 0; i < mDecoders.Length(); ++i) {
-    nsresult rv = mDecoders[i]->SetCDMProxy(aProxy);
-    NS_ENSURE_SUCCESS(rv, rv);
+  for (auto& decoder : mDecoders) {
+    decoder->SetCDMProxy(aProxy);
   }
 
-  for (uint32_t i = 0; i < mWaitingDecoders.Length(); ++i) {
-    CDMCaps::AutoLock caps(aProxy->Capabilites());
-    caps.CallOnMainThreadWhenCapsAvailable(
-      NS_NewRunnableMethodWithArg<SourceBufferDecoder*>(this,
-                                                        &TrackBuffer::QueueInitializeDecoder,
-                                                        mWaitingDecoders[i]));
-  }
-
-  mWaitingDecoders.Clear();
-
+  mIsWaitingOnCDM = false;
+  mParentDecoder->NotifyWaitingForResourcesStatusChanged();
   return NS_OK;
 }
 #endif
 
 #if defined(DEBUG)
 void
 TrackBuffer::Dump(const char* aPath)
 {
--- a/dom/media/mediasource/TrackBuffer.h
+++ b/dom/media/mediasource/TrackBuffer.h
@@ -71,16 +71,18 @@ public:
 
   // Returns true if an init segment has been appended.
   bool HasInitSegment();
 
   // Returns true iff mParser->HasInitData() and the decoder using that init
   // segment has successfully initialized by setting mHas{Audio,Video}..
   bool IsReady();
 
+  bool IsWaitingOnCDMResource();
+
   // Returns true if any of the decoders managed by this track buffer
   // contain aTime in their buffered ranges.
   bool ContainsTime(int64_t aTime, int64_t aTolerance);
 
   void BreakCycles();
 
   // Run MSE Reset Parser State Algorithm.
   // 3.5.2 Reset Parser State
@@ -180,20 +182,16 @@ private:
   // invoking Shutdown. This is all so that we can avoid destroying the decoders
   // off-main-thread. :-(
   nsTArray<nsRefPtr<SourceBufferDecoder>> mShutdownDecoders;
 
   // Contains only the initialized decoders managed by this TrackBuffer.
   // Access protected by mParentDecoder's monitor.
   nsTArray<nsRefPtr<SourceBufferDecoder>> mInitializedDecoders;
 
-  // Decoders which are waiting on a Content Decryption Module to be able to
-  // finish ReadMetadata.
-  nsTArray<nsRefPtr<SourceBufferDecoder>> mWaitingDecoders;
-
   // The decoder that the owning SourceBuffer is currently appending data to.
   // Modified on the main thread only.
   nsRefPtr<SourceBufferDecoder> mCurrentDecoder;
 
   nsRefPtr<MediaSourceDecoder> mParentDecoder;
   const nsCString mType;
 
   // The last start and end timestamps added to the TrackBuffer via
@@ -201,16 +199,19 @@ private:
   int64_t mLastStartTimestamp;
   Maybe<int64_t> mLastEndTimestamp;
   void AdjustDecodersTimestampOffset(int32_t aOffset);
 
   // The timestamp offset used by our current decoder, in microseconds.
   int64_t mLastTimestampOffset;
   int64_t mAdjustedTimestamp;
 
+  // True if at least one of our decoders has encrypted content.
+  bool mIsWaitingOnCDM;
+
   // Set when the first decoder used by this TrackBuffer is initialized.
   // Protected by mParentDecoder's monitor.
   MediaInfo mInfo;
 
   void ContinueShutdown();
   MediaPromiseHolder<ShutdownPromise> mShutdownPromise;
   bool mDecoderPerSegment;
   bool mShutdown;