Bug 1351124 - Detect MP4 PSSH boxes in MOOF boxes and dispatch those in 'encrypted' events to content. r?jya draft
authorChris Pearce <cpearce@mozilla.com>
Wed, 12 Jul 2017 22:07:15 +1200
changeset 608569 413d490f9a02eae80485893e34da69912e39c357
parent 608564 29cd7ea0e3b76935d76e376ee0f88f4c93aca85d
child 637353 3228c1e4db46ad34360a6cbf1f8217c78dcec633
push id68330
push usercpearce@mozilla.com
push dateThu, 13 Jul 2017 21:31:39 +0000
reviewersjya
bugs1351124
milestone56.0a1
Bug 1351124 - Detect MP4 PSSH boxes in MOOF boxes and dispatch those in 'encrypted' events to content. r?jya We detect when a PSSH is contained in a MOOF and stash them in the mp4_demmuxer::Moof object. When the mp4_demuxer::SampleIterator returns a sample, we check whether it's the first sample from its MOOF, and if so, we attach any PSSH boxes from that MOOF to the sample. The TrackBuffersManager checks samples upon demux, to see whether they have any EME init data attached, and if so dispatches thoses to the HTMLMediaElement in 'encrypted' events. MozReview-Commit-ID: F8GobKOr96F
dom/media/MediaData.h
dom/media/mediasource/TrackBuffersManager.cpp
dom/media/mediasource/TrackBuffersManager.h
media/libstagefright/binding/Box.cpp
media/libstagefright/binding/Index.cpp
media/libstagefright/binding/MoofParser.cpp
media/libstagefright/binding/include/mp4_demuxer/Box.h
media/libstagefright/binding/include/mp4_demuxer/MoofParser.h
--- a/dom/media/MediaData.h
+++ b/dom/media/MediaData.h
@@ -586,16 +586,18 @@ public:
 
 class CryptoSample : public CryptoTrack
 {
 public:
   nsTArray<uint16_t> mPlainSizes;
   nsTArray<uint32_t> mEncryptedSizes;
   nsTArray<uint8_t> mIV;
   nsTArray<nsCString> mSessionIds;
+  nsTArray<nsTArray<uint8_t>> mInitDatas;
+  nsString mInitDataType;
 };
 
 // MediaRawData is a MediaData container used to store demuxed, still compressed
 // samples.
 // Use MediaRawData::CreateWriter() to obtain a MediaRawDataWriter object that
 // provides methods to modify and manipulate the data.
 // Memory allocations are fallible. Methods return a boolean indicating if
 // memory allocations were successful. Return values should always be checked.
--- a/dom/media/mediasource/TrackBuffersManager.cpp
+++ b/dom/media/mediasource/TrackBuffersManager.cpp
@@ -58,17 +58,17 @@ AppendStateToStr(SourceBufferAttributes:
   }
 }
 
 static Atomic<uint32_t> sStreamSourceID(0u);
 
 class DispatchKeyNeededEvent : public Runnable {
 public:
   DispatchKeyNeededEvent(AbstractMediaDecoder* aDecoder,
-                         nsTArray<uint8_t>& aInitData,
+                         const nsTArray<uint8_t>& aInitData,
                          const nsString& aInitDataType)
     : Runnable("DispatchKeyNeededEvent")
     , mDecoder(aDecoder)
     , mInitData(aInitData)
     , mInitDataType(aInitDataType)
   {
   }
   NS_IMETHOD Run() override {
@@ -1267,22 +1267,39 @@ TrackBuffersManager::DoDemuxVideo()
   mVideoTracks.mDemuxer->GetSamples(-1)
     ->Then(GetTaskQueue(), __func__, this,
            &TrackBuffersManager::OnVideoDemuxCompleted,
            &TrackBuffersManager::OnVideoDemuxFailed)
     ->Track(mVideoTracks.mDemuxRequest);
 }
 
 void
-TrackBuffersManager::OnVideoDemuxCompleted(RefPtr<MediaTrackDemuxer::SamplesHolder> aSamples)
+TrackBuffersManager::MaybeDispatchEncryptedEvent(
+  const nsTArray<RefPtr<MediaRawData>>& aSamples)
+{
+  // Try and dispatch 'encrypted'. Won't go if ready state still HAVE_NOTHING.
+  for (const RefPtr<MediaRawData>& sample : aSamples) {
+    for (const nsTArray<uint8_t>& initData : sample->mCrypto.mInitDatas) {
+      nsCOMPtr<nsIRunnable> r = new DispatchKeyNeededEvent(
+        mParentDecoder, initData, sample->mCrypto.mInitDataType);
+      mAbstractMainThread->Dispatch(r.forget());
+    }
+  }
+}
+
+void
+TrackBuffersManager::OnVideoDemuxCompleted(
+  RefPtr<MediaTrackDemuxer::SamplesHolder> aSamples)
 {
   MOZ_ASSERT(OnTaskQueue());
   MSE_DEBUG("%" PRIuSIZE " video samples demuxed", aSamples->mSamples.Length());
   mVideoTracks.mDemuxRequest.Complete();
   mVideoTracks.mQueuedSamples.AppendElements(aSamples->mSamples);
+
+  MaybeDispatchEncryptedEvent(aSamples->mSamples);
   DoDemuxAudio();
 }
 
 void
 TrackBuffersManager::DoDemuxAudio()
 {
   MOZ_ASSERT(OnTaskQueue());
   if (!HasAudio()) {
@@ -1299,16 +1316,18 @@ TrackBuffersManager::DoDemuxAudio()
 void
 TrackBuffersManager::OnAudioDemuxCompleted(RefPtr<MediaTrackDemuxer::SamplesHolder> aSamples)
 {
   MOZ_ASSERT(OnTaskQueue());
   MSE_DEBUG("%" PRIuSIZE " audio samples demuxed", aSamples->mSamples.Length());
   mAudioTracks.mDemuxRequest.Complete();
   mAudioTracks.mQueuedSamples.AppendElements(aSamples->mSamples);
   CompleteCodedFrameProcessing();
+
+  MaybeDispatchEncryptedEvent(aSamples->mSamples);
 }
 
 void
 TrackBuffersManager::CompleteCodedFrameProcessing()
 {
   MOZ_ASSERT(OnTaskQueue());
 
   // 1. For each coded frame in the media segment run the following steps:
--- a/dom/media/mediasource/TrackBuffersManager.h
+++ b/dom/media/mediasource/TrackBuffersManager.h
@@ -253,16 +253,21 @@ private:
   void DoDemuxAudio();
   void OnAudioDemuxCompleted(RefPtr<MediaTrackDemuxer::SamplesHolder> aSamples);
   void OnAudioDemuxFailed(const MediaResult& aError)
   {
     mAudioTracks.mDemuxRequest.Complete();
     OnDemuxFailed(TrackType::kAudioTrack, aError);
   }
 
+  // Dispatches an "encrypted" event is any sample in array has initData
+  // present.
+  void MaybeDispatchEncryptedEvent(
+    const nsTArray<RefPtr<MediaRawData>>& aSamples);
+
   void DoEvictData(const media::TimeUnit& aPlaybackTime, int64_t aSizeToEvict);
 
   struct TrackData
   {
     TrackData()
       : mNumTracks(0)
       , mNeedRandomAccessPoint(true)
       , mSizeBuffer(0)
--- a/media/libstagefright/binding/Box.cpp
+++ b/media/libstagefright/binding/Box.cpp
@@ -68,16 +68,17 @@ Box::Box(BoxContext* aContext, uint64_t 
   }
 
   size_t bytes;
   if (!mContext->mSource->CachedReadAt(aOffset, header, sizeof(header),
                                        &bytes) ||
       bytes != sizeof(header)) {
     return;
   }
+  mHeader.AppendElements(header, sizeof(header));
 
   uint64_t size = BigEndian::readUint32(header);
   if (size == 1) {
     uint8_t bigLength[8];
     if (aOffset > INT64_MAX - sizeof(header) - sizeof(bigLength)) {
       return;
     }
     MediaByteRange bigLengthRange(headerRange.mEnd,
@@ -86,16 +87,17 @@ Box::Box(BoxContext* aContext, uint64_t 
         !byteRange->Contains(bigLengthRange) ||
         !mContext->mSource->CachedReadAt(aOffset + sizeof(header), bigLength,
                                          sizeof(bigLength), &bytes) ||
         bytes != sizeof(bigLength)) {
       return;
     }
     size = BigEndian::readUint64(bigLength);
     mBodyOffset = bigLengthRange.mEnd;
+    mHeader.AppendElements(bigLength, sizeof(bigLength));
   } else if (size == 0) {
     // box extends to end of file.
     size = mContext->mByteRanges.LastInterval().mEnd - aOffset;
     mBodyOffset = headerRange.mEnd;
   } else {
     mBodyOffset = headerRange.mEnd;
   }
 
--- a/media/libstagefright/binding/Index.cpp
+++ b/media/libstagefright/binding/Index.cpp
@@ -119,16 +119,30 @@ already_AddRefed<MediaRawData> SampleIte
   }
 
   size_t bytesRead;
   if (!mIndex->mSource->ReadAt(sample->mOffset, writer->Data(), sample->Size(),
                                &bytesRead) || bytesRead != sample->Size()) {
     return nullptr;
   }
 
+  if (mCurrentSample == 0 && mIndex->mMoofParser) {
+    const nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs();
+    MOZ_ASSERT(mCurrentMoof < moofs.Length());
+    const Moof* currentMoof = &moofs[mCurrentMoof];
+    if (!currentMoof->mPsshes.IsEmpty()) {
+      // This Moof contained crypto init data. Report that. We only report
+      // the init data on the Moof's first sample, to avoid reporting it more
+      // than once per Moof.
+      writer->mCrypto.mValid = true;
+      writer->mCrypto.mInitDatas.AppendElements(currentMoof->mPsshes);
+      writer->mCrypto.mInitDataType = NS_LITERAL_STRING("cenc");
+    }
+  }
+
   if (!s->mCencRange.IsEmpty()) {
     MoofParser* parser = mIndex->mMoofParser.get();
 
     if (!parser || !parser->mSinf.IsValid()) {
       return nullptr;
     }
 
     uint8_t ivSize = parser->mSinf.mDefaultIVSize;
--- a/media/libstagefright/binding/MoofParser.cpp
+++ b/media/libstagefright/binding/MoofParser.cpp
@@ -389,21 +389,39 @@ public:
     return aA->mCompositionRange.start < aB->mCompositionRange.start;
   }
 };
 
 Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio)
   : mRange(aBox.Range())
   , mMaxRoundingError(35000)
 {
+  nsTArray<Box> psshBoxes;
   for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("traf")) {
       ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio);
     }
+    if (box.IsType("pssh")) {
+      psshBoxes.AppendElement(box);
+    }
   }
+
+  // The EME spec requires that PSSH boxes which are contiguous in the
+  // file are dispatched to the media element in a single "encrypted" event.
+  // So append contiguous boxes here.
+  for (size_t i = 0; i < psshBoxes.Length(); ++i) {
+    Box box = psshBoxes[i];
+    if (i == 0 || box.Offset() != psshBoxes[i - 1].NextOffset()) {
+      mPsshes.AppendElement();
+    }
+    nsTArray<uint8_t>& pssh = mPsshes.LastElement();
+    pssh.AppendElements(box.Header());
+    pssh.AppendElements(box.Read());
+  }
+
   if (IsValid()) {
     if (mIndex.Length()) {
       // Ensure the samples are contiguous with no gaps.
       nsTArray<Sample*> ctsOrder;
       for (auto& sample : mIndex) {
         ctsOrder.AppendElement(&sample);
       }
       ctsOrder.Sort(CtsComparator());
--- a/media/libstagefright/binding/include/mp4_demuxer/Box.h
+++ b/media/libstagefright/binding/include/mp4_demuxer/Box.h
@@ -48,23 +48,26 @@ public:
 
   Box Next() const;
   Box FirstChild() const;
   nsTArray<uint8_t> Read();
   bool Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange);
 
   static const uint64_t kMAX_BOX_READ;
 
+  const nsTArray<uint8_t>& Header() const { return mHeader; }
+
 private:
   bool Contains(MediaByteRange aRange) const;
   BoxContext* mContext;
   mozilla::MediaByteRange mRange;
   uint64_t mBodyOffset;
   uint64_t mChildOffset;
   AtomType mType;
+  nsTArray<uint8_t> mHeader;
   const Box* mParent;
 };
 
 // BoxReader takes a copy of a box contents and serves through an AutoByteReader.
 class MOZ_RAII BoxReader
 {
 public:
   explicit BoxReader(Box& aBox)
--- a/media/libstagefright/binding/include/mp4_demuxer/MoofParser.h
+++ b/media/libstagefright/binding/include/mp4_demuxer/MoofParser.h
@@ -230,16 +230,17 @@ public:
   Interval<Microseconds> mTimeRange;
   FallibleTArray<Sample> mIndex;
 
   nsTArray<CencSampleEncryptionInfoEntry> mFragmentSampleEncryptionInfoEntries;
   nsTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries;
 
   nsTArray<Saiz> mSaizs;
   nsTArray<Saio> mSaios;
+  nsTArray<nsTArray<uint8_t>> mPsshes;
 
 private:
     // aDecodeTime is updated to the end of the parsed TRAF on return.
   void ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio);
   // aDecodeTime is updated to the end of the parsed TRUN on return.
   bool ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio);
   void ParseSaiz(Box& aBox);
   void ParseSaio(Box& aBox);