Bug 1519617 - Update MoofParser to handle tracks using track_id 0. r=jya
authorBryce Van Dyk <bvandyk@mozilla.com>
Wed, 16 Jan 2019 11:45:42 +0000
changeset 514061 4c800571b02d794cd54b02a2a2a3e0e4c21c5a0b
parent 514060 475348be3c9463b96ee614c44afe33e3db17e960
child 514062 19efbb6ade6a2bc0736639c2325fa325e9641523
push id1953
push userffxbld-merge
push dateMon, 11 Mar 2019 12:10:20 +0000
treeherdermozilla-release@9c35dcbaa899 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjya
bugs1519617
milestone66.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1519617 - Update MoofParser to handle tracks using track_id 0. r=jya Using track_id 0 is forbidden by the mp4 spec, however, some sites still serve media using this track_id. We've been using the 0 track ID to trigger special handling in the MoofParser where we will parse multiple tracks, and this led us to be tolerant of tracks using this reserved id (though we likely had some bugs due to this). Since sites are using this track_id, and as other browsers (and Firefox until I broke this) tolerate such media, we should too. In order to do so correctly, we should no longer us track_id=0 as a special case in the MoofParser, and instead have an explicit flag, which is what this patch does. Differential Revision: https://phabricator.services.mozilla.com/D16428
dom/media/gtest/mp4_demuxer/TestParser.cpp
dom/media/mediasource/ContainerParser.cpp
dom/media/mp4/MP4Metadata.cpp
dom/media/mp4/MoofParser.cpp
dom/media/mp4/MoofParser.h
--- a/dom/media/gtest/mp4_demuxer/TestParser.cpp
+++ b/dom/media/gtest/mp4_demuxer/TestParser.cpp
@@ -89,17 +89,17 @@ TEST(MP4Metadata, EmptyStream) {
   // We can seek anywhere in any MPEG4.
   EXPECT_TRUE(metadata.CanSeek());
   EXPECT_FALSE(metadata.Crypto().Ref()->valid);
 }
 
 TEST(MoofParser, EmptyStream) {
   RefPtr<ByteStream> stream = new TestStream(nullptr, 0);
 
-  MoofParser parser(stream, 0, false);
+  MoofParser parser(stream, 0, false, true);
   EXPECT_EQ(0u, parser.mOffset);
   EXPECT_TRUE(parser.ReachedEnd());
 
   MediaByteRangeSet byteRanges;
   EXPECT_FALSE(parser.RebuildFragmentedIndex(byteRanges));
 
   EXPECT_TRUE(parser.GetCompositionRange(byteRanges).IsNull());
   EXPECT_TRUE(parser.mInitRange.IsEmpty());
@@ -399,17 +399,17 @@ TEST(MoofParser, test_case_mp4) {
   length = ArrayLength(testFiles);
 
   for (size_t test = 0; test < length; ++test) {
     nsTArray<uint8_t> buffer = ReadTestFile(tests[test].mFilename);
     ASSERT_FALSE(buffer.IsEmpty());
     RefPtr<ByteStream> stream =
         new TestStream(buffer.Elements(), buffer.Length());
 
-    MoofParser parser(stream, 0, false);
+    MoofParser parser(stream, 0, false, true);
     EXPECT_EQ(0u, parser.mOffset) << tests[test].mFilename;
     EXPECT_FALSE(parser.ReachedEnd()) << tests[test].mFilename;
     EXPECT_TRUE(parser.mInitRange.IsEmpty()) << tests[test].mFilename;
 
     RefPtr<MediaByteBuffer> metadataBuffer = parser.Metadata();
     EXPECT_TRUE(metadataBuffer) << tests[test].mFilename;
 
     EXPECT_FALSE(parser.mInitRange.IsEmpty()) << tests[test].mFilename;
--- a/dom/media/mediasource/ContainerParser.cpp
+++ b/dom/media/mediasource/ContainerParser.cpp
@@ -521,17 +521,18 @@ class MP4ContainerParser : public Contai
     if (initSegment) {
       mResource = new SourceBufferResource();
       DDLINKCHILD("resource", mResource.get());
       mStream = new MP4Stream(mResource);
       // We use a timestampOffset of 0 for ContainerParser, and require
       // consumers of ParseStartAndEndTimestamps to add their timestamp offset
       // manually. This allows the ContainerParser to be shared across different
       // timestampOffsets.
-      mParser = new MoofParser(mStream, 0, /* aIsAudio = */ false);
+      mParser = new MoofParser(mStream, 0, /* aIsAudio = */ false,
+                               /* aIsMultitrackParser */ true);
       DDLINKCHILD("parser", mParser.get());
       mInitData = new MediaByteBuffer();
       mCompleteInitSegmentRange = MediaByteRange();
       mCompleteMediaHeaderRange = MediaByteRange();
       mCompleteMediaSegmentRange = MediaByteRange();
       mGlobalOffset = mTotalParsed;
     } else if (!mStream || !mParser) {
       mTotalParsed += aData->Length();
--- a/dom/media/mp4/MP4Metadata.cpp
+++ b/dom/media/mp4/MP4Metadata.cpp
@@ -455,17 +455,17 @@ MP4Metadata::ResultAndIndice MP4Metadata
   UniquePtr<IndiceWrapper> indice;
   indice = mozilla::MakeUnique<IndiceWrapper>(indiceRawData);
 
   return {NS_OK, std::move(indice)};
 }
 
 /*static*/ MP4Metadata::ResultAndByteBuffer MP4Metadata::Metadata(
     ByteStream* aSource) {
-  auto parser = mozilla::MakeUnique<MoofParser>(aSource, 0, false);
+  auto parser = mozilla::MakeUnique<MoofParser>(aSource, 0, false, true);
   RefPtr<mozilla::MediaByteBuffer> buffer = parser->Metadata();
   if (!buffer) {
     return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
                         RESULT_DETAIL("Cannot parse metadata")),
             nullptr};
   }
   return {NS_OK, std::move(buffer)};
 }
--- a/dom/media/mp4/MoofParser.cpp
+++ b/dom/media/mp4/MoofParser.cpp
@@ -50,17 +50,17 @@ bool MoofParser::RebuildFragmentedIndex(
   bool foundValidMoof = false;
 
   for (Box box(&aContext, mOffset); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("moov") && mInitRange.IsEmpty()) {
       mInitRange = MediaByteRange(0, box.Range().mEnd);
       ParseMoov(box);
     } else if (box.IsType("moof")) {
       Moof moof(box, mTrex, mMvhd, mMdhd, mEdts, mSinf, &mLastDecodeTime,
-                mIsAudio);
+                mIsAudio, mIsMultitrackParser);
 
       if (!moof.IsValid() && !box.Next().IsAvailable()) {
         // Moof isn't valid abort search for now.
         break;
       }
 
       if (!mMoofs.IsEmpty()) {
         // Stitch time ranges together in the case of a (hopefully small) time
@@ -222,21 +222,21 @@ void MoofParser::ParseMoov(Box& aBox) {
 }
 
 void MoofParser::ParseTrak(Box& aBox) {
   Tkhd tkhd;
   for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("tkhd")) {
       tkhd = Tkhd(box);
     } else if (box.IsType("mdia")) {
-      if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) {
+      if (mIsMultitrackParser || tkhd.mTrackId == mTrex.mTrackId) {
         ParseMdia(box, tkhd);
       }
     } else if (box.IsType("edts") &&
-               (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId)) {
+               (mIsMultitrackParser || tkhd.mTrackId == mTrex.mTrackId)) {
       mEdts = Edts(box);
     }
   }
 }
 
 void MoofParser::ParseMdia(Box& aBox, Tkhd& aTkhd) {
   for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("mdhd")) {
@@ -246,22 +246,18 @@ void MoofParser::ParseMdia(Box& aBox, Tk
     }
   }
 }
 
 void MoofParser::ParseMvex(Box& aBox) {
   for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("trex")) {
       Trex trex = Trex(box);
-      if (!mTrex.mTrackId || trex.mTrackId == mTrex.mTrackId) {
-        auto trackId = mTrex.mTrackId;
+      if (mIsMultitrackParser || trex.mTrackId == mTrex.mTrackId) {
         mTrex = trex;
-        // Keep the original trackId, as should it be 0 we want to continue
-        // parsing all tracks.
-        mTrex.mTrackId = trackId;
       }
     }
   }
 }
 
 void MoofParser::ParseMinf(Box& aBox) {
   for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("stbl")) {
@@ -294,18 +290,18 @@ void MoofParser::ParseStbl(Box& aBox) {
           return;
         }
       }
     }
   }
 }
 
 void MoofParser::ParseStsd(Box& aBox) {
-  if (mTrex.mTrackId == 0) {
-    // If mTrex.mTrackId is 0, then the parser is being used to read multiple
+  if (mIsMultitrackParser) {
+    // If mIsMultitrackParser, then the parser is being used to read multiple
     // tracks metadata, and it is not a sane operation to try and map multiple
     // sample description boxes, from different tracks, onto the parser, which
     // is modeled around storing metadata for a single track.
     return;
   }
   MOZ_ASSERT(
       mSampleDescriptions.IsEmpty(),
       "Shouldn't have any sample descriptions when starting to parse stsd");
@@ -348,22 +344,24 @@ class CtsComparator {
     return aA->mCompositionRange.start == aB->mCompositionRange.start;
   }
   bool LessThan(Sample* const aA, Sample* const aB) const {
     return aA->mCompositionRange.start < aB->mCompositionRange.start;
   }
 };
 
 Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts,
-           Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio)
+           Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio,
+           bool aIsMultitrackParser)
     : mRange(aBox.Range()), mTfhd(aTrex), mMaxRoundingError(35000) {
   nsTArray<Box> psshBoxes;
   for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("traf")) {
-      ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio);
+      ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio,
+                aIsMultitrackParser);
     }
     if (box.IsType("pssh")) {
       psshBoxes.AppendElement(box);
     }
   }
 
   // The EME spec requires that PSSH boxes which are contiguous in the
   // file are dispatched to the media element in a single "encrypted" event.
@@ -501,24 +499,24 @@ bool Moof::ProcessCencAuxInfo(AtomType a
   for (int i = 0; i < cencRanges.Length(); i++) {
     mIndex[i].mCencRange = cencRanges[i];
   }
   return true;
 }
 
 void Moof::ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd,
                      Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime,
-                     bool aIsAudio) {
+                     bool aIsAudio, bool aIsMultitrackParser) {
   MOZ_ASSERT(aDecodeTime);
   Tfdt tfdt;
 
   for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("tfhd")) {
       mTfhd = Tfhd(box, aTrex);
-    } else if (!aTrex.mTrackId || mTfhd.mTrackId == aTrex.mTrackId) {
+    } else if (aIsMultitrackParser || mTfhd.mTrackId == aTrex.mTrackId) {
       if (box.IsType("tfdt")) {
         tfdt = Tfdt(box);
       } else if (box.IsType("sgpd")) {
         Sgpd sgpd(box);
         if (sgpd.IsValid() && sgpd.mGroupingType == "seig") {
           mFragmentSampleEncryptionInfoEntries.Clear();
           if (!mFragmentSampleEncryptionInfoEntries.AppendElements(
                   sgpd.mEntries, mozilla::fallible)) {
@@ -546,17 +544,17 @@ void Moof::ParseTraf(Box& aBox, Trex& aT
         if (!mSaios.AppendElement(Saio(box, aSinf.mDefaultEncryptionType),
                                   mozilla::fallible)) {
           LOG(Moof, "OOM");
           return;
         }
       }
     }
   }
-  if (aTrex.mTrackId && mTfhd.mTrackId != aTrex.mTrackId) {
+  if (!aIsMultitrackParser && mTfhd.mTrackId != aTrex.mTrackId) {
     return;
   }
   // Now search for TRUN boxes.
   uint64_t decodeTime =
       tfdt.IsValid() ? tfdt.mBaseMediaDecodeTime : *aDecodeTime;
   for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("trun")) {
       if (ParseTrun(box, aMvhd, aMdhd, aEdts, &decodeTime, aIsAudio).isOk()) {
--- a/dom/media/mp4/MoofParser.h
+++ b/dom/media/mp4/MoofParser.h
@@ -222,17 +222,18 @@ class Sgpd final : public Atom  // Sampl
 // these are encrypted when parsing the track fragment header (tfhd).
 struct SampleDescriptionEntry {
   bool mIsEncryptedEntry = false;
 };
 
 class Moof final : public Atom {
  public:
   Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts,
-       Sinf& aSinf, uint64_t* aDecoderTime, bool aIsAudio);
+       Sinf& aSinf, uint64_t* aDecoderTime, bool aIsAudio,
+       bool aIsMultitrackParser);
   bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges);
   void FixRounding(const Moof& aMoof);
 
   mozilla::MediaByteRange mRange;
   mozilla::MediaByteRange mMdatRange;
   MP4Interval<Microseconds> mTimeRange;
   FallibleTArray<Sample> mIndex;
 
@@ -243,17 +244,18 @@ class Moof final : public Atom {
   Tfhd mTfhd;
   FallibleTArray<Saiz> mSaizs;
   FallibleTArray<Saio> mSaios;
   nsTArray<nsTArray<uint8_t>> mPsshes;
 
  private:
   // aDecodeTime is updated to the end of the parsed TRAF on return.
   void ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts,
-                 Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio);
+                 Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio,
+                 bool aIsMultitrackParser);
   // aDecodeTime is updated to the end of the parsed TRUN on return.
   Result<Ok, nsresult> ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd,
                                  Edts& aEdts, uint64_t* aDecodeTime,
                                  bool aIsAudio);
   // Process the sample auxiliary information used by common encryption.
   // aScheme is used to select the appropriate auxiliary information and should
   // be set based on the encryption scheme used by the track being processed.
   // Note, the term cenc here refers to the standard, not the specific scheme
@@ -262,24 +264,28 @@ class Moof final : public Atom {
   bool ProcessCencAuxInfo(AtomType aScheme);
   uint64_t mMaxRoundingError;
 };
 
 DDLoggedTypeDeclName(MoofParser);
 
 class MoofParser : public DecoderDoctorLifeLogger<MoofParser> {
  public:
-  MoofParser(ByteStream* aSource, uint32_t aTrackId, bool aIsAudio)
+  MoofParser(ByteStream* aSource, uint32_t aTrackId, bool aIsAudio,
+             bool aIsMultitrackParser = false)
       : mSource(aSource),
         mOffset(0),
         mTrex(aTrackId),
         mIsAudio(aIsAudio),
-        mLastDecodeTime(0) {
-    // Setting the mTrex.mTrackId to 0 is a nasty work around for calculating
-    // the composition range for MSE. We need an array of tracks.
+        mLastDecodeTime(0),
+        mIsMultitrackParser(aIsMultitrackParser) {
+    // Setting mIsMultitrackParser is a nasty work around for calculating
+    // the composition range for MSE that causes the parser to parse multiple
+    // tracks. Ideally we'd store an array of tracks with different metadata
+    // for each.
     DDLINKCHILD("source", aSource);
   }
   bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges);
   // If *aCanEvict is set to true. then will remove all moofs already parsed
   // from index then rebuild the index. *aCanEvict is set to true upon return if
   // some moofs were removed.
   bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges,
                               bool* aCanEvict);
@@ -321,12 +327,13 @@ class MoofParser : public DecoderDoctorL
   nsTArray<Moof>& Moofs() { return mMoofs; }
 
  private:
   void ScanForMetadata(mozilla::MediaByteRange& aMoov);
   nsTArray<Moof> mMoofs;
   nsTArray<MediaByteRange> mMediaRanges;
   bool mIsAudio;
   uint64_t mLastDecodeTime;
+  bool mIsMultitrackParser;
 };
 }  // namespace mozilla
 
 #endif