Bug 1523563 - Make sense of EbmlComposer. r=bryce
authorAndreas Pehrson <apehrson@mozilla.com>
Wed, 03 Jul 2019 09:25:03 +0000
changeset 481090 6c52ecd21b3033b903d1689eecd6134ab28d5aed
parent 481089 f5a1371cca6e8a7af4c65745489dbb0bba00a2f1
child 481091 d9f2bc3dd281e95f1711f327e8d45649aa2f8077
push id89078
push userpehrsons@gmail.com
push dateWed, 03 Jul 2019 09:31:19 +0000
treeherderautoland@d9f2bc3dd281 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbryce
bugs1523563
milestone69.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1523563 - Make sense of EbmlComposer. r=bryce This does two things: - Makes the code for how flushing works actually readable (seriously). - Allows audio-only webms. Differential Revision: https://phabricator.services.mozilla.com/D35173
dom/media/webm/EbmlComposer.cpp
dom/media/webm/EbmlComposer.h
--- a/dom/media/webm/EbmlComposer.cpp
+++ b/dom/media/webm/EbmlComposer.cpp
@@ -13,18 +13,23 @@
 #include "limits.h"
 
 namespace mozilla {
 
 // Timecode scale in nanoseconds
 static const unsigned long TIME_CODE_SCALE = 1000000;
 // The WebM header size without audio CodecPrivateData
 static const int32_t DEFAULT_HEADER_SIZE = 1024;
+// Number of milliseconds after which we flush audio-only clusters
+static const int32_t FLUSH_AUDIO_ONLY_AFTER_MS = 1000;
 
 void EbmlComposer::GenerateHeader() {
+  MOZ_RELEASE_ASSERT(!mMetadataFinished);
+  MOZ_RELEASE_ASSERT(mHasAudio || mHasVideo);
+
   // Write the EBML header.
   EbmlGlobal ebml;
   // The WEbM header default size usually smaller than 1k.
   auto buffer =
       MakeUnique<uint8_t[]>(DEFAULT_HEADER_SIZE + mCodecPrivateData.Length());
   ebml.buf = buffer.get();
   ebml.offset = 0;
   writeHeader(&ebml);
@@ -65,160 +70,153 @@ void EbmlComposer::GenerateHeader() {
         Ebml_EndSubElement(&ebml, &trackLoc);
       }
     }
     // The Recording length is unknown and
     // ignore write the whole Segment element size
   }
   MOZ_ASSERT(ebml.offset <= DEFAULT_HEADER_SIZE + mCodecPrivateData.Length(),
              "write more data > EBML_BUFFER_SIZE");
-  auto block = mClusterBuffs.AppendElement();
+  auto block = mFinishedClusters.AppendElement();
   block->SetLength(ebml.offset);
   memcpy(block->Elements(), ebml.buf, ebml.offset);
-  mFlushState |= FLUSH_METADATA;
-}
-
-void EbmlComposer::FinishMetadata() {
-  if (mFlushState & FLUSH_METADATA) {
-    // We don't remove the first element of mClusterBuffs because the
-    // |mClusterHeaderIndex| may have value.
-    mClusterCanFlushBuffs.AppendElement()->SwapElements(mClusterBuffs[0]);
-    mFlushState &= ~FLUSH_METADATA;
-  }
+  mMetadataFinished = true;
 }
 
 void EbmlComposer::FinishCluster() {
-  FinishMetadata();
-  if (!(mFlushState & FLUSH_CLUSTER)) {
-    // No completed cluster available.
+  if (!mWritingCluster) {
     return;
   }
 
   MOZ_ASSERT(mClusterLengthLoc > 0);
   EbmlGlobal ebml;
   EbmlLoc ebmlLoc;
   ebmlLoc.offset = mClusterLengthLoc;
   ebml.offset = 0;
-  for (uint32_t i = mClusterHeaderIndex; i < mClusterBuffs.Length(); i++) {
-    ebml.offset += mClusterBuffs[i].Length();
+  for (uint32_t i = mClusterHeaderIndex; i < mClusters.Length(); i++) {
+    ebml.offset += mClusters[i].Length();
   }
-  ebml.buf = mClusterBuffs[mClusterHeaderIndex].Elements();
+  ebml.buf = mClusters[mClusterHeaderIndex].Elements();
   Ebml_EndSubElement(&ebml, &ebmlLoc);
-  // Move the mClusterBuffs data from mClusterHeaderIndex that we can skip
+  // Move the mClusters data from mClusterHeaderIndex that we can skip
   // the metadata and the rest P-frames after ContainerWriter::FLUSH_NEEDED.
-  for (uint32_t i = mClusterHeaderIndex; i < mClusterBuffs.Length(); i++) {
-    mClusterCanFlushBuffs.AppendElement()->SwapElements(mClusterBuffs[i]);
+  for (uint32_t i = mClusterHeaderIndex; i < mClusters.Length(); i++) {
+    mFinishedClusters.AppendElement()->SwapElements(mClusters[i]);
   }
 
   mClusterHeaderIndex = 0;
   mClusterLengthLoc = 0;
-  mClusterBuffs.Clear();
-  mFlushState &= ~FLUSH_CLUSTER;
+  mClusters.Clear();
+  mWritingCluster = false;
 }
 
 void EbmlComposer::WriteSimpleBlock(EncodedFrame* aFrame) {
+  MOZ_RELEASE_ASSERT(mMetadataFinished);
+
   EbmlGlobal ebml;
   ebml.offset = 0;
 
   auto frameType = aFrame->GetFrameType();
-  bool flush = false;
-  bool isVP8IFrame = (frameType == EncodedFrame::FrameType::VP8_I_FRAME);
+  const bool isVP8IFrame = (frameType == EncodedFrame::FrameType::VP8_I_FRAME);
+  const bool isVP8PFrame = (frameType == EncodedFrame::FrameType::VP8_P_FRAME);
+  const bool isOpus = (frameType == EncodedFrame::FrameType::OPUS_AUDIO_FRAME);
   if (isVP8IFrame) {
+    MOZ_ASSERT(mHasVideo);
     FinishCluster();
-    flush = true;
-  } else {
-    // Force it to calculate timecode using signed math via cast
-    int64_t timeCode =
-        (aFrame->GetTimeStamp() / ((int)PR_USEC_PER_MSEC) - mClusterTimecode) +
-        (mCodecDelay / PR_NSEC_PER_MSEC);
-    if (timeCode < SHRT_MIN || timeCode > SHRT_MAX) {
-      // We're probably going to overflow (or underflow) the timeCode value
-      // later!
-      FinishCluster();
-      flush = true;
-    }
   }
 
-  auto block = mClusterBuffs.AppendElement();
-  block->SetLength(aFrame->GetFrameData().Length() + DEFAULT_HEADER_SIZE);
-  ebml.buf = block->Elements();
-
-  if (flush) {
-    EbmlLoc ebmlLoc;
-    Ebml_StartSubElement(&ebml, &ebmlLoc, Cluster);
-    MOZ_ASSERT(mClusterBuffs.Length() > 0);
-    // current cluster header array index
-    mClusterHeaderIndex = mClusterBuffs.Length() - 1;
-    mClusterLengthLoc = ebmlLoc.offset;
-    // if timeCode didn't under/overflow before, it shouldn't after this
-    mClusterTimecode = aFrame->GetTimeStamp() / PR_USEC_PER_MSEC;
-    Ebml_SerializeUnsigned(&ebml, Timecode, mClusterTimecode);
-    mFlushState |= FLUSH_CLUSTER;
+  if (isVP8PFrame && !mWritingCluster) {
+    // We ensure that clusters start with I-frames.
+    return;
   }
 
-  bool isOpus = (frameType == EncodedFrame::FrameType::OPUS_AUDIO_FRAME);
-  // Can't underflow/overflow now
   int64_t timeCode =
       aFrame->GetTimeStamp() / ((int)PR_USEC_PER_MSEC) - mClusterTimecode;
   if (isOpus) {
     timeCode += mCodecDelay / PR_NSEC_PER_MSEC;
   }
-  MOZ_ASSERT(timeCode >= SHRT_MIN && timeCode <= SHRT_MAX);
+
+  if (!mHasVideo && timeCode >= FLUSH_AUDIO_ONLY_AFTER_MS) {
+    MOZ_ASSERT(mHasAudio);
+    MOZ_ASSERT(isOpus);
+    // Audio-only, we'll still have to flush every now and then.
+    // We do it every second for now.
+    FinishCluster();
+  } else if (timeCode < SHRT_MIN || timeCode > SHRT_MAX) {
+    // This would overflow when writing the block below.
+    FinishCluster();
+  }
+
+  auto block = mClusters.AppendElement();
+  block->SetLength(aFrame->GetFrameData().Length() + DEFAULT_HEADER_SIZE);
+  ebml.buf = block->Elements();
+
+  if (!mWritingCluster) {
+    EbmlLoc ebmlLoc;
+    Ebml_StartSubElement(&ebml, &ebmlLoc, Cluster);
+    MOZ_ASSERT(mClusters.Length() > 0);
+    // current cluster header array index
+    mClusterHeaderIndex = mClusters.Length() - 1;
+    mClusterLengthLoc = ebmlLoc.offset;
+    // if timeCode didn't under/overflow before, it shouldn't after this
+    mClusterTimecode = aFrame->GetTimeStamp() / PR_USEC_PER_MSEC;
+    Ebml_SerializeUnsigned(&ebml, Timecode, mClusterTimecode);
+
+    // Can't under-/overflow now
+    timeCode =
+        aFrame->GetTimeStamp() / ((int)PR_USEC_PER_MSEC) - mClusterTimecode;
+    if (isOpus) {
+      timeCode += mCodecDelay / PR_NSEC_PER_MSEC;
+    }
+
+    mWritingCluster = true;
+  }
+
   writeSimpleBlock(&ebml, isOpus ? 0x2 : 0x1, static_cast<short>(timeCode),
                    isVP8IFrame, 0, 0,
                    (unsigned char*)aFrame->GetFrameData().Elements(),
                    aFrame->GetFrameData().Length());
   MOZ_ASSERT(
       ebml.offset <= DEFAULT_HEADER_SIZE + aFrame->GetFrameData().Length(),
       "write more data > EBML_BUFFER_SIZE");
   block->SetLength(ebml.offset);
 }
 
 void EbmlComposer::SetVideoConfig(uint32_t aWidth, uint32_t aHeight,
                                   uint32_t aDisplayWidth,
                                   uint32_t aDisplayHeight) {
+  MOZ_RELEASE_ASSERT(!mMetadataFinished);
   MOZ_ASSERT(aWidth > 0, "Width should > 0");
   MOZ_ASSERT(aHeight > 0, "Height should > 0");
   MOZ_ASSERT(aDisplayWidth > 0, "DisplayWidth should > 0");
   MOZ_ASSERT(aDisplayHeight > 0, "DisplayHeight should > 0");
   mWidth = aWidth;
   mHeight = aHeight;
   mDisplayWidth = aDisplayWidth;
   mDisplayHeight = aDisplayHeight;
+  mHasVideo = true;
 }
 
 void EbmlComposer::SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels) {
+  MOZ_RELEASE_ASSERT(!mMetadataFinished);
   MOZ_ASSERT(aSampleFreq > 0, "SampleFreq should > 0");
   MOZ_ASSERT(aChannels > 0, "Channels should > 0");
   mSampleFreq = aSampleFreq;
   mChannels = aChannels;
+  mHasAudio = true;
 }
 
 void EbmlComposer::ExtractBuffer(nsTArray<nsTArray<uint8_t> >* aDestBufs,
                                  uint32_t aFlag) {
-  if ((aFlag & ContainerWriter::FLUSH_NEEDED) ||
-      (aFlag & ContainerWriter::GET_HEADER)) {
-    FinishMetadata();
+  if (!mMetadataFinished) {
+    return;
   }
   if (aFlag & ContainerWriter::FLUSH_NEEDED) {
     FinishCluster();
   }
   // aDestBufs may have some element
-  for (uint32_t i = 0; i < mClusterCanFlushBuffs.Length(); i++) {
-    aDestBufs->AppendElement()->SwapElements(mClusterCanFlushBuffs[i]);
+  for (uint32_t i = 0; i < mFinishedClusters.Length(); i++) {
+    aDestBufs->AppendElement()->SwapElements(mFinishedClusters[i]);
   }
-  mClusterCanFlushBuffs.Clear();
+  mFinishedClusters.Clear();
 }
 
-EbmlComposer::EbmlComposer()
-    : mFlushState(FLUSH_NONE),
-      mClusterHeaderIndex(0),
-      mClusterLengthLoc(0),
-      mCodecDelay(0),
-      mClusterTimecode(0),
-      mWidth(0),
-      mHeight(0),
-      mDisplayWidth(0),
-      mDisplayHeight(0),
-      mSampleFreq(0),
-      mChannels(0) {}
-
 }  // namespace mozilla
--- a/dom/media/webm/EbmlComposer.h
+++ b/dom/media/webm/EbmlComposer.h
@@ -10,76 +10,82 @@
 
 namespace mozilla {
 
 /*
  * A WebM muxer helper for package the valid WebM format.
  */
 class EbmlComposer {
  public:
-  EbmlComposer();
+  EbmlComposer() = default;
   /*
-   * Assign the parameter which header required.
+   * Assign the parameters which header requires. These can be called multiple
+   * times to change paramter values until GenerateHeader() is called, when this
+   * becomes illegal to call again.
    */
   void SetVideoConfig(uint32_t aWidth, uint32_t aHeight, uint32_t aDisplayWidth,
                       uint32_t aDisplayHeight);
-
   void SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels);
   /*
    * Set the CodecPrivateData for writing in header.
    */
   void SetAudioCodecPrivateData(nsTArray<uint8_t>& aBufs) {
     mCodecPrivateData.AppendElements(aBufs);
   }
   /*
-   * Generate the whole WebM header and output to mBuff.
+   * Generate the whole WebM header with the configured tracks, and make
+   * available to ExtractBuffer. Must only be called once.
    */
   void GenerateHeader();
   /*
    * Insert media encoded buffer into muxer and it would be package
    * into SimpleBlock. If no cluster is opened, new cluster will start for
    * writing.
    */
   void WriteSimpleBlock(EncodedFrame* aFrame);
   /*
    * Get valid cluster data.
    */
   void ExtractBuffer(nsTArray<nsTArray<uint8_t> >* aDestBufs,
                      uint32_t aFlag = 0);
 
  private:
-  // Move the metadata data to mClusterCanFlushBuffs.
-  void FinishMetadata();
-  // Close current cluster and move data to mClusterCanFlushBuffs.
+  // Close current cluster and move data to mFinishedClusters. Idempotent.
   void FinishCluster();
-  // The temporary storage for cluster data.
-  nsTArray<nsTArray<uint8_t> > mClusterBuffs;
-  // The storage which contain valid cluster data.
-  nsTArray<nsTArray<uint8_t> > mClusterCanFlushBuffs;
+  // Canonical storage of clusters. Each element in the outer array corresponds
+  // to a cluster. These are never removed, to keep mClusterHeaderIndex
+  // accurate. The payload data in the inner array is however removed. It is
+  // moved to mFinishedClusters as a cluster is finished.
+  nsTArray<nsTArray<uint8_t> > mClusters;
+  // Finished clusters to be flushed out by ExtractBuffer().
+  nsTArray<nsTArray<uint8_t> > mFinishedClusters;
 
-  // Indicate the data types in mClusterBuffs.
-  enum { FLUSH_NONE = 0, FLUSH_METADATA = 1 << 0, FLUSH_CLUSTER = 1 << 1 };
-  uint32_t mFlushState;
-  // Indicate the cluster header index in mClusterBuffs.
-  uint32_t mClusterHeaderIndex;
+  // Metadata has been serialized.
+  bool mMetadataFinished = false;
+  // True if we have an open cluster.
+  bool mWritingCluster = false;
+  // Indicate the current cluster's header index in mClusters.
+  size_t mClusterHeaderIndex = 0;
   // The cluster length position.
-  uint64_t mClusterLengthLoc;
+  uint64_t mClusterLengthLoc = 0;
   // Audio codec specific header data.
   nsTArray<uint8_t> mCodecPrivateData;
   // Codec delay in nanoseconds.
-  uint64_t mCodecDelay;
+  uint64_t mCodecDelay = 0;
 
   // The timecode of the cluster.
-  uint64_t mClusterTimecode;
+  uint64_t mClusterTimecode = 0;
 
   // Video configuration
-  int mWidth;
-  int mHeight;
-  int mDisplayWidth;
-  int mDisplayHeight;
+  int mWidth = 0;
+  int mHeight = 0;
+  int mDisplayWidth = 0;
+  int mDisplayHeight = 0;
+  bool mHasVideo = false;
   // Audio configuration
-  float mSampleFreq;
-  int mChannels;
+  float mSampleFreq = 0;
+  int mChannels = 0;
+  bool mHasAudio = false;
 };
 
 }  // namespace mozilla
 
 #endif