Bug 964197 - Add frame duration for synchronization. r=cpearce
authorAlfredo Yang <ayang@mozilla.com>
Sun, 02 Feb 2014 20:57:41 -0500
changeset 182591 d8a9d899c0d33b33a2bc1778732ab0d6d36044c3
parent 182590 543e8c09e7b073c4b7aa14d1d44d16a6e5e86281
child 182592 fd79a8abbbf89f9dcc9349ec417154946ba010a6
push id3343
push userffxbld
push dateMon, 17 Mar 2014 21:55:32 +0000
treeherdermozilla-beta@2f7d3415f79f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerscpearce
bugs964197
milestone29.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 964197 - Add frame duration for synchronization. r=cpearce
content/media/encoder/fmp4_muxer/ISOControl.cpp
content/media/encoder/fmp4_muxer/ISOControl.h
content/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp
content/media/encoder/fmp4_muxer/ISOMediaBoxes.h
content/media/encoder/fmp4_muxer/ISOMediaWriter.cpp
--- a/content/media/encoder/fmp4_muxer/ISOControl.cpp
+++ b/content/media/encoder/fmp4_muxer/ISOControl.cpp
@@ -16,16 +16,17 @@ namespace mozilla {
 #define iso_time_offset 2082844800
 
 FragmentBuffer::FragmentBuffer(uint32_t aTrackType, uint32_t aFragDuration,
                                TrackMetadataBase* aMetadata)
   : mTrackType(aTrackType)
   , mFragDuration(aFragDuration)
   , mMediaStartTime(0)
   , mFragmentNumber(0)
+  , mLastFrameTimeOfLastFragment(0)
   , mEOS(false)
 {
   mFragArray.AppendElement();
   if (mTrackType == Audio_Track) {
     nsRefPtr<AACTrackMetadata> audMeta = static_cast<AACTrackMetadata*>(aMetadata);
     MOZ_ASSERT(audMeta);
   } else {
     nsRefPtr<AVCTrackMetadata> vidMeta = static_cast<AVCTrackMetadata*>(aMetadata);
--- a/content/media/encoder/fmp4_muxer/ISOControl.h
+++ b/content/media/encoder/fmp4_muxer/ISOControl.h
@@ -67,16 +67,24 @@ public:
   // on codec type. This data will be sent as a special frame from encoder to
   // ISOMediaWriter and pass to this class via AddFrame().
   nsresult GetCSD(nsTArray<uint8_t>& aCSD);
 
   bool HasCSD() { return mCSDFrame; }
 
   uint32_t GetType() { return mTrackType; }
 
+  void SetLastFragmentLastFrameTime(uint32_t aTime) {
+    mLastFrameTimeOfLastFragment = aTime;
+  }
+
+  uint32_t GetLastFragmentLastFrameTime() {
+    return mLastFrameTimeOfLastFragment;
+  }
+
 private:
   uint32_t mTrackType;
 
   // Fragment duration, microsecond per unit.
   uint32_t mFragDuration;
 
   // Media start time, microsecond per unit.
   // Together with mFragDuration, mFragmentNumber and EncodedFrame->GetTimeStamp(),
@@ -87,16 +95,22 @@ private:
 
   // Current fragment number. It will be increase when a new element of
   // mFragArray is created.
   // Note:
   //   It only means the fragment number of current accumulated frames, not
   //   the current 'creating' fragment mFragNum in ISOControl.
   uint32_t mFragmentNumber;
 
+  // The last frame time stamp of last fragment. It is for calculating the
+  // play duration of first frame in current fragment. The frame duration is
+  // defined as "current frame timestamp - last frame timestamp" here. So it
+  // needs to keep the last timestamp of last fragment.
+  uint32_t mLastFrameTimeOfLastFragment;
+
   // Array of fragments, each element has enough samples to form a
   // complete fragment.
   nsTArray<nsTArray<nsRefPtr<EncodedFrame>>> mFragArray;
 
   // Codec specific data frame, it will be generated by encoder and send to
   // ISOMediaWriter through WriteEncodedTrack(). The data will be vary depends
   // on codec type.
   nsRefPtr<EncodedFrame> mCSDFrame;
--- a/content/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp
+++ b/content/media/encoder/fmp4_muxer/ISOMediaBoxes.cpp
@@ -6,16 +6,17 @@
 #include <climits>
 #include "TrackMetadataBase.h"
 #include "ISOMediaBoxes.h"
 #include "ISOControl.h"
 #include "EncodedFrameContainer.h"
 #include "ISOTrackMetadata.h"
 #include "MP4ESDS.h"
 #include "AVCBox.h"
+#include "VideoUtils.h"
 
 namespace mozilla {
 
 // 14496-12 6.2.2 'Data Types and fields'
 const uint32_t iso_matrix[] = { 0x00010000, 0,          0,
                                 0,          0x00010000, 0,
                                 0,          0,          0x40000000 };
 
@@ -121,29 +122,58 @@ TrackRunBox::fillSampleTable()
   FragmentBuffer* frag = mControl->GetFragment(mTrackType);
 
   rv = frag->GetFirstFragment(frames);
   if (NS_FAILED(rv)) {
     return 0;
   }
   uint32_t len = frames.Length();
   sample_info_table = new tbl[len];
+  // Create sample table according to 14496-12 8.8.8.2.
   for (uint32_t i = 0; i < len; i++) {
-    sample_info_table[i].sample_duration = 0;
-    sample_info_table[i].sample_size = frames.ElementAt(i)->GetFrameData().Length();
-    mAllSampleSize += sample_info_table[i].sample_size;
-    table_size += sizeof(uint32_t);
+    // Sample size.
+    sample_info_table[i].sample_size = 0;
+    if (flags.to_ulong() & flags_sample_size_present) {
+      sample_info_table[i].sample_size = frames.ElementAt(i)->GetFrameData().Length();
+      mAllSampleSize += sample_info_table[i].sample_size;
+      table_size += sizeof(uint32_t);
+    }
+
+    // Sample flags.
+    sample_info_table[i].sample_flags = 0;
     if (flags.to_ulong() & flags_sample_flags_present) {
       sample_info_table[i].sample_flags =
         set_sample_flags(
           (frames.ElementAt(i)->GetFrameType() == EncodedFrame::I_FRAME));
       table_size += sizeof(uint32_t);
-    } else {
-      sample_info_table[i].sample_flags = 0;
     }
+
+    // Sample duration.
+    sample_info_table[i].sample_duration = 0;
+    if (flags.to_ulong() & flags_sample_duration_present) {
+      // Calculate each frame's duration, it is decided by "current frame
+      // timestamp - last frame timestamp".
+      uint64_t frame_time = 0;
+      if (i == 0) {
+        frame_time = frames.ElementAt(i)->GetTimeStamp() -
+                     frag->GetLastFragmentLastFrameTime();
+      } else {
+        frame_time = frames.ElementAt(i)->GetTimeStamp() -
+                     frames.ElementAt(i - 1)->GetTimeStamp();
+        // Keep the last frame time of current fagment, it will be used to calculate
+        // the first frame duration of next fragment.
+        if ((len - 1) == i) {
+          frag->SetLastFragmentLastFrameTime(frames.ElementAt(i)->GetTimeStamp());
+        }
+      }
+      sample_info_table[i].sample_duration =
+        frame_time * mMeta.mVidMeta->VideoFrequency / USECS_PER_S;
+      table_size += sizeof(uint32_t);
+    }
+
     sample_info_table[i].sample_composition_time_offset = 0;
   }
   return table_size;
 }
 
 nsresult
 TrackRunBox::Generate(uint32_t* aBoxSize)
 {
@@ -175,33 +205,39 @@ nsresult
 TrackRunBox::Write()
 {
   WRITE_FULLBOX(mControl, size)
   mControl->Write(sample_count);
   if (flags.to_ulong() & flags_data_offset_present) {
     mControl->Write(data_offset);
   }
   for (uint32_t i = 0; i < sample_count; i++) {
-    mControl->Write(sample_info_table[i].sample_size);
+    if (flags.to_ulong() & flags_sample_duration_present) {
+      mControl->Write(sample_info_table[i].sample_duration);
+    }
+    if (flags.to_ulong() & flags_sample_size_present) {
+      mControl->Write(sample_info_table[i].sample_size);
+    }
     if (flags.to_ulong() & flags_sample_flags_present) {
       mControl->Write(sample_info_table[i].sample_flags);
     }
   }
 
   return NS_OK;
 }
 
 TrackRunBox::TrackRunBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl)
   : FullBox(NS_LITERAL_CSTRING("trun"), 0, aFlags, aControl)
   , sample_count(0)
   , data_offset(0)
   , first_sample_flags(0)
   , mAllSampleSize(0)
   , mTrackType(aType)
 {
+  mMeta.Init(aControl);
   MOZ_COUNT_CTOR(TrackRunBox);
 }
 
 TrackRunBox::~TrackRunBox()
 {
   MOZ_COUNT_DTOR(TrackRunBox);
 }
 
@@ -213,25 +249,32 @@ TrackFragmentHeaderBox::UpdateBaseDataOf
 }
 
 nsresult
 TrackFragmentHeaderBox::Generate(uint32_t* aBoxSize)
 {
   track_ID = mControl->GetTrackID(mTrackType);
   size += sizeof(track_ID);
 
-  if (flags.to_ulong() | base_data_offset_present) {
+  if (flags.to_ulong() & base_data_offset_present) {
     // base_data_offset needs to add size of 'trun', 'tfhd' and
-    // header of 'mdat 'later.
+    // header of 'mdat' later.
     base_data_offset = 0;
     size += sizeof(base_data_offset);
   }
-  if (flags.to_ulong() | default_sample_duration_present) {
+  if (flags.to_ulong() & default_sample_duration_present) {
     if (mTrackType == Video_Track) {
-      default_sample_duration = mMeta.mVidMeta->VideoFrequency / mMeta.mVidMeta->FrameRate;
+      if (!mMeta.mVidMeta->FrameRate) {
+        // 0 means frame rate is variant, so it is wrong to write
+        // default_sample_duration.
+        MOZ_ASSERT(0);
+        default_sample_duration = 0;
+      } else {
+        default_sample_duration = mMeta.mVidMeta->VideoFrequency / mMeta.mVidMeta->FrameRate;
+      }
     } else if (mTrackType == Audio_Track) {
       default_sample_duration = mMeta.mAudMeta->FrameDuration;
     } else {
       MOZ_ASSERT(0);
       return NS_ERROR_FAILURE;
     }
     size += sizeof(default_sample_duration);
   }
@@ -239,56 +282,70 @@ TrackFragmentHeaderBox::Generate(uint32_
   return NS_OK;
 }
 
 nsresult
 TrackFragmentHeaderBox::Write()
 {
   WRITE_FULLBOX(mControl, size)
   mControl->Write(track_ID);
-  if (flags.to_ulong() | base_data_offset_present) {
+  if (flags.to_ulong() & base_data_offset_present) {
     mControl->Write(base_data_offset);
   }
-  if (flags.to_ulong() | default_sample_duration_present) {
+  if (flags.to_ulong() & default_sample_duration_present) {
     mControl->Write(default_sample_duration);
   }
   return NS_OK;
 }
 
 TrackFragmentHeaderBox::TrackFragmentHeaderBox(uint32_t aType,
+                                               uint32_t aFlags,
                                                ISOControl* aControl)
-  // TODO: tf_flags, we may need to customize it from caller
-  : FullBox(NS_LITERAL_CSTRING("tfhd"),
-            0,
-            base_data_offset_present | default_sample_duration_present,
-            aControl)
+  : FullBox(NS_LITERAL_CSTRING("tfhd"), 0, aFlags, aControl)
   , track_ID(0)
   , base_data_offset(0)
   , default_sample_duration(0)
 {
   mTrackType = aType;
   mMeta.Init(mControl);
   MOZ_COUNT_CTOR(TrackFragmentHeaderBox);
 }
 
 TrackFragmentHeaderBox::~TrackFragmentHeaderBox()
 {
   MOZ_COUNT_DTOR(TrackFragmentHeaderBox);
 }
 
-TrackFragmentBox::TrackFragmentBox(uint32_t aType, uint32_t aFlags,
-                                   ISOControl* aControl)
+TrackFragmentBox::TrackFragmentBox(uint32_t aType, ISOControl* aControl)
   : DefaultContainerImpl(NS_LITERAL_CSTRING("traf"), aControl)
   , mTrackType(aType)
 {
-  boxes.AppendElement(new TrackFragmentHeaderBox(aType, aControl));
+  // Flags in TrackFragmentHeaderBox.
+  uint32_t tf_flags = base_data_offset_present;
+
+  // Audio frame rate should be fixed; otherwise it will cause noise when playback.
+  // So it doesn't need to keep duration of each audio frame in TrackRunBox. It
+  // keeps the default sample duration in TrackFragmentHeaderBox.
+  tf_flags |= (mTrackType & Audio_Track ? default_sample_duration_present : 0);
+
+  boxes.AppendElement(new TrackFragmentHeaderBox(aType, tf_flags, aControl));
+
+  // Always adds flags_data_offset_present in each TrackRunBox, Android
+  // parser requires this flag to calculate the correct bitstream offset.
+  uint32_t tr_flags = flags_sample_size_present | flags_data_offset_present;
+
+  // Flags in TrackRunBox.
+  // If there is no default sample duration exists, each frame duration needs to
+  // be recored in the TrackRunBox.
+  tr_flags |= (tf_flags & default_sample_duration_present ? 0 : flags_sample_duration_present);
 
   // For video, add sample_flags to record I frame.
-  aFlags |= (mTrackType & Video_Track ? flags_sample_flags_present : 0);
-  boxes.AppendElement(new TrackRunBox(mTrackType, aFlags, aControl));
+  tr_flags |= (mTrackType & Video_Track ? flags_sample_flags_present : 0);
+
+  boxes.AppendElement(new TrackRunBox(mTrackType, tr_flags, aControl));
   MOZ_COUNT_CTOR(TrackFragmentBox);
 }
 
 TrackFragmentBox::~TrackFragmentBox()
 {
   MOZ_COUNT_DTOR(TrackFragmentBox);
 }
 
@@ -324,29 +381,23 @@ MovieFragmentHeaderBox::~MovieFragmentHe
 }
 
 MovieFragmentBox::MovieFragmentBox(uint32_t aType, ISOControl* aControl)
   : DefaultContainerImpl(NS_LITERAL_CSTRING("moof"), aControl)
   , mTrackType(aType)
 {
   boxes.AppendElement(new MovieFragmentHeaderBox(mTrackType, aControl));
 
-  // Always adds flags_data_offset_present in each TrackFragmentBox, Android
-  // parser requires this flag to calculate the correct bitstream offset.
   if (mTrackType & Audio_Track) {
     boxes.AppendElement(
-      new TrackFragmentBox(Audio_Track,
-                           flags_sample_size_present | flags_data_offset_present,
-                           aControl));
+      new TrackFragmentBox(Audio_Track, aControl));
   }
   if (mTrackType & Video_Track) {
     boxes.AppendElement(
-      new TrackFragmentBox(Video_Track,
-                           flags_sample_size_present | flags_data_offset_present,
-                           aControl));
+      new TrackFragmentBox(Video_Track, aControl));
   }
   MOZ_COUNT_CTOR(MovieFragmentBox);
 }
 
 MovieFragmentBox::~MovieFragmentBox()
 {
   MOZ_COUNT_DTOR(MovieFragmentBox);
 }
@@ -383,18 +434,22 @@ TrackExtendsBox::Generate(uint32_t* aBox
 
   if (mTrackType == Audio_Track) {
     default_sample_description_index = 1;
     default_sample_duration = mMeta.mAudMeta->FrameDuration;
     default_sample_size = mMeta.mAudMeta->FrameSize;
     default_sample_flags = set_sample_flags(1);
   } else if (mTrackType == Video_Track) {
     default_sample_description_index = 1;
-    default_sample_duration =
-      mMeta.mVidMeta->VideoFrequency / mMeta.mVidMeta->FrameRate;
+    // Video meta data has assigned framerate, it implies that this video's
+    // frame rate should be fixed.
+    if (mMeta.mVidMeta->FrameRate) {
+      default_sample_duration =
+        mMeta.mVidMeta->VideoFrequency / mMeta.mVidMeta->FrameRate;
+    }
     default_sample_size = 0;
     default_sample_flags = set_sample_flags(0);
   } else {
     MOZ_ASSERT(0);
     return NS_ERROR_FAILURE;
   }
 
   size += sizeof(track_ID) +
--- a/content/media/encoder/fmp4_muxer/ISOMediaBoxes.h
+++ b/content/media/encoder/fmp4_muxer/ISOMediaBoxes.h
@@ -294,16 +294,17 @@ public:
   TrackRunBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl);
   ~TrackRunBox();
 
 protected:
   uint32_t fillSampleTable();
 
   uint32_t mAllSampleSize;
   uint32_t mTrackType;
+  MetaHelper mMeta;
 };
 
 // tf_flags in TrackFragmentHeaderBox, 14496-12 8.8.7.1.
 #define base_data_offset_present         0x000001
 #define sample_description_index_present 0x000002
 #define default_sample_duration_present  0x000008
 #define default_sample_size_present      0x000010
 #define default_sample_flags_present     0x000020
@@ -322,30 +323,30 @@ public:
   // MuxerOperation methods
   nsresult Generate(uint32_t* aBoxSize) MOZ_OVERRIDE;
   nsresult Write() MOZ_OVERRIDE;
 
   // TrackFragmentHeaderBox methods
   nsresult UpdateBaseDataOffset(uint64_t aOffset); // The offset of the first
                                                    // sample in file.
 
-  TrackFragmentHeaderBox(uint32_t aType, ISOControl* aControl);
+  TrackFragmentHeaderBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl);
   ~TrackFragmentHeaderBox();
 
 protected:
   uint32_t mTrackType;
   MetaHelper mMeta;
 };
 
 // 14496-12 8.8.6 'Track Fragment Box'
 // Box type: 'traf'
 // TrackFragmentBox cotains TrackFragmentHeaderBox and TrackRunBox.
 class TrackFragmentBox : public DefaultContainerImpl {
 public:
-  TrackFragmentBox(uint32_t aType, uint32_t aFlags, ISOControl* aControl);
+  TrackFragmentBox(uint32_t aType, ISOControl* aControl);
   ~TrackFragmentBox();
 
 protected:
   uint32_t mTrackType;
 };
 
 // 14496-12 8.8.5 'Movie Fragment Header Box'
 // Box type: 'mfhd'
--- a/content/media/encoder/fmp4_muxer/ISOMediaWriter.cpp
+++ b/content/media/encoder/fmp4_muxer/ISOMediaWriter.cpp
@@ -4,28 +4,29 @@
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "ISOMediaWriter.h"
 #include "ISOControl.h"
 #include "ISOMediaBoxes.h"
 #include "ISOTrackMetadata.h"
 #include "nsThreadUtils.h"
 #include "MediaEncoder.h"
+#include "VideoUtils.h"
 
 #undef LOG
 #ifdef MOZ_WIDGET_GONK
 #include <android/log.h>
 #define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
 #else
 #define LOG(args, ...)
 #endif
 
 namespace mozilla {
 
-const static uint32_t FRAG_DURATION = 2000000;    // microsecond per unit
+const static uint32_t FRAG_DURATION = 2 * USECS_PER_S;    // microsecond per unit
 
 ISOMediaWriter::ISOMediaWriter(uint32_t aType)
   : ContainerWriter()
   , mState(MUXING_HEAD)
   , mBlobReady(false)
   , mType(0)
 {
   if (aType & CREATE_AUDIO_TRACK) {