Bug 1294753: encode all available audio on each cycle instead of one 'packet' r=rillian
authorRandell Jesup <rjesup@jesup.org>
Thu, 18 Aug 2016 00:31:11 -0400
changeset 336694 4c9b550ac7d63a9196884f4e965b26bc4be0ddc2
parent 336693 a2cfcd0a8f9bb765cd3a8c2047d1cdbc5abfaa83
child 336695 38b376e74c6e1a1ccab3c447df3d6687b811c8b5
push id10033
push userraliiev@mozilla.com
push dateMon, 19 Sep 2016 13:50:26 +0000
treeherdermozilla-aurora@5dddbefdf759 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersrillian
bugs1294753
milestone51.0a1
Bug 1294753: encode all available audio on each cycle instead of one 'packet' r=rillian Also fixes some accesses to mEndOfStream outside the monitor
dom/media/encoder/MediaEncoder.cpp
dom/media/encoder/OpusTrackEncoder.cpp
dom/media/ogg/OggWriter.cpp
--- a/dom/media/encoder/MediaEncoder.cpp
+++ b/dom/media/encoder/MediaEncoder.cpp
@@ -303,27 +303,29 @@ MediaEncoder::GetEncodedData(nsTArray<ns
       mState = ENCODE_TRACK;
       break;
     }
 
     case ENCODE_TRACK: {
       LOG(LogLevel::Debug, ("ENCODE_TRACK TimeStamp = %f", GetEncodeTimeStamp()));
       EncodedFrameContainer encodedData;
       nsresult rv = NS_OK;
+      // We're most likely to actually wait for a video frame, so do that first to minimize
+      // capture offset/lipsync issues
+      rv = WriteEncodedDataToMuxer(mVideoEncoder.get());
+      if (NS_FAILED(rv)) {
+        LOG(LogLevel::Error, ("Fail to write video encoder data to muxer"));
+        break;
+      }
       rv = WriteEncodedDataToMuxer(mAudioEncoder.get());
       if (NS_FAILED(rv)) {
         LOG(LogLevel::Error, ("Error! Fail to write audio encoder data to muxer"));
         break;
       }
       LOG(LogLevel::Debug, ("Audio encoded TimeStamp = %f", GetEncodeTimeStamp()));
-      rv = WriteEncodedDataToMuxer(mVideoEncoder.get());
-      if (NS_FAILED(rv)) {
-        LOG(LogLevel::Error, ("Fail to write video encoder data to muxer"));
-        break;
-      }
       LOG(LogLevel::Debug, ("Video encoded TimeStamp = %f", GetEncodeTimeStamp()));
       // In audio only or video only case, let unavailable track's flag to be true.
       bool isAudioCompleted = (mAudioEncoder && mAudioEncoder->IsEncodingComplete()) || !mAudioEncoder;
       bool isVideoCompleted = (mVideoEncoder && mVideoEncoder->IsEncodingComplete()) || !mVideoEncoder;
       rv = mWriter->GetContainerData(aOutputBufs,
                                      isAudioCompleted && isVideoCompleted ?
                                      ContainerWriter::FLUSH_NEEDED : 0);
       if (aOutputBufs != nullptr) {
--- a/dom/media/encoder/OpusTrackEncoder.cpp
+++ b/dom/media/encoder/OpusTrackEncoder.cpp
@@ -270,181 +270,193 @@ OpusTrackEncoder::GetEncodedTrack(Encode
     if (mCanceled || mEncodingComplete) {
       return NS_ERROR_FAILURE;
     }
   }
 
   // calculation below depends on the truth that mInitialized is true.
   MOZ_ASSERT(mInitialized);
 
-  // re-sampled frames left last time which didn't fit into an Opus packet duration.
-  const int framesLeft = mResampledLeftover.Length() / mChannels;
-  // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
-  // of kOpusSamplingRate. There is not precision loss in the integer division
-  // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
-  // framesToFetch to ensure there will be at least n frames after re-sampling.
-  const int frameRoundUp = framesLeft ? 1 : 0;
+  bool wait = true;
+  int result = 0;
+  // Only wait once, then loop until we run out of packets of input data
+  while (result >= 0 && !mEncodingComplete) {
+    // re-sampled frames left last time which didn't fit into an Opus packet duration.
+    const int framesLeft = mResampledLeftover.Length() / mChannels;
+    // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
+    // of kOpusSamplingRate. There is not precision loss in the integer division
+    // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
+    // framesToFetch to ensure there will be at least n frames after re-sampling.
+    const int frameRoundUp = framesLeft ? 1 : 0;
+
+    MOZ_ASSERT(GetPacketDuration() >= framesLeft);
+    // Try to fetch m frames such that there will be n frames
+    // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
+    const int framesToFetch = !mResampler ? GetPacketDuration()
+                              : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
+                              + frameRoundUp;
+    {
+      // Move all the samples from mRawSegment to mSourceSegment. We only hold
+      // the monitor in this block.
+      ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 
-  MOZ_ASSERT(GetPacketDuration() >= framesLeft);
-  // Try to fetch m frames such that there will be n frames
-  // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
-  const int framesToFetch = !mResampler ? GetPacketDuration()
-    : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
-      + frameRoundUp;
-  {
-    // Move all the samples from mRawSegment to mSourceSegment. We only hold
-    // the monitor in this block.
-    ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+      // Wait until enough raw data, end of stream or cancelled.
+      while (!mCanceled && mRawSegment.GetDuration() +
+             mSourceSegment.GetDuration() < framesToFetch &&
+             !mEndOfStream) {
+        if (wait) {
+          mReentrantMonitor.Wait();
+          wait = false;
+        } else {
+          goto done; // nested while's...
+        }
+      }
 
-    // Wait until enough raw data, end of stream or cancelled.
-    while (!mCanceled && mRawSegment.GetDuration() +
-        mSourceSegment.GetDuration() < framesToFetch &&
-        !mEndOfStream) {
-      mReentrantMonitor.Wait();
-    }
+      if (mCanceled) {
+        return NS_ERROR_FAILURE;
+      }
+
+      mSourceSegment.AppendFrom(&mRawSegment);
 
-    if (mCanceled || mEncodingComplete) {
-      return NS_ERROR_FAILURE;
+      // Pad |mLookahead| samples to the end of source stream to prevent lost of
+      // original data, the pcm duration will be calculated at rate 48K later.
+      if (mEndOfStream && !mEosSetInEncoder) {
+        mEosSetInEncoder = true;
+        mSourceSegment.AppendNullData(mLookahead);
+      }
     }
 
-    mSourceSegment.AppendFrom(&mRawSegment);
+    // Start encoding data.
+    AutoTArray<AudioDataValue, 9600> pcm;
+    pcm.SetLength(GetPacketDuration() * mChannels);
+    AudioSegment::ChunkIterator iter(mSourceSegment);
+    int frameCopied = 0;
 
-    // Pad |mLookahead| samples to the end of source stream to prevent lost of
-    // original data, the pcm duration will be calculated at rate 48K later.
-    if (mEndOfStream && !mEosSetInEncoder) {
-      mEosSetInEncoder = true;
-      mSourceSegment.AppendNullData(mLookahead);
-    }
-  }
+    while (!iter.IsEnded() && frameCopied < framesToFetch) {
+      AudioChunk chunk = *iter;
 
-  // Start encoding data.
-  AutoTArray<AudioDataValue, 9600> pcm;
-  pcm.SetLength(GetPacketDuration() * mChannels);
-  AudioSegment::ChunkIterator iter(mSourceSegment);
-  int frameCopied = 0;
+      // Chunk to the required frame size.
+      int frameToCopy = chunk.GetDuration();
+      if (frameCopied + frameToCopy > framesToFetch) {
+        frameToCopy = framesToFetch - frameCopied;
+      }
 
-  while (!iter.IsEnded() && frameCopied < framesToFetch) {
-    AudioChunk chunk = *iter;
+      if (!chunk.IsNull()) {
+        // Append the interleaved data to the end of pcm buffer.
+        AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
+                                               pcm.Elements() + frameCopied * mChannels);
+      } else {
+        memset(pcm.Elements() + frameCopied * mChannels, 0,
+               frameToCopy * mChannels * sizeof(AudioDataValue));
+      }
 
-    // Chunk to the required frame size.
-    int frameToCopy = chunk.GetDuration();
-    if (frameCopied + frameToCopy > framesToFetch) {
-      frameToCopy = framesToFetch - frameCopied;
+      frameCopied += frameToCopy;
+      iter.Next();
     }
 
-    if (!chunk.IsNull()) {
-      // Append the interleaved data to the end of pcm buffer.
-      AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
-        pcm.Elements() + frameCopied * mChannels);
-    } else {
-      memset(pcm.Elements() + frameCopied * mChannels, 0,
-             frameToCopy * mChannels * sizeof(AudioDataValue));
-    }
-
-    frameCopied += frameToCopy;
-    iter.Next();
-  }
+    RefPtr<EncodedFrame> audiodata = new EncodedFrame();
+    audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
+    int framesInPCM = frameCopied;
+    if (mResampler) {
+      AutoTArray<AudioDataValue, 9600> resamplingDest;
+      // We want to consume all the input data, so we slightly oversize the
+      // resampled data buffer so we can fit the output data in. We cannot really
+      // predict the output frame count at each call.
+      uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
+      uint32_t inframes = frameCopied;
 
-  RefPtr<EncodedFrame> audiodata = new EncodedFrame();
-  audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
-  int framesInPCM = frameCopied;
-  if (mResampler) {
-    AutoTArray<AudioDataValue, 9600> resamplingDest;
-    // We want to consume all the input data, so we slightly oversize the
-    // resampled data buffer so we can fit the output data in. We cannot really
-    // predict the output frame count at each call.
-    uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
-    uint32_t inframes = frameCopied;
-
-    resamplingDest.SetLength(outframes * mChannels);
+      resamplingDest.SetLength(outframes * mChannels);
 
 #if MOZ_SAMPLE_TYPE_S16
-    short* in = reinterpret_cast<short*>(pcm.Elements());
-    short* out = reinterpret_cast<short*>(resamplingDest.Elements());
-    speex_resampler_process_interleaved_int(mResampler, in, &inframes,
-                                                        out, &outframes);
+      short* in = reinterpret_cast<short*>(pcm.Elements());
+      short* out = reinterpret_cast<short*>(resamplingDest.Elements());
+      speex_resampler_process_interleaved_int(mResampler, in, &inframes,
+                                              out, &outframes);
 #else
-    float* in = reinterpret_cast<float*>(pcm.Elements());
-    float* out = reinterpret_cast<float*>(resamplingDest.Elements());
-    speex_resampler_process_interleaved_float(mResampler, in, &inframes,
-                                                          out, &outframes);
+      float* in = reinterpret_cast<float*>(pcm.Elements());
+      float* out = reinterpret_cast<float*>(resamplingDest.Elements());
+      speex_resampler_process_interleaved_float(mResampler, in, &inframes,
+                                                out, &outframes);
 #endif
 
-    MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
-    PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
-        mResampledLeftover.Length());
+      MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
+      PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
+              mResampledLeftover.Length());
 
-    uint32_t outframesToCopy = std::min(outframes,
-        static_cast<uint32_t>(GetPacketDuration() - framesLeft));
+      uint32_t outframesToCopy = std::min(outframes,
+                                          static_cast<uint32_t>(GetPacketDuration() - framesLeft));
 
-    MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
-        outframesToCopy * mChannels);
-    PodCopy(pcm.Elements() + mResampledLeftover.Length(),
-        resamplingDest.Elements(), outframesToCopy * mChannels);
-    int frameLeftover = outframes - outframesToCopy;
-    mResampledLeftover.SetLength(frameLeftover * mChannels);
-    PodCopy(mResampledLeftover.Elements(),
-        resamplingDest.Elements() + outframesToCopy * mChannels,
-        mResampledLeftover.Length());
-    // This is always at 48000Hz.
-    framesInPCM = framesLeft + outframesToCopy;
-    audiodata->SetDuration(framesInPCM);
-  } else {
-    // The ogg time stamping and pre-skip is always timed at 48000.
-    audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
-  }
+      MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
+                 outframesToCopy * mChannels);
+      PodCopy(pcm.Elements() + mResampledLeftover.Length(),
+              resamplingDest.Elements(), outframesToCopy * mChannels);
+      int frameLeftover = outframes - outframesToCopy;
+      mResampledLeftover.SetLength(frameLeftover * mChannels);
+      PodCopy(mResampledLeftover.Elements(),
+              resamplingDest.Elements() + outframesToCopy * mChannels,
+              mResampledLeftover.Length());
+      // This is always at 48000Hz.
+      framesInPCM = framesLeft + outframesToCopy;
+      audiodata->SetDuration(framesInPCM);
+    } else {
+      // The ogg time stamping and pre-skip is always timed at 48000.
+      audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
+    }
 
-  // Remove the raw data which has been pulled to pcm buffer.
-  // The value of frameCopied should equal to (or smaller than, if eos)
-  // GetPacketDuration().
-  mSourceSegment.RemoveLeading(frameCopied);
+    // Remove the raw data which has been pulled to pcm buffer.
+    // The value of frameCopied should equal to (or smaller than, if eos)
+    // GetPacketDuration().
+    mSourceSegment.RemoveLeading(frameCopied);
 
-  // Has reached the end of input stream and all queued data has pulled for
-  // encoding.
-  if (mSourceSegment.GetDuration() == 0 && mEndOfStream) {
-    mEncodingComplete = true;
-    LOG("[Opus] Done encoding.");
-  }
+    // Has reached the end of input stream and all queued data has pulled for
+    // encoding.
+    if (mSourceSegment.GetDuration() == 0 && mEosSetInEncoder) {
+      mEncodingComplete = true;
+      LOG("[Opus] Done encoding.");
+    }
 
-  MOZ_ASSERT(mEndOfStream || framesInPCM == GetPacketDuration());
+    MOZ_ASSERT(mEosSetInEncoder || framesInPCM == GetPacketDuration());
 
-  // Append null data to pcm buffer if the leftover data is not enough for
-  // opus encoder.
-  if (framesInPCM < GetPacketDuration() && mEndOfStream) {
-    PodZero(pcm.Elements() + framesInPCM * mChannels,
-        (GetPacketDuration() - framesInPCM) * mChannels);
-  }
-  nsTArray<uint8_t> frameData;
-  // Encode the data with Opus Encoder.
-  frameData.SetLength(MAX_DATA_BYTES);
-  // result is returned as opus error code if it is negative.
-  int result = 0;
+    // Append null data to pcm buffer if the leftover data is not enough for
+    // opus encoder.
+    if (framesInPCM < GetPacketDuration() && mEosSetInEncoder) {
+      PodZero(pcm.Elements() + framesInPCM * mChannels,
+              (GetPacketDuration() - framesInPCM) * mChannels);
+    }
+    nsTArray<uint8_t> frameData;
+    // Encode the data with Opus Encoder.
+    frameData.SetLength(MAX_DATA_BYTES);
+    // result is returned as opus error code if it is negative.
+    result = 0;
 #ifdef MOZ_SAMPLE_TYPE_S16
-  const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
-  result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
-                       frameData.Elements(), MAX_DATA_BYTES);
+    const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
+    result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
+                         frameData.Elements(), MAX_DATA_BYTES);
 #else
-  const float* pcmBuf = static_cast<float*>(pcm.Elements());
-  result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
-                             frameData.Elements(), MAX_DATA_BYTES);
+    const float* pcmBuf = static_cast<float*>(pcm.Elements());
+    result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
+                               frameData.Elements(), MAX_DATA_BYTES);
 #endif
-  frameData.SetLength(result >= 0 ? result : 0);
+    frameData.SetLength(result >= 0 ? result : 0);
 
-  if (result < 0) {
-    LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
+    if (result < 0) {
+      LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
+    }
+    if (mEncodingComplete) {
+      if (mResampler) {
+        speex_resampler_destroy(mResampler);
+        mResampler = nullptr;
+      }
+      mResampledLeftover.SetLength(0);
+    }
+
+    audiodata->SwapInFrameData(frameData);
+    // timestamp should be the time of the first sample
+    audiodata->SetTimeStamp(mOutputTimeStamp);
+    mOutputTimeStamp += FramesToUsecs(GetPacketDuration(), kOpusSamplingRate).value();
+    LOG("[Opus] mOutputTimeStamp %lld.",mOutputTimeStamp);
+    aData.AppendEncodedFrame(audiodata);
   }
-  if (mEncodingComplete) {
-    if (mResampler) {
-      speex_resampler_destroy(mResampler);
-      mResampler = nullptr;
-    }
-    mResampledLeftover.SetLength(0);
-  }
-
-  audiodata->SwapInFrameData(frameData);
-  mOutputTimeStamp += FramesToUsecs(GetPacketDuration(), kOpusSamplingRate).value();
-  audiodata->SetTimeStamp(mOutputTimeStamp);
-  LOG("[Opus] mOutputTimeStamp %lld.",mOutputTimeStamp);
-  aData.AppendEncodedFrame(audiodata);
+done:
   return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
 }
 
 } // namespace mozilla
--- a/dom/media/ogg/OggWriter.cpp
+++ b/dom/media/ogg/OggWriter.cpp
@@ -56,24 +56,27 @@ OggWriter::Init()
 
 nsresult
 OggWriter::WriteEncodedTrack(const EncodedFrameContainer& aData,
                              uint32_t aFlags)
 {
   PROFILER_LABEL("OggWriter", "WriteEncodedTrack",
     js::ProfileEntry::Category::OTHER);
 
-  for (uint32_t i = 0; i < aData.GetEncodedFrames().Length(); i++) {
+  uint32_t len = aData.GetEncodedFrames().Length();
+  for (uint32_t i = 0; i < len; i++) {
     if (aData.GetEncodedFrames()[i]->GetFrameType() != EncodedFrame::OPUS_AUDIO_FRAME) {
       LOG("[OggWriter] wrong encoded data type!");
       return NS_ERROR_FAILURE;
     }
 
+    // only pass END_OF_STREAM on the last frame!
     nsresult rv = WriteEncodedData(aData.GetEncodedFrames()[i]->GetFrameData(),
                                    aData.GetEncodedFrames()[i]->GetDuration(),
+                                   i < len-1 ? (aFlags & ~ContainerWriter::END_OF_STREAM) :
                                    aFlags);
     if (NS_FAILED(rv)) {
       LOG("%p Failed to WriteEncodedTrack!", this);
       return rv;
     }
   }
   return NS_OK;
 }