Bug 1271585 - Part 2 - Synchronously insert audio frames from the microphone in the MSG if possible. r=pehrsons,jesup
authorPaul Adenot <paul@paul.cx>
Mon, 30 May 2016 15:24:19 +0200
changeset 340590 999676db297b68200d58c0d086e2a21a0b4856df
parent 340589 466534e08cbc970c40d81488c488c4ee847c784f
child 340591 062aa1bd5487d268078b031b75335c106ae40480
push id1183
push userraliiev@mozilla.com
push dateMon, 05 Sep 2016 20:01:49 +0000
treeherdermozilla-release@3148731bed45 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspehrsons, jesup
bugs1271585
milestone49.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1271585 - Part 2 - Synchronously insert audio frames from the microphone in the MSG if possible. r=pehrsons,jesup MozReview-Commit-ID: Fm2woel600v
dom/media/webrtc/MediaEngine.h
dom/media/webrtc/MediaEngineWebRTC.h
dom/media/webrtc/MediaEngineWebRTCAudio.cpp
--- a/dom/media/webrtc/MediaEngine.h
+++ b/dom/media/webrtc/MediaEngine.h
@@ -53,16 +53,20 @@ public:
   static const int DEFAULT_169_VIDEO_WIDTH = 1280;
   static const int DEFAULT_169_VIDEO_HEIGHT = 720;
 
 #ifndef MOZ_B2G
   static const int DEFAULT_SAMPLE_RATE = 32000;
 #else
   static const int DEFAULT_SAMPLE_RATE = 16000;
 #endif
+  // This allows using whatever rate the graph is using for the
+  // MediaStreamTrack. This is useful for microphone data, we know it's already
+  // at the correct rate for insertion in the MSG.
+  static const int USE_GRAPH_RATE = -1;
 
   /* Populate an array of video sources in the nsTArray. Also include devices
    * that are currently unavailable. */
   virtual void EnumerateVideoDevices(dom::MediaSourceEnum,
                                      nsTArray<RefPtr<MediaEngineVideoSource> >*) = 0;
 
   /* Populate an array of audio sources in the nsTArray. Also include devices
    * that are currently unavailable. */
--- a/dom/media/webrtc/MediaEngineWebRTC.h
+++ b/dom/media/webrtc/MediaEngineWebRTC.h
@@ -429,16 +429,17 @@ public:
     , mThread(aThread)
     , mCapIndex(aIndex)
     , mChannel(-1)
     , mNrAllocations(0)
     , mStarted(false)
     , mSampleFrequency(MediaEngine::DEFAULT_SAMPLE_RATE)
     , mPlayoutDelay(0)
     , mNullTransport(nullptr)
+    , mSkipProcessing(false)
   {
     MOZ_ASSERT(aVoiceEnginePtr);
     MOZ_ASSERT(aAudioInput);
     mDeviceName.Assign(NS_ConvertUTF8toUTF16(name));
     mDeviceUUID.Assign(uuid);
     mListener = new mozilla::WebRTCAudioDataListener(this);
     // We'll init lazily as needed
   }
@@ -510,16 +511,32 @@ protected:
 private:
   // These allocate/configure and release the channel
   bool AllocChannel();
   void FreeChannel();
   // These start/stop VoEBase and associated interfaces
   bool InitEngine();
   void DeInitEngine();
 
+  // This is true when all processing is disabled, we can skip
+  // packetization, resampling and other processing passes.
+  bool PassThrough() {
+    return mSkipProcessing;
+  }
+  template<typename T>
+  void InsertInGraph(const T* aBuffer,
+                     size_t aFrames,
+                     uint32_t aChannels);
+
+  void PacketizeAndProcess(MediaStreamGraph* aGraph,
+                           const AudioDataValue* aBuffer,
+                           size_t aFrames,
+                           TrackRate aRate,
+                           uint32_t aChannels);
+
   webrtc::VoiceEngine* mVoiceEngine;
   RefPtr<mozilla::AudioInput> mAudioInput;
   RefPtr<WebRTCAudioDataListener> mListener;
 
   // Note: shared across all microphone sources - we don't want to Terminate()
   // the VoEBase until there are no active captures
   static int sChannelsOpen;
   static ScopedCustomReleasePtr<webrtc::VoEBase> mVoEBase;
@@ -550,16 +567,20 @@ private:
   nsCString mDeviceUUID;
 
   uint32_t mSampleFrequency;
   int32_t mPlayoutDelay;
 
   NullTransport *mNullTransport;
 
   nsTArray<int16_t> mInputBuffer;
+  // mSkipProcessing is true if none of the processing passes are enabled,
+  // because of prefs or constraints. This allows simply copying the audio into
+  // the MSG, skipping resampling and the whole webrtc.org code.
+  bool mSkipProcessing;
 };
 
 class MediaEngineWebRTC : public MediaEngine
 {
 public:
   explicit MediaEngineWebRTC(MediaEnginePrefs& aPrefs);
 
   // Clients should ensure to clean-up sources video/audio sources
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
@@ -293,16 +293,22 @@ MediaEngineWebRTCMicrophoneSource::Resta
     }
     if (0 != (error = mVoEProcessing->SetAgcStatus(agc_on, (webrtc::AgcModes) aPrefs.mAgc))) {
       LOG(("%s Error setting AGC Status: %d ",__FUNCTION__, error));
     }
     if (0 != (error = mVoEProcessing->SetNsStatus(noise_on, (webrtc::NsModes) aPrefs.mNoise))) {
       LOG(("%s Error setting NoiseSuppression Status: %d ",__FUNCTION__, error));
     }
   }
+
+  mSkipProcessing = !(aec_on || agc_on || noise_on);
+  if (mSkipProcessing) {
+    mSampleFrequency = MediaEngine::USE_GRAPH_RATE;
+  }
+
   return NS_OK;
 }
 
 nsresult
 MediaEngineWebRTCMicrophoneSource::Deallocate()
 {
   AssertIsOnOwningThread();
   --mNrAllocations;
@@ -339,16 +345,19 @@ MediaEngineWebRTCMicrophoneSource::Start
   {
     MonitorAutoLock lock(mMonitor);
     mSources.AppendElement(aStream);
     mPrincipalHandles.AppendElement(aPrincipalHandle);
     MOZ_ASSERT(mSources.Length() == mPrincipalHandles.Length());
   }
 
   AudioSegment* segment = new AudioSegment();
+  if (mSampleFrequency == MediaEngine::USE_GRAPH_RATE) {
+    mSampleFrequency = aStream->GraphRate();
+  }
   aStream->AddAudioTrack(aID, mSampleFrequency, 0, segment, SourceMediaStream::ADDTRACK_QUEUED);
 
   // XXX Make this based on the pref.
   aStream->RegisterForAudioMixing();
   LOG(("Start audio for stream %p", aStream));
 
   if (!mListener) {
     mListener = new mozilla::WebRTCAudioDataListener(this);
@@ -452,45 +461,107 @@ void
 MediaEngineWebRTCMicrophoneSource::NotifyOutputData(MediaStreamGraph* aGraph,
                                                     AudioDataValue* aBuffer,
                                                     size_t aFrames,
                                                     TrackRate aRate,
                                                     uint32_t aChannels)
 {
 }
 
+void
+MediaEngineWebRTCMicrophoneSource::PacketizeAndProcess(MediaStreamGraph* aGraph,
+                                                       const AudioDataValue* aBuffer,
+                                                       size_t aFrames,
+                                                       TrackRate aRate,
+                                                       uint32_t aChannels)
+{
+  // This will call Process() with data coming out of the AEC/NS/AGC/etc chain
+  if (!mPacketizer ||
+      mPacketizer->PacketSize() != aRate/100u ||
+      mPacketizer->Channels() != aChannels) {
+    // It's ok to drop the audio still in the packetizer here.
+    mPacketizer =
+      new AudioPacketizer<AudioDataValue, int16_t>(aRate/100, aChannels);
+  }
+
+  mPacketizer->Input(aBuffer, static_cast<uint32_t>(aFrames));
+
+  while (mPacketizer->PacketsAvailable()) {
+    uint32_t samplesPerPacket = mPacketizer->PacketSize() *
+      mPacketizer->Channels();
+    if (mInputBuffer.Length() < samplesPerPacket) {
+      mInputBuffer.SetLength(samplesPerPacket);
+    }
+    int16_t* packet = mInputBuffer.Elements();
+    mPacketizer->Output(packet);
+
+    mVoERender->ExternalRecordingInsertData(packet, samplesPerPacket, aRate, 0);
+  }
+}
+
+template<typename T>
+void
+MediaEngineWebRTCMicrophoneSource::InsertInGraph(const T* aBuffer,
+                                                 size_t aFrames,
+                                                 uint32_t aChannels)
+{
+  if (mState != kStarted) {
+    return;
+  }
+
+  size_t len = mSources.Length();
+  for (size_t i = 0; i < len; i++) {
+    if (!mSources[i]) {
+      continue;
+    }
+    RefPtr<SharedBuffer> buffer =
+      SharedBuffer::Create(aFrames * aChannels * sizeof(T));
+    PodCopy(static_cast<T*>(buffer->Data()),
+            aBuffer, aFrames * aChannels);
+
+    TimeStamp insertTime;
+    // Make sure we include the stream and the track.
+    // The 0:1 is a flag to note when we've done the final insert for a given input block.
+    LogTime(AsyncLatencyLogger::AudioTrackInsertion,
+            LATENCY_STREAM_ID(mSources[i].get(), mTrackID),
+            (i+1 < len) ? 0 : 1, insertTime);
+
+    nsAutoPtr<AudioSegment> segment(new AudioSegment());
+    AutoTArray<const T*, 1> channels;
+    // XXX Bug 971528 - Support stereo capture in gUM
+    MOZ_ASSERT(aChannels == 1,
+        "GraphDriver only supports us stereo audio for now");
+    channels.AppendElement(static_cast<T*>(buffer->Data()));
+    segment->AppendFrames(buffer.forget(), channels, aFrames,
+                         mPrincipalHandles[i]);
+    segment->GetStartTime(insertTime);
+
+    RUN_ON_THREAD(mThread,
+                  WrapRunnable(mSources[i], &SourceMediaStream::AppendToTrack,
+                               mTrackID, segment,
+                               static_cast<AudioSegment*>(nullptr)),
+                  NS_DISPATCH_NORMAL);
+  }
+}
+
 // Called back on GraphDriver thread!
 // Note this can be called back after ::Shutdown()
 void
 MediaEngineWebRTCMicrophoneSource::NotifyInputData(MediaStreamGraph* aGraph,
                                                    const AudioDataValue* aBuffer,
                                                    size_t aFrames,
                                                    TrackRate aRate,
                                                    uint32_t aChannels)
 {
-  // This will call Process() with data coming out of the AEC/NS/AGC/etc chain
-  if (!mPacketizer ||
-      mPacketizer->PacketSize() != aRate/100u ||
-      mPacketizer->Channels() != aChannels) {
-    // It's ok to drop the audio still in the packetizer here.
-    mPacketizer = new AudioPacketizer<AudioDataValue, int16_t>(aRate/100, aChannels);
-  }
-
-  mPacketizer->Input(aBuffer, static_cast<uint32_t>(aFrames));
-
-  while (mPacketizer->PacketsAvailable()) {
-    uint32_t samplesPerPacket = mPacketizer->PacketSize() *
-                                mPacketizer->Channels();
-    if (mInputBuffer.Length() < samplesPerPacket) {
-      mInputBuffer.SetLength(samplesPerPacket);
-    }
-    int16_t* packet = mInputBuffer.Elements();
-    mPacketizer->Output(packet);
-
-    mVoERender->ExternalRecordingInsertData(packet, samplesPerPacket, aRate, 0);
+  // If some processing is necessary, packetize and insert in the WebRTC.org
+  // code. Otherwise, directly insert the mic data in the MSG, bypassing all processing.
+  if (PassThrough()) {
+    InsertInGraph<AudioDataValue>(aBuffer, aFrames, aChannels);
+  } else {
+    PacketizeAndProcess(aGraph, aBuffer, aFrames, aRate, aChannels);
   }
 }
 
 #define ResetProcessingIfNeeded(_processing)                        \
 do {                                                                \
   webrtc::_processing##Modes mode;                                  \
   int rv = mVoEProcessing->Get##_processing##Status(enabled, mode); \
   if (rv) {                                                         \
@@ -673,16 +744,17 @@ MediaEngineWebRTCMicrophoneSource::Shutd
 typedef int16_t sample;
 
 void
 MediaEngineWebRTCMicrophoneSource::Process(int channel,
                                            webrtc::ProcessingTypes type,
                                            sample *audio10ms, int length,
                                            int samplingFreq, bool isStereo)
 {
+  MOZ_ASSERT(!PassThrough(), "This should be bypassed when in PassThrough mode.");
   // On initial capture, throw away all far-end data except the most recent sample
   // since it's already irrelevant and we want to keep avoid confusing the AEC far-end
   // input code with "old" audio.
   if (!mStarted) {
     mStarted  = true;
     while (gFarendObserver->Size() > 1) {
       free(gFarendObserver->Pop()); // only call if size() > 0
     }
@@ -705,43 +777,18 @@ MediaEngineWebRTCMicrophoneSource::Proce
   }
 
   MonitorAutoLock lock(mMonitor);
   if (mState != kStarted)
     return;
 
   uint32_t len = mSources.Length();
   for (uint32_t i = 0; i < len; i++) {
-    RefPtr<SharedBuffer> buffer = SharedBuffer::Create(length * sizeof(sample));
-
-    sample* dest = static_cast<sample*>(buffer->Data());
-    memcpy(dest, audio10ms, length * sizeof(sample));
-
-    nsAutoPtr<AudioSegment> segment(new AudioSegment());
-    AutoTArray<const sample*,1> channels;
-    channels.AppendElement(dest);
-    segment->AppendFrames(buffer.forget(), channels, length,
-                          mPrincipalHandles[i]);
-    TimeStamp insertTime;
-    segment->GetStartTime(insertTime);
-
-    if (mSources[i]) {
-      // Make sure we include the stream and the track.
-      // The 0:1 is a flag to note when we've done the final insert for a given input block.
-      LogTime(AsyncLatencyLogger::AudioTrackInsertion, LATENCY_STREAM_ID(mSources[i].get(), mTrackID),
-              (i+1 < len) ? 0 : 1, insertTime);
-
-      // This is safe from any thread, and is safe if the track is Finished
-      // or Destroyed.
-      // Note: due to evil magic, the nsAutoPtr<AudioSegment>'s ownership transfers to
-      // the Runnable (AutoPtr<> = AutoPtr<>)
-      RUN_ON_THREAD(mThread, WrapRunnable(mSources[i], &SourceMediaStream::AppendToTrack,
-                                          mTrackID, segment, (AudioSegment *) nullptr),
-                    NS_DISPATCH_NORMAL);
-    }
+    MOZ_ASSERT(!isStereo);
+    InsertInGraph<int16_t>(audio10ms, length, 1);
   }
 
   return;
 }
 
 void
 MediaEngineWebRTCAudioCaptureSource::GetName(nsAString &aName)
 {