Bug 1531833 - Add a way to tell the graph driver that the audio input is voice. r=pehrsons
authorPaul Adenot <paul@paul.cx>
Tue, 16 Apr 2019 15:42:38 +0000
changeset 469693 d47604394f78
parent 469692 45f499ef4b62
child 469694 0a351368e1f7
push id35879
push usernerli@mozilla.com
push dateTue, 16 Apr 2019 22:01:48 +0000
treeherdermozilla-central@12a60898fdc1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspehrsons
bugs1531833
milestone68.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1531833 - Add a way to tell the graph driver that the audio input is voice. r=pehrsons Differential Revision: https://phabricator.services.mozilla.com/D21737
dom/media/GraphDriver.cpp
dom/media/GraphDriver.h
dom/media/MediaStreamGraph.cpp
dom/media/MediaStreamGraph.h
dom/media/MediaStreamGraphImpl.h
dom/media/webrtc/MediaEngineWebRTCAudio.h
--- a/dom/media/GraphDriver.cpp
+++ b/dom/media/GraphDriver.cpp
@@ -464,17 +464,18 @@ StreamAndPromiseForOperation::StreamAndP
     MediaStream* aStream, void* aPromise, dom::AudioContextOperation aOperation,
     dom::AudioContextOperationFlags aFlags)
     : mStream(aStream),
       mPromise(aPromise),
       mOperation(aOperation),
       mFlags(aFlags) {}
 
 AudioCallbackDriver::AudioCallbackDriver(MediaStreamGraphImpl* aGraphImpl,
-                                         uint32_t aInputChannelCount)
+                                         uint32_t aInputChannelCount,
+                                         AudioInputType aAudioInputType)
     : GraphDriver(aGraphImpl),
       mOutputChannels(0),
       mSampleRate(0),
       mInputChannelCount(aInputChannelCount),
       mIterationDurationMS(MEDIA_GRAPH_TARGET_PERIOD_MS),
       mStarted(false),
       mInitShutdownThread(
           SharedThreadPool::Get(NS_LITERAL_CSTRING("CubebOperation"), 1)),
--- a/dom/media/GraphDriver.h
+++ b/dom/media/GraphDriver.h
@@ -318,16 +318,17 @@ struct StreamAndPromiseForOperation {
                                dom::AudioContextOperationFlags aFlags);
   RefPtr<MediaStream> mStream;
   void* mPromise;
   dom::AudioContextOperation mOperation;
   dom::AudioContextOperationFlags mFlags;
 };
 
 enum AsyncCubebOperation { INIT, SHUTDOWN };
+enum class AudioInputType { Unknown, Voice };
 
 /**
  * This is a graph driver that is based on callback functions called by the
  * audio api. This ensures minimal audio latency, because it means there is no
  * buffering happening: the audio is generated inside the callback.
  *
  * This design is less flexible than running our own thread:
  * - We have no control over the thread:
@@ -349,17 +350,18 @@ class AudioCallbackDriver : public Graph
 #if defined(XP_WIN)
     ,
                             public audio::DeviceChangeListener
 #endif
 {
  public:
   /** If aInputChannelCount is zero, then this driver is output-only. */
   AudioCallbackDriver(MediaStreamGraphImpl* aGraphImpl,
-                      uint32_t aInputChannelCount);
+                      uint32_t aInputChannelCount,
+                      AudioInputType aAudioInputType);
   virtual ~AudioCallbackDriver();
 
   void Start() override;
   void Revive() override;
   void WaitForNextIteration() override;
   void WakeUp() override;
   void Shutdown() override;
 #if defined(XP_WIN)
@@ -397,16 +399,23 @@ class AudioCallbackDriver : public Graph
 
   uint32_t OutputChannelCount() {
     MOZ_ASSERT(mOutputChannels != 0 && mOutputChannels <= 8);
     return mOutputChannels;
   }
 
   uint32_t InputChannelCount() { return mInputChannelCount; }
 
+  AudioInputType InputDevicePreference() {
+    if (mInputDevicePreference == CUBEB_DEVICE_PREF_VOICE) {
+      return AudioInputType::Voice;
+    }
+    return AudioInputType::Unknown;
+  }
+
   /* Enqueue a promise that is going to be resolved when a specific operation
    * occurs on the cubeb stream. */
   void EnqueueStreamAndPromiseForOperation(
       MediaStream* aStream, void* aPromise,
       dom::AudioContextOperation aOperation,
       dom::AudioContextOperationFlags aFlags);
 
   std::thread::id ThreadId() { return mAudioThreadId.load(); }
@@ -498,22 +507,22 @@ class AudioCallbackDriver : public Graph
     AudioCallbackDriver* mDriver;
   };
 
   /* Shared thread pool with up to one thread for off-main-thread
    * initialization and shutdown of the audio stream via AsyncCubebTask. */
   const RefPtr<SharedThreadPool> mInitShutdownThread;
   /* This must be accessed with the graph monitor held. */
   AutoTArray<StreamAndPromiseForOperation, 1> mPromisesForOperation;
+  cubeb_device_pref mInputDevicePreference;
   /* This is used to signal adding the mixer callback on first run
    * of audio callback. This is atomic because it is touched from different
    * threads, the audio callback thread and the state change thread. However,
    * the order of the threads does not allow concurent access. */
   Atomic<bool> mAddedMixer;
-
   /* Contains the id of the audio thread for as long as the callback
    * is taking place, after that it is reseted to an invalid value. */
   std::atomic<std::thread::id> mAudioThreadId;
   /* True when audio thread is running. False before
    * starting and after stopping it the audio thread. */
   Atomic<bool> mAudioThreadRunning;
   /* Indication of whether a fallback SystemClockDriver should be started if
    * StateCallback() receives an error.  No mutex need be held during access.
--- a/dom/media/MediaStreamGraph.cpp
+++ b/dom/media/MediaStreamGraph.cpp
@@ -363,18 +363,18 @@ void MediaStreamGraphImpl::UpdateStreamO
     MonitorAutoLock mon(mMonitor);
     switching = CurrentDriver()->Switching();
   }
 
   if (audioTrackPresent && mRealtime &&
       !CurrentDriver()->AsAudioCallbackDriver() && !switching) {
     MonitorAutoLock mon(mMonitor);
     if (LifecycleStateRef() == LIFECYCLE_RUNNING) {
-      AudioCallbackDriver* driver =
-          new AudioCallbackDriver(this, AudioInputChannelCount());
+      AudioCallbackDriver* driver = new AudioCallbackDriver(
+          this, AudioInputChannelCount(), AudioInputDevicePreference());
       CurrentDriver()->SwitchAtNextIteration(driver);
     }
   }
 
   if (!mStreamOrderDirty) {
     return;
   }
 
@@ -607,18 +607,18 @@ void MediaStreamGraphImpl::CreateOrDestr
       {
         MonitorAutoLock lock(mMonitor);
         switching = CurrentDriver()->Switching();
       }
 
       if (!CurrentDriver()->AsAudioCallbackDriver() && !switching) {
         MonitorAutoLock mon(mMonitor);
         if (LifecycleStateRef() == LIFECYCLE_RUNNING) {
-          AudioCallbackDriver* driver =
-              new AudioCallbackDriver(this, AudioInputChannelCount());
+          AudioCallbackDriver* driver = new AudioCallbackDriver(
+              this, AudioInputChannelCount(), AudioInputDevicePreference());
           CurrentDriver()->SwitchAtNextIteration(driver);
         }
       }
     }
   }
 
   for (int32_t i = audioOutputStreamsFound.Length() - 1; i >= 0; --i) {
     if (!audioOutputStreamsFound[i]) {
@@ -741,18 +741,18 @@ void MediaStreamGraphImpl::OpenAudioInpu
 
   listeners.AppendElement(aListener);
 
   if (listeners.Length() == 1) {  // first open for this device
     mInputDeviceID = aID;
     // Switch Drivers since we're adding input (to input-only or full-duplex)
     MonitorAutoLock mon(mMonitor);
     if (LifecycleStateRef() == LIFECYCLE_RUNNING) {
-      AudioCallbackDriver* driver =
-          new AudioCallbackDriver(this, AudioInputChannelCount());
+      AudioCallbackDriver* driver = new AudioCallbackDriver(
+          this, AudioInputChannelCount(), AudioInputDevicePreference());
       LOG(LogLevel::Debug,
           ("%p OpenAudioInput: starting new AudioCallbackDriver(input) %p",
            this, driver));
       CurrentDriver()->SwitchAtNextIteration(driver);
     } else {
       LOG(LogLevel::Error, ("OpenAudioInput in shutdown!"));
       MOZ_ASSERT_UNREACHABLE("Can't open cubeb inputs in shutdown");
     }
@@ -825,17 +825,18 @@ void MediaStreamGraphImpl::CloseAudioInp
   MonitorAutoLock mon(mMonitor);
   if (LifecycleStateRef() == LIFECYCLE_RUNNING) {
     GraphDriver* driver;
     if (audioTrackPresent) {
       // We still have audio output
       LOG(LogLevel::Debug,
           ("%p: CloseInput: output present (AudioCallback)", this));
 
-      driver = new AudioCallbackDriver(this, AudioInputChannelCount());
+      driver = new AudioCallbackDriver(this, AudioInputChannelCount(),
+                                       AudioInputDevicePreference());
       CurrentDriver()->SwitchAtNextIteration(driver);
     } else if (CurrentDriver()->AsAudioCallbackDriver()) {
       LOG(LogLevel::Debug,
           ("%p: CloseInput: no output present (SystemClockCallback)", this));
 
       driver = new SystemClockDriver(this);
       CurrentDriver()->SwitchAtNextIteration(driver);
     }  // else SystemClockDriver->SystemClockDriver, no switch
@@ -976,30 +977,34 @@ void MediaStreamGraphImpl::ReevaluateInp
   bool needToSwitch = false;
 
   if (CurrentDriver()->AsAudioCallbackDriver()) {
     AudioCallbackDriver* audioCallbackDriver =
         CurrentDriver()->AsAudioCallbackDriver();
     if (audioCallbackDriver->InputChannelCount() != AudioInputChannelCount()) {
       needToSwitch = true;
     }
+    if (audioCallbackDriver->InputDevicePreference() !=
+        AudioInputDevicePreference()) {
+      needToSwitch = true;
+    }
   } else {
     // We're already in the process of switching to a audio callback driver,
     // which will happen at the next iteration.
     // However, maybe it's not the correct number of channels. Re-query the
     // correct channel amount at this time.
 #ifdef DEBUG
     MonitorAutoLock lock(mMonitor);
     MOZ_ASSERT(CurrentDriver()->Switching());
 #endif
     needToSwitch = true;
   }
   if (needToSwitch) {
-    AudioCallbackDriver* newDriver =
-        new AudioCallbackDriver(this, AudioInputChannelCount());
+    AudioCallbackDriver* newDriver = new AudioCallbackDriver(
+        this, AudioInputChannelCount(), AudioInputDevicePreference());
     {
       MonitorAutoLock lock(mMonitor);
       CurrentDriver()->SwitchAtNextIteration(newDriver);
     }
   }
 }
 
 bool MediaStreamGraphImpl::OnGraphThreadOrNotRunning() const {
@@ -3148,18 +3153,19 @@ MediaStreamGraphImpl::MediaStreamGraphIm
 #ifdef DEBUG
       ,
       mCanRunMessagesSynchronously(false)
 #endif
       ,
       mMainThreadGraphTime(0, "MediaStreamGraphImpl::mMainThreadGraphTime") {
   if (mRealtime) {
     if (aDriverRequested == AUDIO_THREAD_DRIVER) {
-      // Always start with zero input channels.
-      mDriver = new AudioCallbackDriver(this, 0);
+      // Always start with zero input channels, and no particular preferences
+      // for the input channel.
+      mDriver = new AudioCallbackDriver(this, 0, AudioInputType::Unknown);
     } else {
       mDriver = new SystemClockDriver(this);
     }
 
 #ifdef TRACING
     // This is a noop if the logger has not been enabled.
     gMSGTraceLogger.Start();
     gMSGTraceLogger.Log("[");
@@ -3640,17 +3646,18 @@ void MediaStreamGraphImpl::ApplyAudioCon
   // anyways, but doing this now save some time.
   if (aOperation == AudioContextOperation::Resume) {
     if (!CurrentDriver()->AsAudioCallbackDriver()) {
       AudioCallbackDriver* driver;
       if (switching) {
         MOZ_ASSERT(nextDriver->AsAudioCallbackDriver());
         driver = nextDriver->AsAudioCallbackDriver();
       } else {
-        driver = new AudioCallbackDriver(this, AudioInputChannelCount());
+        driver = new AudioCallbackDriver(this, AudioInputChannelCount(),
+                                         AudioInputDevicePreference());
         MonitorAutoLock lock(mMonitor);
         CurrentDriver()->SwitchAtNextIteration(driver);
       }
       driver->EnqueueStreamAndPromiseForOperation(aDestinationStream, aPromise,
                                                   aOperation, aFlags);
     } else {
       // We are resuming a context, but we are already using an
       // AudioCallbackDriver, we can resolve the promise now.
--- a/dom/media/MediaStreamGraph.h
+++ b/dom/media/MediaStreamGraph.h
@@ -118,16 +118,20 @@ class AudioDataListenerInterface {
                                TrackRate aRate, uint32_t aChannels) = 0;
 
   /**
    * Number of audio input channels.
    */
   virtual uint32_t RequestedInputChannelCount(MediaStreamGraphImpl* aGraph) = 0;
 
   /**
+   * Whether the underlying audio device is used for voice input.
+   */
+  virtual bool IsVoiceInput(MediaStreamGraphImpl* aGraph) const = 0;
+  /**
    * Called when the underlying audio device has changed.
    */
   virtual void DeviceChanged(MediaStreamGraphImpl* aGraph) = 0;
 
   /**
    * Called when the underlying audio device is being closed.
    */
   virtual void Disconnect(MediaStreamGraphImpl* aGraph) = 0;
--- a/dom/media/MediaStreamGraphImpl.h
+++ b/dom/media/MediaStreamGraphImpl.h
@@ -393,16 +393,17 @@ class MediaStreamGraphImpl : public Medi
    * anymore, for a particular stream. It can be that other streams still need
    * audio from this audio input device. */
   virtual void CloseAudioInput(Maybe<CubebUtils::AudioDeviceID>& aID,
                                AudioDataListener* aListener) override;
   /* Called on the graph thread when the input device settings should be
    * reevaluated, for example, if the channel count of the input stream should
    * be changed. */
   void ReevaluateInputDevice();
+
   /* Called on the graph thread when there is new output data for listeners.
    * This is the mixed audio output of this MediaStreamGraph. */
   void NotifyOutputData(AudioDataValue* aBuffer, size_t aFrames,
                         TrackRate aRate, uint32_t aChannels);
   /* Called on the graph thread when there is new input data for listeners. This
    * is the raw audio input for this MediaStreamGraph. */
   void NotifyInputData(const AudioDataValue* aBuffer, size_t aFrames,
                        TrackRate aRate, uint32_t aChannels);
@@ -480,16 +481,39 @@ class MediaStreamGraphImpl : public Medi
     MOZ_ASSERT(listeners);
     for (const auto& listener : *listeners) {
       maxInputChannels = std::max(maxInputChannels,
                                   listener->RequestedInputChannelCount(this));
     }
     return maxInputChannels;
   }
 
+  AudioInputType AudioInputDevicePreference() {
+    MOZ_ASSERT(OnGraphThreadOrNotRunning());
+
+    if (!mInputDeviceUsers.GetValue(mInputDeviceID)) {
+      return AudioInputType::Unknown;
+    }
+    bool voiceInput = false;
+    // When/if we decide to support multiple input device per graph, this needs
+    // loop over them.
+    nsTArray<RefPtr<AudioDataListener>>* listeners =
+        mInputDeviceUsers.GetValue(mInputDeviceID);
+    MOZ_ASSERT(listeners);
+
+    // If at least one stream is considered to be voice,
+    for (const auto& listener : *listeners) {
+      voiceInput |= listener->IsVoiceInput(this);
+    }
+    if (voiceInput) {
+      return AudioInputType::Voice;
+    }
+    return AudioInputType::Unknown;
+  }
+
   CubebUtils::AudioDeviceID InputDeviceID() { return mInputDeviceID; }
 
   double MediaTimeToSeconds(GraphTime aTime) const {
     NS_ASSERTION(aTime > -STREAM_TIME_MAX && aTime <= STREAM_TIME_MAX,
                  "Bad time");
     return static_cast<double>(aTime) / GraphRate();
   }
 
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.h
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.h
@@ -155,16 +155,22 @@ class AudioInputProcessing : public Audi
   void Pull(StreamTime aEndOfAppendedData, StreamTime aDesiredTime);
 
   void NotifyOutputData(MediaStreamGraphImpl* aGraph, AudioDataValue* aBuffer,
                         size_t aFrames, TrackRate aRate,
                         uint32_t aChannels) override;
   void NotifyInputData(MediaStreamGraphImpl* aGraph,
                        const AudioDataValue* aBuffer, size_t aFrames,
                        TrackRate aRate, uint32_t aChannels) override;
+  bool IsVoiceInput(MediaStreamGraphImpl* aGraph) const override {
+    // If we're passing data directly without AEC or any other process, this
+    // means that all voice-processing has been disabled intentionaly. In this
+    // case, consider that the device is not used for voice input.
+    return !PassThrough(aGraph);
+  }
 
   void Start();
   void Stop();
 
   void DeviceChanged(MediaStreamGraphImpl* aGraph) override;
 
   uint32_t RequestedInputChannelCount(MediaStreamGraphImpl* aGraph) override {
     return GetRequestedInputChannelCount(aGraph);