Bug 1156472 - Part 3 - Implement AudioCaptureStream. r=roc
authorPaul Adenot <paul@paul.cx>
Fri, 24 Jul 2015 14:28:16 +0200
changeset 286417 865d6aa9cc6732e074d656872ed85bd50fc7d716
parent 286416 778457f7bae7eef97125f8931c0a63f29612f4cc
child 286418 244d8d88808e1a363a93a3dc35f7c454ad5c6f47
push id5067
push userraliiev@mozilla.com
push dateMon, 21 Sep 2015 14:04:52 +0000
treeherdermozilla-beta@14221ffe5b2f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersroc
bugs1156472
milestone42.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1156472 - Part 3 - Implement AudioCaptureStream. r=roc It is a ProcessMediaStream that simply mixes its inputs into a mono stream, up/down mixing appropriately.
dom/media/AudioCaptureStream.cpp
dom/media/AudioCaptureStream.h
dom/media/AudioMixer.h
dom/media/AudioSegment.cpp
dom/media/AudioSegment.h
dom/media/DOMMediaStream.cpp
dom/media/DOMMediaStream.h
dom/media/moz.build
new file mode 100644
--- /dev/null
+++ b/dom/media/AudioCaptureStream.cpp
@@ -0,0 +1,133 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MediaStreamGraphImpl.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/unused.h"
+
+#include "AudioSegment.h"
+#include "mozilla/Logging.h"
+#include "mozilla/Attributes.h"
+#include "AudioCaptureStream.h"
+#include "ImageContainer.h"
+#include "AudioNodeEngine.h"
+#include "AudioNodeStream.h"
+#include "AudioNodeExternalInputStream.h"
+#include "webaudio/MediaStreamAudioDestinationNode.h"
+#include <algorithm>
+#include "DOMMediaStream.h"
+
+using namespace mozilla::layers;
+using namespace mozilla::dom;
+using namespace mozilla::gfx;
+
+namespace mozilla
+{
+
+// We are mixing to mono until PeerConnection can accept stereo
+static const uint32_t MONO = 1;
+
+AudioCaptureStream::AudioCaptureStream(DOMMediaStream* aWrapper)
+  : ProcessedMediaStream(aWrapper), mTrackCreated(false)
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  MOZ_COUNT_CTOR(AudioCaptureStream);
+  mMixer.AddCallback(this);
+}
+
+AudioCaptureStream::~AudioCaptureStream()
+{
+  MOZ_COUNT_DTOR(AudioCaptureStream);
+  mMixer.RemoveCallback(this);
+}
+
+void
+AudioCaptureStream::ProcessInput(GraphTime aFrom, GraphTime aTo,
+                                 uint32_t aFlags)
+{
+  uint32_t inputCount = mInputs.Length();
+  StreamBuffer::Track* track = EnsureTrack(AUDIO_TRACK);
+  // Notify the DOM everything is in order.
+  if (!mTrackCreated) {
+    for (uint32_t i = 0; i < mListeners.Length(); i++) {
+      MediaStreamListener* l = mListeners[i];
+      AudioSegment tmp;
+      l->NotifyQueuedTrackChanges(
+        Graph(), AUDIO_TRACK, 0, MediaStreamListener::TRACK_EVENT_CREATED, tmp);
+      l->NotifyFinishedTrackCreation(Graph());
+    }
+    mTrackCreated = true;
+  }
+
+  // If the captured stream is connected back to a object on the page (be it an
+  // HTMLMediaElement with a stream as source, or an AudioContext), a cycle
+  // situation occur. This can work if it's an AudioContext with at least one
+  // DelayNode, but the MSG will mute the whole cycle otherwise.
+  bool blocked = mFinished || mBlocked.GetAt(aFrom);
+  if (blocked || InMutedCycle() || inputCount == 0) {
+    track->Get<AudioSegment>()->AppendNullData(aTo - aFrom);
+  } else {
+    // We mix down all the tracks of all inputs, to a stereo track. Everything
+    // is {up,down}-mixed to stereo.
+    mMixer.StartMixing();
+    AudioSegment output;
+    for (uint32_t i = 0; i < inputCount; i++) {
+      MediaStream* s = mInputs[i]->GetSource();
+      StreamBuffer::TrackIter tracks(s->GetStreamBuffer(), MediaSegment::AUDIO);
+      while (!tracks.IsEnded()) {
+        AudioSegment* inputSegment = tracks->Get<AudioSegment>();
+        StreamTime inputStart = s->GraphTimeToStreamTime(aFrom);
+        StreamTime inputEnd = s->GraphTimeToStreamTime(aTo);
+        AudioSegment toMix;
+        toMix.AppendSlice(*inputSegment, inputStart, inputEnd);
+        // Care for streams blocked in the [aTo, aFrom] range.
+        if (inputEnd - inputStart < aTo - aFrom) {
+          toMix.AppendNullData((aTo - aFrom) - (inputEnd - inputStart));
+        }
+        toMix.Mix(mMixer, MONO, Graph()->GraphRate());
+        tracks.Next();
+      }
+    }
+    // This calls MixerCallback below
+    mMixer.FinishMixing();
+  }
+
+  // Regardless of the status of the input tracks, we go foward.
+  mBuffer.AdvanceKnownTracksTime(GraphTimeToStreamTime((aTo)));
+}
+
+void
+AudioCaptureStream::MixerCallback(AudioDataValue* aMixedBuffer,
+                                  AudioSampleFormat aFormat, uint32_t aChannels,
+                                  uint32_t aFrames, uint32_t aSampleRate)
+{
+  nsAutoTArray<nsTArray<AudioDataValue>, MONO> output;
+  nsAutoTArray<const AudioDataValue*, MONO> bufferPtrs;
+  output.SetLength(MONO);
+  bufferPtrs.SetLength(MONO);
+
+  uint32_t written = 0;
+  // We need to copy here, because the mixer will reuse the storage, we should
+  // not hold onto it. Buffers are in planar format.
+  for (uint32_t channel = 0; channel < aChannels; channel++) {
+    AudioDataValue* out = output[channel].AppendElements(aFrames);
+    PodCopy(out, aMixedBuffer + written, aFrames);
+    bufferPtrs[channel] = out;
+    written += aFrames;
+  }
+  AudioChunk chunk;
+  chunk.mBuffer = new mozilla::SharedChannelArrayBuffer<AudioDataValue>(&output);
+  chunk.mDuration = aFrames;
+  chunk.mBufferFormat = aFormat;
+  chunk.mVolume = 1.0f;
+  chunk.mChannelData.SetLength(MONO);
+  for (uint32_t channel = 0; channel < aChannels; channel++) {
+    chunk.mChannelData[channel] = bufferPtrs[channel];
+  }
+
+  // Now we have mixed data, simply append it to out track.
+  EnsureTrack(AUDIO_TRACK)->Get<AudioSegment>()->AppendAndConsumeChunk(&chunk);
+}
+}
new file mode 100644
--- /dev/null
+++ b/dom/media/AudioCaptureStream.h
@@ -0,0 +1,40 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZILLA_AUDIOCAPTURESTREAM_H_
+#define MOZILLA_AUDIOCAPTURESTREAM_H_
+
+#include "MediaStreamGraph.h"
+#include "AudioMixer.h"
+#include <algorithm>
+
+namespace mozilla
+{
+
+class DOMMediaStream;
+
+/**
+ * See MediaStreamGraph::CreateAudioCaptureStream.
+ */
+class AudioCaptureStream : public ProcessedMediaStream,
+                           public MixerCallbackReceiver
+{
+public:
+  explicit AudioCaptureStream(DOMMediaStream* aWrapper);
+  virtual ~AudioCaptureStream();
+
+  void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
+
+protected:
+  enum { AUDIO_TRACK = 1 };
+  void MixerCallback(AudioDataValue* aMixedBuffer, AudioSampleFormat aFormat,
+                     uint32_t aChannels, uint32_t aFrames,
+                     uint32_t aSampleRate) override;
+  AudioMixer mMixer;
+  bool mTrackCreated;
+};
+}
+
+#endif /* MOZILLA_AUDIOCAPTURESTREAM_H_ */
--- a/dom/media/AudioMixer.h
+++ b/dom/media/AudioMixer.h
@@ -21,17 +21,19 @@ struct MixerCallbackReceiver {
                              uint32_t aFrames,
                              uint32_t aSampleRate) = 0;
 };
 /**
  * This class mixes multiple streams of audio together to output a single audio
  * stream.
  *
  * AudioMixer::Mix is to be called repeatedly with buffers that have the same
- * length, sample rate, sample format and channel count.
+ * length, sample rate, sample format and channel count. This class works with
+ * interleaved and plannar buffers, but the buffer mixed must be of the same
+ * type during a mixing cycle.
  *
  * When all the tracks have been mixed, calling FinishMixing will call back with
  * a buffer containing the mixed audio data.
  *
  * This class is not thread safe.
  */
 class AudioMixer
 {
@@ -66,17 +68,17 @@ public:
                                    mChannels,
                                    mFrames,
                                    mSampleRate);
     }
     PodZero(mMixedAudio.Elements(), mMixedAudio.Length());
     mSampleRate = mChannels = mFrames = 0;
   }
 
-  /* Add a buffer to the mix. aSamples is interleaved. */
+  /* Add a buffer to the mix. */
   void Mix(AudioDataValue* aSamples,
            uint32_t aChannels,
            uint32_t aFrames,
            uint32_t aSampleRate) {
     if (!mFrames && !mChannels) {
       mFrames = aFrames;
       mChannels = aChannels;
       mSampleRate = aSampleRate;
--- a/dom/media/AudioSegment.cpp
+++ b/dom/media/AudioSegment.cpp
@@ -141,16 +141,113 @@ void AudioSegment::ResampleChunks(SpeexR
       Resample<int16_t>(aResampler, aInRate, aOutRate);
     break;
     default:
       MOZ_ASSERT(false);
     break;
   }
 }
 
+// This helps to to safely get a pointer to the position we want to start
+// writing a planar audio buffer, depending on the channel and the offset in the
+// buffer.
+static AudioDataValue*
+PointerForOffsetInChannel(AudioDataValue* aData, size_t aLengthSamples,
+                          uint32_t aChannelCount, uint32_t aChannel,
+                          uint32_t aOffsetSamples)
+{
+  size_t samplesPerChannel = aLengthSamples / aChannelCount;
+  size_t beginningOfChannel = samplesPerChannel * aChannel;
+  MOZ_ASSERT(aChannel * samplesPerChannel + aOffsetSamples < aLengthSamples,
+             "Offset request out of bounds.");
+  return aData + beginningOfChannel + aOffsetSamples;
+}
+
+void
+AudioSegment::Mix(AudioMixer& aMixer, uint32_t aOutputChannels,
+                  uint32_t aSampleRate)
+{
+  nsAutoTArray<AudioDataValue, AUDIO_PROCESSING_FRAMES* GUESS_AUDIO_CHANNELS>
+  buf;
+  nsAutoTArray<const void*, GUESS_AUDIO_CHANNELS> channelData;
+  uint32_t offsetSamples = 0;
+  uint32_t duration = GetDuration();
+
+  if (duration <= 0) {
+    MOZ_ASSERT(duration == 0);
+    return;
+  }
+
+  uint32_t outBufferLength = duration * aOutputChannels;
+  buf.SetLength(outBufferLength);
+
+  for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
+    AudioChunk& c = *ci;
+    uint32_t frames = c.mDuration;
+
+    // If the chunk is silent, simply write the right number of silence in the
+    // buffers.
+    if (c.mBufferFormat == AUDIO_FORMAT_SILENCE) {
+      for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
+        AudioDataValue* ptr =
+          PointerForOffsetInChannel(buf.Elements(), outBufferLength,
+                                    aOutputChannels, channel, offsetSamples);
+        PodZero(ptr, frames);
+      }
+    } else {
+      // Othewise, we need to upmix or downmix appropriately, depending on the
+      // desired input and output channels.
+      channelData.SetLength(c.mChannelData.Length());
+      for (uint32_t i = 0; i < channelData.Length(); ++i) {
+        channelData[i] = c.mChannelData[i];
+      }
+      if (channelData.Length() < aOutputChannels) {
+        // Up-mix.
+        AudioChannelsUpMix(&channelData, aOutputChannels, gZeroChannel);
+        for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
+          AudioDataValue* ptr =
+            PointerForOffsetInChannel(buf.Elements(), outBufferLength,
+                                      aOutputChannels, channel, offsetSamples);
+          PodCopy(ptr, reinterpret_cast<const float*>(channelData[channel]),
+                  frames);
+        }
+        MOZ_ASSERT(channelData.Length() == aOutputChannels);
+      } else if (channelData.Length() > aOutputChannels) {
+        // Down mix.
+        nsAutoTArray<float*, GUESS_AUDIO_CHANNELS> outChannelPtrs;
+        outChannelPtrs.SetLength(aOutputChannels);
+        uint32_t offsetSamples = 0;
+        for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
+          outChannelPtrs[channel] =
+            PointerForOffsetInChannel(buf.Elements(), outBufferLength,
+                                      aOutputChannels, channel, offsetSamples);
+        }
+        AudioChannelsDownMix(channelData, outChannelPtrs.Elements(),
+                             aOutputChannels, frames);
+      } else {
+        // The channel count is already what we want, just copy it over.
+        for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
+          AudioDataValue* ptr =
+            PointerForOffsetInChannel(buf.Elements(), outBufferLength,
+                                      aOutputChannels, channel, offsetSamples);
+          PodCopy(ptr, reinterpret_cast<const float*>(channelData[channel]),
+                  frames);
+        }
+      }
+    }
+    offsetSamples += frames;
+  }
+
+  if (offsetSamples) {
+    MOZ_ASSERT(offsetSamples == outBufferLength / aOutputChannels,
+               "We forgot to write some samples?");
+    aMixer.Mix(buf.Elements(), aOutputChannels, offsetSamples, aSampleRate);
+  }
+}
+
 void
 AudioSegment::WriteTo(uint64_t aID, AudioMixer& aMixer, uint32_t aOutputChannels, uint32_t aSampleRate)
 {
   nsAutoTArray<AudioDataValue,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> buf;
   nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData;
   // Offset in the buffer that will end up sent to the AudioStream, in samples.
   uint32_t offset = 0;
 
--- a/dom/media/AudioSegment.h
+++ b/dom/media/AudioSegment.h
@@ -294,17 +294,24 @@ public:
     chunk->mVolume = aChunk->mVolume;
     chunk->mBufferFormat = aChunk->mBufferFormat;
 #ifdef MOZILLA_INTERNAL_API
     chunk->mTimeStamp = TimeStamp::Now();
 #endif
     return chunk;
   }
   void ApplyVolume(float aVolume);
-  void WriteTo(uint64_t aID, AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
+  // Mix the segment into a mixer, interleaved. This is useful to output a
+  // segment to a system audio callback. It up or down mixes to aChannelCount
+  // channels.
+  void WriteTo(uint64_t aID, AudioMixer& aMixer, uint32_t aChannelCount,
+               uint32_t aSampleRate);
+  // Mix the segment into a mixer, keeping it planar, up or down mixing to
+  // aChannelCount channels.
+  void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
 
   int ChannelCount() {
     NS_WARN_IF_FALSE(!mChunks.IsEmpty(),
         "Cannot query channel count on a AudioSegment with no chunks.");
     // Find the first chunk that has non-zero channels. A chunk that hs zero
     // channels is just silence and we can simply discard it.
     for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
       if (ci->ChannelCount()) {
--- a/dom/media/DOMMediaStream.cpp
+++ b/dom/media/DOMMediaStream.cpp
@@ -297,16 +297,28 @@ DOMMediaStream::InitTrackUnionStream(nsI
 
   if (!aGraph) {
     aGraph = MediaStreamGraph::GetInstance();
   }
   InitStreamCommon(aGraph->CreateTrackUnionStream(this));
 }
 
 void
+DOMMediaStream::InitAudioCaptureStream(nsIDOMWindow* aWindow,
+                                       MediaStreamGraph* aGraph)
+{
+  mWindow = aWindow;
+
+  if (!aGraph) {
+    aGraph = MediaStreamGraph::GetInstance();
+  }
+  InitStreamCommon(aGraph->CreateAudioCaptureStream(this));
+}
+
+void
 DOMMediaStream::InitStreamCommon(MediaStream* aStream)
 {
   mStream = aStream;
 
   // Setup track listener
   mListener = new StreamListener(this);
   aStream->AddListener(mListener);
 }
@@ -324,16 +336,25 @@ already_AddRefed<DOMMediaStream>
 DOMMediaStream::CreateTrackUnionStream(nsIDOMWindow* aWindow,
                                        MediaStreamGraph* aGraph)
 {
   nsRefPtr<DOMMediaStream> stream = new DOMMediaStream();
   stream->InitTrackUnionStream(aWindow, aGraph);
   return stream.forget();
 }
 
+already_AddRefed<DOMMediaStream>
+DOMMediaStream::CreateAudioCaptureStream(nsIDOMWindow* aWindow,
+                                         MediaStreamGraph* aGraph)
+{
+  nsRefPtr<DOMMediaStream> stream = new DOMMediaStream();
+  stream->InitAudioCaptureStream(aWindow, aGraph);
+  return stream.forget();
+}
+
 void
 DOMMediaStream::SetTrackEnabled(TrackID aTrackID, bool aEnabled)
 {
   if (mStream) {
     mStream->SetTrackEnabled(aTrackID, aEnabled);
   }
 }
 
@@ -648,16 +669,25 @@ already_AddRefed<DOMLocalMediaStream>
 DOMLocalMediaStream::CreateTrackUnionStream(nsIDOMWindow* aWindow,
                                             MediaStreamGraph* aGraph)
 {
   nsRefPtr<DOMLocalMediaStream> stream = new DOMLocalMediaStream();
   stream->InitTrackUnionStream(aWindow, aGraph);
   return stream.forget();
 }
 
+already_AddRefed<DOMLocalMediaStream>
+DOMLocalMediaStream::CreateAudioCaptureStream(nsIDOMWindow* aWindow,
+                                              MediaStreamGraph* aGraph)
+{
+  nsRefPtr<DOMLocalMediaStream> stream = new DOMLocalMediaStream();
+  stream->InitAudioCaptureStream(aWindow, aGraph);
+  return stream.forget();
+}
+
 DOMAudioNodeMediaStream::DOMAudioNodeMediaStream(AudioNode* aNode)
 : mStreamNode(aNode)
 {
 }
 
 DOMAudioNodeMediaStream::~DOMAudioNodeMediaStream()
 {
 }
--- a/dom/media/DOMMediaStream.h
+++ b/dom/media/DOMMediaStream.h
@@ -193,16 +193,23 @@ public:
                                                              MediaStreamGraph* aGraph = nullptr);
 
   /**
    * Create an nsDOMMediaStream whose underlying stream is a TrackUnionStream.
    */
   static already_AddRefed<DOMMediaStream> CreateTrackUnionStream(nsIDOMWindow* aWindow,
                                                                  MediaStreamGraph* aGraph = nullptr);
 
+  /**
+   * Create an nsDOMMediaStream whose underlying stream is an
+   * AudioCaptureStream
+   */
+  static already_AddRefed<DOMMediaStream> CreateAudioCaptureStream(
+    nsIDOMWindow* aWindow, MediaStreamGraph* aGraph = nullptr);
+
   void SetLogicalStreamStartTime(StreamTime aTime)
   {
     mLogicalStreamStartTime = aTime;
   }
 
   // Notifications from StreamListener.
   // BindDOMTrack should only be called when it's safe to run script.
   MediaStreamTrack* BindDOMTrack(TrackID aTrackID, MediaSegment::Type aType);
@@ -256,16 +263,18 @@ public:
 protected:
   virtual ~DOMMediaStream();
 
   void Destroy();
   void InitSourceStream(nsIDOMWindow* aWindow,
                         MediaStreamGraph* aGraph = nullptr);
   void InitTrackUnionStream(nsIDOMWindow* aWindow,
                             MediaStreamGraph* aGraph = nullptr);
+  void InitAudioCaptureStream(nsIDOMWindow* aWindow,
+                              MediaStreamGraph* aGraph = nullptr);
   void InitStreamCommon(MediaStream* aStream);
   already_AddRefed<AudioTrack> CreateAudioTrack(AudioStreamTrack* aStreamTrack);
   already_AddRefed<VideoTrack> CreateVideoTrack(VideoStreamTrack* aStreamTrack);
 
   // Called when MediaStreamGraph has finished an iteration where tracks were
   // created.
   void TracksCreated();
 
@@ -346,16 +355,22 @@ public:
 
   /**
    * Create an nsDOMLocalMediaStream whose underlying stream is a TrackUnionStream.
    */
   static already_AddRefed<DOMLocalMediaStream>
   CreateTrackUnionStream(nsIDOMWindow* aWindow,
                          MediaStreamGraph* aGraph = nullptr);
 
+  /**
+   * Create an nsDOMLocalMediaStream whose underlying stream is an
+   * AudioCaptureStream. */
+  static already_AddRefed<DOMLocalMediaStream> CreateAudioCaptureStream(
+    nsIDOMWindow* aWindow, MediaStreamGraph* aGraph = nullptr);
+
 protected:
   virtual ~DOMLocalMediaStream();
 };
 
 NS_DEFINE_STATIC_IID_ACCESSOR(DOMLocalMediaStream,
                               NS_DOMLOCALMEDIASTREAM_IID)
 
 class DOMAudioNodeMediaStream : public DOMMediaStream
--- a/dom/media/moz.build
+++ b/dom/media/moz.build
@@ -191,16 +191,17 @@ EXPORTS.mozilla.dom += [
     'VideoPlaybackQuality.h',
     'VideoStreamTrack.h',
     'VideoTrack.h',
     'VideoTrackList.h',
 ]
 
 UNIFIED_SOURCES += [
     'AbstractThread.cpp',
+    'AudioCaptureStream.cpp',
     'AudioChannelFormat.cpp',
     'AudioCompactor.cpp',
     'AudioSegment.cpp',
     'AudioSink.cpp',
     'AudioStream.cpp',
     'AudioStreamTrack.cpp',
     'AudioTrack.cpp',
     'AudioTrackList.cpp',