Bug 982490 - Ensure for MSG cycle that each MediaStream write the same number of frames to their AudioStream. r=jesup,roc
☠☠ backed out by 5d7494ed030d ☠ ☠
authorPaul Adenot <paul@paul.cx>
Mon, 24 Mar 2014 11:06:06 +0100
changeset 195787 87f437be7de56c59093e4ba8c0104dce735a2e3e
parent 195786 33072f5b4c66d01bffd982dbc9c3e4fc6e615803
child 195788 89a615263614916da84dbe1625d1e0244ad3668d
push id3624
push userasasaki@mozilla.com
push dateMon, 09 Jun 2014 21:49:01 +0000
treeherdermozilla-beta@b1a5da15899a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjesup, roc
bugs982490
milestone31.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 982490 - Ensure for MSG cycle that each MediaStream write the same number of frames to their AudioStream. r=jesup,roc
content/media/AudioMixer.h
content/media/AudioSampleFormat.h
content/media/AudioSegment.cpp
content/media/AudioSegment.h
content/media/MediaSegment.h
content/media/MediaStreamGraph.cpp
content/media/MediaStreamGraph.h
content/media/MediaStreamGraphImpl.h
content/media/compiledtest/TestAudioMixer.cpp
content/media/compiledtest/moz.build
content/media/moz.build
content/media/webrtc/MediaEngineWebRTCAudio.cpp
new file mode 100644
--- /dev/null
+++ b/content/media/AudioMixer.h
@@ -0,0 +1,85 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZILLA_AUDIOMIXER_H_
+#define MOZILLA_AUDIOMIXER_H_
+
+#include "AudioSampleFormat.h"
+#include "nsTArray.h"
+#include "mozilla/PodOperations.h"
+
+namespace mozilla {
+typedef void(*MixerFunc)(AudioDataValue* aMixedBuffer,
+                         AudioSampleFormat aFormat,
+                         uint32_t aChannels,
+                         uint32_t aFrames);
+
+/**
+ * This class mixes multiple streams of audio together to output a single audio
+ * stream.
+ *
+ * AudioMixer::Mix is to be called repeatedly with buffers that have the same
+ * length, sample rate, sample format and channel count.
+ *
+ * When all the tracks have been mixed, calling FinishMixing will call back with
+ * a buffer containing the mixed audio data.
+ *
+ * This class is not thread safe.
+ */
+class AudioMixer
+{
+public:
+  AudioMixer(MixerFunc aCallback)
+    : mCallback(aCallback),
+      mFrames(0),
+      mChannels(0)
+  { }
+
+  /* Get the data from the mixer. This is supposed to be called when all the
+   * tracks have been mixed in. The caller should not hold onto the data. */
+  void FinishMixing() {
+    mCallback(mMixedAudio.Elements(),
+              AudioSampleTypeToFormat<AudioDataValue>::Format,
+              mChannels,
+              mFrames);
+    PodZero(mMixedAudio.Elements(), mMixedAudio.Length());
+    mChannels = mFrames = 0;
+  }
+
+  /* Add a buffer to the mix. aSamples is interleaved. */
+  void Mix(AudioDataValue* aSamples, uint32_t aChannels, uint32_t aFrames) {
+    if (!mFrames && !mChannels) {
+      mFrames = aFrames;
+      mChannels = aChannels;
+      EnsureCapacityAndSilence();
+    }
+
+    MOZ_ASSERT(aFrames == mFrames);
+    MOZ_ASSERT(aChannels == mChannels);
+
+    for (uint32_t i = 0; i < aFrames * aChannels; i++) {
+      mMixedAudio[i] += aSamples[i];
+    }
+  }
+private:
+  void EnsureCapacityAndSilence() {
+    if (mFrames * mChannels > mMixedAudio.Length()) {
+      mMixedAudio.SetLength(mFrames* mChannels);
+    }
+    PodZero(mMixedAudio.Elements(), mMixedAudio.Length());
+  }
+
+  /* Function that is called when the mixing is done. */
+  MixerFunc mCallback;
+  /* Number of frames for this mixing block. */
+  uint32_t mFrames;
+  /* Number of channels for this mixing block. */
+  uint32_t mChannels;
+  /* Buffer containing the mixed audio data. */
+  nsTArray<AudioDataValue> mMixedAudio;
+};
+}
+
+#endif // MOZILLA_AUDIOMIXER_H_
--- a/content/media/AudioSampleFormat.h
+++ b/content/media/AudioSampleFormat.h
@@ -44,17 +44,29 @@ public:
 };
 template <> class AudioSampleTraits<AUDIO_FORMAT_S16> {
 public:
   typedef int16_t Type;
 };
 
 typedef AudioSampleTraits<AUDIO_OUTPUT_FORMAT>::Type AudioDataValue;
 
-// Single-sample conversion 
+template<typename T> class AudioSampleTypeToFormat;
+
+template <> class AudioSampleTypeToFormat<float> {
+public:
+  static const AudioSampleFormat Format = AUDIO_FORMAT_FLOAT32;
+};
+
+template <> class AudioSampleTypeToFormat<short> {
+public:
+  static const AudioSampleFormat Format = AUDIO_FORMAT_S16;
+};
+
+// Single-sample conversion
 /*
  * Use "2^N" conversion since it's simple, fast, "bit transparent", used by
  * many other libraries and apparently behaves reasonably.
  * http://blog.bjornroche.com/2009/12/int-float-int-its-jungle-out-there.html
  * http://blog.bjornroche.com/2009/12/linearity-and-dynamic-range-in-int.html
  */
 inline float
 AudioSampleToFloat(float aValue)
--- a/content/media/AudioSegment.cpp
+++ b/content/media/AudioSegment.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioSegment.h"
 
 #include "AudioStream.h"
+#include "AudioMixer.h"
 #include "AudioChannelFormat.h"
 #include "Latency.h"
 #include "speex/speex_resampler.h"
 
 namespace mozilla {
 
 template <class SrcT, class DestT>
 static void
@@ -129,76 +130,81 @@ void AudioSegment::ResampleChunks(SpeexR
     break;
     default:
       MOZ_ASSERT(false);
     break;
   }
 }
 
 void
-AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput)
+AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput, AudioMixer* aMixer)
 {
   uint32_t outputChannels = aOutput->GetChannels();
   nsAutoTArray<AudioDataValue,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> buf;
   nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData;
 
+  if (!GetDuration()) {
+    return;
+  }
+
+  uint32_t outBufferLength = GetDuration() * outputChannels;
+  buf.SetLength(outBufferLength);
+
+  // Offset in the buffer that will end up sent to the AudioStream.
+  uint32_t offset = 0;
+
   for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
     AudioChunk& c = *ci;
-    TrackTicks offset = 0;
-    while (offset < c.mDuration) {
-      TrackTicks durationTicks =
-        std::min<TrackTicks>(c.mDuration - offset, AUDIO_PROCESSING_FRAMES);
-      if (uint64_t(outputChannels)*durationTicks > INT32_MAX || offset > INT32_MAX) {
-        NS_ERROR("Buffer overflow");
-        return;
-      }
-
-      uint32_t duration = uint32_t(durationTicks);
+    uint32_t frames = c.mDuration;
 
-      // If we have written data in the past, or we have real (non-silent) data
-      // to write, we can proceed. Otherwise, it means we just started the
-      // AudioStream, and we don't have real data to write to it (just silence).
-      // To avoid overbuffering in the AudioStream, we simply drop the silence,
-      // here. The stream will underrun and output silence anyways.
-      if (c.mBuffer || aOutput->GetWritten()) {
-        buf.SetLength(outputChannels*duration);
-        if (c.mBuffer) {
-          channelData.SetLength(c.mChannelData.Length());
-          for (uint32_t i = 0; i < channelData.Length(); ++i) {
-            channelData[i] =
-              AddAudioSampleOffset(c.mChannelData[i], c.mBufferFormat, int32_t(offset));
-          }
+    // If we have written data in the past, or we have real (non-silent) data
+    // to write, we can proceed. Otherwise, it means we just started the
+    // AudioStream, and we don't have real data to write to it (just silence).
+    // To avoid overbuffering in the AudioStream, we simply drop the silence,
+    // here. The stream will underrun and output silence anyways.
+    if (c.mBuffer || aOutput->GetWritten()) {
+      if (c.mBuffer) {
+        channelData.SetLength(c.mChannelData.Length());
+        for (uint32_t i = 0; i < channelData.Length(); ++i) {
+          channelData[i] = c.mChannelData[i];
+        }
+
+        if (channelData.Length() < outputChannels) {
+          // Up-mix. Note that this might actually make channelData have more
+          // than outputChannels temporarily.
+          AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel);
+        }
 
-          if (channelData.Length() < outputChannels) {
-            // Up-mix. Note that this might actually make channelData have more
-            // than outputChannels temporarily.
-            AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel);
-          }
+        if (channelData.Length() > outputChannels) {
+          // Down-mix.
+          DownmixAndInterleave(channelData, c.mBufferFormat, frames,
+                               c.mVolume, outputChannels, buf.Elements() + offset);
+        } else {
+          InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat,
+                                     frames, c.mVolume,
+                                     outputChannels,
+                                     buf.Elements() + offset);
+        }
+      } else {
+        // Assumes that a bit pattern of zeroes == 0.0f
+        memset(buf.Elements() + offset, 0, outputChannels * frames * sizeof(AudioDataValue));
+      }
+    }
 
-          if (channelData.Length() > outputChannels) {
-            // Down-mix.
-            DownmixAndInterleave(channelData, c.mBufferFormat, duration,
-                                 c.mVolume, outputChannels, buf.Elements());
-          } else {
-            InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat,
-                                       duration, c.mVolume,
-                                       outputChannels,
-                                       buf.Elements());
-          }
-        } else {
-          // Assumes that a bit pattern of zeroes == 0.0f
-          memset(buf.Elements(), 0, buf.Length()*sizeof(AudioDataValue));
-        }
-        aOutput->Write(buf.Elements(), int32_t(duration), &(c.mTimeStamp));
-      }
-      if(!c.mTimeStamp.IsNull()) {
-        TimeStamp now = TimeStamp::Now();
-        // would be more efficient to c.mTimeStamp to ms on create time then pass here
-        LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID,
-                (now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp);
-      }
-      offset += duration;
+    offset += frames * outputChannels;
+
+    if (!c.mTimeStamp.IsNull()) {
+      TimeStamp now = TimeStamp::Now();
+      // would be more efficient to c.mTimeStamp to ms on create time then pass here
+      LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID,
+              (now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp);
     }
   }
+
+  aOutput->Write(buf.Elements(), GetDuration(), &(mChunks[mChunks.Length() - 1].mTimeStamp));
+
+  if (aMixer) {
+    aMixer->Mix(buf.Elements(), outputChannels, GetDuration());
+  }
   aOutput->Start();
 }
 
 }
--- a/content/media/AudioSegment.h
+++ b/content/media/AudioSegment.h
@@ -22,16 +22,17 @@ public:
   SharedChannelArrayBuffer(nsTArray<nsTArray<T>>* aBuffers)
   {
     mBuffers.SwapElements(*aBuffers);
   }
   nsTArray<nsTArray<T>> mBuffers;
 };
 
 class AudioStream;
+class AudioMixer;
 
 /**
  * For auto-arrays etc, guess this as the common number of channels.
  */
 const int GUESS_AUDIO_CHANNELS = 2;
 
 // We ensure that the graph advances in steps that are multiples of the Web
 // Audio block size
@@ -210,17 +211,17 @@ public:
     chunk->mVolume = aChunk->mVolume;
     chunk->mBufferFormat = aChunk->mBufferFormat;
 #ifdef MOZILLA_INTERNAL_API
     chunk->mTimeStamp = TimeStamp::Now();
 #endif
     return chunk;
   }
   void ApplyVolume(float aVolume);
-  void WriteTo(uint64_t aID, AudioStream* aOutput);
+  void WriteTo(uint64_t aID, AudioStream* aOutput, AudioMixer* aMixer = nullptr);
 
   int ChannelCount() {
     NS_WARN_IF_FALSE(!mChunks.IsEmpty(),
         "Cannot query channel count on a AudioSegment with no chunks.");
     return mChunks.IsEmpty() ? 0 : mChunks[0].mChannelData.Length();
   }
 
   static Type StaticType() { return AUDIO; }
--- a/content/media/MediaSegment.h
+++ b/content/media/MediaSegment.h
@@ -262,19 +262,18 @@ protected:
       aSource->mChunks.RemoveElementAt(0);
     }
     mChunks.MoveElementsFrom(aSource->mChunks);
   }
 
   void AppendSliceInternal(const MediaSegmentBase<C, Chunk>& aSource,
                            TrackTicks aStart, TrackTicks aEnd)
   {
-    NS_ASSERTION(aStart <= aEnd, "Endpoints inverted");
-    NS_WARN_IF_FALSE(aStart >= 0 && aEnd <= aSource.mDuration,
-                     "Slice out of range");
+    MOZ_ASSERT(aStart <= aEnd, "Endpoints inverted");
+    MOZ_ASSERT(aStart >= 0 && aEnd <= aSource.mDuration, "Slice out of range");
     mDuration += aEnd - aStart;
     TrackTicks offset = 0;
     for (uint32_t i = 0; i < aSource.mChunks.Length() && offset < aEnd; ++i) {
       const Chunk& c = aSource.mChunks[i];
       TrackTicks start = std::max(aStart, offset);
       TrackTicks nextOffset = offset + c.GetDuration();
       TrackTicks end = std::min(aEnd, nextOffset);
       if (start < end) {
--- a/content/media/MediaStreamGraph.cpp
+++ b/content/media/MediaStreamGraph.cpp
@@ -572,37 +572,56 @@ MediaStreamGraphImpl::UpdateStreamOrderF
     aStack->popLast();
     stream->mIsOnOrderingStack = false;
   }
 
   stream->mHasBeenOrdered = true;
   *mStreams.AppendElement() = stream.forget();
 }
 
+static void AudioMixerCallback(AudioDataValue* aMixedBuffer,
+                               AudioSampleFormat aFormat,
+                               uint32_t aChannels,
+                               uint32_t aFrames)
+{
+  // Need an api to register mixer callbacks, bug 989921
+}
+
 void
 MediaStreamGraphImpl::UpdateStreamOrder()
 {
   mOldStreams.SwapElements(mStreams);
   mStreams.ClearAndRetainStorage();
+  bool shouldMix = false;
   for (uint32_t i = 0; i < mOldStreams.Length(); ++i) {
     MediaStream* stream = mOldStreams[i];
     stream->mHasBeenOrdered = false;
     stream->mIsConsumed = false;
     stream->mIsOnOrderingStack = false;
     stream->mInBlockingSet = false;
+    if (stream->AsSourceStream() &&
+        stream->AsSourceStream()->NeedsMixing()) {
+      shouldMix = true;
+    }
     ProcessedMediaStream* ps = stream->AsProcessedStream();
     if (ps) {
       ps->mInCycle = false;
       AudioNodeStream* ns = ps->AsAudioNodeStream();
       if (ns) {
         ns->Unmute();
       }
     }
   }
 
+  if (!mMixer && shouldMix) {
+    mMixer = new AudioMixer(AudioMixerCallback);
+  } else if (mMixer && !shouldMix) {
+    mMixer = nullptr;
+  }
+
   mozilla::LinkedList<MediaStream> stack;
   for (uint32_t i = 0; i < mOldStreams.Length(); ++i) {
     nsRefPtr<MediaStream>& s = mOldStreams[i];
     if (s->IsIntrinsicallyConsumed()) {
       MarkConsumed(s);
     }
     if (!s->mHasBeenOrdered) {
       UpdateStreamOrderForStream(&stack, s.forget());
@@ -805,16 +824,17 @@ MediaStreamGraphImpl::CreateOrDestroyAud
 
         // XXX allocating a AudioStream could be slow so we're going to have to do
         // something here ... preallocation, async allocation, multiplexing onto a single
         // stream ...
         MediaStream::AudioOutputStream* audioOutputStream =
           aStream->mAudioOutputStreams.AppendElement();
         audioOutputStream->mAudioPlaybackStartTime = aAudioOutputStartTime;
         audioOutputStream->mBlockedAudioTime = 0;
+        audioOutputStream->mLastTickWritten = 0;
         audioOutputStream->mStream = new AudioStream();
         // XXX for now, allocate stereo output. But we need to fix this to
         // match the system's ideal channel configuration.
         audioOutputStream->mStream->Init(2, IdealAudioRate(), AUDIO_CHANNEL_NORMAL, AudioStream::LowLatency);
         audioOutputStream->mTrackID = tracks->GetID();
 
         LogLatency(AsyncLatencyLogger::AudioStreamCreate,
                    reinterpret_cast<uint64_t>(aStream),
@@ -826,92 +846,124 @@ MediaStreamGraphImpl::CreateOrDestroyAud
   for (int32_t i = audioOutputStreamsFound.Length() - 1; i >= 0; --i) {
     if (!audioOutputStreamsFound[i]) {
       aStream->mAudioOutputStreams[i].mStream->Shutdown();
       aStream->mAudioOutputStreams.RemoveElementAt(i);
     }
   }
 }
 
-void
+TrackTicks
 MediaStreamGraphImpl::PlayAudio(MediaStream* aStream,
                                 GraphTime aFrom, GraphTime aTo)
 {
   MOZ_ASSERT(mRealtime, "Should only attempt to play audio in realtime mode");
 
+  TrackTicks ticksWritten = 0;
+  // We compute the number of needed ticks by converting a difference of graph
+  // time rather than by substracting two converted stream time to ensure that
+  // the rounding between {Graph,Stream}Time and track ticks is not dependant
+  // on the absolute value of the {Graph,Stream}Time, and so that number of
+  // ticks to play is the same for each cycle.
+  TrackTicks ticksNeeded = TimeToTicksRoundDown(IdealAudioRate(), aTo) - TimeToTicksRoundDown(IdealAudioRate(), aFrom);
+
   if (aStream->mAudioOutputStreams.IsEmpty()) {
-    return;
+    return 0;
   }
 
   // When we're playing multiple copies of this stream at the same time, they're
   // perfectly correlated so adding volumes is the right thing to do.
   float volume = 0.0f;
   for (uint32_t i = 0; i < aStream->mAudioOutputs.Length(); ++i) {
     volume += aStream->mAudioOutputs[i].mVolume;
   }
 
   for (uint32_t i = 0; i < aStream->mAudioOutputStreams.Length(); ++i) {
     MediaStream::AudioOutputStream& audioOutput = aStream->mAudioOutputStreams[i];
     StreamBuffer::Track* track = aStream->mBuffer.FindTrack(audioOutput.mTrackID);
     AudioSegment* audio = track->Get<AudioSegment>();
+    AudioSegment output;
+    MOZ_ASSERT(track->GetRate() == IdealAudioRate());
+
+    // offset and audioOutput.mLastTickWritten can differ by at most one sample,
+    // because of the rounding issue. We track that to ensure we don't skip a
+    // sample, or play a sample twice.
+    TrackTicks offset = track->TimeToTicksRoundDown(GraphTimeToStreamTime(aStream, aFrom));
+    if (!audioOutput.mLastTickWritten) {
+        audioOutput.mLastTickWritten = offset;
+    }
+    if (audioOutput.mLastTickWritten != offset) {
+      // If there is a global underrun of the MSG, this property won't hold, and
+      // we reset the sample count tracking.
+      if (std::abs(audioOutput.mLastTickWritten - offset) != 1) {
+        audioOutput.mLastTickWritten = offset;
+      } else {
+        offset = audioOutput.mLastTickWritten;
+      }
+    }
 
     // We don't update aStream->mBufferStartTime here to account for
     // time spent blocked. Instead, we'll update it in UpdateCurrentTime after the
     // blocked period has completed. But we do need to make sure we play from the
     // right offsets in the stream buffer, even if we've already written silence for
     // some amount of blocked time after the current time.
     GraphTime t = aFrom;
-    while (t < aTo) {
+    while (ticksNeeded) {
       GraphTime end;
       bool blocked = aStream->mBlocked.GetAt(t, &end);
       end = std::min(end, aTo);
 
-      AudioSegment output;
-      if (blocked) {
-        // Track total blocked time in aStream->mBlockedAudioTime so that
-        // the amount of silent samples we've inserted for blocking never gets
-        // more than one sample away from the ideal amount.
-        TrackTicks startTicks =
-            TimeToTicksRoundDown(IdealAudioRate(), audioOutput.mBlockedAudioTime);
-        audioOutput.mBlockedAudioTime += end - t;
-        TrackTicks endTicks =
-            TimeToTicksRoundDown(IdealAudioRate(), audioOutput.mBlockedAudioTime);
-
-        output.InsertNullDataAtStart(endTicks - startTicks);
-        STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing blocking-silence samples for %f to %f",
-                                    aStream, MediaTimeToSeconds(t), MediaTimeToSeconds(end)));
+      // Check how many ticks of sound we can provide if we are blocked some
+      // time in the middle of this cycle.
+      TrackTicks toWrite = 0;
+      if (end >= aTo) {
+        toWrite = ticksNeeded;
       } else {
-        TrackTicks startTicks =
-            track->TimeToTicksRoundDown(GraphTimeToStreamTime(aStream, t));
-        TrackTicks endTicks =
-            track->TimeToTicksRoundDown(GraphTimeToStreamTime(aStream, end));
+        toWrite = TimeToTicksRoundDown(IdealAudioRate(), end - aFrom);
+      }
 
-        // If startTicks is before the track start, then that part of 'audio'
-        // will just be silence, which is fine here. But if endTicks is after
-        // the track end, then 'audio' won't be long enough, so we'll need
-        // to explicitly play silence.
-        TrackTicks sliceEnd = std::min(endTicks, audio->GetDuration());
-        if (sliceEnd > startTicks) {
-          output.AppendSlice(*audio, startTicks, sliceEnd);
+      if (blocked) {
+        output.InsertNullDataAtStart(toWrite);
+        STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing %ld blocking-silence samples for %f to %f (%ld to %ld)\n",
+                                    aStream, toWrite, MediaTimeToSeconds(t), MediaTimeToSeconds(end),
+                                    offset, offset + toWrite));
+        ticksNeeded -= toWrite;
+      } else {
+        TrackTicks endTicksNeeded = offset + toWrite;
+        TrackTicks endTicksAvailable = audio->GetDuration();
+        if (endTicksNeeded <= endTicksAvailable) {
+          output.AppendSlice(*audio, offset, endTicksNeeded);
+        } else {
+          MOZ_ASSERT(track->IsEnded(), "Not enough data, and track not ended.");
+          // If we are at the end of the track, maybe write the remaining
+          // samples, and pad with/output silence.
+          if (endTicksNeeded > endTicksAvailable &&
+              offset < endTicksAvailable) {
+            output.AppendSlice(*audio, offset, endTicksAvailable);
+            ticksNeeded -= endTicksAvailable - offset;
+            toWrite -= endTicksAvailable - offset;
+          }
+          output.AppendNullData(toWrite);
         }
-        // Play silence where the track has ended
-        output.AppendNullData(endTicks - sliceEnd);
-        NS_ASSERTION(endTicks == sliceEnd || track->IsEnded(),
-                     "Ran out of data but track not ended?");
         output.ApplyVolume(volume);
-        STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing samples for %f to %f (samples %lld to %lld)",
-                                    aStream, MediaTimeToSeconds(t), MediaTimeToSeconds(end),
-                                    startTicks, endTicks));
+        STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing %ld samples for %f to %f (samples %ld to %ld)\n",
+                                     aStream, toWrite, MediaTimeToSeconds(t), MediaTimeToSeconds(end),
+                                     offset, endTicksNeeded));
+        ticksNeeded -= toWrite;
       }
-      // Need unique id for stream & track - and we want it to match the inserter
-      output.WriteTo(LATENCY_STREAM_ID(aStream, track->GetID()),
-                     audioOutput.mStream);
       t = end;
+      offset += toWrite;
+      audioOutput.mLastTickWritten += toWrite;
     }
+
+    // Need unique id for stream & track - and we want it to match the inserter
+    output.WriteTo(LATENCY_STREAM_ID(aStream, track->GetID()),
+                   audioOutput.mStream, mMixer);
   }
+  return ticksWritten;
 }
 
 static void
 SetImageToBlackPixel(PlanarYCbCrImage* aImage)
 {
   uint8_t blackPixel[] = { 0x10, 0x80, 0x80 };
 
   PlanarYCbCrData data;
@@ -1236,16 +1288,19 @@ MediaStreamGraphImpl::RunThread()
     // Figure out which streams are blocked and when.
     GraphTime prevComputedTime = mStateComputedTime;
     RecomputeBlocking(endBlockingDecisions);
 
     // Play stream contents.
     bool allBlockedForever = true;
     // True when we've done ProcessInput for all processed streams.
     bool doneAllProducing = false;
+    // This is the number of frame that are written to the AudioStreams, for
+    // this cycle.
+    TrackTicks ticksPlayed = 0;
     // Figure out what each stream wants to do
     for (uint32_t i = 0; i < mStreams.Length(); ++i) {
       MediaStream* stream = mStreams[i];
       if (!doneAllProducing) {
         ProcessedMediaStream* ps = stream->AsProcessedStream();
         if (ps) {
           AudioNodeStream* n = stream->AsAudioNodeStream();
           if (n) {
@@ -1272,28 +1327,39 @@ MediaStreamGraphImpl::RunThread()
                              "Stream did not produce enough data");
           }
         }
       }
       NotifyHasCurrentData(stream);
       if (mRealtime) {
         // Only playback audio and video in real-time mode
         CreateOrDestroyAudioStreams(prevComputedTime, stream);
-        PlayAudio(stream, prevComputedTime, mStateComputedTime);
+        TrackTicks ticksPlayedForThisStream = PlayAudio(stream, prevComputedTime, mStateComputedTime);
+        if (!ticksPlayed) {
+          ticksPlayed = ticksPlayedForThisStream;
+        } else {
+          MOZ_ASSERT(!ticksPlayedForThisStream || ticksPlayedForThisStream == ticksPlayed,
+              "Each stream should have the same number of frame.");
+        }
         PlayVideo(stream);
       }
       SourceMediaStream* is = stream->AsSourceStream();
       if (is) {
         UpdateBufferSufficiencyState(is);
       }
       GraphTime end;
       if (!stream->mBlocked.GetAt(mCurrentTime, &end) || end < GRAPH_TIME_MAX) {
         allBlockedForever = false;
       }
     }
+
+    if (mMixer) {
+      mMixer->FinishMixing();
+    }
+
     if (ensureNextIteration || !allBlockedForever) {
       EnsureNextIteration();
     }
 
     // Send updates to the main thread and wait for the next control loop
     // iteration.
     {
       MonitorAutoLock lock(mMonitor);
@@ -2313,16 +2379,30 @@ SourceMediaStream::GetBufferedTicks(Trac
         track->TimeToTicksRoundDown(
           GraphTimeToStreamTime(GraphImpl()->mStateComputedTime));
     }
   }
   return 0;
 }
 
 void
+SourceMediaStream::RegisterForAudioMixing()
+{
+  MutexAutoLock lock(mMutex);
+  mNeedsMixing = true;
+}
+
+bool
+SourceMediaStream::NeedsMixing()
+{
+  MutexAutoLock lock(mMutex);
+  return mNeedsMixing;
+}
+
+void
 MediaInputPort::Init()
 {
   STREAM_LOG(PR_LOG_DEBUG, ("Adding MediaInputPort %p (from %p to %p) to the graph",
              this, mSource, mDest));
   mSource->AddConsumer(this);
   mDest->AddInput(this);
   // mPortCount decremented via MediaInputPort::Destroy's message
   ++mDest->GraphImpl()->mPortCount;
@@ -2496,16 +2576,17 @@ MediaStreamGraphImpl::MediaStreamGraphIm
   , mForceShutDown(false)
   , mPostedRunInStableStateEvent(false)
   , mDetectedNotRunning(false)
   , mPostedRunInStableState(false)
   , mRealtime(aRealtime)
   , mNonRealtimeProcessing(false)
   , mStreamOrderDirty(false)
   , mLatencyLog(AsyncLatencyLogger::Get())
+  , mMixer(nullptr)
 {
 #ifdef PR_LOGGING
   if (!gMediaStreamGraphLog) {
     gMediaStreamGraphLog = PR_NewLogModule("MediaStreamGraph");
   }
 #endif
 
   mCurrentTimeStamp = mInitialTimeStamp = mLastMainThreadUpdate = TimeStamp::Now();
--- a/content/media/MediaStreamGraph.h
+++ b/content/media/MediaStreamGraph.h
@@ -13,16 +13,17 @@
 #include "nsIRunnable.h"
 #include "StreamBuffer.h"
 #include "TimeVarying.h"
 #include "VideoFrameContainer.h"
 #include "VideoSegment.h"
 #include "MainThreadUtils.h"
 #include "nsAutoRef.h"
 #include "speex/speex_resampler.h"
+#include "AudioMixer.h"
 
 class nsIRunnable;
 
 template <>
 class nsAutoRefTraits<SpeexResamplerState> : public nsPointerRefTraits<SpeexResamplerState>
 {
   public:
   static void Release(SpeexResamplerState* aState) { speex_resampler_destroy(aState); }
@@ -567,16 +568,18 @@ protected:
   // audio track.
   struct AudioOutputStream {
     // When we started audio playback for this track.
     // Add mStream->GetPosition() to find the current audio playback position.
     GraphTime mAudioPlaybackStartTime;
     // Amount of time that we've wanted to play silence because of the stream
     // blocking.
     MediaTime mBlockedAudioTime;
+    // Last tick written to the audio output.
+    TrackTicks mLastTickWritten;
     nsAutoPtr<AudioStream> mStream;
     TrackID mTrackID;
   };
   nsTArray<AudioOutputStream> mAudioOutputStreams;
 
   /**
    * When true, this means the stream will be finished once all
    * buffered data has been consumed.
@@ -777,16 +780,19 @@ public:
     uint32_t mCommands;
     // Each time the track updates are flushed to the media graph thread,
     // the segment buffer is emptied.
     nsAutoPtr<MediaSegment> mData;
     nsTArray<ThreadAndRunnable> mDispatchWhenNotEnough;
     bool mHaveEnough;
   };
 
+  void RegisterForAudioMixing();
+  bool NeedsMixing();
+
 protected:
   TrackData* FindDataForTrack(TrackID aID)
   {
     for (uint32_t i = 0; i < mUpdateTracks.Length(); ++i) {
       if (mUpdateTracks[i].mID == aID) {
         return &mUpdateTracks[i];
       }
     }
@@ -810,16 +816,17 @@ protected:
   Mutex mMutex;
   // protected by mMutex
   StreamTime mUpdateKnownTracksTime;
   nsTArray<TrackData> mUpdateTracks;
   nsTArray<nsRefPtr<MediaStreamDirectListener> > mDirectListeners;
   bool mPullEnabled;
   bool mUpdateFinished;
   bool mDestroyed;
+  bool mNeedsMixing;
 };
 
 /**
  * Represents a connection between a ProcessedMediaStream and one of its
  * input streams.
  * We make these refcounted so that stream-related messages with MediaInputPort*
  * pointers can be sent to the main thread safely.
  *
--- a/content/media/MediaStreamGraphImpl.h
+++ b/content/media/MediaStreamGraphImpl.h
@@ -8,22 +8,25 @@
 
 #include "MediaStreamGraph.h"
 
 #include "mozilla/Monitor.h"
 #include "mozilla/TimeStamp.h"
 #include "nsIThread.h"
 #include "nsIRunnable.h"
 #include "Latency.h"
+#include "mozilla/WeakPtr.h"
 
 namespace mozilla {
 
 template <typename T>
 class LinkedList;
 
+class AudioMixer;
+
 /**
  * Assume we can run an iteration of the MediaStreamGraph loop in this much time
  * or less.
  * We try to run the control loop at this rate.
  */
 static const int MEDIA_GRAPH_TARGET_PERIOD_MS = 10;
 
 /**
@@ -48,20 +51,16 @@ static const int AUDIO_TARGET_MS = 2*MED
  * near the end of the iteration of the control loop. The maximum delay
  * to the setting of the next video frame is 2*MEDIA_GRAPH_TARGET_PERIOD_MS +
  * SCHEDULE_SAFETY_MARGIN_MS. This is not optimal yet.
  */
 static const int VIDEO_TARGET_MS = 2*MEDIA_GRAPH_TARGET_PERIOD_MS +
     SCHEDULE_SAFETY_MARGIN_MS;
 
 /**
- * Rate at which we run the video tracks.
- */
-
-/**
  * A per-stream update message passed from the media graph thread to the
  * main thread.
  */
 struct StreamUpdate {
   int64_t mGraphUpdateIndex;
   nsRefPtr<MediaStream> mStream;
   StreamTime mNextMainThreadCurrentTime;
   bool mNextMainThreadFinished;
@@ -322,19 +321,19 @@ public:
   /**
    * If aStream needs an audio stream but doesn't have one, create it.
    * If aStream doesn't need an audio stream but has one, destroy it.
    */
   void CreateOrDestroyAudioStreams(GraphTime aAudioOutputStartTime,
                                    MediaStream* aStream);
   /**
    * Queue audio (mix of stream audio and silence for blocked intervals)
-   * to the audio output stream.
+   * to the audio output stream. Returns the number of frames played.
    */
-  void PlayAudio(MediaStream* aStream, GraphTime aFrom, GraphTime aTo);
+  TrackTicks PlayAudio(MediaStream* aStream, GraphTime aFrom, GraphTime aTo);
   /**
    * Set the correct current video frame for stream aStream.
    */
   void PlayVideo(MediaStream* aStream);
   /**
    * No more data will be forthcoming for aStream. The stream will end
    * at the current buffer end point. The StreamBuffer's tracks must be
    * explicitly set to finished by the caller.
@@ -570,13 +569,17 @@ public:
    * True when a change has happened which requires us to recompute the stream
    * blocking order.
    */
   bool mStreamOrderDirty;
   /**
    * Hold a ref to the Latency logger
    */
   nsRefPtr<AsyncLatencyLogger> mLatencyLog;
+  /**
+   * If this is not null, all the audio output for the MSG will be mixed down.
+   */
+  nsAutoPtr<AudioMixer> mMixer;
 };
 
 }
 
 #endif /* MEDIASTREAMGRAPHIMPL_H_ */
new file mode 100644
--- /dev/null
+++ b/content/media/compiledtest/TestAudioMixer.cpp
@@ -0,0 +1,155 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioMixer.h"
+#include <assert.h>
+
+using mozilla::AudioDataValue;
+using mozilla::AudioSampleFormat;
+
+/* In this test, the different audio stream and channels are always created to
+ * cancel each other. */
+void MixingDone(AudioDataValue* aData, AudioSampleFormat aFormat, uint32_t aChannels, uint32_t aFrames)
+{
+  bool silent = true;
+  for (uint32_t i = 0; i < aChannels * aFrames; i++) {
+    if (aData[i] != 0.0) {
+      if (aFormat == mozilla::AUDIO_FORMAT_S16) {
+        fprintf(stderr, "Sample at %d is not silent: %d\n", i, (short)aData[i]);
+      } else {
+        fprintf(stderr, "Sample at %d is not silent: %f\n", i, (float)aData[i]);
+      }
+      silent = false;
+    }
+  }
+  if (!silent) {
+    MOZ_CRASH();
+  }
+}
+
+/* Helper function to give us the maximum and minimum value that don't clip,
+ * for a given sample format (integer or floating-point). */
+template<typename T>
+T GetLowValue();
+
+template<typename T>
+T GetHighValue();
+
+template<>
+float GetLowValue<float>() {
+  return -1.0;
+}
+
+template<>
+short GetLowValue<short>() {
+  return -INT16_MAX;
+}
+
+template<>
+float GetHighValue<float>() {
+  return 1.0;
+}
+
+template<>
+short GetHighValue<short>() {
+  return INT16_MAX;
+}
+
+void FillBuffer(AudioDataValue* aBuffer, uint32_t aLength, AudioDataValue aValue)
+{
+  AudioDataValue* end = aBuffer + aLength;
+  while (aBuffer != end) {
+    *aBuffer++ = aValue;
+  }
+}
+
+int main(int argc, char* argv[]) {
+  const uint32_t CHANNEL_LENGTH = 256;
+  AudioDataValue a[CHANNEL_LENGTH * 2];
+  AudioDataValue b[CHANNEL_LENGTH * 2];
+  FillBuffer(a, CHANNEL_LENGTH, GetLowValue<AudioDataValue>());
+  FillBuffer(a + CHANNEL_LENGTH, CHANNEL_LENGTH, GetHighValue<AudioDataValue>());
+  FillBuffer(b, CHANNEL_LENGTH, GetHighValue<AudioDataValue>());
+  FillBuffer(b + CHANNEL_LENGTH, CHANNEL_LENGTH, GetLowValue<AudioDataValue>());
+
+  {
+    int iterations = 2;
+    mozilla::AudioMixer mixer(MixingDone);
+
+    fprintf(stderr, "Test AudioMixer constant buffer length.\n");
+
+    while (iterations--) {
+      mixer.Mix(a, 2, CHANNEL_LENGTH);
+      mixer.Mix(b, 2, CHANNEL_LENGTH);
+      mixer.FinishMixing();
+    }
+  }
+
+  {
+    mozilla::AudioMixer mixer(MixingDone);
+
+    fprintf(stderr, "Test AudioMixer variable buffer length.\n");
+
+    FillBuffer(a, CHANNEL_LENGTH / 2, GetLowValue<AudioDataValue>());
+    FillBuffer(a + CHANNEL_LENGTH / 2, CHANNEL_LENGTH / 2, GetLowValue<AudioDataValue>());
+    FillBuffer(b, CHANNEL_LENGTH / 2, GetHighValue<AudioDataValue>());
+    FillBuffer(b + CHANNEL_LENGTH / 2, CHANNEL_LENGTH / 2, GetHighValue<AudioDataValue>());
+    mixer.Mix(a, 2, CHANNEL_LENGTH / 2);
+    mixer.Mix(b, 2, CHANNEL_LENGTH / 2);
+    mixer.FinishMixing();
+    FillBuffer(a, CHANNEL_LENGTH, GetLowValue<AudioDataValue>());
+    FillBuffer(a + CHANNEL_LENGTH, CHANNEL_LENGTH, GetHighValue<AudioDataValue>());
+    FillBuffer(b, CHANNEL_LENGTH, GetHighValue<AudioDataValue>());
+    FillBuffer(b + CHANNEL_LENGTH, CHANNEL_LENGTH, GetLowValue<AudioDataValue>());
+    mixer.Mix(a, 2, CHANNEL_LENGTH);
+    mixer.Mix(b, 2, CHANNEL_LENGTH);
+    mixer.FinishMixing();
+    FillBuffer(a, CHANNEL_LENGTH / 2, GetLowValue<AudioDataValue>());
+    FillBuffer(a + CHANNEL_LENGTH / 2, CHANNEL_LENGTH / 2, GetLowValue<AudioDataValue>());
+    FillBuffer(b, CHANNEL_LENGTH / 2, GetHighValue<AudioDataValue>());
+    FillBuffer(b + CHANNEL_LENGTH / 2, CHANNEL_LENGTH / 2, GetHighValue<AudioDataValue>());
+    mixer.Mix(a, 2, CHANNEL_LENGTH / 2);
+    mixer.Mix(b, 2, CHANNEL_LENGTH / 2);
+    mixer.FinishMixing();
+  }
+
+  FillBuffer(a, CHANNEL_LENGTH, GetLowValue<AudioDataValue>());
+  FillBuffer(b, CHANNEL_LENGTH, GetHighValue<AudioDataValue>());
+
+  {
+    mozilla::AudioMixer mixer(MixingDone);
+    fprintf(stderr, "Test AudioMixer variable channel count.\n");
+
+    mixer.Mix(a, 1, CHANNEL_LENGTH);
+    mixer.Mix(b, 1, CHANNEL_LENGTH);
+    mixer.FinishMixing();
+    mixer.Mix(a, 1, CHANNEL_LENGTH);
+    mixer.Mix(b, 1, CHANNEL_LENGTH);
+    mixer.FinishMixing();
+    mixer.Mix(a, 1, CHANNEL_LENGTH);
+    mixer.Mix(b, 1, CHANNEL_LENGTH);
+    mixer.FinishMixing();
+  }
+
+  {
+    mozilla::AudioMixer mixer(MixingDone);
+    fprintf(stderr, "Test AudioMixer variable stream count.\n");
+
+    mixer.Mix(a, 2, CHANNEL_LENGTH);
+    mixer.Mix(b, 2, CHANNEL_LENGTH);
+    mixer.FinishMixing();
+    mixer.Mix(a, 2, CHANNEL_LENGTH);
+    mixer.Mix(b, 2, CHANNEL_LENGTH);
+    mixer.Mix(a, 2, CHANNEL_LENGTH);
+    mixer.Mix(b, 2, CHANNEL_LENGTH);
+    mixer.FinishMixing();
+    mixer.Mix(a, 2, CHANNEL_LENGTH);
+    mixer.Mix(b, 2, CHANNEL_LENGTH);
+    mixer.FinishMixing();
+  }
+
+  return 0;
+}
+
new file mode 100644
--- /dev/null
+++ b/content/media/compiledtest/moz.build
@@ -0,0 +1,16 @@
+# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+CPP_UNIT_TESTS += [
+    'TestAudioMixer.cpp',
+]
+
+FAIL_ON_WARNINGS = True
+
+LOCAL_INCLUDES += [
+    '..',
+]
+
--- a/content/media/moz.build
+++ b/content/media/moz.build
@@ -7,16 +7,18 @@
 PARALLEL_DIRS += [
   'encoder',
   'mediasource',
   'ogg',
   'webaudio',
   'webvtt'
 ]
 
+TEST_TOOL_DIRS += ['compiledtest']
+
 if CONFIG['MOZ_RAW']:
     PARALLEL_DIRS += ['raw']
 
 if CONFIG['MOZ_WAVE']:
     PARALLEL_DIRS += ['wave']
 
 if CONFIG['MOZ_WEBM']:
     PARALLEL_DIRS += ['webm']
@@ -52,16 +54,17 @@ TEST_DIRS += [
     'gtest',
 ]
 
 EXPORTS += [
     'AbstractMediaDecoder.h',
     'AudioChannelFormat.h',
     'AudioCompactor.h',
     'AudioEventTimeline.h',
+    'AudioMixer.h',
     'AudioNodeEngine.h',
     'AudioNodeExternalInputStream.h',
     'AudioNodeStream.h',
     'AudioSampleFormat.h',
     'AudioSegment.h',
     'AudioStream.h',
     'BufferDecoder.h',
     'BufferMediaResource.h',
--- a/content/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/content/media/webrtc/MediaEngineWebRTCAudio.cpp
@@ -153,16 +153,18 @@ MediaEngineWebRTCAudioSource::Start(Sour
   {
     MonitorAutoLock lock(mMonitor);
     mSources.AppendElement(aStream);
   }
 
   AudioSegment* segment = new AudioSegment();
   aStream->AddTrack(aID, SAMPLE_FREQUENCY, 0, segment);
   aStream->AdvanceKnownTracksTime(STREAM_TIME_MAX);
+  // XXX Make this based on the pref.
+  aStream->RegisterForAudioMixing();
   LOG(("Start audio for stream %p", aStream));
 
   if (mState == kStarted) {
     MOZ_ASSERT(aID == mTrackID);
     return NS_OK;
   }
   mState = kStarted;
   mTrackID = aID;