Bug 664918. Part 8: Add mozCaptureStream()/mozCaptureStreamUntilEnded() APIs to HTML media elements, returning a MediaStream representing the contents of the media element. r=cpearce,jesup
authorRobert O'Callahan <robert@ocallahan.org>
Mon, 30 Apr 2012 15:12:42 +1200
changeset 96750 5f6acbc53709de1b86fb2cd7b27a4755efcf54c8
parent 96749 b214aadcd5807b943c52763f184c53b0816e1036
child 96751 25d07f14ecb2635c4296d21fbf91cfccc0e9ae7f
push id1116
push userlsblakk@mozilla.com
push dateMon, 16 Jul 2012 19:38:18 +0000
treeherdermozilla-beta@95f959a8b4dc [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerscpearce, jesup
bugs664918
milestone15.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 664918. Part 8: Add mozCaptureStream()/mozCaptureStreamUntilEnded() APIs to HTML media elements, returning a MediaStream representing the contents of the media element. r=cpearce,jesup This is currently not fully functional. The MediaStream always ends when the underlying resource ends. You can't use these APIs on a media element whose src is a MediaStream. Seeking or pausing the resource will cause problems. The media element does not play back in sync with the MediaStream.
content/html/content/public/nsHTMLMediaElement.h
content/html/content/src/nsHTMLMediaElement.cpp
content/media/MediaResource.cpp
content/media/MediaResource.h
content/media/nsAudioAvailableEventManager.cpp
content/media/nsBuiltinDecoder.cpp
content/media/nsBuiltinDecoder.h
content/media/nsBuiltinDecoderReader.cpp
content/media/nsBuiltinDecoderReader.h
content/media/nsBuiltinDecoderStateMachine.cpp
content/media/nsBuiltinDecoderStateMachine.h
content/media/nsMediaCache.cpp
content/media/nsMediaCache.h
content/media/nsMediaDecoder.h
dom/interfaces/html/nsIDOMHTMLAudioElement.idl
dom/interfaces/html/nsIDOMHTMLMediaElement.idl
dom/interfaces/html/nsIDOMHTMLVideoElement.idl
--- a/content/html/content/public/nsHTMLMediaElement.h
+++ b/content/html/content/public/nsHTMLMediaElement.h
@@ -260,16 +260,19 @@ public:
   // http://www.whatwg.org/specs/web-apps/current-work/#ended
   bool IsPlaybackEnded() const;
 
   // principal of the currently playing resource. Anything accessing the contents
   // of this element must have a principal that subsumes this principal.
   // Returns null if nothing is playing.
   already_AddRefed<nsIPrincipal> GetCurrentPrincipal();
 
+  // called to notify that the principal of the decoder's media resource has changed.
+  void NotifyDecoderPrincipalChanged();
+
   // Update the visual size of the media. Called from the decoder on the
   // main thread when/if the size changes.
   void UpdateMediaSize(nsIntSize size);
 
   // Returns the CanPlayStatus indicating if we can handle this
   // MIME type. The MIME type should not include the codecs parameter.
   // If it returns anything other than CANPLAY_NO then it also
   // returns a null-terminated list of supported codecs
@@ -412,16 +415,25 @@ protected:
    */
   void SetupMediaStreamPlayback();
   /**
    * Stop playback on mStream.
    */
   void EndMediaStreamPlayback();
 
   /**
+   * Returns an nsDOMMediaStream containing the played contents of this
+   * element. When aFinishWhenEnded is true, when this element ends playback
+   * we will finish the stream and not play any more into it.
+   * When aFinishWhenEnded is false, ending playback does not finish the stream.
+   * The stream will never finish.
+   */
+  already_AddRefed<nsDOMMediaStream> CaptureStreamInternal(bool aFinishWhenEnded);
+
+  /**
    * Create a decoder for the given aMIMEType. Returns null if we
    * were unable to create the decoder.
    */
   already_AddRefed<nsMediaDecoder> CreateDecoder(const nsACString& aMIMEType);
 
   /**
    * Initialize a decoder as a clone of an existing decoder in another
    * element.
@@ -628,16 +640,24 @@ protected:
   // set in the src attribute.
   nsRefPtr<nsDOMMediaStream> mSrcAttrStream;
 
   // Holds a reference to the DOM wrapper for the MediaStream that we're
   // actually playing.
   // At most one of mDecoder and mStream can be non-null.
   nsRefPtr<nsDOMMediaStream> mStream;
 
+  // Holds references to the DOM wrappers for the MediaStreams that we're
+  // writing to.
+  struct OutputMediaStream {
+    nsRefPtr<nsDOMMediaStream> mStream;
+    bool mFinishWhenEnded;
+  };
+  nsTArray<OutputMediaStream> mOutputStreams;
+
   // Holds a reference to the MediaStreamListener attached to mStream. STRONG!
   StreamListener* mStreamListener;
 
   // Holds a reference to the first channel we open to the media resource.
   // Once the decoder is created, control over the channel passes to the
   // decoder, and we null out this reference. We must store this in case
   // we need to cancel the channel before control of it passes to the decoder.
   nsCOMPtr<nsIChannel> mChannel;
@@ -764,19 +784,22 @@ protected:
   // Indicates whether |autoplay| will actually autoplay based on the pref
   // media.autoplay.enabled
   bool mAutoplayEnabled;
 
   // Playback of the video is paused either due to calling the
   // 'Pause' method, or playback not yet having started.
   bool mPaused;
 
-  // True if the sound is muted
+  // True if the sound is muted.
   bool mMuted;
 
+  // True if the sound is being captured.
+  bool mAudioCaptured;
+
   // If TRUE then the media element was actively playing before the currently
   // in progress seeking. If FALSE then the media element is either not seeking
   // or was not actively playing before the current seek. Used to decide whether
   // to raise the 'waiting' event as per 4.7.1.8 in HTML 5 specification.
   bool mPlayingBeforeSeek;
 
   // True iff this element is paused because the document is inactive
   bool mPausedForInactiveDocument;
--- a/content/html/content/src/nsHTMLMediaElement.cpp
+++ b/content/html/content/src/nsHTMLMediaElement.cpp
@@ -422,28 +422,34 @@ NS_IMPL_RELEASE_INHERITED(nsHTMLMediaEle
 NS_IMPL_CYCLE_COLLECTION_CLASS(nsHTMLMediaElement)
 
 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(nsHTMLMediaElement, nsGenericHTMLElement)
   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mStream)
   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mSrcAttrStream)
   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mSourcePointer)
   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mLoadBlockedDoc)
   NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mSourceLoadCandidate)
+  for (PRUint32 i = 0; i < tmp->mOutputStreams.Length(); ++i) {
+    NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR(mOutputStreams[i].mStream);
+  }
 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
 
 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(nsHTMLMediaElement, nsGenericHTMLElement)
   if (tmp->mStream) {
     // Need to EndMediaStreamPlayback to clear mStream and make sure everything
     // gets unhooked correctly.
     tmp->EndMediaStreamPlayback();
   }
   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mSrcAttrStream)
   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mSourcePointer)
   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mLoadBlockedDoc)
   NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mSourceLoadCandidate)
+  for (PRUint32 i = 0; i < tmp->mOutputStreams.Length(); ++i) {
+    NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mOutputStreams[i].mStream);
+  }
 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
 
 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION_INHERITED(nsHTMLMediaElement)
   NS_INTERFACE_MAP_ENTRY(nsIObserver)
 NS_INTERFACE_MAP_END_INHERITING(nsGenericHTMLElement)
 
 // nsIDOMHTMLMediaElement
 NS_IMPL_BOOL_ATTR(nsHTMLMediaElement, Controls, controls)
@@ -1417,16 +1423,53 @@ NS_IMETHODIMP nsHTMLMediaElement::SetMut
     GetMediaStream()->SetAudioOutputVolume(this, effectiveVolume);
   }
 
   DispatchAsyncEvent(NS_LITERAL_STRING("volumechange"));
 
   return NS_OK;
 }
 
+already_AddRefed<nsDOMMediaStream>
+nsHTMLMediaElement::CaptureStreamInternal(bool aFinishWhenEnded)
+{
+  OutputMediaStream* out = mOutputStreams.AppendElement();
+  out->mStream = nsDOMMediaStream::CreateInputStream();
+  nsRefPtr<nsIPrincipal> principal = GetCurrentPrincipal();
+  out->mStream->CombineWithPrincipal(principal);
+  out->mFinishWhenEnded = aFinishWhenEnded;
+
+  mAudioCaptured = true;
+  if (mDecoder) {
+    mDecoder->SetAudioCaptured(true);
+    mDecoder->AddOutputStream(
+        out->mStream->GetStream()->AsSourceStream(), aFinishWhenEnded);
+  }
+  nsRefPtr<nsDOMMediaStream> result = out->mStream;
+  return result.forget();
+}
+
+NS_IMETHODIMP nsHTMLMediaElement::MozCaptureStream(nsIDOMMediaStream** aStream)
+{
+  *aStream = CaptureStreamInternal(false).get();
+  return NS_OK;
+}
+
+NS_IMETHODIMP nsHTMLMediaElement::MozCaptureStreamUntilEnded(nsIDOMMediaStream** aStream)
+{
+  *aStream = CaptureStreamInternal(true).get();
+  return NS_OK;
+}
+
+NS_IMETHODIMP nsHTMLMediaElement::GetMozAudioCaptured(bool *aCaptured)
+{
+  *aCaptured = mAudioCaptured;
+  return NS_OK;
+}
+
 class MediaElementSetForURI : public nsURIHashKey {
 public:
   MediaElementSetForURI(const nsIURI* aKey) : nsURIHashKey(aKey) {}
   MediaElementSetForURI(const MediaElementSetForURI& toCopy)
     : nsURIHashKey(toCopy), mElements(toCopy.mElements) {}
   nsTArray<nsHTMLMediaElement*> mElements;
 };
 
@@ -1543,16 +1586,17 @@ nsHTMLMediaElement::nsHTMLMediaElement(a
     mFragmentEnd(-1.0),
     mAllowAudioData(false),
     mBegun(false),
     mLoadedFirstFrame(false),
     mAutoplaying(true),
     mAutoplayEnabled(true),
     mPaused(true),
     mMuted(false),
+    mAudioCaptured(false),
     mPlayingBeforeSeek(false),
     mPausedForInactiveDocument(false),
     mWaitingFired(false),
     mIsRunningLoadMethod(false),
     mIsLoadingFromSourceChildren(false),
     mDelayingLoadEvent(false),
     mIsRunningSelectResource(false),
     mHaveQueuedSelectResource(false),
@@ -2262,30 +2306,37 @@ nsresult nsHTMLMediaElement::FinishDecod
   mNetworkState = nsIDOMHTMLMediaElement::NETWORK_LOADING;
 
   // Force a same-origin check before allowing events for this media resource.
   mMediaSecurityVerified = false;
 
   // The new stream has not been suspended by us.
   mPausedForInactiveDocument = false;
 
+  aDecoder->SetAudioCaptured(mAudioCaptured);
   aDecoder->SetVolume(mMuted ? 0.0 : mVolume);
+  for (PRUint32 i = 0; i < mOutputStreams.Length(); ++i) {
+    OutputMediaStream* ms = &mOutputStreams[i];
+    aDecoder->AddOutputStream(ms->mStream->GetStream()->AsSourceStream(),
+        ms->mFinishWhenEnded);
+  }
 
   nsresult rv = aDecoder->Load(aStream, aListener, aCloneDonor);
   if (NS_FAILED(rv)) {
     LOG(PR_LOG_DEBUG, ("%p Failed to load for decoder %p", this, aDecoder));
     return rv;
   }
 
   // Decoder successfully created, the decoder now owns the MediaResource
   // which owns the channel.
   mChannel = nsnull;
 
   mDecoder = aDecoder;
   AddMediaElementToURITable();
+  NotifyDecoderPrincipalChanged();
 
   // We may want to suspend the new stream now.
   // This will also do an AddRemoveSelfReference.
   NotifyOwnerDocumentActivityChanged();
 
   if (!mPaused) {
     SetPlayedOrSeeked(true);
     if (!mPausedForInactiveDocument) {
@@ -2385,16 +2436,18 @@ private:
   bool mPendingNotifyOutput;
 };
 
 void nsHTMLMediaElement::SetupMediaStreamPlayback()
 {
   NS_ASSERTION(!mStream && !mStreamListener, "Should have been ended already");
 
   mStream = mSrcAttrStream;
+  // XXX if we ever support capturing the output of a media element which is
+  // playing a stream, we'll need to add a CombineWithPrincipal call here.
   mStreamListener = new StreamListener(this);
   NS_ADDREF(mStreamListener);
   GetMediaStream()->AddListener(mStreamListener);
   if (mPaused) {
     GetMediaStream()->ChangeExplicitBlockerCount(1);
   }
   if (mPausedForInactiveDocument) {
     GetMediaStream()->ChangeExplicitBlockerCount(1);
@@ -2906,16 +2959,25 @@ already_AddRefed<nsIPrincipal> nsHTMLMed
   }
   if (mStream) {
     nsRefPtr<nsIPrincipal> principal = mStream->GetPrincipal();
     return principal.forget();
   }
   return nsnull;
 }
 
+void nsHTMLMediaElement::NotifyDecoderPrincipalChanged()
+{
+  for (PRUint32 i = 0; i < mOutputStreams.Length(); ++i) {
+    OutputMediaStream* ms = &mOutputStreams[i];
+    nsRefPtr<nsIPrincipal> principal = GetCurrentPrincipal();
+    ms->mStream->CombineWithPrincipal(principal);
+  }
+}
+
 void nsHTMLMediaElement::UpdateMediaSize(nsIntSize size)
 {
   mMediaSize = size;
 }
 
 void nsHTMLMediaElement::NotifyOwnerDocumentActivityChanged()
 {
   nsIDocument* ownerDoc = OwnerDoc();
--- a/content/media/MediaResource.cpp
+++ b/content/media/MediaResource.cpp
@@ -757,16 +757,24 @@ ChannelMediaResource::CacheClientNotifyD
   NS_ASSERTION(NS_IsMainThread(), "Don't call on non-main thread");
   // NOTE: this can be called with the media cache lock held, so don't
   // block or do anything which might try to acquire a lock!
 
   nsCOMPtr<nsIRunnable> event = new DataEnded(mDecoder, aStatus);
   NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
 }
 
+void
+ChannelMediaResource::CacheClientNotifyPrincipalChanged()
+{
+  NS_ASSERTION(NS_IsMainThread(), "Don't call on non-main thread");
+
+  mDecoder->NotifyPrincipalChanged();
+}
+
 nsresult
 ChannelMediaResource::CacheClientSeek(PRInt64 aOffset, bool aResume)
 {
   NS_ASSERTION(NS_IsMainThread(), "Don't call on non-main thread");
 
   CloseChannel();
 
   if (aResume) {
--- a/content/media/MediaResource.h
+++ b/content/media/MediaResource.h
@@ -370,16 +370,18 @@ public:
   // Notify that data is available from the cache. This can happen even
   // if this stream didn't read any data, since another stream might have
   // received data for the same resource.
   void CacheClientNotifyDataReceived();
   // Notify that we reached the end of the stream. This can happen even
   // if this stream didn't read any data, since another stream might have
   // received data for the same resource.
   void CacheClientNotifyDataEnded(nsresult aStatus);
+  // Notify that the principal for the cached resource changed.
+  void CacheClientNotifyPrincipalChanged();
 
   // These are called on the main thread by nsMediaCache. These shouldn't block,
   // but they may grab locks --- the media cache is not holding its lock
   // when these are called.
   // Start a new load at the given aOffset. The old load is cancelled
   // and no more data from the old load will be notified via
   // nsMediaCacheStream::NotifyDataReceived/Ended.
   // This can fail.
--- a/content/media/nsAudioAvailableEventManager.cpp
+++ b/content/media/nsAudioAvailableEventManager.cpp
@@ -156,20 +156,26 @@ void nsAudioAvailableEventManager::Queue
     if (aEndTimeSampleOffset > mSignalBufferPosition + audioDataLength) {
       time = (aEndTimeSampleOffset - mSignalBufferPosition - audioDataLength) / 
              mSamplesPerSecond;
     }
 
     // Fill the signalBuffer.
     PRUint32 i;
     float *signalBuffer = mSignalBuffer.get() + mSignalBufferPosition;
-    for (i = 0; i < signalBufferTail; ++i) {
-      signalBuffer[i] = MOZ_CONVERT_AUDIO_SAMPLE(audioData[i]);
+    if (audioData) {
+      for (i = 0; i < signalBufferTail; ++i) {
+        signalBuffer[i] = MOZ_CONVERT_AUDIO_SAMPLE(audioData[i]);
+      }
+    } else {
+      memset(signalBuffer, 0, signalBufferTail*sizeof(signalBuffer[0]));
     }
-    audioData += signalBufferTail;
+    if (audioData) {
+      audioData += signalBufferTail;
+    }
 
     NS_ASSERTION(audioDataLength >= signalBufferTail,
                  "audioDataLength about to wrap past zero to +infinity!");
     audioDataLength -= signalBufferTail;
 
     if (mPendingEvents.Length() > 0) {
       // Check last event timecode to make sure that all queued events
       // are in non-descending sequence.
@@ -199,18 +205,22 @@ void nsAudioAvailableEventManager::Queue
 
   NS_ASSERTION(mSignalBufferPosition + audioDataLength < mSignalBufferLength,
                "Intermediate signal buffer must fit at least one more item.");
 
   if (audioDataLength > 0) {
     // Add data to the signalBuffer.
     PRUint32 i;
     float *signalBuffer = mSignalBuffer.get() + mSignalBufferPosition;
-    for (i = 0; i < audioDataLength; ++i) {
-      signalBuffer[i] = MOZ_CONVERT_AUDIO_SAMPLE(audioData[i]);
+    if (audioData) {
+      for (i = 0; i < audioDataLength; ++i) {
+        signalBuffer[i] = MOZ_CONVERT_AUDIO_SAMPLE(audioData[i]);
+      }
+    } else {
+      memset(signalBuffer, 0, audioDataLength*sizeof(signalBuffer[0]));
     }
     mSignalBufferPosition += audioDataLength;
   }
 }
 
 void nsAudioAvailableEventManager::Clear()
 {
   ReentrantMonitorAutoEnter mon(mReentrantMonitor);
--- a/content/media/nsBuiltinDecoder.cpp
+++ b/content/media/nsBuiltinDecoder.cpp
@@ -77,16 +77,40 @@ void nsBuiltinDecoder::SetVolume(double 
 {
   NS_ASSERTION(NS_IsMainThread(), "Should be on main thread.");
   mInitialVolume = aVolume;
   if (mDecoderStateMachine) {
     mDecoderStateMachine->SetVolume(aVolume);
   }
 }
 
+void nsBuiltinDecoder::SetAudioCaptured(bool aCaptured)
+{
+  NS_ASSERTION(NS_IsMainThread(), "Should be on main thread.");
+  mInitialAudioCaptured = aCaptured;
+  if (mDecoderStateMachine) {
+    mDecoderStateMachine->SetAudioCaptured(aCaptured);
+  }
+}
+
+void nsBuiltinDecoder::AddOutputStream(SourceMediaStream* aStream, bool aFinishWhenEnded)
+{
+  NS_ASSERTION(NS_IsMainThread(), "Should be on main thread.");
+
+  {
+    ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+    OutputMediaStream* ms = mOutputStreams.AppendElement();
+    ms->Init(PRInt64(mCurrentTime*USECS_PER_S), aStream, aFinishWhenEnded);
+  }
+
+  // Make sure the state machine thread runs so that any buffered data
+  // is fed into our strema.
+  ScheduleStateMachineThread();
+}
+
 double nsBuiltinDecoder::GetDuration()
 {
   NS_ASSERTION(NS_IsMainThread(), "Should be on main thread.");
   if (mInfiniteStream) {
     return std::numeric_limits<double>::infinity();
   }
   if (mDuration >= 0) {
      return static_cast<double>(mDuration) / static_cast<double>(USECS_PER_S);
@@ -687,16 +711,23 @@ void nsBuiltinDecoder::NotifyDownloadEnd
     ResourceLoaded();
   }
   else if (aStatus != NS_BASE_STREAM_CLOSED) {
     NetworkError();
   }
   UpdateReadyStateForData();
 }
 
+void nsBuiltinDecoder::NotifyPrincipalChanged()
+{
+  if (mElement) {
+    mElement->NotifyDecoderPrincipalChanged();
+  }
+}
+
 void nsBuiltinDecoder::NotifyBytesConsumed(PRInt64 aBytes)
 {
   ReentrantMonitorAutoEnter mon(mReentrantMonitor);
   NS_ASSERTION(OnStateMachineThread() || mDecoderStateMachine->OnDecodeThread(),
                "Should be on play state machine or decode thread.");
   if (!mIgnoreProgressData) {
     mDecoderPosition += aBytes;
     mPlaybackStatistics.AddBytes(aBytes);
--- a/content/media/nsBuiltinDecoder.h
+++ b/content/media/nsBuiltinDecoder.h
@@ -257,16 +257,17 @@ public:
 
   // Return the current decode state. The decoder monitor must be
   // obtained before calling this.
   virtual State GetState() = 0;
 
   // Set the audio volume. The decoder monitor must be obtained before
   // calling this.
   virtual void SetVolume(double aVolume) = 0;
+  virtual void SetAudioCaptured(bool aCapture) = 0;
 
   virtual void Shutdown() = 0;
 
   // Called from the main thread to get the duration. The decoder monitor
   // must be obtained before calling this. It is in units of microseconds.
   virtual PRInt64 GetDuration() = 0;
 
   // Called from the main thread to set the duration of the media resource
@@ -392,27 +393,76 @@ public:
 
   // Seek to the time position in (seconds) from the start of the video.
   virtual nsresult Seek(double aTime);
 
   virtual nsresult PlaybackRateChanged();
 
   virtual void Pause();
   virtual void SetVolume(double aVolume);
+  virtual void SetAudioCaptured(bool aCaptured);
+
+  virtual void AddOutputStream(SourceMediaStream* aStream, bool aFinishWhenEnded);
+  // Protected by mReentrantMonitor. All decoder output is copied to these streams.
+  struct OutputMediaStream {
+    void Init(PRInt64 aInitialTime, SourceMediaStream* aStream, bool aFinishWhenEnded)
+    {
+      mLastAudioPacketTime = -1;
+      mLastAudioPacketEndTime = -1;
+      mAudioFramesWrittenBaseTime = aInitialTime;
+      mAudioFramesWritten = 0;
+      mNextVideoTime = aInitialTime;
+      mStream = aStream;
+      mStreamInitialized = false;
+      mFinishWhenEnded = aFinishWhenEnded;
+      mHaveSentFinish = false;
+      mHaveSentFinishAudio = false;
+      mHaveSentFinishVideo = false;
+    }
+    PRInt64 mLastAudioPacketTime; // microseconds
+    PRInt64 mLastAudioPacketEndTime; // microseconds
+    // Count of audio frames written to the stream
+    PRInt64 mAudioFramesWritten;
+    // Timestamp of the first audio packet whose frames we wrote.
+    PRInt64 mAudioFramesWrittenBaseTime; // microseconds
+    // mNextVideoTime is the end timestamp for the last packet sent to the stream.
+    // Therefore video packets starting at or after this time need to be copied
+    // to the output stream.
+    PRInt64 mNextVideoTime; // microseconds
+    // The last video image sent to the stream. Useful if we need to replicate
+    // the image.
+    nsRefPtr<Image> mLastVideoImage;
+    nsRefPtr<SourceMediaStream> mStream;
+    gfxIntSize mLastVideoImageDisplaySize;
+    // This is set to true when the stream is initialized (audio and
+    // video tracks added).
+    bool mStreamInitialized;
+    bool mFinishWhenEnded;
+    bool mHaveSentFinish;
+    bool mHaveSentFinishAudio;
+    bool mHaveSentFinishVideo;
+  };
+  nsTArray<OutputMediaStream>& OutputStreams()
+  {
+    GetReentrantMonitor().AssertCurrentThreadIn();
+    return mOutputStreams;
+  }
+
   virtual double GetDuration();
 
   virtual void SetInfinite(bool aInfinite);
   virtual bool IsInfinite();
 
   virtual MediaResource* GetResource() { return mResource; }
   virtual already_AddRefed<nsIPrincipal> GetCurrentPrincipal();
 
   virtual void NotifySuspendedStatusChanged();
   virtual void NotifyBytesDownloaded();
   virtual void NotifyDownloadEnded(nsresult aStatus);
+  virtual void NotifyPrincipalChanged();
   // Called by the decode thread to keep track of the number of bytes read
   // from the resource.
   void NotifyBytesConsumed(PRInt64 aBytes);
 
   // Called when the video file has completed downloading.
   // Call on the main thread only.
   void ResourceLoaded();
 
@@ -658,16 +708,19 @@ public:
   // started this is reset to negative.
   double mRequestedSeekTime;
 
   // Duration of the media resource. Set to -1 if unknown.
   // Set when the metadata is loaded. Accessed on the main thread
   // only.
   PRInt64 mDuration;
 
+  // True when playback should start with audio captured (not playing).
+  bool mInitialAudioCaptured;
+
   // True if the media resource is seekable (server supports byte range
   // requests).
   bool mSeekable;
 
   /******
    * The following member variables can be accessed from any thread.
    ******/
 
@@ -681,16 +734,19 @@ public:
   // Media data resource.
   nsAutoPtr<MediaResource> mResource;
 
   // ReentrantMonitor for detecting when the video play state changes. A call
   // to Wait on this monitor will block the thread until the next
   // state change.
   ReentrantMonitor mReentrantMonitor;
 
+  // Data about MediaStreams that are being fed by this decoder.
+  nsTArray<OutputMediaStream> mOutputStreams;
+
   // Set to one of the valid play states. It is protected by the
   // monitor mReentrantMonitor. This monitor must be acquired when reading or
   // writing the state. Any change to the state on the main thread
   // must call NotifyAll on the monitor so the decode thread can wake up.
   PlayState mPlayState;
 
   // The state to change to after a seek or load operation. It must only
   // be changed from the main thread. The decoder monitor must be acquired
--- a/content/media/nsBuiltinDecoderReader.cpp
+++ b/content/media/nsBuiltinDecoderReader.cpp
@@ -66,16 +66,31 @@ extern PRLogModuleInfo* gBuiltinDecoderL
 #else
 #define SEEK_LOG(type, msg)
 #endif
 #else
 #define LOG(type, msg)
 #define SEEK_LOG(type, msg)
 #endif
 
+void
+AudioData::EnsureAudioBuffer()
+{
+  if (mAudioBuffer)
+    return;
+  mAudioBuffer = SharedBuffer::Create(mFrames*mChannels*sizeof(AudioDataValue));
+
+  AudioDataValue* data = static_cast<AudioDataValue*>(mAudioBuffer->Data());
+  for (PRUint32 i = 0; i < mFrames; ++i) {
+    for (PRUint32 j = 0; j < mChannels; ++j) {
+      data[j*mFrames + i] = mAudioData[i*mChannels + j];
+    }
+  }
+}
+
 static bool
 ValidatePlane(const VideoData::YCbCrBuffer::Plane& aPlane)
 {
   return aPlane.mWidth <= PlanarYCbCrImage::MAX_DIMENSION &&
          aPlane.mHeight <= PlanarYCbCrImage::MAX_DIMENSION &&
          aPlane.mWidth * aPlane.mHeight < MAX_VIDEO_WIDTH * MAX_VIDEO_HEIGHT &&
          aPlane.mStride > 0;
 }
@@ -110,17 +125,25 @@ VideoData* VideoData::Create(nsVideoInfo
                              PRInt64 aTime,
                              PRInt64 aEndTime,
                              const YCbCrBuffer& aBuffer,
                              bool aKeyframe,
                              PRInt64 aTimecode,
                              nsIntRect aPicture)
 {
   if (!aContainer) {
-    return nsnull;
+    // Create a dummy VideoData with no image. This gives us something to
+    // send to media streams if necessary.
+    nsAutoPtr<VideoData> v(new VideoData(aOffset,
+                                         aTime,
+                                         aEndTime,
+                                         aKeyframe,
+                                         aTimecode,
+                                         aInfo.mDisplay));
+    return v.forget();
   }
 
   // The following situation should never happen unless there is a bug
   // in the decoder
   if (aBuffer.mPlanes[1].mWidth != aBuffer.mPlanes[2].mWidth ||
       aBuffer.mPlanes[1].mHeight != aBuffer.mPlanes[2].mHeight) {
     NS_ERROR("C planes with different sizes");
     return nsnull;
--- a/content/media/nsBuiltinDecoderReader.h
+++ b/content/media/nsBuiltinDecoderReader.h
@@ -38,23 +38,25 @@
  * ***** END LICENSE BLOCK ***** */
 #if !defined(nsBuiltinDecoderReader_h_)
 #define nsBuiltinDecoderReader_h_
 
 #include <nsDeque.h>
 #include "ImageLayers.h"
 #include "nsSize.h"
 #include "mozilla/ReentrantMonitor.h"
+#include "MediaStreamGraph.h"
+#include "SharedBuffer.h"
 
 // Stores info relevant to presenting media frames.
 class nsVideoInfo {
 public:
   nsVideoInfo()
-    : mAudioRate(0),
-      mAudioChannels(0),
+    : mAudioRate(44100),
+      mAudioChannels(2),
       mDisplay(0,0),
       mStereoMode(mozilla::layers::STEREO_MODE_MONO),
       mHasAudio(false),
       mHasVideo(false)
   {}
 
   // Returns true if it's safe to use aPicture as the picture to be
   // extracted inside a frame of size aFrame, and scaled up to and displayed
@@ -108,16 +110,18 @@ typedef float AudioDataValue;
 #define MOZ_CONVERT_AUDIO_SAMPLE(x) (x)
 #define MOZ_SAMPLE_TYPE_FLOAT32 1
 
 #endif
 
 // Holds chunk a decoded audio frames.
 class AudioData {
 public:
+  typedef mozilla::SharedBuffer SharedBuffer;
+
   AudioData(PRInt64 aOffset,
             PRInt64 aTime,
             PRInt64 aDuration,
             PRUint32 aFrames,
             AudioDataValue* aData,
             PRUint32 aChannels)
   : mOffset(aOffset),
     mTime(aTime),
@@ -129,24 +133,33 @@ public:
     MOZ_COUNT_CTOR(AudioData);
   }
 
   ~AudioData()
   {
     MOZ_COUNT_DTOR(AudioData);
   }
 
+  // If mAudioBuffer is null, creates it from mAudioData.
+  void EnsureAudioBuffer();
+
+  PRInt64 GetEnd() { return mTime + mDuration; }
+
   // Approximate byte offset of the end of the page on which this chunk
   // ends.
   const PRInt64 mOffset;
 
   PRInt64 mTime; // Start time of data in usecs.
   const PRInt64 mDuration; // In usecs.
   const PRUint32 mFrames;
   const PRUint32 mChannels;
+  // At least one of mAudioBuffer/mAudioData must be non-null.
+  // mChannels channels, each with mFrames frames
+  nsRefPtr<SharedBuffer> mAudioBuffer;
+  // mFrames frames, each with mChannels values
   nsAutoArrayPtr<AudioDataValue> mAudioData;
 };
 
 // Holds a decoded video frame, in YCbCr format. These are queued in the reader.
 class VideoData {
 public:
   typedef mozilla::layers::ImageContainer ImageContainer;
   typedef mozilla::layers::Image Image;
@@ -193,16 +206,18 @@ public:
     return new VideoData(aOffset, aTime, aEndTime, aTimecode);
   }
 
   ~VideoData()
   {
     MOZ_COUNT_DTOR(VideoData);
   }
 
+  PRInt64 GetEnd() { return mEndTime; }
+
   // Dimensions at which to display the video frame. The picture region
   // will be scaled to this size. This is should be the picture region's
   // dimensions scaled with respect to its aspect ratio.
   nsIntSize mDisplay;
 
   // Approximate byte offset of the end of the frame in the media.
   PRInt64 mOffset;
 
@@ -365,16 +380,35 @@ template <class T> class MediaQueue : pr
     return last->mTime - first->mTime;
   }
 
   void LockedForEach(nsDequeFunctor& aFunctor) const {
     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
     ForEach(aFunctor);
   }
 
+  // Extracts elements from the queue into aResult, in order.
+  // Elements whose start time is before aTime are ignored.
+  void GetElementsAfter(PRInt64 aTime, nsTArray<T*>* aResult) {
+    ReentrantMonitorAutoEnter mon(mReentrantMonitor);
+    if (!GetSize())
+      return;
+    PRInt32 i;
+    for (i = GetSize() - 1; i > 0; --i) {
+      T* v = static_cast<T*>(ObjectAt(i));
+      if (v->GetEnd() < aTime)
+        break;
+    }
+    // Elements less than i have a end time before aTime. It's also possible
+    // that the element at i has a end time before aTime, but that's OK.
+    for (; i < GetSize(); ++i) {
+      aResult->AppendElement(static_cast<T*>(ObjectAt(i)));
+    }
+  }
+
 private:
   mutable ReentrantMonitor mReentrantMonitor;
 
   // True when we've decoded the last frame of data in the
   // bitstream for which we're queueing frame data.
   bool mEndOfStream;
 };
 
@@ -403,17 +437,17 @@ public:
   // false if the audio is finished, end of file has been reached,
   // or an un-recoverable read error has occured.
   virtual bool DecodeAudioData() = 0;
 
   // Reads and decodes one video frame. Packets with a timestamp less
   // than aTimeThreshold will be decoded (unless they're not keyframes
   // and aKeyframeSkip is true), but will not be added to the queue.
   virtual bool DecodeVideoFrame(bool &aKeyframeSkip,
-                                  PRInt64 aTimeThreshold) = 0;
+                                PRInt64 aTimeThreshold) = 0;
 
   virtual bool HasAudio() = 0;
   virtual bool HasVideo() = 0;
 
   // Read header data for all bitstreams in the file. Fills mInfo with
   // the data required to present the media. Returns NS_OK on success,
   // or NS_ERROR_FAILURE on failure.
   virtual nsresult ReadMetadata(nsVideoInfo* aInfo) = 0;
--- a/content/media/nsBuiltinDecoderStateMachine.cpp
+++ b/content/media/nsBuiltinDecoderStateMachine.cpp
@@ -41,16 +41,18 @@
 #include "nsTArray.h"
 #include "nsBuiltinDecoder.h"
 #include "nsBuiltinDecoderReader.h"
 #include "nsBuiltinDecoderStateMachine.h"
 #include "mozilla/mozalloc.h"
 #include "VideoUtils.h"
 #include "nsTimeRanges.h"
 #include "nsDeque.h"
+#include "AudioSegment.h"
+#include "VideoSegment.h"
 
 #include "mozilla/Preferences.h"
 #include "mozilla/StandardInteger.h"
 #include "mozilla/Util.h"
 
 using namespace mozilla;
 using namespace mozilla::layers;
 
@@ -415,30 +417,33 @@ nsBuiltinDecoderStateMachine::nsBuiltinD
   mSeekTime(0),
   mFragmentEndTime(-1),
   mReader(aReader),
   mCurrentFrameTime(0),
   mAudioStartTime(-1),
   mAudioEndTime(-1),
   mVideoFrameEndTime(-1),
   mVolume(1.0),
+  mAudioCaptured(false),
   mSeekable(true),
   mPositionChangeQueued(false),
   mAudioCompleted(false),
   mGotDurationFromMetaData(false),
   mStopDecodeThread(true),
   mDecodeThreadIdle(false),
   mStopAudioThread(true),
   mQuickBuffering(false),
   mIsRunning(false),
   mRunAgain(false),
   mDispatchedRunEvent(false),
   mDecodeThreadWaiting(false),
   mRealTime(aRealTime),
   mRequestedNewDecodeThread(false),
+  mDidThrottleAudioDecoding(false),
+  mDidThrottleVideoDecoding(false),
   mEventManager(aDecoder)
 {
   MOZ_COUNT_CTOR(nsBuiltinDecoderStateMachine);
   NS_ASSERTION(NS_IsMainThread(), "Should be on main thread.");
 
   StateMachineTracker::Instance().EnsureGlobalStateMachine();
 
   // only enable realtime mode when "media.realtime_decoder.enabled" is true.
@@ -516,16 +521,286 @@ void nsBuiltinDecoderStateMachine::Decod
       DecodeSeek();
     }
   }
 
   mDecodeThreadIdle = true;
   LOG(PR_LOG_DEBUG, ("%p Decode thread finished", mDecoder.get()));
 }
 
+void nsBuiltinDecoderStateMachine::SendOutputStreamAudio(AudioData* aAudio,
+                                                         OutputMediaStream* aStream,
+                                                         AudioSegment* aOutput)
+{
+  mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
+
+  if (aAudio->mTime <= aStream->mLastAudioPacketTime) {
+    // ignore packet that we've already processed
+    return;
+  }
+  aStream->mLastAudioPacketTime = aAudio->mTime;
+  aStream->mLastAudioPacketEndTime = aAudio->GetEnd();
+
+  NS_ASSERTION(aOutput->GetChannels() == aAudio->mChannels,
+               "Wrong number of channels");
+
+  // This logic has to mimic AudioLoop closely to make sure we write
+  // the exact same silences
+  CheckedInt64 audioWrittenOffset = UsecsToFrames(mInfo.mAudioRate,
+      aStream->mAudioFramesWrittenBaseTime + mStartTime) + aStream->mAudioFramesWritten;
+  CheckedInt64 frameOffset = UsecsToFrames(mInfo.mAudioRate, aAudio->mTime);
+  if (!audioWrittenOffset.valid() || !frameOffset.valid())
+    return;
+  if (audioWrittenOffset.value() < frameOffset.value()) {
+    // Write silence to catch up
+    LOG(PR_LOG_DEBUG, ("%p Decoder writing %d frames of silence to MediaStream",
+                       mDecoder.get(), PRInt32(frameOffset.value() - audioWrittenOffset.value())));
+    AudioSegment silence;
+    silence.InitFrom(*aOutput);
+    silence.InsertNullDataAtStart(frameOffset.value() - audioWrittenOffset.value());
+    aStream->mAudioFramesWritten += silence.GetDuration();
+    aOutput->AppendFrom(&silence);
+  }
+
+  PRInt64 offset;
+  if (aStream->mAudioFramesWritten == 0) {
+    NS_ASSERTION(frameOffset.value() <= audioWrittenOffset.value(),
+                 "Otherwise we'd have taken the write-silence path");
+    // We're starting in the middle of a packet. Split the packet.
+    offset = audioWrittenOffset.value() - frameOffset.value();
+  } else {
+    // Write the entire packet.
+    offset = 0;
+  }
+
+  if (offset >= aAudio->mFrames)
+    return;
+
+  aAudio->EnsureAudioBuffer();
+  nsRefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer;
+  aOutput->AppendFrames(buffer.forget(), aAudio->mFrames, PRInt32(offset), aAudio->mFrames,
+                        MOZ_AUDIO_DATA_FORMAT);
+  LOG(PR_LOG_DEBUG, ("%p Decoder writing %d frames of data to MediaStream for AudioData at %lld",
+                     mDecoder.get(), aAudio->mFrames - PRInt32(offset), aAudio->mTime));
+  aStream->mAudioFramesWritten += aAudio->mFrames - PRInt32(offset);
+}
+
+static void WriteVideoToMediaStream(Image* aImage,
+                                    PRInt64 aDuration, const gfxIntSize& aIntrinsicSize,
+                                    VideoSegment* aOutput)
+{
+  nsRefPtr<Image> image = aImage;
+  aOutput->AppendFrame(image.forget(), aDuration, aIntrinsicSize);
+}
+
+static const TrackID TRACK_AUDIO = 1;
+static const TrackID TRACK_VIDEO = 2;
+static const TrackRate RATE_VIDEO = USECS_PER_S;
+
+void nsBuiltinDecoderStateMachine::SendOutputStreamData()
+{
+  mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
+
+  if (mState == DECODER_STATE_DECODING_METADATA)
+    return;
+
+  nsTArray<OutputMediaStream>& streams = mDecoder->OutputStreams();
+  PRInt64 minLastAudioPacketTime = PR_INT64_MAX;
+
+  bool finished =
+      (!mInfo.mHasAudio || mReader->mAudioQueue.IsFinished()) &&
+      (!mInfo.mHasVideo || mReader->mVideoQueue.IsFinished());
+
+  for (PRUint32 i = 0; i < streams.Length(); ++i) {
+    OutputMediaStream* stream = &streams[i];
+    SourceMediaStream* mediaStream = stream->mStream;
+    StreamTime endPosition = 0;
+
+    if (!stream->mStreamInitialized) {
+      if (mInfo.mHasAudio) {
+        AudioSegment* audio = new AudioSegment();
+        audio->Init(mInfo.mAudioChannels);
+        mediaStream->AddTrack(TRACK_AUDIO, mInfo.mAudioRate, 0, audio);
+      }
+      if (mInfo.mHasVideo) {
+        VideoSegment* video = new VideoSegment();
+        mediaStream->AddTrack(TRACK_VIDEO, RATE_VIDEO, 0, video);
+      }
+      stream->mStreamInitialized = true;
+    }
+
+    if (mInfo.mHasAudio) {
+      nsAutoTArray<AudioData*,10> audio;
+      // It's OK to hold references to the AudioData because while audio
+      // is captured, only the decoder thread pops from the queue (see below).
+      mReader->mAudioQueue.GetElementsAfter(stream->mLastAudioPacketTime, &audio);
+      AudioSegment output;
+      output.Init(mInfo.mAudioChannels);
+      for (PRUint32 i = 0; i < audio.Length(); ++i) {
+        AudioData* a = audio[i];
+        SendOutputStreamAudio(audio[i], stream, &output);
+      }
+      if (output.GetDuration() > 0) {
+        mediaStream->AppendToTrack(TRACK_AUDIO, &output);
+      }
+      if (mReader->mAudioQueue.IsFinished() && !stream->mHaveSentFinishAudio) {
+        mediaStream->EndTrack(TRACK_AUDIO);
+        stream->mHaveSentFinishAudio = true;
+      }
+      minLastAudioPacketTime = NS_MIN(minLastAudioPacketTime, stream->mLastAudioPacketTime);
+      endPosition = NS_MAX(endPosition,
+          TicksToTimeRoundDown(mInfo.mAudioRate, stream->mAudioFramesWritten));
+    }
+
+    if (mInfo.mHasVideo) {
+      nsAutoTArray<VideoData*,10> video;
+      // It's OK to hold references to the VideoData only the decoder thread
+      // pops from the queue.
+      mReader->mVideoQueue.GetElementsAfter(stream->mNextVideoTime + mStartTime, &video);
+      VideoSegment output;
+      for (PRUint32 i = 0; i < video.Length(); ++i) {
+        VideoData* v = video[i];
+        if (stream->mNextVideoTime + mStartTime < v->mTime) {
+          LOG(PR_LOG_DEBUG, ("%p Decoder writing last video to MediaStream for %lld ms",
+                             mDecoder.get(), v->mTime - (stream->mNextVideoTime + mStartTime)));
+          // Write last video frame to catch up. mLastVideoImage can be null here
+          // which is fine, it just means there's no video.
+          WriteVideoToMediaStream(stream->mLastVideoImage,
+              v->mTime - (stream->mNextVideoTime + mStartTime), stream->mLastVideoImageDisplaySize,
+              &output);
+          stream->mNextVideoTime = v->mTime - mStartTime;
+        }
+        if (stream->mNextVideoTime + mStartTime < v->mEndTime) {
+          LOG(PR_LOG_DEBUG, ("%p Decoder writing video frame %lld to MediaStream",
+                             mDecoder.get(), v->mTime));
+          WriteVideoToMediaStream(v->mImage,
+              v->mEndTime - (stream->mNextVideoTime + mStartTime), v->mDisplay,
+              &output);
+          stream->mNextVideoTime = v->mEndTime - mStartTime;
+          stream->mLastVideoImage = v->mImage;
+          stream->mLastVideoImageDisplaySize = v->mDisplay;
+        } else {
+          LOG(PR_LOG_DEBUG, ("%p Decoder skipping writing video frame %lld to MediaStream",
+                             mDecoder.get(), v->mTime));
+        }
+      }
+      if (output.GetDuration() > 0) {
+        mediaStream->AppendToTrack(TRACK_VIDEO, &output);
+      }
+      if (mReader->mVideoQueue.IsFinished() && !stream->mHaveSentFinishVideo) {
+        mediaStream->EndTrack(TRACK_VIDEO);
+        stream->mHaveSentFinishVideo = true;
+      }
+      endPosition = NS_MAX(endPosition,
+          TicksToTimeRoundDown(RATE_VIDEO, stream->mNextVideoTime));
+    }
+
+    if (!stream->mHaveSentFinish) {
+      stream->mStream->AdvanceKnownTracksTime(endPosition);
+    }
+
+    if (finished && !stream->mHaveSentFinish) {
+      stream->mHaveSentFinish = true;
+      stream->mStream->Finish();
+    }
+  }
+
+  if (mAudioCaptured) {
+    // Discard audio packets that are no longer needed.
+    PRInt64 audioPacketTimeToDiscard =
+        NS_MIN(minLastAudioPacketTime, mStartTime + mCurrentFrameTime);
+    while (true) {
+      nsAutoPtr<AudioData> a(mReader->mAudioQueue.PopFront());
+      if (!a)
+        break;
+      // Packet times are not 100% reliable so this may discard packets that
+      // actually contain data for mCurrentFrameTime. This means if someone might
+      // create a new output stream and we actually don't have the audio for the
+      // very start. That's OK, we'll play silence instead for a brief moment.
+      // That's OK. Seeking to this time would have a similar issue for such
+      // badly muxed resources.
+      if (a->GetEnd() >= audioPacketTimeToDiscard) {
+        mReader->mAudioQueue.PushFront(a.forget());
+        break;
+      }
+    }
+
+    if (finished) {
+      mAudioCompleted = true;
+      UpdateReadyState();
+    }
+  }
+}
+
+bool nsBuiltinDecoderStateMachine::HaveEnoughDecodedAudio(PRInt64 aAmpleAudioUSecs)
+{
+  mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
+
+  if (mReader->mAudioQueue.GetSize() == 0 ||
+      GetDecodedAudioDuration() < aAmpleAudioUSecs) {
+    return false;
+  }
+  if (!mAudioCaptured) {
+    return true;
+  }
+
+  nsTArray<OutputMediaStream>& streams = mDecoder->OutputStreams();
+  for (PRUint32 i = 0; i < streams.Length(); ++i) {
+    OutputMediaStream* stream = &streams[i];
+    if (!stream->mHaveSentFinishAudio &&
+        !stream->mStream->HaveEnoughBuffered(TRACK_AUDIO)) {
+      return false;
+    }
+  }
+
+  nsIThread* thread = GetStateMachineThread();
+  nsCOMPtr<nsIRunnable> callback = NS_NewRunnableMethod(this,
+      &nsBuiltinDecoderStateMachine::ScheduleStateMachineWithLockAndWakeDecoder);
+  for (PRUint32 i = 0; i < streams.Length(); ++i) {
+    OutputMediaStream* stream = &streams[i];
+    if (!stream->mHaveSentFinishAudio) {
+      stream->mStream->DispatchWhenNotEnoughBuffered(TRACK_AUDIO, thread, callback);
+    }
+  }
+  return true;
+}
+
+bool nsBuiltinDecoderStateMachine::HaveEnoughDecodedVideo()
+{
+  mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
+
+  if (static_cast<PRUint32>(mReader->mVideoQueue.GetSize()) < AMPLE_VIDEO_FRAMES) {
+    return false;
+  }
+
+  nsTArray<OutputMediaStream>& streams = mDecoder->OutputStreams();
+  if (streams.IsEmpty()) {
+    return true;
+  }
+
+  for (PRUint32 i = 0; i < streams.Length(); ++i) {
+    OutputMediaStream* stream = &streams[i];
+    if (!stream->mHaveSentFinishVideo &&
+        !stream->mStream->HaveEnoughBuffered(TRACK_VIDEO)) {
+      return false;
+    }
+  }
+
+  nsIThread* thread = GetStateMachineThread();
+  nsCOMPtr<nsIRunnable> callback = NS_NewRunnableMethod(this,
+      &nsBuiltinDecoderStateMachine::ScheduleStateMachineWithLockAndWakeDecoder);
+  for (PRUint32 i = 0; i < streams.Length(); ++i) {
+    OutputMediaStream* stream = &streams[i];
+    if (!stream->mHaveSentFinishVideo) {
+      stream->mStream->DispatchWhenNotEnoughBuffered(TRACK_VIDEO, thread, callback);
+    }
+  }
+  return true;
+}
+
 void nsBuiltinDecoderStateMachine::DecodeLoop()
 {
   LOG(PR_LOG_DEBUG, ("%p Start DecodeLoop()", mDecoder.get()));
 
   mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
   NS_ASSERTION(OnDecodeThread(), "Should be on decode thread.");
 
   // We want to "pump" the decode until we've got a few frames decoded
@@ -553,17 +828,16 @@ void nsBuiltinDecoderStateMachine::Decod
   PRInt64 lowAudioThreshold = LOW_AUDIO_USECS;
 
   // Our local ample audio threshold. If we increase lowAudioThreshold, we'll
   // also increase this too appropriately (we don't want lowAudioThreshold to
   // be greater than ampleAudioThreshold, else we'd stop decoding!).
   PRInt64 ampleAudioThreshold = AMPLE_AUDIO_USECS;
 
   MediaQueue<VideoData>& videoQueue = mReader->mVideoQueue;
-  MediaQueue<AudioData>& audioQueue = mReader->mAudioQueue;
 
   // Main decode loop.
   bool videoPlaying = HasVideo();
   bool audioPlaying = HasAudio();
   while ((mState == DECODER_STATE_DECODING || mState == DECODER_STATE_BUFFERING) &&
          !mStopDecodeThread &&
          (videoPlaying || audioPlaying))
   {
@@ -587,30 +861,33 @@ void nsBuiltinDecoderStateMachine::Decod
     // audio, or if we're low on video, provided we're not running low on
     // data to decode. If we're running low on downloaded data to decode,
     // we won't start keyframe skipping, as we'll be pausing playback to buffer
     // soon anyway and we'll want to be able to display frames immediately
     // after buffering finishes.
     if (mState == DECODER_STATE_DECODING &&
         !skipToNextKeyframe &&
         videoPlaying &&
-        ((!audioPump && audioPlaying && GetDecodedAudioDuration() < lowAudioThreshold) ||
-         (!videoPump &&
-           videoPlaying &&
-           static_cast<PRUint32>(videoQueue.GetSize()) < LOW_VIDEO_FRAMES)) &&
+        ((!audioPump && audioPlaying && !mDidThrottleAudioDecoding && GetDecodedAudioDuration() < lowAudioThreshold) ||
+         (!videoPump && videoPlaying && !mDidThrottleVideoDecoding &&
+          static_cast<PRUint32>(videoQueue.GetSize()) < LOW_VIDEO_FRAMES)) &&
         !HasLowUndecodedData())
 
     {
       skipToNextKeyframe = true;
       LOG(PR_LOG_DEBUG, ("%p Skipping video decode to the next keyframe", mDecoder.get()));
     }
 
     // Video decode.
-    if (videoPlaying &&
-        static_cast<PRUint32>(videoQueue.GetSize()) < AMPLE_VIDEO_FRAMES)
+    bool throttleVideoDecoding = !videoPlaying || HaveEnoughDecodedVideo();
+    if (mDidThrottleVideoDecoding && !throttleVideoDecoding) {
+      videoPump = true;
+    }
+    mDidThrottleVideoDecoding = throttleVideoDecoding;
+    if (!throttleVideoDecoding)
     {
       // Time the video decode, so that if it's slow, we can increase our low
       // audio threshold to reduce the chance of an audio underrun while we're
       // waiting for a video decode to complete.
       TimeDuration decodeTime;
       {
         PRInt64 currentTime = GetMediaTime();
         ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor());
@@ -627,39 +904,40 @@ void nsBuiltinDecoderStateMachine::Decod
                                      ampleAudioThreshold);
         LOG(PR_LOG_DEBUG,
             ("Slow video decode, set lowAudioThreshold=%lld ampleAudioThreshold=%lld",
              lowAudioThreshold, ampleAudioThreshold));
       }
     }
 
     // Audio decode.
-    if (audioPlaying &&
-        (GetDecodedAudioDuration() < ampleAudioThreshold || audioQueue.GetSize() == 0))
-    {
+    bool throttleAudioDecoding = !audioPlaying || HaveEnoughDecodedAudio(ampleAudioThreshold);
+    if (mDidThrottleAudioDecoding && !throttleAudioDecoding) {
+      audioPump = true;
+    }
+    mDidThrottleAudioDecoding = throttleAudioDecoding;
+    if (!mDidThrottleAudioDecoding) {
       ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor());
       audioPlaying = mReader->DecodeAudioData();
     }
 
+    SendOutputStreamData();
+
     // Notify to ensure that the AudioLoop() is not waiting, in case it was
     // waiting for more audio to be decoded.
     mDecoder->GetReentrantMonitor().NotifyAll();
 
     // The ready state can change when we've decoded data, so update the
     // ready state, so that DOM events can fire.
     UpdateReadyState();
 
     if ((mState == DECODER_STATE_DECODING || mState == DECODER_STATE_BUFFERING) &&
         !mStopDecodeThread &&
         (videoPlaying || audioPlaying) &&
-        (!audioPlaying || (GetDecodedAudioDuration() >= ampleAudioThreshold &&
-                           audioQueue.GetSize() > 0))
-        &&
-        (!videoPlaying ||
-          static_cast<PRUint32>(videoQueue.GetSize()) >= AMPLE_VIDEO_FRAMES))
+        throttleAudioDecoding && throttleVideoDecoding)
     {
       // All active bitstreams' decode is well ahead of the playback
       // position, we may as well wait for the playback to catch up. Note the
       // audio push thread acquires and notifies the decoder monitor every time
       // it pops AudioData off the audio queue. So if the audio push thread pops
       // the last AudioData off the audio queue right after that queue reported
       // it was non-empty here, we'll receive a notification on the decoder
       // monitor which will wake us up shortly after we sleep, thus preventing
@@ -692,16 +970,25 @@ void nsBuiltinDecoderStateMachine::Decod
 
 bool nsBuiltinDecoderStateMachine::IsPlaying()
 {
   mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
 
   return !mPlayStartTime.IsNull();
 }
 
+static void WriteSilence(nsAudioStream* aStream, PRUint32 aFrames)
+{
+  PRUint32 numSamples = aFrames * aStream->GetChannels();
+  nsAutoTArray<AudioDataValue, 1000> buf;
+  buf.SetLength(numSamples);
+  memset(buf.Elements(), 0, numSamples * sizeof(AudioDataValue));
+  aStream->Write(buf.Elements(), aFrames);
+}
+
 void nsBuiltinDecoderStateMachine::AudioLoop()
 {
   NS_ASSERTION(OnAudioThread(), "Should be on audio thread.");
   LOG(PR_LOG_DEBUG, ("%p Begun audio thread/loop", mDecoder.get()));
   PRInt64 audioDuration = 0;
   PRInt64 audioStartTime = -1;
   PRUint32 channels, rate;
   double volume = -1;
@@ -755,16 +1042,17 @@ void nsBuiltinDecoderStateMachine::Audio
       {
         if (!IsPlaying() && !mAudioStream->IsPaused()) {
           mAudioStream->Pause();
         }
         mon.Wait();
       }
 
       // If we're shutting down, break out and exit the audio thread.
+      // Also break out if audio is being captured.
       if (mState == DECODER_STATE_SHUTDOWN ||
           mStopAudioThread ||
           mReader->mAudioQueue.AtEndOfStream())
       {
         break;
       }
 
       // We only want to go to the expense of changing the volume if
@@ -808,16 +1096,18 @@ void nsBuiltinDecoderStateMachine::Audio
     PRInt64 framesWritten = 0;
     if (missingFrames.value() > 0) {
       // The next audio chunk begins some time after the end of the last chunk
       // we pushed to the audio hardware. We must push silence into the audio
       // hardware so that the next audio chunk begins playback at the correct
       // time.
       missingFrames = NS_MIN(static_cast<PRInt64>(PR_UINT32_MAX),
                              missingFrames.value());
+      LOG(PR_LOG_DEBUG, ("%p Decoder playing %d frames of silence",
+                         mDecoder.get(), PRInt32(missingFrames.value())));
       framesWritten = PlaySilence(static_cast<PRUint32>(missingFrames.value()),
                                   channels, playedFrames.value());
     } else {
       framesWritten = PlayFromAudioQueue(sampleTime.value(), channels);
     }
     audioDuration += framesWritten;
     {
       ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
@@ -845,20 +1135,17 @@ void nsBuiltinDecoderStateMachine::Audio
         // written isn't an exact multiple of minWriteFrames, we'll have
         // left over audio data which hasn't yet been written to the hardware,
         // and so that audio will not start playing. Write silence to ensure
         // the last block gets pushed to hardware, so that playback starts.
         PRInt64 framesToWrite = minWriteFrames - unplayedFrames;
         if (framesToWrite < PR_UINT32_MAX / channels) {
           // Write silence manually rather than using PlaySilence(), so that
           // the AudioAPI doesn't get a copy of the audio frames.
-          PRUint32 numSamples = framesToWrite * channels;
-          nsAutoArrayPtr<AudioDataValue> buf(new AudioDataValue[numSamples]);
-          memset(buf.get(), 0, numSamples * sizeof(AudioDataValue));
-          mAudioStream->Write(buf, framesToWrite);
+          WriteSilence(mAudioStream, framesToWrite);
         }
       }
 
       PRInt64 oldPosition = -1;
       PRInt64 position = GetMediaTime();
       while (oldPosition != position &&
              mAudioEndTime - position > 0 &&
              mState != DECODER_STATE_SEEKING &&
@@ -880,20 +1167,22 @@ void nsBuiltinDecoderStateMachine::Audio
   }
   LOG(PR_LOG_DEBUG, ("%p Reached audio stream end.", mDecoder.get()));
   {
     // Must hold lock while anulling the audio stream to prevent
     // state machine thread trying to use it while we're destroying it.
     ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
     mAudioStream = nsnull;
     mEventManager.Clear();
-    mAudioCompleted = true;
-    UpdateReadyState();
-    // Kick the decode thread; it may be sleeping waiting for this to finish.
-    mDecoder->GetReentrantMonitor().NotifyAll();
+    if (!mAudioCaptured) {
+      mAudioCompleted = true;
+      UpdateReadyState();
+      // Kick the decode thread; it may be sleeping waiting for this to finish.
+      mDecoder->GetReentrantMonitor().NotifyAll();
+    }
   }
 
   // Must not hold the decoder monitor while we shutdown the audio stream, as
   // it makes a synchronous dispatch on Android.
   audioStream->Shutdown();
   audioStream = nsnull;
 
   LOG(PR_LOG_DEBUG, ("%p Audio stream finished playing, audio thread exit", mDecoder.get()));
@@ -903,49 +1192,49 @@ PRUint32 nsBuiltinDecoderStateMachine::P
                                                    PRUint32 aChannels,
                                                    PRUint64 aFrameOffset)
 
 {
   NS_ASSERTION(OnAudioThread(), "Only call on audio thread.");
   NS_ASSERTION(!mAudioStream->IsPaused(), "Don't play when paused");
   PRUint32 maxFrames = SILENCE_BYTES_CHUNK / aChannels / sizeof(AudioDataValue);
   PRUint32 frames = NS_MIN(aFrames, maxFrames);
-  PRUint32 numSamples = frames * aChannels;
-  nsAutoArrayPtr<AudioDataValue> buf(new AudioDataValue[numSamples]);
-  memset(buf.get(), 0, numSamples * sizeof(AudioDataValue));
-  mAudioStream->Write(buf, frames);
+  WriteSilence(mAudioStream, frames);
   // Dispatch events to the DOM for the audio just written.
-  mEventManager.QueueWrittenAudioData(buf.get(), frames * aChannels,
+  mEventManager.QueueWrittenAudioData(nsnull, frames * aChannels,
                                       (aFrameOffset + frames) * aChannels);
   return frames;
 }
 
 PRUint32 nsBuiltinDecoderStateMachine::PlayFromAudioQueue(PRUint64 aFrameOffset,
                                                           PRUint32 aChannels)
 {
   NS_ASSERTION(OnAudioThread(), "Only call on audio thread.");
   NS_ASSERTION(!mAudioStream->IsPaused(), "Don't play when paused");
   nsAutoPtr<AudioData> audio(mReader->mAudioQueue.PopFront());
   {
     ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
     NS_WARN_IF_FALSE(IsPlaying(), "Should be playing");
+    NS_ASSERTION(!mAudioCaptured, "Audio cannot be captured here!");
     // Awaken the decode loop if it's waiting for space to free up in the
     // audio queue.
     mDecoder->GetReentrantMonitor().NotifyAll();
   }
   PRInt64 offset = -1;
   PRUint32 frames = 0;
   // The state machine could have paused since we've released the decoder
   // monitor and acquired the audio monitor. Rather than acquire both
   // monitors, the audio stream also maintains whether its paused or not.
   // This prevents us from doing a blocking write while holding the audio
   // monitor while paused; we would block, and the state machine won't be
   // able to acquire the audio monitor in order to resume or destroy the
   // audio stream.
   if (!mAudioStream->IsPaused()) {
+    LOG(PR_LOG_DEBUG, ("%p Decoder playing %d frames of data to stream for AudioData at %lld",
+                       mDecoder.get(), audio->mFrames, audio->mTime));
     mAudioStream->Write(audio->mAudioData,
                         audio->mFrames);
 
     offset = audio->mOffset;
     frames = audio->mFrames;
 
     // Dispatch events to the DOM for the audio just written.
     mEventManager.QueueWrittenAudioData(audio->mAudioData.get(),
@@ -1072,16 +1361,26 @@ nsHTMLMediaElement::NextFrameStatus nsBu
 
 void nsBuiltinDecoderStateMachine::SetVolume(double volume)
 {
   NS_ASSERTION(NS_IsMainThread(), "Should be on main thread.");
   ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
   mVolume = volume;
 }
 
+void nsBuiltinDecoderStateMachine::SetAudioCaptured(bool aCaptured)
+{
+  NS_ASSERTION(NS_IsMainThread(), "Should be on main thread.");
+  ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
+  if (!mAudioCaptured && aCaptured) {
+    StopAudioThread();
+  }
+  mAudioCaptured = aCaptured;
+}
+
 double nsBuiltinDecoderStateMachine::GetCurrentTime() const
 {
   NS_ASSERTION(NS_IsMainThread() ||
                OnStateMachineThread() ||
                OnDecodeThread(),
                "Should be on main, decode, or state machine thread.");
 
   return static_cast<double>(mCurrentFrameTime) / static_cast<double>(USECS_PER_S);
@@ -1358,17 +1657,17 @@ nsBuiltinDecoderStateMachine::StartDecod
 
 nsresult
 nsBuiltinDecoderStateMachine::StartAudioThread()
 {
   NS_ASSERTION(OnStateMachineThread() || OnDecodeThread(),
                "Should be on state machine or decode thread.");
   mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
   mStopAudioThread = false;
-  if (HasAudio() && !mAudioThread) {
+  if (HasAudio() && !mAudioThread && !mAudioCaptured) {
     nsresult rv = NS_NewThread(getter_AddRefs(mAudioThread),
                                nsnull,
                                MEDIA_THREAD_STACK_SIZE);
     if (NS_FAILED(rv)) {
       LOG(PR_LOG_DEBUG, ("%p Changed state to SHUTDOWN because failed to create audio thread", mDecoder.get()));
       mState = DECODER_STATE_SHUTDOWN;
       return rv;
     }
@@ -1537,16 +1836,19 @@ nsresult nsBuiltinDecoderStateMachine::D
 
 void nsBuiltinDecoderStateMachine::DecodeSeek()
 {
   NS_ASSERTION(OnDecodeThread(), "Should be on decode thread.");
   mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
   NS_ASSERTION(mState == DECODER_STATE_SEEKING,
                "Only call when in seeking state");
 
+  mDidThrottleAudioDecoding = false;
+  mDidThrottleVideoDecoding = false;
+
   // During the seek, don't have a lock on the decoder state,
   // otherwise long seek operations can block the main thread.
   // The events dispatched to the main thread are SYNC calls.
   // These calls are made outside of the decode monitor lock so
   // it is safe for the main thread to makes calls that acquire
   // the lock since it won't deadlock. We check the state when
   // acquiring the lock again in case shutdown has occurred
   // during the time when we didn't have the lock.
@@ -1591,25 +1893,24 @@ void nsBuiltinDecoderStateMachine::Decod
       AudioData* audio = HasAudio() ? mReader->mAudioQueue.PeekFront() : nsnull;
       NS_ASSERTION(!audio || (audio->mTime <= seekTime &&
                               seekTime <= audio->mTime + audio->mDuration),
                     "Seek target should lie inside the first audio block after seek");
       PRInt64 startTime = (audio && audio->mTime < seekTime) ? audio->mTime : seekTime;
       mAudioStartTime = startTime;
       mPlayDuration = startTime - mStartTime;
       if (HasVideo()) {
-        nsAutoPtr<VideoData> video(mReader->mVideoQueue.PeekFront());
+        VideoData* video = mReader->mVideoQueue.PeekFront();
         if (video) {
           NS_ASSERTION(video->mTime <= seekTime && seekTime <= video->mEndTime,
                         "Seek target should lie inside the first frame after seek");
           {
             ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor());
             RenderVideoFrame(video, TimeStamp::Now());
           }
-          mReader->mVideoQueue.PopFront();
           nsCOMPtr<nsIRunnable> event =
             NS_NewRunnableMethod(mDecoder, &nsBuiltinDecoder::Invalidate);
           NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
         }
       }
     }
   }
   mDecoder->StartProgressUpdates();
@@ -1877,28 +2178,31 @@ void nsBuiltinDecoderStateMachine::Rende
   NS_ASSERTION(OnStateMachineThread() || OnDecodeThread(),
                "Should be on state machine or decode thread.");
   mDecoder->GetReentrantMonitor().AssertNotCurrentThreadIn();
 
   if (aData->mDuplicate) {
     return;
   }
 
+  LOG(PR_LOG_DEBUG, ("%p Decoder playing video frame %lld",
+                     mDecoder.get(), aData->mTime));
+
   VideoFrameContainer* container = mDecoder->GetVideoFrameContainer();
   if (container) {
     container->SetCurrentFrame(aData->mDisplay, aData->mImage, aTarget);
   }
 }
 
 PRInt64
 nsBuiltinDecoderStateMachine::GetAudioClock()
 {
   NS_ASSERTION(OnStateMachineThread(), "Should be on state machine thread.");
   mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
-  if (!HasAudio())
+  if (!HasAudio() || mAudioCaptured)
     return -1;
   // We must hold the decoder monitor while using the audio stream off the
   // audio thread to ensure that it doesn't get destroyed on the audio thread
   // while we're using it.
   if (!mAudioStream) {
     // Audio thread hasn't played any data yet.
     return mAudioStartTime;
   }
@@ -1948,16 +2252,17 @@ void nsBuiltinDecoderStateMachine::Advan
   PRInt64 remainingTime = AUDIO_DURATION_USECS;
   NS_ASSERTION(clock_time >= mStartTime, "Should have positive clock time.");
   nsAutoPtr<VideoData> currentFrame;
   if (mReader->mVideoQueue.GetSize() > 0) {
     VideoData* frame = mReader->mVideoQueue.PeekFront();
     while (mRealTime || clock_time >= frame->mTime) {
       mVideoFrameEndTime = frame->mEndTime;
       currentFrame = frame;
+      LOG(PR_LOG_DEBUG, ("%p Decoder discarding video frame %lld", mDecoder.get(), frame->mTime));
       mReader->mVideoQueue.PopFront();
       // Notify the decode thread that the video queue's buffers may have
       // free'd up space for more frames.
       mDecoder->GetReentrantMonitor().NotifyAll();
       mDecoder->UpdatePlaybackOffset(frame->mOffset);
       if (mReader->mVideoQueue.GetSize() == 0)
         break;
       frame = mReader->mVideoQueue.PeekFront();
@@ -2235,16 +2540,22 @@ void nsBuiltinDecoderStateMachine::Timeo
   // as soon as possible. Nothing else needed to do, the state machine is
   // going to run anyway.
 }
 
 nsresult nsBuiltinDecoderStateMachine::ScheduleStateMachine() {
   return ScheduleStateMachine(0);
 }
 
+void nsBuiltinDecoderStateMachine::ScheduleStateMachineWithLockAndWakeDecoder() {
+  ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
+  mon.NotifyAll();
+  ScheduleStateMachine(0);
+}
+
 nsresult nsBuiltinDecoderStateMachine::ScheduleStateMachine(PRInt64 aUsecs) {
   mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
   NS_ABORT_IF_FALSE(GetStateMachineThread(),
     "Must have a state machine thread to schedule");
 
   if (mState == DECODER_STATE_SHUTDOWN) {
     return NS_ERROR_FAILURE;
   }
--- a/content/media/nsBuiltinDecoderStateMachine.h
+++ b/content/media/nsBuiltinDecoderStateMachine.h
@@ -112,16 +112,18 @@ hardware (via nsAudioStream and libsydne
 #include "prmem.h"
 #include "nsThreadUtils.h"
 #include "nsBuiltinDecoder.h"
 #include "nsBuiltinDecoderReader.h"
 #include "nsAudioAvailableEventManager.h"
 #include "nsHTMLMediaElement.h"
 #include "mozilla/ReentrantMonitor.h"
 #include "nsITimer.h"
+#include "AudioSegment.h"
+#include "VideoSegment.h"
 
 /*
   The state machine class. This manages the decoding and seeking in the
   nsBuiltinDecoderReader on the decode thread, and A/V sync on the shared
   state machine thread, and controls the audio "push" thread.
 
   All internal state is synchronised via the decoder monitor. State changes
   are either propagated by NotifyAll on the monitor (typically when state
@@ -132,28 +134,33 @@ hardware (via nsAudioStream and libsydne
 */
 class nsBuiltinDecoderStateMachine : public nsDecoderStateMachine
 {
 public:
   typedef mozilla::ReentrantMonitor ReentrantMonitor;
   typedef mozilla::TimeStamp TimeStamp;
   typedef mozilla::TimeDuration TimeDuration;
   typedef mozilla::VideoFrameContainer VideoFrameContainer;
+  typedef nsBuiltinDecoder::OutputMediaStream OutputMediaStream;
+  typedef mozilla::SourceMediaStream SourceMediaStream;
+  typedef mozilla::AudioSegment AudioSegment;
+  typedef mozilla::VideoSegment VideoSegment;
 
   nsBuiltinDecoderStateMachine(nsBuiltinDecoder* aDecoder, nsBuiltinDecoderReader* aReader, bool aRealTime = false);
   ~nsBuiltinDecoderStateMachine();
 
   // nsDecoderStateMachine interface
   virtual nsresult Init(nsDecoderStateMachine* aCloneDonor);
   State GetState()
   { 
     mDecoder->GetReentrantMonitor().AssertCurrentThreadIn();
     return mState; 
   }
   virtual void SetVolume(double aVolume);
+  virtual void SetAudioCaptured(bool aCapture);
   virtual void Shutdown();
   virtual PRInt64 GetDuration();
   virtual void SetDuration(PRInt64 aDuration);
   void SetEndTime(PRInt64 aEndTime);
   virtual bool OnDecodeThread() const {
     return IsCurrentThread(mDecodeThread);
   }
 
@@ -244,16 +251,20 @@ public:
   static nsIThread* GetStateMachineThread();
 
   // Schedules the shared state machine thread to run the state machine.
   // If the state machine thread is the currently running the state machine,
   // we wait until that has completely finished before running the state
   // machine again.
   nsresult ScheduleStateMachine();
 
+  // Calls ScheduleStateMachine() after taking the decoder lock. Also
+  // notifies the decoder thread in case it's waiting on the decoder lock.
+  void ScheduleStateMachineWithLockAndWakeDecoder();
+
   // Schedules the shared state machine thread to run the state machine
   // in aUsecs microseconds from now, if it's not already scheduled to run
   // earlier, in which case the request is discarded.
   nsresult ScheduleStateMachine(PRInt64 aUsecs);
 
   // Creates and starts a new decode thread. Don't call this directly,
   // request a new decode thread by calling
   // StateMachineTracker::RequestCreateDecodeThread().
@@ -268,16 +279,22 @@ public:
 
   // Drop reference to decoder.  Only called during shutdown dance.
   void ReleaseDecoder() { mDecoder = nsnull; }
 
    // Called when a "MozAudioAvailable" event listener is added to the media
    // element. Called on the main thread.
    void NotifyAudioAvailableListener();
 
+  // Copy queued audio/video data in the reader to any output MediaStreams that
+  // need it.
+  void SendOutputStreamData();
+  bool HaveEnoughDecodedAudio(PRInt64 aAmpleAudioUSecs);
+  bool HaveEnoughDecodedVideo();
+
 protected:
 
   // Returns true if we've got less than aAudioUsecs microseconds of decoded
   // and playable data. The decoder monitor must be held.
   bool HasLowDecodedData(PRInt64 aAudioUsecs) const;
 
   // Returns true if we're running low on data which is not yet decoded.
   // The decoder monitor must be held.
@@ -431,16 +448,21 @@ protected:
   // Decode loop, decodes data until EOF or shutdown.
   // Called on the decode thread.
   void DecodeLoop();
 
   // Decode thread run function. Determines which of the Decode*() functions
   // to call.
   void DecodeThreadRun();
 
+  // Copy audio from an AudioData packet to aOutput. This may require
+  // inserting silence depending on the timing of the audio packet.
+  void SendOutputStreamAudio(AudioData* aAudio, OutputMediaStream* aStream,
+                             AudioSegment* aOutput);
+
   // State machine thread run function. Defers to RunStateMachine().
   nsresult CallRunStateMachine();
 
   // Performs one "cycle" of the state machine. Polls the state, and may send
   // a video frame to be displayed, and generally manages the decode. Called
   // periodically via timer to ensure the video stays in sync.
   nsresult RunStateMachine();
 
@@ -564,16 +586,20 @@ protected:
   // Volume of playback. 0.0 = muted. 1.0 = full volume. Read/Written
   // from the state machine and main threads. Synchronised via decoder
   // monitor.
   double mVolume;
 
   // Time at which we started decoding. Synchronised via decoder monitor.
   TimeStamp mDecodeStartTime;
 
+  // True if we shouldn't play our audio (but still write it to any capturing
+  // streams).
+  bool mAudioCaptured;
+
   // True if the media resource can be seeked. Accessed from the state
   // machine and main threads. Synchronised via decoder monitor.
   bool mSeekable;
 
   // True if an event to notify about a change in the playback
   // position has been queued, but not yet run. It is set to false when
   // the event is run. This allows coalescing of these events as they can be
   // produced many times per second. Synchronised via decoder monitor.
@@ -631,16 +657,22 @@ protected:
   // True if the decode thread has gone filled its buffers and is now
   // waiting to be awakened before it continues decoding. Synchronized
   // by the decoder monitor.
   bool mDecodeThreadWaiting;
 
   // True is we are decoding a realtime stream, like a camera stream
   bool mRealTime;
 
+  // Record whether audio and video decoding were throttled during the
+  // previous iteration of DecodeLooop. When we transition from
+  // throttled to not-throttled we need to pump decoding.
+  bool mDidThrottleAudioDecoding;
+  bool mDidThrottleVideoDecoding;
+
   // True if we've requested a new decode thread, but it has not yet been
   // created. Synchronized by the decoder monitor.
   bool mRequestedNewDecodeThread;
   
   PRUint32 mBufferingWait;
   PRInt64  mLowDataThresholdUsecs;
 
 private:
--- a/content/media/nsMediaCache.cpp
+++ b/content/media/nsMediaCache.cpp
@@ -234,16 +234,17 @@ public:
   void Verify() {}
 #endif
 
   ReentrantMonitor& GetReentrantMonitor() { return mReentrantMonitor; }
 
   /**
    * An iterator that makes it easy to iterate through all streams that
    * have a given resource ID and are not closed.
+   * Can be used on the main thread or while holding the media cache lock.
    */
   class ResourceStreamIterator {
   public:
     ResourceStreamIterator(PRInt64 aResourceID) :
       mResourceID(aResourceID), mNext(0) {}
     nsMediaCacheStream* Next()
     {
       while (mNext < gMediaCache->mStreams.Length()) {
@@ -346,23 +347,24 @@ protected:
 
   // Truncate the file and index array if there are free blocks at the
   // end
   void Truncate();
 
   // This member is main-thread only. It's used to allocate unique
   // resource IDs to streams.
   PRInt64                       mNextResourceID;
-  // This member is main-thread only. It contains all the streams.
-  nsTArray<nsMediaCacheStream*> mStreams;
 
   // The monitor protects all the data members here. Also, off-main-thread
   // readers that need to block will Wait() on this monitor. When new
   // data becomes available in the cache, we NotifyAll() on this monitor.
   ReentrantMonitor         mReentrantMonitor;
+  // This is only written while on the main thread and the monitor is held.
+  // Thus, it can be safely read from the main thread or while holding the monitor.
+  nsTArray<nsMediaCacheStream*> mStreams;
   // The Blocks describing the cache entries.
   nsTArray<Block> mIndex;
   // Writer which performs IO, asynchronously writing cache blocks.
   nsRefPtr<FileBlockCache> mFileCache;
   // The list of free blocks; they are not ordered.
   BlockList       mFreeBlocks;
   // True if an event to run Update() has been queued but not processed
   bool            mUpdateQueued;
@@ -1698,28 +1700,42 @@ nsMediaCacheStream::NotifyDataStarted(PR
   mChannelOffset = aOffset;
   if (mStreamLength >= 0) {
     // If we started reading at a certain offset, then for sure
     // the stream is at least that long.
     mStreamLength = NS_MAX(mStreamLength, mChannelOffset);
   }
 }
 
-void
+bool
 nsMediaCacheStream::UpdatePrincipal(nsIPrincipal* aPrincipal)
 {
-  nsContentUtils::CombineResourcePrincipals(&mPrincipal, aPrincipal);
+  return nsContentUtils::CombineResourcePrincipals(&mPrincipal, aPrincipal);
 }
 
 void
 nsMediaCacheStream::NotifyDataReceived(PRInt64 aSize, const char* aData,
     nsIPrincipal* aPrincipal)
 {
   NS_ASSERTION(NS_IsMainThread(), "Only call on main thread");
 
+  // Update principals before putting the data in the cache. This is important,
+  // we want to make sure all principals are updated before any consumer
+  // can see the new data.
+  // We do this without holding the cache monitor, in case the client wants
+  // to do something that takes a lock.
+  {
+    nsMediaCache::ResourceStreamIterator iter(mResourceID);
+    while (nsMediaCacheStream* stream = iter.Next()) {
+      if (stream->UpdatePrincipal(aPrincipal)) {
+        stream->mClient->CacheClientNotifyPrincipalChanged();
+      }
+    }
+  }
+
   ReentrantMonitorAutoEnter mon(gMediaCache->GetReentrantMonitor());
   PRInt64 size = aSize;
   const char* data = aData;
 
   LOG(PR_LOG_DEBUG, ("Stream %p DataReceived at %lld count=%lld",
       this, (long long)mChannelOffset, (long long)aSize));
 
   // We process the data one block (or part of a block) at a time
@@ -1764,17 +1780,16 @@ nsMediaCacheStream::NotifyDataReceived(P
   }
 
   nsMediaCache::ResourceStreamIterator iter(mResourceID);
   while (nsMediaCacheStream* stream = iter.Next()) {
     if (stream->mStreamLength >= 0) {
       // The stream is at least as long as what we've read
       stream->mStreamLength = NS_MAX(stream->mStreamLength, mChannelOffset);
     }
-    stream->UpdatePrincipal(aPrincipal);
     stream->mClient->CacheClientNotifyDataReceived();
   }
 
   // Notify in case there's a waiting reader
   // XXX it would be fairly easy to optimize things a lot more to
   // avoid waking up reader threads unnecessarily
   mon.NotifyAll();
 }
--- a/content/media/nsMediaCache.h
+++ b/content/media/nsMediaCache.h
@@ -225,20 +225,20 @@ public:
   enum ReadMode {
     MODE_METADATA,
     MODE_PLAYBACK
   };
 
   // aClient provides the underlying transport that cache will use to read
   // data for this stream.
   nsMediaCacheStream(ChannelMediaResource* aClient)
-    : mClient(aClient), mResourceID(0), mInitialized(false),
+    : mClient(aClient), mInitialized(false),
       mHasHadUpdate(false),
       mClosed(false),
-      mDidNotifyDataEnded(false),
+      mDidNotifyDataEnded(false), mResourceID(0),
       mIsSeekable(false), mCacheSuspended(false),
       mChannelEnded(false),
       mChannelOffset(0), mStreamLength(-1),  
       mStreamOffset(0), mPlaybackBytesPerSecond(10000),
       mPinCount(0), mCurrentMode(MODE_PLAYBACK),
       mMetadataInPartialBlockBuffer(false) {}
   ~nsMediaCacheStream();
 
@@ -321,17 +321,18 @@ public:
   void Unpin();
   // See comments above for NotifyDataLength about how the length
   // can vary over time. Returns -1 if no length is known. Returns the
   // reported length if we haven't got any better information. If
   // the stream ended normally we return the length we actually got.
   // If we've successfully read data beyond the originally reported length,
   // we return the end of the data we've read.
   PRInt64 GetLength();
-  // Returns the unique resource ID
+  // Returns the unique resource ID. Call only on the main thread or while
+  // holding the media cache lock.
   PRInt64 GetResourceID() { return mResourceID; }
   // Returns the end of the bytes starting at the given offset
   // which are in cache.
   PRInt64 GetCachedDataEnd(PRInt64 aOffset);
   // Returns the offset of the first byte of cached data at or after aOffset,
   // or -1 if there is no such cached data.
   PRInt64 GetNextCachedData(PRInt64 aOffset);
   // Fills aRanges with the ByteRanges representing the data which is currently
@@ -454,39 +455,40 @@ private:
   PRInt64 GetNextCachedDataInternal(PRInt64 aOffset);
   // A helper function to do the work of closing the stream. Assumes
   // that the cache monitor is held. Main thread only.
   // aReentrantMonitor is the nsAutoReentrantMonitor wrapper holding the cache monitor.
   // This is used to NotifyAll to wake up threads that might be
   // blocked on reading from this stream.
   void CloseInternal(ReentrantMonitorAutoEnter& aReentrantMonitor);
   // Update mPrincipal given that data has been received from aPrincipal
-  void UpdatePrincipal(nsIPrincipal* aPrincipal);
+  bool UpdatePrincipal(nsIPrincipal* aPrincipal);
 
   // These fields are main-thread-only.
   ChannelMediaResource*  mClient;
   nsCOMPtr<nsIPrincipal> mPrincipal;
-  // This is a unique ID representing the resource we're loading.
-  // All streams with the same mResourceID are loading the same
-  // underlying resource and should share data.
-  PRInt64                mResourceID;
   // Set to true when Init or InitAsClone has been called
   bool                   mInitialized;
   // Set to true when nsMediaCache::Update() has finished while this stream
   // was present.
   bool                   mHasHadUpdate;
   // Set to true when the stream has been closed either explicitly or
   // due to an internal cache error
   bool                   mClosed;
   // True if CacheClientNotifyDataEnded has been called for this stream.
   bool                   mDidNotifyDataEnded;
 
-  // The following fields are protected by the cache's monitor but are
-  // only written on the main thread. 
+  // The following fields must be written holding the cache's monitor and
+  // only on the main thread, thus can be read either on the main thread
+  // or while holding the cache's monitor.
 
+  // This is a unique ID representing the resource we're loading.
+  // All streams with the same mResourceID are loading the same
+  // underlying resource and should share data.
+  PRInt64 mResourceID;
   // The last reported seekability state for the underlying channel
   bool mIsSeekable;
   // True if the cache has suspended our channel because the cache is
   // full and the priority of the data that would be received is lower
   // than the priority of the data already in the cache
   bool mCacheSuspended;
   // True if the channel ended and we haven't seeked it again.
   bool mChannelEnded;
--- a/content/media/nsMediaDecoder.h
+++ b/content/media/nsMediaDecoder.h
@@ -36,16 +36,17 @@
  *
  * ***** END LICENSE BLOCK ***** */
 #if !defined(nsMediaDecoder_h_)
 #define nsMediaDecoder_h_
 
 #include "ImageLayers.h"
 #include "mozilla/ReentrantMonitor.h"
 #include "VideoFrameContainer.h"
+#include "MediaStreamGraph.h"
 
 class nsHTMLMediaElement;
 class nsIStreamListener;
 class nsTimeRanges;
 class nsIMemoryReporter;
 class nsIPrincipal;
 class nsITimer;
 
@@ -64,23 +65,24 @@ static const PRUint32 FRAMEBUFFER_LENGTH
 static const PRUint32 FRAMEBUFFER_LENGTH_MAX = 16384;
 
 // All methods of nsMediaDecoder must be called from the main thread only
 // with the exception of GetVideoFrameContainer and GetStatistics,
 // which can be called from any thread.
 class nsMediaDecoder : public nsIObserver
 {
 public:
+  typedef mozilla::layers::Image Image;
+  typedef mozilla::layers::ImageContainer ImageContainer;
   typedef mozilla::MediaResource MediaResource;
   typedef mozilla::ReentrantMonitor ReentrantMonitor;
+  typedef mozilla::SourceMediaStream SourceMediaStream;
   typedef mozilla::TimeStamp TimeStamp;
   typedef mozilla::TimeDuration TimeDuration;
   typedef mozilla::VideoFrameContainer VideoFrameContainer;
-  typedef mozilla::layers::Image Image;
-  typedef mozilla::layers::ImageContainer ImageContainer;
 
   nsMediaDecoder();
   virtual ~nsMediaDecoder();
 
   // Create a new decoder of the same type as this one.
   virtual nsMediaDecoder* Clone() = 0;
 
   // Perform any initialization required for the decoder.
@@ -124,16 +126,23 @@ public:
   virtual bool IsInfinite() = 0;
 
   // Pause video playback.
   virtual void Pause() = 0;
 
   // Set the audio volume. It should be a value from 0 to 1.0.
   virtual void SetVolume(double aVolume) = 0;
 
+  // Sets whether audio is being captured. If it is, we won't play any
+  // of our audio.
+  virtual void SetAudioCaptured(bool aCaptured) = 0;
+
+  // Add an output stream. All decoder output will be sent to the stream.
+  virtual void AddOutputStream(SourceMediaStream* aStream, bool aFinishWhenEnded) = 0;
+
   // Start playback of a video. 'Load' must have previously been
   // called.
   virtual nsresult Play() = 0;
 
   // Start downloading the media. Decode the downloaded data up to the
   // point of the first frame of data.
   // aResource is the media stream to use. Ownership of aResource passes to
   // the decoder, even if Load returns an error.
@@ -326,16 +335,20 @@ public:
   // Call on the main thread only.
   virtual void NotifyBytesDownloaded() = 0;
 
   // Called by nsChannelToPipeListener or MediaResource when the
   // download has ended. Called on the main thread only. aStatus is
   // the result from OnStopRequest.
   virtual void NotifyDownloadEnded(nsresult aStatus) = 0;
 
+  // Called by MediaResource when the principal of the resource has
+  // changed. Called on main thread only.
+  virtual void NotifyPrincipalChanged() = 0;
+
   // Called as data arrives on the stream and is read into the cache.  Called
   // on the main thread only.
   virtual void NotifyDataArrived(const char* aBuffer, PRUint32 aLength, PRInt64 aOffset) = 0;
 
   // Cleanup internal data structures. Must be called on the main
   // thread by the owning object before that object disposes of this object.
   virtual void Shutdown();
 
--- a/dom/interfaces/html/nsIDOMHTMLAudioElement.idl
+++ b/dom/interfaces/html/nsIDOMHTMLAudioElement.idl
@@ -47,17 +47,17 @@
  * <audio> element.
  *
  * For more information on this interface, please see
  * http://www.whatwg.org/specs/web-apps/current-work/#audio
  *
  * @status UNDER_DEVELOPMENT
  */
 
-[scriptable, uuid(e1a11e83-255b-4350-81cf-f1f3e7d59712)]
+[scriptable, uuid(32c54e30-5063-4e35-8fc9-890e50fed147)]
 interface nsIDOMHTMLAudioElement : nsIDOMHTMLMediaElement
 {
   // Setup the audio stream for writing
   void mozSetup(in PRUint32 channels, in PRUint32 rate);
 
   // Write audio to the audio stream
   [implicit_jscontext]
   unsigned long mozWriteAudio(in jsval data);
--- a/dom/interfaces/html/nsIDOMHTMLMediaElement.idl
+++ b/dom/interfaces/html/nsIDOMHTMLMediaElement.idl
@@ -35,16 +35,18 @@
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "nsIDOMHTMLElement.idl"
 #include "nsIDOMMediaError.idl"
 #include "nsIDOMTimeRanges.idl"
 
+interface nsIDOMMediaStream;
+
 /**
  * The nsIDOMHTMLMediaElement interface is an interface to be implemented by the HTML
  * <audio> and <video> elements.
  *
  * For more information on this interface, please see
  * http://www.whatwg.org/specs/web-apps/current-work/#htmlmediaelement
  *
  * @status UNDER_DEVELOPMENT
@@ -52,17 +54,17 @@
 
 // undef the GetCurrentTime macro defined in WinBase.h from the MS Platform SDK
 %{C++
 #ifdef GetCurrentTime
 #undef GetCurrentTime
 #endif
 %}
 
-[scriptable, uuid(3e672e79-a0ea-45ef-87de-828402f1f6d7)]
+[scriptable, uuid(6b938133-a8c2-424a-9401-a631f74aeff5)]
 interface nsIDOMHTMLMediaElement : nsIDOMHTMLElement
 {
   // error state
   readonly attribute nsIDOMMediaError error;
 
   // network state
   [implicit_jscontext] attribute jsval src;
   readonly attribute DOMString currentSrc;
@@ -100,16 +102,21 @@ interface nsIDOMHTMLMediaElement : nsIDO
   void pause();
 
   // controls
            attribute boolean controls;
            attribute double volume;
            attribute boolean muted;
            attribute boolean defaultMuted;
 
+  // Mozilla extension: stream capture
+  nsIDOMMediaStream mozCaptureStream();
+  nsIDOMMediaStream mozCaptureStreamUntilEnded();
+  readonly attribute boolean mozAudioCaptured;
+
   // Mozilla extension: extra stream metadata information, used as part
   // of MozAudioAvailable events and the mozWriteAudio() method.  The
   // mozFrameBufferLength method allows for the size of the framebuffer
   // used within MozAudioAvailable events to be changed.  The new size must
   // be between 512 and 16384.  The default size, for a  media element with
   // audio is (mozChannels * 1024).
   readonly attribute unsigned long mozChannels;
   readonly attribute unsigned long mozSampleRate;
--- a/dom/interfaces/html/nsIDOMHTMLVideoElement.idl
+++ b/dom/interfaces/html/nsIDOMHTMLVideoElement.idl
@@ -43,17 +43,17 @@
  * <video> element.
  *
  * For more information on this interface, please see
  * http://www.whatwg.org/specs/web-apps/current-work/#video
  *
  * @status UNDER_DEVELOPMENT
  */
 
-[scriptable, uuid(e1a11e83-255b-4350-81cf-f1f3e7d59712)]
+[scriptable, uuid(e43f61e3-9c67-4e78-8534-3399d7f192b9)]
 interface nsIDOMHTMLVideoElement : nsIDOMHTMLMediaElement
 {
            attribute long width; 
            attribute long height;
   readonly attribute unsigned long videoWidth;
   readonly attribute unsigned long videoHeight;
            attribute DOMString poster;