Bug 1188099 - (Part 2) Introduce global queue and track speaking state across windows. r=smaug r=kdavis
authorEitan Isaacson <eitan@monotonous.org>
Sat, 08 Aug 2015 10:30:46 -0700
changeset 256989 51b76297c3d33f0cc40ec95a887b141a7f89d678
parent 256988 21fc5e503f210cd57244ecaca45d90d2d9fb9388
child 256990 f83dae195fa43d99ed11bff30e9c3d81a7164b42
push id14559
push userphilringnalda@gmail.com
push dateSun, 09 Aug 2015 23:41:14 +0000
treeherderfx-team@0e269a1f1beb [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmaug, kdavis
bugs1188099
milestone42.0a1
Bug 1188099 - (Part 2) Introduce global queue and track speaking state across windows. r=smaug r=kdavis
dom/media/webspeech/synth/SpeechSynthesis.cpp
dom/media/webspeech/synth/SpeechSynthesis.h
dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
dom/media/webspeech/synth/nsSpeechTask.cpp
dom/media/webspeech/synth/nsSpeechTask.h
dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
dom/media/webspeech/synth/nsSynthVoiceRegistry.h
dom/webidl/SpeechSynthesis.webidl
--- a/dom/media/webspeech/synth/SpeechSynthesis.cpp
+++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp
@@ -106,27 +106,29 @@ SpeechSynthesis::Pending() const
   default:
     return true;
   }
 }
 
 bool
 SpeechSynthesis::Speaking() const
 {
-  if (mSpeechQueue.IsEmpty()) {
-    return false;
+  if (!mSpeechQueue.IsEmpty() &&
+      mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_SPEAKING) {
+    return true;
   }
 
-  return mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_SPEAKING;
+  // Returns global speaking state if global queue is enabled. Or false.
+  return nsSynthVoiceRegistry::GetInstance()->IsSpeaking();
 }
 
 bool
 SpeechSynthesis::Paused() const
 {
-  return mHoldQueue ||
+  return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) ||
          (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused());
 }
 
 void
 SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance)
 {
   if (aUtterance.mState != SpeechSynthesisUtterance::STATE_NONE) {
     // XXX: Should probably raise an error
@@ -173,25 +175,28 @@ SpeechSynthesis::AdvanceQueue()
   }
 
   return;
 }
 
 void
 SpeechSynthesis::Cancel()
 {
-  if (mCurrentTask) {
-    if (mSpeechQueue.Length() > 1) {
-      // Remove all queued utterances except for current one.
-      mSpeechQueue.RemoveElementsAt(1, mSpeechQueue.Length() - 1);
-    }
-    mCurrentTask->Cancel();
+  if (!mSpeechQueue.IsEmpty() &&
+      mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_SPEAKING) {
+    // Remove all queued utterances except for current one, we will remove it
+    // in OnEnd
+    mSpeechQueue.RemoveElementsAt(1, mSpeechQueue.Length() - 1);
   } else {
     mSpeechQueue.Clear();
   }
+
+  if (mCurrentTask) {
+    mCurrentTask->Cancel();
+  }
 }
 
 void
 SpeechSynthesis::Pause()
 {
   if (Paused()) {
     return;
   }
@@ -264,10 +269,18 @@ SpeechSynthesis::GetVoices(nsTArray< nsR
   mVoiceCache.Clear();
 
   for (uint32_t i = 0; i < aResult.Length(); i++) {
     SpeechSynthesisVoice* voice = aResult[i];
     mVoiceCache.Put(voice->mUri, voice);
   }
 }
 
+// For testing purposes, allows us to drop anything in the global queue from
+// content, and bring the browser to initial state.
+void
+SpeechSynthesis::DropGlobalQueue()
+{
+  nsSynthVoiceRegistry::GetInstance()->DropGlobalQueue();
+}
+
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/SpeechSynthesis.h
+++ b/dom/media/webspeech/synth/SpeechSynthesis.h
@@ -49,16 +49,18 @@ public:
   void Pause();
 
   void Resume();
 
   void OnEnd(const nsSpeechTask* aTask);
 
   void GetVoices(nsTArray< nsRefPtr<SpeechSynthesisVoice> >& aResult);
 
+  void DropGlobalQueue();
+
 private:
   virtual ~SpeechSynthesis();
 
   void AdvanceQueue();
 
   nsCOMPtr<nsPIDOMWindow> mParent;
 
   nsTArray<nsRefPtr<SpeechSynthesisUtterance> > mSpeechQueue;
--- a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
+++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
@@ -26,18 +26,24 @@ sync protocol PSpeechSynthesis
 child:
 
     VoiceAdded(RemoteVoice aVoice);
 
     VoiceRemoved(nsString aUri);
 
     SetDefaultVoice(nsString aUri, bool aIsDefault);
 
+    IsSpeakingChanged(bool aIsSpeaking);
+
 parent:
     __delete__();
 
     PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang,
                             float aVolume, float aRate, float aPitch);
-    sync ReadVoiceList() returns (RemoteVoice[] aVoices, nsString[] aDefaults);
+
+    sync ReadVoicesAndState() returns (RemoteVoice[] aVoices,
+                                       nsString[] aDefaults, bool aIsSpeaking);
+
+    DropGlobalQueue();
 };
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@@ -35,16 +35,23 @@ SpeechSynthesisChild::RecvVoiceRemoved(c
 bool
 SpeechSynthesisChild::RecvSetDefaultVoice(const nsString& aUri,
                                           const bool& aIsDefault)
 {
   nsSynthVoiceRegistry::RecvSetDefaultVoice(aUri, aIsDefault);
   return true;
 }
 
+bool
+SpeechSynthesisChild::RecvIsSpeakingChanged(const bool& aIsSpeaking)
+{
+  nsSynthVoiceRegistry::RecvIsSpeakingChanged(aIsSpeaking);
+  return true;
+}
+
 PSpeechSynthesisRequestChild*
 SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild(const nsString& aText,
                                                         const nsString& aLang,
                                                         const nsString& aUri,
                                                         const float& aVolume,
                                                         const float& aRate,
                                                         const float& aPitch)
 {
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@@ -23,16 +23,18 @@ class SpeechSynthesisChild : public PSpe
 
 public:
   bool RecvVoiceAdded(const RemoteVoice& aVoice) override;
 
   bool RecvVoiceRemoved(const nsString& aUri) override;
 
   bool RecvSetDefaultVoice(const nsString& aUri, const bool& aIsDefault) override;
 
+  bool RecvIsSpeakingChanged(const bool& aIsSpeaking) override;
+
 protected:
   SpeechSynthesisChild();
   virtual ~SpeechSynthesisChild();
 
   PSpeechSynthesisRequestChild* AllocPSpeechSynthesisRequestChild(const nsString& aLang,
                                                                   const nsString& aUri,
                                                                   const nsString& aText,
                                                                   const float& aVolume,
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
@@ -20,20 +20,30 @@ SpeechSynthesisParent::~SpeechSynthesisP
 
 void
 SpeechSynthesisParent::ActorDestroy(ActorDestroyReason aWhy)
 {
   // Implement me! Bug 1005141
 }
 
 bool
-SpeechSynthesisParent::RecvReadVoiceList(InfallibleTArray<RemoteVoice>* aVoices,
-                                         InfallibleTArray<nsString>* aDefaults)
+SpeechSynthesisParent::RecvReadVoicesAndState(InfallibleTArray<RemoteVoice>* aVoices,
+                                              InfallibleTArray<nsString>* aDefaults,
+                                              bool* aIsSpeaking)
 {
-  nsSynthVoiceRegistry::GetInstance()->SendVoices(aVoices, aDefaults);
+  nsSynthVoiceRegistry::GetInstance()->SendVoicesAndState(aVoices, aDefaults,
+                                                          aIsSpeaking);
+  return true;
+}
+
+bool
+SpeechSynthesisParent::RecvDropGlobalQueue()
+{
+  nsSynthVoiceRegistry::GetInstance()->DropGlobalQueue();
+
   return true;
 }
 
 PSpeechSynthesisRequestParent*
 SpeechSynthesisParent::AllocPSpeechSynthesisRequestParent(const nsString& aText,
                                                           const nsString& aLang,
                                                           const nsString& aUri,
                                                           const float& aVolume,
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@@ -19,18 +19,21 @@ class SpeechSynthesisRequestParent;
 class SpeechSynthesisParent : public PSpeechSynthesisParent
 {
   friend class ContentParent;
   friend class SpeechSynthesisRequestParent;
 
 public:
   virtual void ActorDestroy(ActorDestroyReason aWhy) override;
 
-  bool RecvReadVoiceList(InfallibleTArray<RemoteVoice>* aVoices,
-                         InfallibleTArray<nsString>* aDefaults) override;
+  bool RecvReadVoicesAndState(InfallibleTArray<RemoteVoice>* aVoices,
+                              InfallibleTArray<nsString>* aDefaults,
+                              bool* aIsSpeaking) override;
+
+  bool RecvDropGlobalQueue() override;
 
 protected:
   SpeechSynthesisParent();
   virtual ~SpeechSynthesisParent();
   PSpeechSynthesisRequestParent* AllocPSpeechSynthesisRequestParent(const nsString& aText,
                                                                     const nsString& aLang,
                                                                     const nsString& aUri,
                                                                     const float& aVolume,
--- a/dom/media/webspeech/synth/nsSpeechTask.cpp
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@@ -2,16 +2,17 @@
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioSegment.h"
 #include "nsSpeechTask.h"
 #include "SpeechSynthesis.h"
+#include "nsSynthVoiceRegistry.h"
 
 // GetCurrentTime is defined in winbase.h as zero argument macro forwarding to
 // GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime().
 #ifdef GetCurrentTime
 #undef GetCurrentTime
 #endif
 
 #undef LOG
@@ -28,25 +29,25 @@ public:
     mSpeechTask(aSpeechTask),
     mStarted(false)
   {
   }
 
   void DoNotifyStarted()
   {
     if (mSpeechTask) {
-      mSpeechTask->DispatchStartImpl();
+      mSpeechTask->DispatchStartInner();
     }
   }
 
   void DoNotifyFinished()
   {
     if (mSpeechTask) {
-      mSpeechTask->DispatchEndImpl(mSpeechTask->GetCurrentTime(),
-                                   mSpeechTask->GetCurrentCharOffset());
+      mSpeechTask->DispatchEndInner(mSpeechTask->GetCurrentTime(),
+                                    mSpeechTask->GetCurrentCharOffset());
     }
   }
 
   virtual void NotifyEvent(MediaStreamGraph* aGraph,
                            MediaStreamListener::MediaStreamGraphEvent event) override
   {
     switch (event) {
       case EVENT_FINISHED:
@@ -91,27 +92,33 @@ NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(
   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
 NS_INTERFACE_MAP_END
 
 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
 
 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance)
   : mUtterance(aUtterance)
+  , mInited(false)
+  , mPrePaused(false)
+  , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
 {
   mText = aUtterance->mText;
   mVolume = aUtterance->Volume();
 }
 
 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText)
   : mUtterance(nullptr)
   , mVolume(aVolume)
   , mText(aText)
+  , mInited(false)
+  , mPrePaused(false)
+  , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
 {
 }
 
 nsSpeechTask::~nsSpeechTask()
 {
   LOG(LogLevel::Debug, ("~nsSpeechTask"));
@@ -125,20 +132,26 @@ nsSpeechTask::~nsSpeechTask()
 
   if (mPort) {
     mPort->Destroy();
     mPort = nullptr;
   }
 }
 
 void
-nsSpeechTask::BindStream(ProcessedMediaStream* aStream)
+nsSpeechTask::Init(ProcessedMediaStream* aStream)
 {
-  mStream = MediaStreamGraph::GetInstance()->CreateSourceStream(nullptr);
-  mPort = aStream->AllocateInputPort(mStream, 0);
+  if (aStream) {
+    mStream = MediaStreamGraph::GetInstance()->CreateSourceStream(nullptr);
+    mPort = aStream->AllocateInputPort(mStream, 0);
+    mIndirectAudio = false;
+  } else {
+    mIndirectAudio = true;
+  }
+  mInited = true;
 }
 
 void
 nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri)
 {
   mChosenVoiceURI = aUri;
 }
 
@@ -148,23 +161,24 @@ nsSpeechTask::Setup(nsISpeechTaskCallbac
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   LOG(LogLevel::Debug, ("nsSpeechTask::Setup"));
 
   mCallback = aCallback;
 
   if (mIndirectAudio) {
+    MOZ_ASSERT(!mStream);
     if (argc > 0) {
       NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services.");
     }
     return NS_OK;
   }
 
-  // mStream is set up in BindStream() that should be called before this.
+  // mStream is set up in Init() that should be called before this.
   MOZ_ASSERT(mStream);
 
   mStream->AddListener(new SynthStreamListener(this));
 
   // XXX: Support more than one channel
   if(NS_WARN_IF(!(aChannels == 1))) {
     return NS_ERROR_FAILURE;
   }
@@ -289,16 +303,23 @@ nsSpeechTask::SendAudioImpl(nsRefPtr<moz
 NS_IMETHODIMP
 nsSpeechTask::DispatchStart()
 {
   if (!mIndirectAudio) {
     NS_WARNING("Can't call DispatchStart() from a direct audio speech service");
     return NS_ERROR_FAILURE;
   }
 
+  return DispatchStartInner();
+}
+
+nsresult
+nsSpeechTask::DispatchStartInner()
+{
+  nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
   return DispatchStartImpl();
 }
 
 nsresult
 nsSpeechTask::DispatchStartImpl()
 {
   return DispatchStartImpl(mChosenVoiceURI);
 }
@@ -324,16 +345,26 @@ nsSpeechTask::DispatchStartImpl(const ns
 NS_IMETHODIMP
 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex)
 {
   if (!mIndirectAudio) {
     NS_WARNING("Can't call DispatchEnd() from a direct audio speech service");
     return NS_ERROR_FAILURE;
   }
 
+  return DispatchEndInner(aElapsedTime, aCharIndex);
+}
+
+nsresult
+nsSpeechTask::DispatchEndInner(float aElapsedTime, uint32_t aCharIndex)
+{
+  if (!mPreCanceled) {
+    nsSynthVoiceRegistry::GetInstance()->SpeakNext();
+  }
+
   return DispatchEndImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
 nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex)
 {
   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEnd\n"));
 
@@ -384,19 +415,21 @@ nsSpeechTask::DispatchPauseImpl(float aE
   if(NS_WARN_IF(mUtterance->mPaused)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
   if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
   mUtterance->mPaused = true;
-  mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("pause"),
-                                           aCharIndex, aElapsedTime,
-                                           EmptyString());
+  if (mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING) {
+    mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("pause"),
+                                             aCharIndex, aElapsedTime,
+                                             EmptyString());
+  }
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex)
 {
   if (!mIndirectAudio) {
     NS_WARNING("Can't call DispatchResume() from a direct audio speech service");
@@ -414,19 +447,22 @@ nsSpeechTask::DispatchResumeImpl(float a
   if(NS_WARN_IF(!(mUtterance->mPaused))) {
     return NS_ERROR_NOT_AVAILABLE;
   }
   if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
   mUtterance->mPaused = false;
-  mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"),
-                                           aCharIndex, aElapsedTime,
-                                           EmptyString());
+  if (mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING) {
+    mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"),
+                                             aCharIndex, aElapsedTime,
+                                             EmptyString());
+  }
+
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
 {
   if (!mIndirectAudio) {
     NS_WARNING("Can't call DispatchError() from a direct audio speech service");
@@ -512,32 +548,47 @@ nsSpeechTask::Pause()
 
   if (mCallback) {
     DebugOnly<nsresult> rv = mCallback->OnPause();
     NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
   }
 
   if (mStream) {
     mStream->ChangeExplicitBlockerCount(1);
+  }
+
+  if (!mInited) {
+    mPrePaused = true;
+  }
+
+  if (!mIndirectAudio) {
     DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset());
   }
 }
 
 void
 nsSpeechTask::Resume()
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   if (mCallback) {
     DebugOnly<nsresult> rv = mCallback->OnResume();
     NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onResume() callback");
   }
 
   if (mStream) {
     mStream->ChangeExplicitBlockerCount(-1);
+  }
+
+  if (mPrePaused) {
+    mPrePaused = false;
+    nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
+  }
+
+  if (!mIndirectAudio) {
     DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset());
   }
 }
 
 void
 nsSpeechTask::Cancel()
 {
   MOZ_ASSERT(XRE_IsParentProcess());
@@ -546,17 +597,24 @@ nsSpeechTask::Cancel()
 
   if (mCallback) {
     DebugOnly<nsresult> rv = mCallback->OnCancel();
     NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onCancel() callback");
   }
 
   if (mStream) {
     mStream->ChangeExplicitBlockerCount(1);
-    DispatchEndImpl(GetCurrentTime(), GetCurrentCharOffset());
+  }
+
+  if (!mInited) {
+    mPreCanceled = true;
+  }
+
+  if (!mIndirectAudio) {
+    DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset());
   }
 }
 
 float
 nsSpeechTask::GetCurrentTime()
 {
   return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0;
 }
--- a/dom/media/webspeech/synth/nsSpeechTask.h
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@@ -38,22 +38,30 @@ public:
   virtual void Cancel();
 
   float GetCurrentTime();
 
   uint32_t GetCurrentCharOffset();
 
   void SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis);
 
-  void SetIndirectAudio(bool aIndirectAudio) { mIndirectAudio = aIndirectAudio; }
-
-  void BindStream(ProcessedMediaStream* aStream);
+  void Init(ProcessedMediaStream* aStream);
 
   void SetChosenVoiceURI(const nsAString& aUri);
 
+  bool IsPreCanceled()
+  {
+    return mPreCanceled;
+  };
+
+  bool IsPrePaused()
+  {
+    return mPrePaused;
+  }
+
 protected:
   virtual ~nsSpeechTask();
 
   nsresult DispatchStartImpl();
 
   virtual nsresult DispatchStartImpl(const nsAString& aUri);
 
   virtual nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
@@ -72,21 +80,31 @@ protected:
                                     float aElapsedTime, uint32_t aCharIndex);
 
   nsRefPtr<SpeechSynthesisUtterance> mUtterance;
 
   float mVolume;
 
   nsString mText;
 
+  bool mInited;
+
+  bool mPrePaused;
+
+  bool mPreCanceled;
+
 private:
   void End();
 
   void SendAudioImpl(nsRefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen);
 
+  nsresult DispatchStartInner();
+
+  nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex);
+
   nsRefPtr<SourceMediaStream> mStream;
 
   nsRefPtr<MediaInputPort> mPort;
 
   nsCOMPtr<nsISpeechTaskCallback> mCallback;
 
   uint32_t mChannels;
 
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@@ -92,63 +92,102 @@ public:
 
   nsString mLang;
 
   bool mIsLocal;
 
   bool mIsQueued;
 };
 
+// GlobalQueueItem
+
+class GlobalQueueItem final
+{
+private:
+  // Private destructor, to discourage deletion outside of Release():
+  ~GlobalQueueItem() {}
+
+public:
+  GlobalQueueItem(VoiceData* aVoice, nsSpeechTask* aTask, const nsAString& aText,
+                  const float& aVolume, const float& aRate, const float& aPitch)
+    : mVoice(aVoice)
+    , mTask(aTask)
+    , mText(aText)
+    , mVolume(aVolume)
+    , mRate(aRate)
+    , mPitch(aPitch) {}
+
+  NS_INLINE_DECL_REFCOUNTING(GlobalQueueItem)
+
+  nsRefPtr<VoiceData> mVoice;
+
+  nsRefPtr<nsSpeechTask> mTask;
+
+  nsString mText;
+
+  float mVolume;
+
+  float mRate;
+
+  float mPitch;
+
+  bool mIsLocal;
+};
+
 // nsSynthVoiceRegistry
 
 static StaticRefPtr<nsSynthVoiceRegistry> gSynthVoiceRegistry;
 static bool sForceGlobalQueue = false;
 
 NS_IMPL_ISUPPORTS(nsSynthVoiceRegistry, nsISynthVoiceRegistry)
 
 nsSynthVoiceRegistry::nsSynthVoiceRegistry()
   : mSpeechSynthChild(nullptr)
   , mUseGlobalQueue(false)
+  , mIsSpeaking(false)
 {
   if (XRE_IsContentProcess()) {
 
     mSpeechSynthChild = new SpeechSynthesisChild();
     ContentChild::GetSingleton()->SendPSpeechSynthesisConstructor(mSpeechSynthChild);
 
     InfallibleTArray<RemoteVoice> voices;
     InfallibleTArray<nsString> defaults;
+    bool isSpeaking;
 
-    mSpeechSynthChild->SendReadVoiceList(&voices, &defaults);
+    mSpeechSynthChild->SendReadVoicesAndState(&voices, &defaults, &isSpeaking);
 
     for (uint32_t i = 0; i < voices.Length(); ++i) {
       RemoteVoice voice = voices[i];
       AddVoiceImpl(nullptr, voice.voiceURI(),
                    voice.name(), voice.lang(),
                    voice.localService(), voice.queued());
     }
 
     for (uint32_t i = 0; i < defaults.Length(); ++i) {
       SetDefaultVoice(defaults[i], true);
     }
+
+    mIsSpeaking = isSpeaking;
   }
 }
 
 nsSynthVoiceRegistry::~nsSynthVoiceRegistry()
 {
   LOG(LogLevel::Debug, ("~nsSynthVoiceRegistry"));
 
   // mSpeechSynthChild's lifecycle is managed by the Content protocol.
   mSpeechSynthChild = nullptr;
 
   if (mStream) {
     if (!mStream->IsDestroyed()) {
-     mStream->Destroy();
-   }
+      mStream->Destroy();
+    }
 
-   mStream = nullptr;
+    mStream = nullptr;
   }
 
   mUriVoiceMap.Clear();
 }
 
 nsSynthVoiceRegistry*
 nsSynthVoiceRegistry::GetInstance()
 {
@@ -170,34 +209,37 @@ nsSynthVoiceRegistry::GetInstanceForServ
 
   return registry.forget();
 }
 
 void
 nsSynthVoiceRegistry::Shutdown()
 {
   LOG(LogLevel::Debug, ("[%s] nsSynthVoiceRegistry::Shutdown()",
-                     (XRE_IsContentProcess()) ? "Content" : "Default"));
+                        (XRE_IsContentProcess()) ? "Content" : "Default"));
   gSynthVoiceRegistry = nullptr;
 }
 
 void
-nsSynthVoiceRegistry::SendVoices(InfallibleTArray<RemoteVoice>* aVoices,
-                                 InfallibleTArray<nsString>* aDefaults)
+nsSynthVoiceRegistry::SendVoicesAndState(InfallibleTArray<RemoteVoice>* aVoices,
+                                         InfallibleTArray<nsString>* aDefaults,
+                                         bool* aIsSpeaking)
 {
   for (uint32_t i=0; i < mVoices.Length(); ++i) {
     nsRefPtr<VoiceData> voice = mVoices[i];
 
     aVoices->AppendElement(RemoteVoice(voice->mUri, voice->mName, voice->mLang,
                                        voice->mIsLocal, voice->mIsQueued));
   }
 
   for (uint32_t i=0; i < mDefaultVoices.Length(); ++i) {
     aDefaults->AppendElement(mDefaultVoices[i]->mUri);
   }
+
+  *aIsSpeaking = IsSpeaking();
 }
 
 void
 nsSynthVoiceRegistry::RecvRemoveVoice(const nsAString& aUri)
 {
   // If we dont have a local instance of the registry yet, we will recieve current
   // voices at contruction time.
   if(!gSynthVoiceRegistry) {
@@ -228,16 +270,28 @@ nsSynthVoiceRegistry::RecvSetDefaultVoic
   // voices at contruction time.
   if(!gSynthVoiceRegistry) {
     return;
   }
 
   gSynthVoiceRegistry->SetDefaultVoice(aUri, aIsDefault);
 }
 
+void
+nsSynthVoiceRegistry::RecvIsSpeakingChanged(bool aIsSpeaking)
+{
+  // If we dont have a local instance of the registry yet, we will get the
+  // speaking state on construction.
+  if(!gSynthVoiceRegistry) {
+    return;
+  }
+
+  gSynthVoiceRegistry->mIsSpeaking = aIsSpeaking;
+}
+
 NS_IMETHODIMP
 nsSynthVoiceRegistry::AddVoice(nsISpeechService* aService,
                                const nsAString& aUri,
                                const nsAString& aName,
                                const nsAString& aLang,
                                bool aLocalService,
                                bool aQueuesUtterances)
 {
@@ -598,45 +652,150 @@ void
 nsSynthVoiceRegistry::Speak(const nsAString& aText,
                             const nsAString& aLang,
                             const nsAString& aUri,
                             const float& aVolume,
                             const float& aRate,
                             const float& aPitch,
                             nsSpeechTask* aTask)
 {
-  LOG(LogLevel::Debug,
-      ("nsSynthVoiceRegistry::Speak text='%s' lang='%s' uri='%s' rate=%f pitch=%f",
-       NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aLang).get(),
-       NS_ConvertUTF16toUTF8(aUri).get(), aRate, aPitch));
+  MOZ_ASSERT(XRE_IsParentProcess());
 
   VoiceData* voice = FindBestMatch(aUri, aLang);
 
   if (!voice) {
     NS_WARNING("No voices found.");
     aTask->DispatchError(0, 0);
     return;
   }
 
   aTask->SetChosenVoiceURI(voice->mUri);
 
-  LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::Speak - Using voice URI: %s",
-                     NS_ConvertUTF16toUTF8(voice->mUri).get()));
+  if (mUseGlobalQueue || sForceGlobalQueue) {
+    LOG(LogLevel::Debug,
+        ("nsSynthVoiceRegistry::Speak queueing text='%s' lang='%s' uri='%s' rate=%f pitch=%f",
+         NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aLang).get(),
+         NS_ConvertUTF16toUTF8(aUri).get(), aRate, aPitch));
+    nsRefPtr<GlobalQueueItem> item = new GlobalQueueItem(voice, aTask, aText,
+                                                         aVolume, aRate, aPitch);
+    mGlobalQueue.AppendElement(item);
+
+    if (mGlobalQueue.Length() == 1) {
+      SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume, item->mRate,
+                item->mPitch);
+    }
+  } else {
+    SpeakImpl(voice, aTask, aText, aVolume, aRate, aPitch);
+  }
+}
+
+void
+nsSynthVoiceRegistry::SpeakNext()
+{
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::SpeakNext %d", mGlobalQueue.IsEmpty()));
+
+  SetIsSpeaking(false);
+
+  if (mGlobalQueue.IsEmpty()) {
+    return;
+  }
+
+  mGlobalQueue.RemoveElementAt(0);
+
+  while (!mGlobalQueue.IsEmpty()) {
+    nsRefPtr<GlobalQueueItem> item = mGlobalQueue.ElementAt(0);
+    if (item->mTask->IsPreCanceled()) {
+      mGlobalQueue.RemoveElementAt(0);
+      continue;
+    }
+    if (!item->mTask->IsPrePaused()) {
+      SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume,
+                item->mRate, item->mPitch);
+    }
+    break;
+  }
+}
+
+void
+nsSynthVoiceRegistry::ResumeQueue()
+{
+  MOZ_ASSERT(XRE_IsParentProcess());
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::ResumeQueue %d", mGlobalQueue.IsEmpty()));
+
+  if (mGlobalQueue.IsEmpty()) {
+    return;
+  }
+
+  nsRefPtr<GlobalQueueItem> item = mGlobalQueue.ElementAt(0);
+  if (!item->mTask->IsPrePaused()) {
+    SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume,
+              item->mRate, item->mPitch);
+  }
+}
+
+bool
+nsSynthVoiceRegistry::IsSpeaking()
+{
+  return mIsSpeaking;
+}
+
+void
+nsSynthVoiceRegistry::SetIsSpeaking(bool aIsSpeaking)
+{
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  // Only set to 'true' if global queue is enabled.
+  mIsSpeaking = aIsSpeaking && (mUseGlobalQueue || sForceGlobalQueue);
+
+  nsTArray<SpeechSynthesisParent*> ssplist;
+  GetAllSpeechSynthActors(ssplist);
+  for (uint32_t i = 0; i < ssplist.Length(); ++i) {
+    unused << ssplist[i]->SendIsSpeakingChanged(aIsSpeaking);
+  }
+}
+
+void
+nsSynthVoiceRegistry::DropGlobalQueue()
+{
+  if (XRE_IsParentProcess()) {
+    mGlobalQueue.Clear();
+    SetIsSpeaking(false);
+  } else {
+    mSpeechSynthChild->SendDropGlobalQueue();
+  }
+}
+
+void
+nsSynthVoiceRegistry::SpeakImpl(VoiceData* aVoice,
+                                nsSpeechTask* aTask,
+                                const nsAString& aText,
+                                const float& aVolume,
+                                const float& aRate,
+                                const float& aPitch)
+{
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::SpeakImpl queueing text='%s' uri='%s' rate=%f pitch=%f",
+       NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aVoice->mUri).get(),
+       aRate, aPitch));
 
   SpeechServiceType serviceType;
 
-  DebugOnly<nsresult> rv = voice->mService->GetServiceType(&serviceType);
+  DebugOnly<nsresult> rv = aVoice->mService->GetServiceType(&serviceType);
   NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Failed to get speech service type");
 
   if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) {
-    aTask->SetIndirectAudio(true);
+    aTask->Init(nullptr);
   } else {
     if (!mStream) {
       mStream = MediaStreamGraph::GetInstance()->CreateTrackUnionStream(nullptr);
     }
-    aTask->BindStream(mStream);
+    aTask->Init(mStream);
   }
 
-  voice->mService->Speak(aText, voice->mUri, aVolume, aRate, aPitch, aTask);
+  aVoice->mService->Speak(aText, aVoice->mUri, aVolume, aRate, aPitch, aTask);
 }
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
@@ -18,70 +18,95 @@ class nsISpeechService;
 namespace mozilla {
 namespace dom {
 
 class RemoteVoice;
 class SpeechSynthesisUtterance;
 class SpeechSynthesisChild;
 class nsSpeechTask;
 class VoiceData;
+class GlobalQueueItem;
 
 class nsSynthVoiceRegistry final : public nsISynthVoiceRegistry
 {
 public:
   NS_DECL_ISUPPORTS
   NS_DECL_NSISYNTHVOICEREGISTRY
 
   nsSynthVoiceRegistry();
 
   already_AddRefed<nsSpeechTask> SpeakUtterance(SpeechSynthesisUtterance& aUtterance,
                                                 const nsAString& aDocLang);
 
   void Speak(const nsAString& aText, const nsAString& aLang,
              const nsAString& aUri, const float& aVolume,  const float& aRate,
              const float& aPitch, nsSpeechTask* aTask);
 
-  void SendVoices(InfallibleTArray<RemoteVoice>* aVoices,
-                  InfallibleTArray<nsString>* aDefaults);
+  void SendVoicesAndState(InfallibleTArray<RemoteVoice>* aVoices,
+                          InfallibleTArray<nsString>* aDefaults,
+                          bool* aIsSpeaking);
+
+  void SpeakNext();
+
+  void ResumeQueue();
+
+  bool IsSpeaking();
+
+  void SetIsSpeaking(bool aIsSpeaking);
+
+  void DropGlobalQueue();
 
   static nsSynthVoiceRegistry* GetInstance();
 
   static already_AddRefed<nsSynthVoiceRegistry> GetInstanceForService();
 
   static void RecvRemoveVoice(const nsAString& aUri);
 
   static void RecvAddVoice(const RemoteVoice& aVoice);
 
   static void RecvSetDefaultVoice(const nsAString& aUri, bool aIsDefault);
 
+  static void RecvIsSpeakingChanged(bool aIsSpeaking);
+
   static void Shutdown();
 
 private:
   virtual ~nsSynthVoiceRegistry();
 
   VoiceData* FindBestMatch(const nsAString& aUri, const nsAString& lang);
 
   bool FindVoiceByLang(const nsAString& aLang, VoiceData** aRetval);
 
   nsresult AddVoiceImpl(nsISpeechService* aService,
                         const nsAString& aUri,
                         const nsAString& aName,
                         const nsAString& aLang,
                         bool aLocalService,
                         bool aQueuesUtterances);
 
-  nsTArray<nsRefPtr<VoiceData> > mVoices;
+  void SpeakImpl(VoiceData* aVoice,
+                 nsSpeechTask* aTask,
+                 const nsAString& aText,
+                 const float& aVolume,
+                 const float& aRate,
+                 const float& aPitch);
 
-  nsTArray<nsRefPtr<VoiceData> > mDefaultVoices;
+  nsTArray<nsRefPtr<VoiceData>> mVoices;
+
+  nsTArray<nsRefPtr<VoiceData>> mDefaultVoices;
 
   nsRefPtrHashtable<nsStringHashKey, VoiceData> mUriVoiceMap;
 
   SpeechSynthesisChild* mSpeechSynthChild;
 
   nsRefPtr<ProcessedMediaStream> mStream;
 
   bool mUseGlobalQueue;
+
+  nsTArray<nsRefPtr<GlobalQueueItem>> mGlobalQueue;
+
+  bool mIsSpeaking;
 };
 
 } // namespace dom
 } // namespace mozilla
 
 #endif
--- a/dom/webidl/SpeechSynthesis.webidl
+++ b/dom/webidl/SpeechSynthesis.webidl
@@ -18,9 +18,12 @@ interface SpeechSynthesis {
 
   [UnsafeInPrerendering]
   void speak(SpeechSynthesisUtterance utterance);
   void cancel();
   void pause();
   [UnsafeInPrerendering]
   void resume();
   sequence<SpeechSynthesisVoice> getVoices();
+
+  [ChromeOnly]
+  void dropGlobalQueue();
 };