Backed out 3 changesets (bug 1331696) for failing browser chrome on toolkit/components/narrate/test/browser_narrate.js r=backout on a CLOSED TREE
authorarthur.iakab <aiakab@mozilla.com>
Thu, 07 Dec 2017 00:25:35 +0200
changeset 395305 efde0448d7bb7e5a7bef4ac6a9c734f87f2981a3
parent 395304 e82fc38fe40de7de72e9172ee66d4646e30818e1
child 395391 f19a94ec336a442baf631c7e325b605225c710b6
push id56676
push useraiakab@mozilla.com
push dateWed, 06 Dec 2017 22:26:17 +0000
treeherderautoland@efde0448d7bb [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbackout
bugs1331696
milestone59.0a1
backs out4f5d0c5f191b55fbc33bb3b41762cd60b8ed73d0
ce87e2d2f1db99745ff8f0aaf7bd28e3608c8a37
e2c5f5afb6fcf566c90360449c3072940c4e58cd
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Backed out 3 changesets (bug 1331696) for failing browser chrome on toolkit/components/narrate/test/browser_narrate.js r=backout on a CLOSED TREE Backed out changeset 4f5d0c5f191b (bug 1331696) Backed out changeset ce87e2d2f1db (bug 1331696) Backed out changeset e2c5f5afb6fc (bug 1331696)
dom/media/webspeech/synth/SpeechSynthesis.cpp
dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
dom/media/webspeech/synth/moz.build
dom/media/webspeech/synth/nsISpeechService.idl
dom/media/webspeech/synth/nsSpeechTask.cpp
dom/media/webspeech/synth/nsSpeechTask.h
dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
dom/media/webspeech/synth/nsSynthVoiceRegistry.h
dom/media/webspeech/synth/pico/PicoModule.cpp
dom/media/webspeech/synth/pico/moz.build
dom/media/webspeech/synth/pico/nsPicoService.cpp
dom/media/webspeech/synth/pico/nsPicoService.h
dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
dom/media/webspeech/synth/test/file_global_queue_pause.html
dom/media/webspeech/synth/test/file_speech_queue.html
dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
dom/media/webspeech/synth/test/nsFakeSynthServices.h
dom/media/webspeech/synth/windows/SapiService.cpp
old-configure.in
--- a/dom/media/webspeech/synth/SpeechSynthesis.cpp
+++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp
@@ -204,35 +204,34 @@ SpeechSynthesis::Cancel()
 void
 SpeechSynthesis::Pause()
 {
   if (Paused()) {
     return;
   }
 
   if (mCurrentTask && !mSpeechQueue.IsEmpty() &&
-      mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_SPEAKING) {
+      mSpeechQueue.ElementAt(0)->GetState() != SpeechSynthesisUtterance::STATE_ENDED) {
     mCurrentTask->Pause();
   } else {
     mHoldQueue = true;
   }
 }
 
 void
 SpeechSynthesis::Resume()
 {
   if (!Paused()) {
     return;
   }
 
-  mHoldQueue = false;
-
   if (mCurrentTask) {
     mCurrentTask->Resume();
   } else {
+    mHoldQueue = false;
     AdvanceQueue();
   }
 }
 
 void
 SpeechSynthesis::OnEnd(const nsSpeechTask* aTask)
 {
   MOZ_ASSERT(mCurrentTask == aTask);
--- a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
+++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
@@ -434,30 +434,37 @@ OSXSpeechSynthesizerService::Speak(const
       offsets.AppendElements(strlen(DLIM_ESCAPE_END));
     } else {
       escapedText.Append(aText[i]);
       offsets.AppendElement(i);
     }
   }
 
   RefPtr<SpeechTaskCallback> callback = new SpeechTaskCallback(aTask, synth, offsets);
-  nsresult rv = aTask->Setup(callback);
+  nsresult rv = aTask->Setup(callback, 0, 0, 0);
   NS_ENSURE_SUCCESS(rv, rv);
 
   NSString* text = nsCocoaUtils::ToNSString(escapedText);
   BOOL success = [synth startSpeakingString:text];
   NS_ENSURE_TRUE(success, NS_ERROR_FAILURE);
 
   aTask->DispatchStart();
   return NS_OK;
 
   NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT;
 }
 
 NS_IMETHODIMP
+OSXSpeechSynthesizerService::GetServiceType(SpeechServiceType* aServiceType)
+{
+  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
 OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
                                      const char16_t* aData)
 {
   return NS_OK;
 }
 
 OSXSpeechSynthesizerService*
 OSXSpeechSynthesizerService::GetInstance()
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@@ -161,17 +161,31 @@ SpeechSynthesisRequestChild::RecvOnMark(
 // SpeechTaskChild
 
 SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
   : nsSpeechTask(aUtterance, aIsChrome)
 {
 }
 
 NS_IMETHODIMP
-SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback)
+SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback,
+                       uint32_t aChannels, uint32_t aRate, uint8_t argc)
+{
+  MOZ_CRASH("Should never be called from child");
+}
+
+NS_IMETHODIMP
+SpeechTaskChild::SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
+                           JSContext* aCx)
+{
+  MOZ_CRASH("Should never be called from child");
+}
+
+NS_IMETHODIMP
+SpeechTaskChild::SendAudioNative(int16_t* aData, uint32_t aDataLen)
 {
   MOZ_CRASH("Should never be called from child");
 }
 
 void
 SpeechTaskChild::Pause()
 {
   MOZ_ASSERT(mActor);
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@@ -80,17 +80,23 @@ protected:
 
 class SpeechTaskChild : public nsSpeechTask
 {
   friend class SpeechSynthesisRequestChild;
 public:
 
   explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
 
-  NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback) override;
+  NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback,
+                   uint32_t aChannels, uint32_t aRate, uint8_t argc) override;
+
+  NS_IMETHOD SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
+                       JSContext* aCx) override;
+
+  NS_IMETHOD SendAudioNative(int16_t* aData, uint32_t aDataLen) override;
 
   void Pause() override;
 
   void Resume() override;
 
   void Cancel() override;
 
   void ForceEnd() override;
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@@ -77,32 +77,32 @@ protected:
 
 class SpeechTaskParent : public nsSpeechTask
 {
   friend class SpeechSynthesisRequestParent;
 public:
   SpeechTaskParent(float aVolume, const nsAString& aUtterance, bool aIsChrome)
     : nsSpeechTask(aVolume, aUtterance, aIsChrome) {}
 
-  nsresult DispatchStartImpl(const nsAString& aUri) override;
+  nsresult DispatchStartImpl(const nsAString& aUri);
 
-  nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) override;
+  nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
 
-  nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) override;
+  nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex);
 
-  nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) override;
+  nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex);
 
-  nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) override;
+  nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex);
 
   nsresult DispatchBoundaryImpl(const nsAString& aName,
                                 float aElapsedTime, uint32_t aCharIndex,
-                                uint32_t aCharLength, uint8_t argc) override;
+                                uint32_t aCharLength, uint8_t argc);
 
   nsresult DispatchMarkImpl(const nsAString& aName,
-                            float aElapsedTime, uint32_t aCharIndex) override;
+                            float aElapsedTime, uint32_t aCharIndex);
 
 private:
   SpeechSynthesisRequestParent* mActor;
 };
 
 } // namespace dom
 } // namespace mozilla
 
--- a/dom/media/webspeech/synth/moz.build
+++ b/dom/media/webspeech/synth/moz.build
@@ -46,16 +46,19 @@ if CONFIG['MOZ_WEBSPEECH']:
         DIRS += ['windows']
 
     if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'cocoa':
         DIRS += ['cocoa']
 
     if CONFIG['MOZ_SYNTH_SPEECHD']:
         DIRS += ['speechd']
 
+    if CONFIG['MOZ_SYNTH_PICO']:
+        DIRS += ['pico']
+
 IPDL_SOURCES += [
     'ipc/PSpeechSynthesis.ipdl',
     'ipc/PSpeechSynthesisRequest.ipdl',
 ]
 
 include('/ipc/chromium/chromium-config.mozbuild')
 
 FINAL_LIBRARY = 'xul'
--- a/dom/media/webspeech/synth/nsISpeechService.idl
+++ b/dom/media/webspeech/synth/nsISpeechService.idl
@@ -1,17 +1,21 @@
 /* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "nsISupports.idl"
 
+typedef unsigned short SpeechServiceType;
+
 /**
- * A callback is implemented by the service.
+ * A callback is implemented by the service. For direct audio services, it is
+ * required to implement these, although it could be helpful to use the
+ * cancel method for shutting down the speech resources.
  */
 [scriptable, uuid(c576de0c-8a3d-4570-be7e-9876d3e5bed2)]
 interface nsISpeechTaskCallback : nsISupports
 {
   /**
    * The user or application has paused the speech.
    */
   void onPause();
@@ -23,34 +27,53 @@ interface nsISpeechTaskCallback : nsISup
 
   /**
    * The user or application has canceled the speech.
    */
   void onCancel();
 
   /**
    * The user or application has changed the volume of this speech.
+   * This is only used on indirect audio service type.
    */
   void onVolumeChanged(in float aVolume);
 };
 
 
 /**
  * A task is associated with a single utterance. It is provided by the browser
  * to the service in the speak() method.
  */
 [scriptable, builtinclass, uuid(ad59949c-2437-4b35-8eeb-d760caab75c5)]
 interface nsISpeechTask : nsISupports
 {
   /**
    * Prepare browser for speech.
    *
    * @param aCallback callback object for mid-speech operations.
+   * @param aChannels number of audio channels. Only required
+   *                    in direct audio services
+   * @param aRate     audio rate. Only required in direct audio services
    */
-  void setup(in nsISpeechTaskCallback aCallback);
+  [optional_argc] void setup(in nsISpeechTaskCallback aCallback,
+                               [optional] in uint32_t aChannels,
+                               [optional] in uint32_t aRate);
+
+  /**
+   * Send audio data to browser.
+   *
+   * @param aData     an Int16Array with PCM-16 audio data.
+   * @param aLandmarks an array of sample offset and landmark pairs.
+   *                     Used for emiting boundary and mark events.
+   */
+  [implicit_jscontext]
+  void sendAudio(in jsval aData, in jsval aLandmarks);
+
+  [noscript]
+  void sendAudioNative([array, size_is(aDataLen)] in short aData, in unsigned long aDataLen);
 
   /**
    * Dispatch start event.
    */
   void dispatchStart();
 
   /**
    * Dispatch end event.
@@ -104,21 +127,25 @@ interface nsISpeechTask : nsISupports
    * @param aCharIndex   offset of spoken characters.
    */
   void dispatchMark(in DOMString aName, in float aElapsedTime, in unsigned long aCharIndex);
 };
 
 /**
  * The main interface of a speech synthesis service.
  *
- * A service is responsible for outputting audio.
- * The service dispatches events, starting with dispatchStart() and ending with
- * dispatchEnd or dispatchError().
- * A service must also respond with the currect actions and events in response
- * to implemented callback methods.
+ * A service's speak method could be implemented in two ways:
+ *  1. Indirect audio - the service is responsible for outputting audio.
+ *    The service calls the nsISpeechTask.dispatch* methods directly. Starting
+ *    with dispatchStart() and ending with dispatchEnd or dispatchError().
+ *
+ *  2. Direct audio - the service provides us with PCM-16 data, and we output it.
+ *    The service does not call the dispatch task methods directly. Instead,
+ *    audio information is provided at setup(), and audio data is sent with
+ *    sendAudio(). The utterance is terminated with an empty sendAudio().
  */
 [scriptable, uuid(9b7d59db-88ff-43d0-b6ee-9f63d042d08f)]
 interface nsISpeechService : nsISupports
 {
   /**
    * Speak the given text using the voice identified byu the given uri. See
    * W3C Speech API spec for information about pitch and rate.
    * https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#utterance-attributes
@@ -129,15 +156,20 @@ interface nsISpeechService : nsISupports
    * @param aRate   rate to speak voice in.
    * @param aPitch  pitch to speak voice in.
    * @param aTask  task instance for utterance, used for sending events or audio
    *                 data back to browser.
    */
   void speak(in DOMString aText, in DOMString aUri,
              in float aVolume, in float aRate, in float aPitch,
              in nsISpeechTask aTask);
+
+  const SpeechServiceType SERVICETYPE_DIRECT_AUDIO = 1;
+  const SpeechServiceType SERVICETYPE_INDIRECT_AUDIO = 2;
+
+  readonly attribute SpeechServiceType serviceType;
 };
 
 %{C++
 // This is the service category speech services could use to start up as
 // a component.
 #define NS_SPEECH_SYNTH_STARTED "speech-synth-started"
 %}
--- a/dom/media/webspeech/synth/nsSpeechTask.cpp
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@@ -2,30 +2,141 @@
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioChannelAgent.h"
 #include "AudioChannelService.h"
 #include "AudioSegment.h"
+#include "MediaStreamListener.h"
 #include "nsSpeechTask.h"
 #include "nsSynthVoiceRegistry.h"
 #include "SharedBuffer.h"
 #include "SpeechSynthesis.h"
 
+// GetCurrentTime is defined in winbase.h as zero argument macro forwarding to
+// GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime().
+#ifdef GetCurrentTime
+#undef GetCurrentTime
+#endif
+
 #undef LOG
 extern mozilla::LogModule* GetSpeechSynthLog();
 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
 
 #define AUDIO_TRACK 1
 
 namespace mozilla {
 namespace dom {
 
+class SynthStreamListener : public MediaStreamListener
+{
+public:
+  SynthStreamListener(nsSpeechTask* aSpeechTask,
+                      MediaStream* aStream,
+                      AbstractThread* aMainThread)
+    : mSpeechTask(aSpeechTask)
+    , mStream(aStream)
+    , mStarted(false)
+  {
+  }
+
+  void DoNotifyStarted()
+  {
+    if (mSpeechTask) {
+      mSpeechTask->DispatchStartInner();
+    }
+  }
+
+  void DoNotifyFinished()
+  {
+    if (mSpeechTask) {
+      mSpeechTask->DispatchEndInner(mSpeechTask->GetCurrentTime(),
+                                    mSpeechTask->GetCurrentCharOffset());
+    }
+  }
+
+  void NotifyEvent(MediaStreamGraph* aGraph,
+                   MediaStreamGraphEvent event) override
+  {
+    switch (event) {
+      case MediaStreamGraphEvent::EVENT_FINISHED:
+        {
+          RefPtr<SynthStreamListener> self = this;
+          if (!mStarted) {
+            mStarted = true;
+            aGraph->DispatchToMainThreadAfterStreamStateUpdate(
+              NS_NewRunnableFunction(
+                "dom::SynthStreamListener::NotifyEvent",
+                [self] {
+                  // "start" event will be fired in DoNotifyStarted() which is
+                  // not allowed in stable state, so we do it asynchronously in
+                  // next run.
+                  NS_DispatchToMainThread(NewRunnableMethod(
+                    "dom::SynthStreamListener::DoNotifyStarted",
+                    self,
+                    &SynthStreamListener::DoNotifyStarted));
+                }));
+          }
+
+          aGraph->DispatchToMainThreadAfterStreamStateUpdate(
+            NS_NewRunnableFunction(
+              "dom::SynthStreamListener::NotifyEvent",
+              [self] {
+                // "end" event will be fired in DoNotifyFinished() which is
+                // not allowed in stable state, so we do it asynchronously in
+                // next run.
+                NS_DispatchToMainThread(NewRunnableMethod(
+                  "dom::SynthStreamListener::DoNotifyFinished",
+                  self,
+                  &SynthStreamListener::DoNotifyFinished));
+              }));
+        }
+        break;
+      case MediaStreamGraphEvent::EVENT_REMOVED:
+        mSpeechTask = nullptr;
+        // Dereference MediaStream to destroy safety
+        mStream = nullptr;
+        break;
+      default:
+        break;
+    }
+  }
+
+  void NotifyBlockingChanged(MediaStreamGraph* aGraph, Blocking aBlocked) override
+  {
+    if (aBlocked == MediaStreamListener::UNBLOCKED && !mStarted) {
+      mStarted = true;
+      RefPtr<SynthStreamListener> self = this;
+      aGraph->DispatchToMainThreadAfterStreamStateUpdate(
+        NS_NewRunnableFunction(
+          "dom::SynthStreamListener::NotifyBlockingChanged",
+          [self] {
+            // "start" event will be fired in DoNotifyStarted() which is
+            // not allowed in stable state, so we do it asynchronously in
+            // next run.
+            NS_DispatchToMainThread(NewRunnableMethod(
+              "dom::SynthStreamListener::DoNotifyStarted",
+              self,
+              &SynthStreamListener::DoNotifyStarted));
+          }));
+    }
+  }
+
+private:
+  // Raw pointer; if we exist, the stream exists,
+  // and 'mSpeechTask' exclusively owns it and therefor exists as well.
+  nsSpeechTask* mSpeechTask;
+  // This is KungFuDeathGrip for MediaStream
+  RefPtr<MediaStream> mStream;
+
+  bool mStarted;
+};
+
 // nsSpeechTask
 
 NS_IMPL_CYCLE_COLLECTION(nsSpeechTask, mSpeechSynthesis, mUtterance, mCallback);
 
 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask)
   NS_INTERFACE_MAP_ENTRY(nsISpeechTask)
   NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
@@ -36,80 +147,268 @@ NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeech
 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
 
 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
   : mUtterance(aUtterance)
   , mInited(false)
   , mPrePaused(false)
   , mPreCanceled(false)
   , mCallback(nullptr)
+  , mIndirectAudio(false)
   , mIsChrome(aIsChrome)
 {
   mText = aUtterance->mText;
   mVolume = aUtterance->Volume();
 }
 
 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome)
   : mUtterance(nullptr)
   , mVolume(aVolume)
   , mText(aText)
   , mInited(false)
   , mPrePaused(false)
   , mPreCanceled(false)
   , mCallback(nullptr)
+  , mIndirectAudio(false)
   , mIsChrome(aIsChrome)
 {
 }
 
 nsSpeechTask::~nsSpeechTask()
 {
   LOG(LogLevel::Debug, ("~nsSpeechTask"));
+  if (mStream) {
+    if (!mStream->IsDestroyed()) {
+      mStream->Destroy();
+    }
+
+    // This will finally destroyed by SynthStreamListener becasue
+    // MediaStream::Destroy() is async.
+    mStream = nullptr;
+  }
+
+  if (mPort) {
+    mPort->Destroy();
+    mPort = nullptr;
+  }
 }
 
 void
-nsSpeechTask::Init()
+nsSpeechTask::InitDirectAudio()
 {
+  // nullptr as final argument here means that this is not tied to a window.
+  // This is a global MSG.
+  mStream = MediaStreamGraph::GetInstance(MediaStreamGraph::AUDIO_THREAD_DRIVER,
+                                          nullptr)->
+    CreateSourceStream();
+  mIndirectAudio = false;
+  mInited = true;
+}
+
+void
+nsSpeechTask::InitIndirectAudio()
+{
+  mIndirectAudio = true;
   mInited = true;
 }
 
 void
 nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri)
 {
   mChosenVoiceURI = aUri;
 }
 
 NS_IMETHODIMP
-nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback)
+nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback,
+                    uint32_t aChannels, uint32_t aRate, uint8_t argc)
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   LOG(LogLevel::Debug, ("nsSpeechTask::Setup"));
 
   mCallback = aCallback;
 
+  if (mIndirectAudio) {
+    MOZ_ASSERT(!mStream);
+    if (argc > 0) {
+      NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services.");
+    }
+    return NS_OK;
+  }
+
+  // mStream is set up in Init() that should be called before this.
+  MOZ_ASSERT(mStream);
+
+  mStream->AddListener(
+    // Non DocGroup-version of AbstractThread::MainThread for the task in parent.
+    new SynthStreamListener(this, mStream, AbstractThread::MainThread()));
+
+  // XXX: Support more than one channel
+  if(NS_WARN_IF(!(aChannels == 1))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  mChannels = aChannels;
+
+  AudioSegment* segment = new AudioSegment();
+  mStream->AddAudioTrack(AUDIO_TRACK, aRate, 0, segment);
+  mStream->AddAudioOutput(this);
+  mStream->SetAudioOutputVolume(this, mVolume);
+
+  return NS_OK;
+}
+
+static RefPtr<mozilla::SharedBuffer>
+makeSamples(int16_t* aData, uint32_t aDataLen)
+{
+  RefPtr<mozilla::SharedBuffer> samples =
+    SharedBuffer::Create(aDataLen * sizeof(int16_t));
+  int16_t* frames = static_cast<int16_t*>(samples->Data());
+
+  for (uint32_t i = 0; i < aDataLen; i++) {
+    frames[i] = aData[i];
+  }
+
+  return samples;
+}
+
+NS_IMETHODIMP
+nsSpeechTask::SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
+                        JSContext* aCx)
+{
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  if(NS_WARN_IF(!(mStream))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  if(NS_WARN_IF(mStream->IsDestroyed())) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  if(NS_WARN_IF(!(mChannels))) {
+    return NS_ERROR_FAILURE;
+  }
+  if(NS_WARN_IF(!(aData.isObject()))) {
+    return NS_ERROR_INVALID_ARG;
+  }
+
+  if (mIndirectAudio) {
+    NS_WARNING("Can't call SendAudio from an indirect audio speech service.");
+    return NS_ERROR_FAILURE;
+  }
+
+  JS::Rooted<JSObject*> darray(aCx, &aData.toObject());
+  JSAutoCompartment ac(aCx, darray);
+
+  JS::Rooted<JSObject*> tsrc(aCx, nullptr);
+
+  // Allow either Int16Array or plain JS Array
+  if (JS_IsInt16Array(darray)) {
+    tsrc = darray;
+  } else {
+    bool isArray;
+    if (!JS_IsArrayObject(aCx, darray, &isArray)) {
+      return NS_ERROR_UNEXPECTED;
+    }
+    if (isArray) {
+      tsrc = JS_NewInt16ArrayFromArray(aCx, darray);
+    }
+  }
+
+  if (!tsrc) {
+    return NS_ERROR_DOM_TYPE_MISMATCH_ERR;
+  }
+
+  uint32_t dataLen = JS_GetTypedArrayLength(tsrc);
+  RefPtr<mozilla::SharedBuffer> samples;
+  {
+    JS::AutoCheckCannotGC nogc;
+    bool isShared;
+    int16_t* data = JS_GetInt16ArrayData(tsrc, &isShared, nogc);
+    if (isShared) {
+      // Must opt in to using shared data.
+      return NS_ERROR_DOM_TYPE_MISMATCH_ERR;
+    }
+    samples = makeSamples(data, dataLen);
+  }
+  SendAudioImpl(samples, dataLen);
+
   return NS_OK;
 }
 
 NS_IMETHODIMP
+nsSpeechTask::SendAudioNative(int16_t* aData, uint32_t aDataLen)
+{
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  if(NS_WARN_IF(!(mStream))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  if(NS_WARN_IF(mStream->IsDestroyed())) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  if(NS_WARN_IF(!(mChannels))) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (mIndirectAudio) {
+    NS_WARNING("Can't call SendAudio from an indirect audio speech service.");
+    return NS_ERROR_FAILURE;
+  }
+
+  RefPtr<mozilla::SharedBuffer> samples = makeSamples(aData, aDataLen);
+  SendAudioImpl(samples, aDataLen);
+
+  return NS_OK;
+}
+
+void
+nsSpeechTask::SendAudioImpl(RefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen)
+{
+  if (aDataLen == 0) {
+    mStream->EndAllTrackAndFinish();
+    return;
+  }
+
+  AudioSegment segment;
+  AutoTArray<const int16_t*, 1> channelData;
+  channelData.AppendElement(static_cast<int16_t*>(aSamples->Data()));
+  segment.AppendFrames(aSamples.forget(), channelData, aDataLen,
+                       PRINCIPAL_HANDLE_NONE);
+  mStream->AppendToTrack(1, &segment);
+  mStream->AdvanceKnownTracksTime(STREAM_TIME_MAX);
+}
+
+NS_IMETHODIMP
 nsSpeechTask::DispatchStart()
 {
+  if (!mIndirectAudio) {
+    NS_WARNING("Can't call DispatchStart() from a direct audio speech service");
+    return NS_ERROR_FAILURE;
+  }
+
+  return DispatchStartInner();
+}
+
+nsresult
+nsSpeechTask::DispatchStartInner()
+{
   nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
   return DispatchStartImpl();
 }
 
 nsresult
 nsSpeechTask::DispatchStartImpl()
 {
   return DispatchStartImpl(mChosenVoiceURI);
 }
 
 nsresult
 nsSpeechTask::DispatchStartImpl(const nsAString& aUri)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStart"));
 
   MOZ_ASSERT(mUtterance);
   if(NS_WARN_IF(!(mUtterance->mState == SpeechSynthesisUtterance::STATE_PENDING))) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
   CreateAudioChannelAgent();
 
@@ -119,38 +418,51 @@ nsSpeechTask::DispatchStartImpl(const ns
                                            nullptr, 0, EmptyString());
 
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex)
 {
-  // After we end, no callback functions should go through.
-  mCallback = nullptr;
+  if (!mIndirectAudio) {
+    NS_WARNING("Can't call DispatchEnd() from a direct audio speech service");
+    return NS_ERROR_FAILURE;
+  }
 
+  return DispatchEndInner(aElapsedTime, aCharIndex);
+}
+
+nsresult
+nsSpeechTask::DispatchEndInner(float aElapsedTime, uint32_t aCharIndex)
+{
   if (!mPreCanceled) {
     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
   }
 
   return DispatchEndImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
 nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEnd\n"));
 
   DestroyAudioChannelAgent();
 
   MOZ_ASSERT(mUtterance);
   if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
+  // XXX: This should not be here, but it prevents a crash in MSG.
+  if (mStream) {
+    mStream->Destroy();
+  }
+
   RefPtr<SpeechSynthesisUtterance> utterance = mUtterance;
 
   if (mSpeechSynthesis) {
     mSpeechSynthesis->OnEnd(this);
   }
 
   if (utterance->mState == SpeechSynthesisUtterance::STATE_PENDING) {
     utterance->mState = SpeechSynthesisUtterance::STATE_NONE;
@@ -162,23 +474,28 @@ nsSpeechTask::DispatchEndImpl(float aEla
   }
 
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex)
 {
+  if (!mIndirectAudio) {
+    NS_WARNING("Can't call DispatchPause() from a direct audio speech service");
+    return NS_ERROR_FAILURE;
+  }
+
   return DispatchPauseImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
 nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPause"));
   MOZ_ASSERT(mUtterance);
   if(NS_WARN_IF(mUtterance->mPaused)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
   if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
@@ -189,23 +506,28 @@ nsSpeechTask::DispatchPauseImpl(float aE
                                              EmptyString());
   }
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex)
 {
+  if (!mIndirectAudio) {
+    NS_WARNING("Can't call DispatchResume() from a direct audio speech service");
+    return NS_ERROR_FAILURE;
+  }
+
   return DispatchResumeImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
 nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResume"));
   MOZ_ASSERT(mUtterance);
   if(NS_WARN_IF(!(mUtterance->mPaused))) {
     return NS_ERROR_NOT_AVAILABLE;
   }
   if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
@@ -217,24 +539,35 @@ nsSpeechTask::DispatchResumeImpl(float a
   }
 
   return NS_OK;
 }
 
 void
 nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex)
 {
-  DispatchError(aElapsedTime, aCharIndex);
+  DispatchErrorInner(aElapsedTime, aCharIndex);
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
 {
   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchError"));
 
+  if (!mIndirectAudio) {
+    NS_WARNING("Can't call DispatchError() from a direct audio speech service");
+    return NS_ERROR_FAILURE;
+  }
+
+  return DispatchErrorInner(aElapsedTime, aCharIndex);
+}
+
+nsresult
+nsSpeechTask::DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex)
+{
   if (!mPreCanceled) {
     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
   }
 
   return DispatchErrorImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
@@ -256,16 +589,21 @@ nsSpeechTask::DispatchErrorImpl(float aE
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchBoundary(const nsAString& aName,
                                float aElapsedTime, uint32_t aCharIndex,
                                uint32_t aCharLength, uint8_t argc)
 {
+  if (!mIndirectAudio) {
+    NS_WARNING("Can't call DispatchBoundary() from a direct audio speech service");
+    return NS_ERROR_FAILURE;
+  }
+
   return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength, argc);
 }
 
 nsresult
 nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName,
                                    float aElapsedTime, uint32_t aCharIndex,
                                    uint32_t aCharLength, uint8_t argc)
 {
@@ -280,16 +618,21 @@ nsSpeechTask::DispatchBoundaryImpl(const
 
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchMark(const nsAString& aName,
                            float aElapsedTime, uint32_t aCharIndex)
 {
+  if (!mIndirectAudio) {
+    NS_WARNING("Can't call DispatchMark() from a direct audio speech service");
+    return NS_ERROR_FAILURE;
+  }
+
   return DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
 }
 
 nsresult
 nsSpeechTask::DispatchMarkImpl(const nsAString& aName,
                                float aElapsedTime, uint32_t aCharIndex)
 {
   MOZ_ASSERT(mUtterance);
@@ -308,64 +651,104 @@ nsSpeechTask::Pause()
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   if (mCallback) {
     DebugOnly<nsresult> rv = mCallback->OnPause();
     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
   }
 
+  if (mStream) {
+    mStream->Suspend();
+  }
+
   if (!mInited) {
     mPrePaused = true;
   }
+
+  if (!mIndirectAudio) {
+    DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset());
+  }
 }
 
 void
 nsSpeechTask::Resume()
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   if (mCallback) {
     DebugOnly<nsresult> rv = mCallback->OnResume();
     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
                          "Unable to call onResume() callback");
   }
 
+  if (mStream) {
+    mStream->Resume();
+  }
+
   if (mPrePaused) {
     mPrePaused = false;
     nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
   }
+
+  if (!mIndirectAudio) {
+    DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset());
+  }
 }
 
 void
 nsSpeechTask::Cancel()
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   LOG(LogLevel::Debug, ("nsSpeechTask::Cancel"));
 
   if (mCallback) {
     DebugOnly<nsresult> rv = mCallback->OnCancel();
     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
                          "Unable to call onCancel() callback");
   }
 
+  if (mStream) {
+    mStream->Suspend();
+  }
+
   if (!mInited) {
     mPreCanceled = true;
   }
+
+  if (!mIndirectAudio) {
+    DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset());
+  }
 }
 
 void
 nsSpeechTask::ForceEnd()
 {
+  if (mStream) {
+    mStream->Suspend();
+  }
+
   if (!mInited) {
     mPreCanceled = true;
   }
 
-  DispatchEnd(0, 0);
+  DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset());
+}
+
+float
+nsSpeechTask::GetCurrentTime()
+{
+  return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0;
+}
+
+uint32_t
+nsSpeechTask::GetCurrentCharOffset()
+{
+  return mStream && mStream->IsFinished() ? mText.Length() : 0;
 }
 
 void
 nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis)
 {
   mSpeechSynthesis = aSpeechSynthesis;
 }
 
@@ -432,15 +815,18 @@ nsSpeechTask::WindowAudioCaptureChanged(
 {
   // This is not supported yet.
   return NS_OK;
 }
 
 void
 nsSpeechTask::SetAudioOutputVolume(float aVolume)
 {
-  if (mCallback) {
+  if (mStream && !mStream->IsDestroyed()) {
+    mStream->SetAudioOutputVolume(this, aVolume);
+  }
+  if (mIndirectAudio && mCallback) {
     mCallback->OnVolumeChanged(aVolume);
   }
 }
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/nsSpeechTask.h
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@@ -2,16 +2,17 @@
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef mozilla_dom_nsSpeechTask_h
 #define mozilla_dom_nsSpeechTask_h
 
+#include "MediaStreamGraph.h"
 #include "SpeechSynthesisUtterance.h"
 #include "nsIAudioChannelAgent.h"
 #include "nsISpeechService.h"
 
 namespace mozilla {
 
 class SharedBuffer;
 
@@ -40,31 +41,45 @@ public:
   virtual void Pause();
 
   virtual void Resume();
 
   virtual void Cancel();
 
   virtual void ForceEnd();
 
+  float GetCurrentTime();
+
+  uint32_t GetCurrentCharOffset();
+
   void SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis);
 
-  void Init();
+  void InitDirectAudio();
+  void InitIndirectAudio();
 
   void SetChosenVoiceURI(const nsAString& aUri);
 
   virtual void SetAudioOutputVolume(float aVolume);
 
   void ForceError(float aElapsedTime, uint32_t aCharIndex);
 
-  bool IsPreCanceled() { return mPreCanceled; };
+  bool IsPreCanceled()
+  {
+    return mPreCanceled;
+  };
 
-  bool IsPrePaused() { return mPrePaused; }
+  bool IsPrePaused()
+  {
+    return mPrePaused;
+  }
 
-  bool IsChrome() { return mIsChrome; }
+  bool IsChrome()
+  {
+    return mIsChrome;
+  }
 
 protected:
   virtual ~nsSpeechTask();
 
   nsresult DispatchStartImpl();
 
   virtual nsresult DispatchStartImpl(const nsAString& aUri);
 
@@ -95,26 +110,41 @@ protected:
 
   bool mPrePaused;
 
   bool mPreCanceled;
 
 private:
   void End();
 
+  void SendAudioImpl(RefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen);
+
+  nsresult DispatchStartInner();
+
+  nsresult DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex);
+  nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex);
+
   void CreateAudioChannelAgent();
 
   void DestroyAudioChannelAgent();
 
+  RefPtr<SourceMediaStream> mStream;
+
+  RefPtr<MediaInputPort> mPort;
+
   nsCOMPtr<nsISpeechTaskCallback> mCallback;
 
   nsCOMPtr<nsIAudioChannelAgent> mAudioChannelAgent;
 
+  uint32_t mChannels;
+
   RefPtr<SpeechSynthesis> mSpeechSynthesis;
 
+  bool mIndirectAudio;
+
   nsString mChosenVoiceURI;
 
   bool mIsChrome;
 };
 
 } // namespace dom
 } // namespace mozilla
 
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@@ -819,18 +819,30 @@ nsSynthVoiceRegistry::SpeakImpl(VoiceDat
                                 const float& aRate,
                                 const float& aPitch)
 {
   LOG(LogLevel::Debug,
       ("nsSynthVoiceRegistry::SpeakImpl queueing text='%s' uri='%s' rate=%f pitch=%f",
        NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aVoice->mUri).get(),
        aRate, aPitch));
 
-  aTask->Init();
+  SpeechServiceType serviceType;
+
+  DebugOnly<nsresult> rv = aVoice->mService->GetServiceType(&serviceType);
+  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to get speech service type");
+
+  if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) {
+    aTask->InitIndirectAudio();
+  } else {
+    aTask->InitDirectAudio();
+  }
 
   if (NS_FAILED(aVoice->mService->Speak(aText, aVoice->mUri, aVolume, aRate,
                                         aPitch, aTask))) {
-    aTask->DispatchError(0, 0);
+    if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) {
+      aTask->DispatchError(0, 0);
+    }
+    // XXX When using direct audio, no way to dispatch error
   }
 }
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
@@ -5,16 +5,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef mozilla_dom_nsSynthVoiceRegistry_h
 #define mozilla_dom_nsSynthVoiceRegistry_h
 
 #include "nsISynthVoiceRegistry.h"
 #include "nsRefPtrHashtable.h"
 #include "nsTArray.h"
+#include "MediaStreamGraph.h"
 
 class nsISpeechService;
 
 namespace mozilla {
 namespace dom {
 
 class RemoteVoice;
 class SpeechSynthesisUtterance;
new file mode 100644
--- /dev/null
+++ b/dom/media/webspeech/synth/pico/PicoModule.cpp
@@ -0,0 +1,58 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ModuleUtils.h"
+#include "nsIClassInfoImpl.h"
+
+#ifdef MOZ_WEBRTC
+
+#include "nsPicoService.h"
+
+using namespace mozilla::dom;
+
+#define PICOSERVICE_CID \
+  {0x346c4fc8, 0x12fe, 0x459c, {0x81, 0x19, 0x9a, 0xa7, 0x73, 0x37, 0x7f, 0xf4}}
+
+#define PICOSERVICE_CONTRACTID "@mozilla.org/synthpico;1"
+
+// Defines nsPicoServiceConstructor
+NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsPicoService,
+                                         nsPicoService::GetInstanceForService)
+
+// Defines kPICOSERVICE_CID
+NS_DEFINE_NAMED_CID(PICOSERVICE_CID);
+
+static const mozilla::Module::CIDEntry kCIDs[] = {
+  { &kPICOSERVICE_CID, true, nullptr, nsPicoServiceConstructor },
+  { nullptr }
+};
+
+static const mozilla::Module::ContractIDEntry kContracts[] = {
+  { PICOSERVICE_CONTRACTID, &kPICOSERVICE_CID },
+  { nullptr }
+};
+
+static const mozilla::Module::CategoryEntry kCategories[] = {
+  { "profile-after-change", "Pico Speech Synth", PICOSERVICE_CONTRACTID },
+  { nullptr }
+};
+
+static void
+UnloadPicoModule()
+{
+  nsPicoService::Shutdown();
+}
+
+static const mozilla::Module kModule = {
+  mozilla::Module::kVersion,
+  kCIDs,
+  kContracts,
+  kCategories,
+  nullptr,
+  nullptr,
+  UnloadPicoModule
+};
+
+NSMODULE_DEFN(synthpico) = &kModule;
+#endif
new file mode 100644
--- /dev/null
+++ b/dom/media/webspeech/synth/pico/moz.build
@@ -0,0 +1,13 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+    'nsPicoService.cpp',
+    'PicoModule.cpp'
+]
+include('/ipc/chromium/chromium-config.mozbuild')
+
+FINAL_LIBRARY = 'xul'
new file mode 100644
--- /dev/null
+++ b/dom/media/webspeech/synth/pico/nsPicoService.cpp
@@ -0,0 +1,762 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.h"
+#include "nsPicoService.h"
+#include "nsPrintfCString.h"
+#include "nsIWeakReferenceUtils.h"
+#include "SharedBuffer.h"
+#include "nsISimpleEnumerator.h"
+
+#include "mozilla/dom/nsSynthVoiceRegistry.h"
+#include "mozilla/dom/nsSpeechTask.h"
+
+#include "nsIFile.h"
+#include "nsThreadUtils.h"
+#include "prenv.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/DebugOnly.h"
+#include <dlfcn.h>
+
+// Pico API constants
+
+// Size of memory allocated for pico engine and voice resources.
+// We only have one voice and its resources loaded at once, so this
+// should always be enough.
+#define PICO_MEM_SIZE 2500000
+
+// Max length of returned strings. Pico will never return longer strings,
+// so this amount should be good enough for preallocating.
+#define PICO_RETSTRINGSIZE 200
+
+// Max amount we want from a single call of pico_getData
+#define PICO_MAX_CHUNK_SIZE 128
+
+// Arbitrary name for loaded voice, it doesn't mean anything outside of Pico
+#define PICO_VOICE_NAME "pico"
+
+// Return status from pico_getData meaning there is more data in the pipeline
+// to get from more calls to pico_getData
+#define PICO_STEP_BUSY 201
+
+// For performing a "soft" reset between utterances. This is used when one
+// utterance is interrupted by a new one.
+#define PICO_RESET_SOFT 0x10
+
+// Currently, Pico only provides mono output.
+#define PICO_CHANNELS_NUM 1
+
+// Pico's sample rate is always 16000
+#define PICO_SAMPLE_RATE 16000
+
+// The path to the language files in Android
+#define PICO_LANG_PATH "/system/tts/lang_pico"
+
+namespace mozilla {
+namespace dom {
+
+StaticRefPtr<nsPicoService> nsPicoService::sSingleton;
+
+class PicoApi
+{
+public:
+
+  PicoApi() : mInitialized(false) {}
+
+  bool Init()
+  {
+    if (mInitialized) {
+      return true;
+    }
+
+    void* handle = dlopen("libttspico.so", RTLD_LAZY);
+
+    if (!handle) {
+      NS_WARNING("Failed to open libttspico.so, pico cannot run");
+      return false;
+    }
+
+    pico_initialize =
+      (pico_Status (*)(void*, uint32_t, pico_System*))dlsym(
+        handle, "pico_initialize");
+
+    pico_terminate =
+      (pico_Status (*)(pico_System*))dlsym(handle, "pico_terminate");
+
+    pico_getSystemStatusMessage =
+      (pico_Status (*)(pico_System, pico_Status, pico_Retstring))dlsym(
+        handle, "pico_getSystemStatusMessage");;
+
+    pico_loadResource =
+      (pico_Status (*)(pico_System, const char*, pico_Resource*))dlsym(
+        handle, "pico_loadResource");
+
+    pico_unloadResource =
+      (pico_Status (*)(pico_System, pico_Resource*))dlsym(
+        handle, "pico_unloadResource");
+
+    pico_getResourceName =
+      (pico_Status (*)(pico_System, pico_Resource, pico_Retstring))dlsym(
+        handle, "pico_getResourceName");
+
+    pico_createVoiceDefinition =
+      (pico_Status (*)(pico_System, const char*))dlsym(
+        handle, "pico_createVoiceDefinition");
+
+    pico_addResourceToVoiceDefinition =
+      (pico_Status (*)(pico_System, const char*, const char*))dlsym(
+        handle, "pico_addResourceToVoiceDefinition");
+
+    pico_releaseVoiceDefinition =
+      (pico_Status (*)(pico_System, const char*))dlsym(
+        handle, "pico_releaseVoiceDefinition");
+
+    pico_newEngine =
+      (pico_Status (*)(pico_System, const char*, pico_Engine*))dlsym(
+        handle, "pico_newEngine");
+
+    pico_disposeEngine =
+      (pico_Status (*)(pico_System, pico_Engine*))dlsym(
+        handle, "pico_disposeEngine");
+
+    pico_resetEngine =
+      (pico_Status (*)(pico_Engine, int32_t))dlsym(handle, "pico_resetEngine");
+
+    pico_putTextUtf8 =
+      (pico_Status (*)(pico_Engine, const char*, const int16_t, int16_t*))dlsym(
+        handle, "pico_putTextUtf8");
+
+    pico_getData =
+      (pico_Status (*)(pico_Engine, void*, int16_t, int16_t*, int16_t*))dlsym(
+        handle, "pico_getData");
+
+    mInitialized = true;
+    return true;
+  }
+
+  typedef signed int pico_Status;
+  typedef char pico_Retstring[PICO_RETSTRINGSIZE];
+
+  pico_Status (* pico_initialize)(void*, uint32_t, pico_System*);
+  pico_Status (* pico_terminate)(pico_System*);
+  pico_Status (* pico_getSystemStatusMessage)(
+    pico_System, pico_Status, pico_Retstring);
+
+  pico_Status (* pico_loadResource)(pico_System, const char*, pico_Resource*);
+  pico_Status (* pico_unloadResource)(pico_System, pico_Resource*);
+  pico_Status (* pico_getResourceName)(
+    pico_System, pico_Resource, pico_Retstring);
+  pico_Status (* pico_createVoiceDefinition)(pico_System, const char*);
+  pico_Status (* pico_addResourceToVoiceDefinition)(
+    pico_System, const char*, const char*);
+  pico_Status (* pico_releaseVoiceDefinition)(pico_System, const char*);
+  pico_Status (* pico_newEngine)(pico_System, const char*, pico_Engine*);
+  pico_Status (* pico_disposeEngine)(pico_System, pico_Engine*);
+
+  pico_Status (* pico_resetEngine)(pico_Engine, int32_t);
+  pico_Status (* pico_putTextUtf8)(
+    pico_Engine, const char*, const int16_t, int16_t*);
+  pico_Status (* pico_getData)(
+    pico_Engine, void*, const int16_t, int16_t*, int16_t*);
+
+private:
+
+  bool mInitialized;
+
+} sPicoApi;
+
+#define PICO_ENSURE_SUCCESS_VOID(_funcName, _status)                      \
+  if (_status < 0) {                                                      \
+    PicoApi::pico_Retstring message;                                      \
+    sPicoApi.pico_getSystemStatusMessage(                                 \
+      nsPicoService::sSingleton->mPicoSystem, _status, message);          \
+    NS_WARNING(                                                           \
+      nsPrintfCString("Error running %s: %s", _funcName, message).get()); \
+    return;                                                               \
+  }
+
+#define PICO_ENSURE_SUCCESS(_funcName, _status, _rv)                      \
+  if (_status < 0) {                                                      \
+    PicoApi::pico_Retstring message;                                      \
+    sPicoApi.pico_getSystemStatusMessage(                                 \
+      nsPicoService::sSingleton->mPicoSystem, _status, message);          \
+    NS_WARNING(                                                           \
+      nsPrintfCString("Error running %s: %s", _funcName, message).get()); \
+    return _rv;                                                           \
+  }
+
+class PicoVoice
+{
+public:
+
+  PicoVoice(const nsAString& aLanguage)
+    : mLanguage(aLanguage) {}
+
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(PicoVoice)
+
+  // Voice language, in BCB-47 syntax
+  nsString mLanguage;
+
+  // Language resource file
+  nsCString mTaFile;
+
+  // Speaker resource file
+  nsCString mSgFile;
+
+private:
+    ~PicoVoice() {}
+};
+
+class PicoCallbackRunnable : public Runnable,
+                             public nsISpeechTaskCallback
+{
+  friend class PicoSynthDataRunnable;
+
+public:
+  PicoCallbackRunnable(const nsAString& aText, PicoVoice* aVoice,
+                       float aRate, float aPitch, nsISpeechTask* aTask,
+                       nsPicoService* aService)
+    : mText(NS_ConvertUTF16toUTF8(aText))
+    , mRate(aRate)
+    , mPitch(aPitch)
+    , mFirstData(true)
+    , mTask(aTask)
+    , mVoice(aVoice)
+    , mService(aService) { }
+
+  NS_DECL_ISUPPORTS_INHERITED
+  NS_DECL_NSISPEECHTASKCALLBACK
+
+  NS_IMETHOD Run() override;
+
+  bool IsCurrentTask() { return mService->mCurrentTask == mTask; }
+
+private:
+  ~PicoCallbackRunnable() { }
+
+  void DispatchSynthDataRunnable(already_AddRefed<SharedBuffer>&& aBuffer,
+                                 size_t aBufferSize);
+
+  nsCString mText;
+
+  float mRate;
+
+  float mPitch;
+
+  bool mFirstData;
+
+  // We use this pointer to compare it with the current service task.
+  // If they differ, this runnable should stop.
+  nsISpeechTask* mTask;
+
+  // We hold a strong reference to the service, which in turn holds
+  // a strong reference to this voice.
+  PicoVoice* mVoice;
+
+  // By holding a strong reference to the service we guarantee that it won't be
+  // destroyed before this runnable.
+  RefPtr<nsPicoService> mService;
+};
+
+NS_IMPL_ISUPPORTS_INHERITED(PicoCallbackRunnable, Runnable, nsISpeechTaskCallback)
+
+// Runnable
+
+NS_IMETHODIMP
+PicoCallbackRunnable::Run()
+{
+  MOZ_ASSERT(!NS_IsMainThread());
+  PicoApi::pico_Status status = 0;
+
+  if (mService->CurrentVoice() != mVoice) {
+    mService->LoadEngine(mVoice);
+  } else {
+    status = sPicoApi.pico_resetEngine(mService->mPicoEngine, PICO_RESET_SOFT);
+    PICO_ENSURE_SUCCESS("pico_unloadResource", status, NS_ERROR_FAILURE);
+  }
+
+  // Add SSML markup for pitch and rate. Pico uses a minimal parser,
+  // so no namespace is needed.
+  nsPrintfCString markedUpText(
+    "<pitch level=\"%0.0f\"><speed level=\"%0.0f\">%s</speed></pitch>",
+    std::min(std::max(50.0f, mPitch * 100), 200.0f),
+    std::min(std::max(20.0f, mRate * 100), 500.0f),
+    mText.get());
+
+  const char* text = markedUpText.get();
+  size_t buffer_size = 512, buffer_offset = 0;
+  RefPtr<SharedBuffer> buffer = SharedBuffer::Create(buffer_size);
+  int16_t text_offset = 0, bytes_recv = 0, bytes_sent = 0, out_data_type = 0;
+  int16_t text_remaining = markedUpText.Length() + 1;
+
+  // Run this loop while this is the current task
+  while (IsCurrentTask()) {
+    if (text_remaining) {
+      status = sPicoApi.pico_putTextUtf8(mService->mPicoEngine,
+                                         text + text_offset, text_remaining,
+                                         &bytes_sent);
+      PICO_ENSURE_SUCCESS("pico_putTextUtf8", status, NS_ERROR_FAILURE);
+      // XXX: End speech task on error
+      text_remaining -= bytes_sent;
+      text_offset += bytes_sent;
+    } else {
+      // If we already fed all the text to the engine, send a zero length buffer
+      // and quit.
+      DispatchSynthDataRunnable(already_AddRefed<SharedBuffer>(), 0);
+      break;
+    }
+
+    do {
+      // Run this loop while the result of getData is STEP_BUSY, when it finishes
+      // synthesizing audio for the given text, it returns STEP_IDLE. We then
+      // break to the outer loop and feed more text, if there is any left.
+      if (!IsCurrentTask()) {
+        // If the task has changed, quit.
+        break;
+      }
+
+      if (buffer_size - buffer_offset < PICO_MAX_CHUNK_SIZE) {
+        // The next audio chunk retrieved may be bigger than our buffer,
+        // so send the data and flush the buffer.
+        DispatchSynthDataRunnable(buffer.forget(), buffer_offset);
+        buffer_offset = 0;
+        buffer = SharedBuffer::Create(buffer_size);
+      }
+
+      status = sPicoApi.pico_getData(mService->mPicoEngine,
+                                     (uint8_t*)buffer->Data() + buffer_offset,
+                                     PICO_MAX_CHUNK_SIZE,
+                                     &bytes_recv, &out_data_type);
+      PICO_ENSURE_SUCCESS("pico_getData", status, NS_ERROR_FAILURE);
+      buffer_offset += bytes_recv;
+    } while (status == PICO_STEP_BUSY);
+  }
+
+  return NS_OK;
+}
+
+void
+PicoCallbackRunnable::DispatchSynthDataRunnable(
+  already_AddRefed<SharedBuffer>&& aBuffer, size_t aBufferSize)
+{
+  class PicoSynthDataRunnable final : public Runnable
+  {
+  public:
+    PicoSynthDataRunnable(already_AddRefed<SharedBuffer>& aBuffer,
+                          size_t aBufferSize, bool aFirstData,
+                          PicoCallbackRunnable* aCallback)
+      : mBuffer(aBuffer)
+      , mBufferSize(aBufferSize)
+      , mFirstData(aFirstData)
+      , mCallback(aCallback) {
+    }
+
+    NS_IMETHOD Run() override
+    {
+      MOZ_ASSERT(NS_IsMainThread());
+
+      if (!mCallback->IsCurrentTask()) {
+        return NS_ERROR_NOT_AVAILABLE;
+      }
+
+      nsISpeechTask* task = mCallback->mTask;
+
+      if (mFirstData) {
+        task->Setup(mCallback, PICO_CHANNELS_NUM, PICO_SAMPLE_RATE, 2);
+      }
+
+      return task->SendAudioNative(
+        mBufferSize ? static_cast<short*>(mBuffer->Data()) : nullptr, mBufferSize / 2);
+    }
+
+  private:
+    RefPtr<SharedBuffer> mBuffer;
+
+    size_t mBufferSize;
+
+    bool mFirstData;
+
+    RefPtr<PicoCallbackRunnable> mCallback;
+  };
+
+  nsCOMPtr<nsIRunnable> sendEvent =
+    new PicoSynthDataRunnable(aBuffer, aBufferSize, mFirstData, this);
+  NS_DispatchToMainThread(sendEvent);
+  mFirstData = false;
+}
+
+// nsISpeechTaskCallback
+
+NS_IMETHODIMP
+PicoCallbackRunnable::OnPause()
+{
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+PicoCallbackRunnable::OnResume()
+{
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+PicoCallbackRunnable::OnCancel()
+{
+  mService->mCurrentTask = nullptr;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+PicoCallbackRunnable::OnVolumeChanged(float aVolume)
+{
+  return NS_OK;
+}
+
+NS_INTERFACE_MAP_BEGIN(nsPicoService)
+  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_ADDREF(nsPicoService)
+NS_IMPL_RELEASE(nsPicoService)
+
+nsPicoService::nsPicoService()
+  : mInitialized(false)
+  , mVoicesMonitor("nsPicoService::mVoices")
+  , mCurrentTask(nullptr)
+  , mPicoSystem(nullptr)
+  , mPicoEngine(nullptr)
+  , mSgResource(nullptr)
+  , mTaResource(nullptr)
+  , mPicoMemArea(nullptr)
+{
+}
+
+nsPicoService::~nsPicoService()
+{
+  // We don't worry about removing the voices because this gets
+  // destructed at shutdown along with the voice registry.
+  MonitorAutoLock autoLock(mVoicesMonitor);
+  mVoices.Clear();
+
+  if (mThread) {
+    mThread->Shutdown();
+  }
+
+  UnloadEngine();
+}
+
+// nsIObserver
+
+NS_IMETHODIMP
+nsPicoService::Observe(nsISupports* aSubject, const char* aTopic,
+                       const char16_t* aData)
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  if(NS_WARN_IF(!(!strcmp(aTopic, "profile-after-change")))) {
+    return NS_ERROR_UNEXPECTED;
+  }
+
+  if (!Preferences::GetBool("media.webspeech.synth.enabled") ||
+      Preferences::GetBool("media.webspeech.synth.test")) {
+    return NS_OK;
+  }
+
+  DebugOnly<nsresult> rv = NS_NewNamedThread("Pico Worker", getter_AddRefs(mThread));
+  MOZ_ASSERT(NS_SUCCEEDED(rv));
+  return mThread->Dispatch(
+    NewRunnableMethod("nsPicoService::Init", this, &nsPicoService::Init), NS_DISPATCH_NORMAL);
+}
+// nsISpeechService
+
+NS_IMETHODIMP
+nsPicoService::Speak(const nsAString& aText, const nsAString& aUri,
+                     float aVolume, float aRate, float aPitch,
+                     nsISpeechTask* aTask)
+{
+  if(NS_WARN_IF(!(mInitialized))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  MonitorAutoLock autoLock(mVoicesMonitor);
+  bool found = false;
+  PicoVoice* voice = mVoices.GetWeak(aUri, &found);
+  if(NS_WARN_IF(!(found))) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  mCurrentTask = aTask;
+  RefPtr<PicoCallbackRunnable> cb = new PicoCallbackRunnable(aText, voice, aRate, aPitch, aTask, this);
+  return mThread->Dispatch(cb, NS_DISPATCH_NORMAL);
+}
+
+NS_IMETHODIMP
+nsPicoService::GetServiceType(SpeechServiceType* aServiceType)
+{
+  *aServiceType = nsISpeechService::SERVICETYPE_DIRECT_AUDIO;
+  return NS_OK;
+}
+
+// private methods
+
+void
+nsPicoService::Init()
+{
+  MOZ_ASSERT(!NS_IsMainThread());
+  MOZ_ASSERT(!mInitialized);
+
+  if (!sPicoApi.Init()) {
+    NS_WARNING("Failed to initialize pico library");
+    return;
+  }
+
+  // Use environment variable, or default android path
+  nsAutoCString langPath(PR_GetEnv("PICO_LANG_PATH"));
+
+  if (langPath.IsEmpty()) {
+    langPath.AssignLiteral(PICO_LANG_PATH);
+  }
+
+  nsCOMPtr<nsIFile> voicesDir;
+  NS_NewNativeLocalFile(langPath, true, getter_AddRefs(voicesDir));
+
+  nsCOMPtr<nsISimpleEnumerator> dirIterator;
+  nsresult rv = voicesDir->GetDirectoryEntries(getter_AddRefs(dirIterator));
+
+  if (NS_FAILED(rv)) {
+    NS_WARNING(nsPrintfCString("Failed to get contents of directory: %s", langPath.get()).get());
+    return;
+  }
+
+  bool hasMoreElements = false;
+  rv = dirIterator->HasMoreElements(&hasMoreElements);
+  MOZ_ASSERT(NS_SUCCEEDED(rv));
+
+  MonitorAutoLock autoLock(mVoicesMonitor);
+
+  while (hasMoreElements && NS_SUCCEEDED(rv)) {
+    nsCOMPtr<nsISupports> supports;
+    rv = dirIterator->GetNext(getter_AddRefs(supports));
+    MOZ_ASSERT(NS_SUCCEEDED(rv));
+
+    nsCOMPtr<nsIFile> voiceFile = do_QueryInterface(supports);
+    MOZ_ASSERT(voiceFile);
+
+    nsAutoCString leafName;
+    voiceFile->GetNativeLeafName(leafName);
+
+    nsAutoString lang;
+
+    if (GetVoiceFileLanguage(leafName, lang)) {
+      nsAutoString uri;
+      uri.AssignLiteral("urn:moz-tts:pico:");
+      uri.Append(lang);
+
+      bool found = false;
+      PicoVoice* voice = mVoices.GetWeak(uri, &found);
+
+      if (!found) {
+        voice = new PicoVoice(lang);
+        mVoices.Put(uri, voice);
+      }
+
+      // Each voice consists of two lingware files: A language resource file,
+      // suffixed by _ta.bin, and a speaker resource file, suffixed by _sb.bin.
+      // We currently assume that there is a pair of files for each language.
+      if (StringEndsWith(leafName, NS_LITERAL_CSTRING("_ta.bin"))) {
+        rv = voiceFile->GetPersistentDescriptor(voice->mTaFile);
+        MOZ_ASSERT(NS_SUCCEEDED(rv));
+      } else if (StringEndsWith(leafName, NS_LITERAL_CSTRING("_sg.bin"))) {
+        rv = voiceFile->GetPersistentDescriptor(voice->mSgFile);
+        MOZ_ASSERT(NS_SUCCEEDED(rv));
+      }
+    }
+
+    rv = dirIterator->HasMoreElements(&hasMoreElements);
+  }
+
+  NS_DispatchToMainThread(NewRunnableMethod("nsPicoService::RegisterVoices",
+                                            this, &nsPicoService::RegisterVoices));
+}
+
+void
+nsPicoService::RegisterVoices()
+{
+  nsSynthVoiceRegistry* registry = nsSynthVoiceRegistry::GetInstance();
+
+  for (auto iter = mVoices.Iter(); !iter.Done(); iter.Next()) {
+    const nsAString& uri = iter.Key();
+    RefPtr<PicoVoice>& voice = iter.Data();
+
+    // If we are missing either a language or a voice resource, it is invalid.
+    if (voice->mTaFile.IsEmpty() || voice->mSgFile.IsEmpty()) {
+      iter.Remove();
+      continue;
+    }
+
+    nsAutoString name;
+    name.AssignLiteral("Pico ");
+    name.Append(voice->mLanguage);
+
+    // This service is multi-threaded and can handle more than one utterance at a
+    // time before previous utterances end. So, aQueuesUtterances == false
+    DebugOnly<nsresult> rv =
+      registry->AddVoice(this, uri, name, voice->mLanguage, true, false);
+    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice");
+  }
+
+  mInitialized = true;
+}
+
+bool
+nsPicoService::GetVoiceFileLanguage(const nsACString& aFileName, nsAString& aLang)
+{
+  nsACString::const_iterator start, end;
+  aFileName.BeginReading(start);
+  aFileName.EndReading(end);
+
+  // The lingware filename syntax is language_(ta/sg).bin,
+  // we extract the language prefix here.
+  if (FindInReadable(NS_LITERAL_CSTRING("_"), start, end)) {
+    end = start;
+    aFileName.BeginReading(start);
+    aLang.Assign(NS_ConvertUTF8toUTF16(Substring(start, end)));
+    return true;
+  }
+
+  return false;
+}
+
+void
+nsPicoService::LoadEngine(PicoVoice* aVoice)
+{
+  PicoApi::pico_Status status = 0;
+
+  if (mPicoSystem) {
+    UnloadEngine();
+  }
+
+  if (!mPicoMemArea) {
+    mPicoMemArea = MakeUnique<uint8_t[]>(PICO_MEM_SIZE);
+  }
+
+  status = sPicoApi.pico_initialize(mPicoMemArea.get(),
+                                    PICO_MEM_SIZE, &mPicoSystem);
+  PICO_ENSURE_SUCCESS_VOID("pico_initialize", status);
+
+  status = sPicoApi.pico_loadResource(mPicoSystem, aVoice->mTaFile.get(), &mTaResource);
+  PICO_ENSURE_SUCCESS_VOID("pico_loadResource", status);
+
+  status = sPicoApi.pico_loadResource(mPicoSystem, aVoice->mSgFile.get(), &mSgResource);
+  PICO_ENSURE_SUCCESS_VOID("pico_loadResource", status);
+
+  status = sPicoApi.pico_createVoiceDefinition(mPicoSystem, PICO_VOICE_NAME);
+  PICO_ENSURE_SUCCESS_VOID("pico_createVoiceDefinition", status);
+
+  char taName[PICO_RETSTRINGSIZE];
+  status = sPicoApi.pico_getResourceName(mPicoSystem, mTaResource, taName);
+  PICO_ENSURE_SUCCESS_VOID("pico_getResourceName", status);
+
+  status = sPicoApi.pico_addResourceToVoiceDefinition(
+    mPicoSystem, PICO_VOICE_NAME, taName);
+  PICO_ENSURE_SUCCESS_VOID("pico_addResourceToVoiceDefinition", status);
+
+  char sgName[PICO_RETSTRINGSIZE];
+  status = sPicoApi.pico_getResourceName(mPicoSystem, mSgResource, sgName);
+  PICO_ENSURE_SUCCESS_VOID("pico_getResourceName", status);
+
+  status = sPicoApi.pico_addResourceToVoiceDefinition(
+    mPicoSystem, PICO_VOICE_NAME, sgName);
+  PICO_ENSURE_SUCCESS_VOID("pico_addResourceToVoiceDefinition", status);
+
+  status = sPicoApi.pico_newEngine(mPicoSystem, PICO_VOICE_NAME, &mPicoEngine);
+  PICO_ENSURE_SUCCESS_VOID("pico_newEngine", status);
+
+  if (sSingleton) {
+    sSingleton->mCurrentVoice = aVoice;
+  }
+}
+
+void
+nsPicoService::UnloadEngine()
+{
+  PicoApi::pico_Status status = 0;
+
+  if (mPicoEngine) {
+    status = sPicoApi.pico_disposeEngine(mPicoSystem, &mPicoEngine);
+    PICO_ENSURE_SUCCESS_VOID("pico_disposeEngine", status);
+    status = sPicoApi.pico_releaseVoiceDefinition(mPicoSystem, PICO_VOICE_NAME);
+    PICO_ENSURE_SUCCESS_VOID("pico_releaseVoiceDefinition", status);
+    mPicoEngine = nullptr;
+  }
+
+  if (mSgResource) {
+    status = sPicoApi.pico_unloadResource(mPicoSystem, &mSgResource);
+    PICO_ENSURE_SUCCESS_VOID("pico_unloadResource", status);
+    mSgResource = nullptr;
+  }
+
+  if (mTaResource) {
+    status = sPicoApi.pico_unloadResource(mPicoSystem, &mTaResource);
+    PICO_ENSURE_SUCCESS_VOID("pico_unloadResource", status);
+    mTaResource = nullptr;
+  }
+
+  if (mPicoSystem) {
+    status = sPicoApi.pico_terminate(&mPicoSystem);
+    PICO_ENSURE_SUCCESS_VOID("pico_terminate", status);
+    mPicoSystem = nullptr;
+  }
+}
+
+PicoVoice*
+nsPicoService::CurrentVoice()
+{
+  MOZ_ASSERT(!NS_IsMainThread());
+
+  return mCurrentVoice;
+}
+
+// static methods
+
+nsPicoService*
+nsPicoService::GetInstance()
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  if (!XRE_IsParentProcess()) {
+    MOZ_ASSERT(false, "nsPicoService can only be started on main gecko process");
+    return nullptr;
+  }
+
+  if (!sSingleton) {
+    sSingleton = new nsPicoService();
+  }
+
+  return sSingleton;
+}
+
+already_AddRefed<nsPicoService>
+nsPicoService::GetInstanceForService()
+{
+  RefPtr<nsPicoService> picoService = GetInstance();
+  return picoService.forget();
+}
+
+void
+nsPicoService::Shutdown()
+{
+  if (!sSingleton) {
+    return;
+  }
+
+  sSingleton->mCurrentTask = nullptr;
+
+  sSingleton = nullptr;
+}
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/dom/media/webspeech/synth/pico/nsPicoService.h
@@ -0,0 +1,93 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsPicoService_h
+#define nsPicoService_h
+
+#include "mozilla/Mutex.h"
+#include "nsTArray.h"
+#include "nsIObserver.h"
+#include "nsIThread.h"
+#include "nsISpeechService.h"
+#include "nsRefPtrHashtable.h"
+#include "mozilla/StaticPtr.h"
+#include "mozilla/Monitor.h"
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+namespace dom {
+
+class PicoVoice;
+class PicoCallbackRunnable;
+
+typedef void* pico_System;
+typedef void* pico_Resource;
+typedef void* pico_Engine;
+
+class nsPicoService : public nsIObserver,
+                      public nsISpeechService
+{
+  friend class PicoCallbackRunnable;
+  friend class PicoInitRunnable;
+
+public:
+  NS_DECL_THREADSAFE_ISUPPORTS
+  NS_DECL_NSISPEECHSERVICE
+  NS_DECL_NSIOBSERVER
+
+  nsPicoService();
+
+  static nsPicoService* GetInstance();
+
+  static already_AddRefed<nsPicoService> GetInstanceForService();
+
+  static void Shutdown();
+
+private:
+
+  virtual ~nsPicoService();
+
+  void Init();
+
+  void RegisterVoices();
+
+  bool GetVoiceFileLanguage(const nsACString& aFileName, nsAString& aLang);
+
+  void LoadEngine(PicoVoice* aVoice);
+
+  void UnloadEngine();
+
+  PicoVoice* CurrentVoice();
+
+  bool mInitialized;
+
+  nsCOMPtr<nsIThread> mThread;
+
+  nsRefPtrHashtable<nsStringHashKey, PicoVoice> mVoices;
+
+  Monitor mVoicesMonitor;
+
+  PicoVoice* mCurrentVoice;
+
+  Atomic<nsISpeechTask*> mCurrentTask;
+
+  pico_System mPicoSystem;
+
+  pico_Engine mPicoEngine;
+
+  pico_Resource mSgResource;
+
+  pico_Resource mTaResource;
+
+  mozilla::UniquePtr<uint8_t[]> mPicoMemArea;
+
+  static StaticRefPtr<nsPicoService> sSingleton;
+};
+
+} // namespace dom
+} // namespace mozilla
+
+#endif
--- a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
+++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
@@ -508,17 +508,18 @@ SpeechDispatcherService::Speak(const nsA
   }
 
   spd_set_voice_rate(mSpeechdClient, static_cast<int>(rate));
 
   // We provide a pitch of 0 to 2 with 1 being the default.
   // speech-dispatcher expects -100 to 100 with 0 being default.
   spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100));
 
-  nsresult rv = aTask->Setup(callback);
+  // The last three parameters don't matter for an indirect service
+  nsresult rv = aTask->Setup(callback, 0, 0, 0);
 
   if (NS_FAILED(rv)) {
     return rv;
   }
 
   if (aText.Length()) {
     int msg_id = spd_say(
       mSpeechdClient, SPD_MESSAGE, NS_ConvertUTF16toUTF8(aText).get());
@@ -543,16 +544,23 @@ SpeechDispatcherService::Speak(const nsA
       callback,
       &SpeechDispatcherCallback::OnSpeechEvent,
       SPD_EVENT_END));
   }
 
   return NS_OK;
 }
 
+NS_IMETHODIMP
+SpeechDispatcherService::GetServiceType(SpeechServiceType* aServiceType)
+{
+  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
+  return NS_OK;
+}
+
 SpeechDispatcherService*
 SpeechDispatcherService::GetInstance(bool create)
 {
   if (XRE_GetProcessType() != GeckoProcessType_Default) {
     MOZ_ASSERT(false,
                "SpeechDispatcherService can only be started on main gecko process");
     return nullptr;
   }
--- a/dom/media/webspeech/synth/test/file_global_queue_pause.html
+++ b/dom/media/webspeech/synth/test/file_global_queue_pause.html
@@ -54,17 +54,18 @@ https://bugzilla.mozilla.org/show_bug.cg
     utterance1.addEventListener('resume', function(e) {
       is(eventOrder.shift(), 'resume1', 'resume1');
       testSynthState(win1, { speaking: true, pending: false, paused: false});
       testSynthState(win2, { speaking: true, pending: true, paused: false});
 
       win2.speechSynthesis.pause();
 
       testSynthState(win1, { speaking: true, pending: false, paused: false});
-      testSynthState(win2, { speaking: true, pending: true, paused: true });
+      // 1188099: currently, paused state is not gaurenteed to be immediate.
+      testSynthState(win2, { speaking: true, pending: true });
 
       // We now make the utterance end.
       SpecialPowers.wrap(win1.speechSynthesis).forceEnd();
     });
     utterance1.addEventListener('end', function(e) {
       is(eventOrder.shift(), 'end1', 'end1');
       testSynthState(win1, { speaking: false, pending: false, paused: false});
       testSynthState(win2, { speaking: false, pending: true, paused: true});
--- a/dom/media/webspeech/synth/test/file_speech_queue.html
+++ b/dom/media/webspeech/synth/test/file_speech_queue.html
@@ -54,18 +54,17 @@ function testFunc(done_cb) {
      [{text: "Come stai?", args: { lang: "it-IT-fail" } },
       { rate: 1, pitch: 1, uri: langUriMap['it-IT-fail'], err: true }],
      [{text: "¡hasta mañana!", args: { lang: "es-MX" } },
       { uri: langUriMap['es-MX'] }]],
     function () {
       var test_data = [];
       var voices = speechSynthesis.getVoices();
       for (var voice of voices) {
-        if (voice.lang.split("-").length > 2) {
-          // Skip voices that don't automatically end with success
+        if (voice.voiceURI.indexOf('urn:moz-tts:fake-direct') < 0) {
           continue;
         }
         test_data.push([{text: "Hello world", args: { voice: voice} },
                         {uri: voice.voiceURI}]);
       }
 
       synthTestQueue(test_data, done_cb);
     });
--- a/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
+++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
@@ -39,27 +39,30 @@ struct VoiceDetails
 {
   const char* uri;
   const char* name;
   const char* lang;
   bool defaultVoice;
   uint32_t flags;
 };
 
-static const VoiceDetails sVoices[] = {
-  {"urn:moz-tts:fake:bob", "Bob Marley", "en-JM", true, 0},
-  {"urn:moz-tts:fake:amy", "Amy Winehouse", "en-GB", false, 0},
-  {"urn:moz-tts:fake:lenny", "Leonard Cohen", "en-CA", false, 0},
-  {"urn:moz-tts:fake:celine", "Celine Dion", "fr-CA", false, 0},
-  {"urn:moz-tts:fake:julie", "Julieta Venegas", "es-MX", false, },
-  {"urn:moz-tts:fake:zanetta", "Zanetta Farussi", "it-IT", false, 0},
-  {"urn:moz-tts:fake:margherita", "Margherita Durastanti", "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd},
-  {"urn:moz-tts:fake:teresa", "Teresa Cornelys", "it-IT-noend", false, eSuppressEnd},
-  {"urn:moz-tts:fake:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, eFailAtStart},
-  {"urn:moz-tts:fake:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, eFail},
+static const VoiceDetails sDirectVoices[] = {
+  {"urn:moz-tts:fake-direct:bob", "Bob Marley", "en-JM", true, 0},
+  {"urn:moz-tts:fake-direct:amy", "Amy Winehouse", "en-GB", false, 0},
+  {"urn:moz-tts:fake-direct:lenny", "Leonard Cohen", "en-CA", false, 0},
+  {"urn:moz-tts:fake-direct:celine", "Celine Dion", "fr-CA", false, 0},
+  {"urn:moz-tts:fake-direct:julie", "Julieta Venegas", "es-MX", false, },
+};
+
+static const VoiceDetails sIndirectVoices[] = {
+  {"urn:moz-tts:fake-indirect:zanetta", "Zanetta Farussi", "it-IT", false, 0},
+  {"urn:moz-tts:fake-indirect:margherita", "Margherita Durastanti", "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd},
+  {"urn:moz-tts:fake-indirect:teresa", "Teresa Cornelys", "it-IT-noend", false, eSuppressEnd},
+  {"urn:moz-tts:fake-indirect:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, eFailAtStart},
+  {"urn:moz-tts:fake-indirect:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, eFail},
 };
 
 // FakeSynthCallback
 class FakeSynthCallback : public nsISpeechTaskCallback
 {
 public:
   explicit FakeSynthCallback(nsISpeechTask* aTask) : mTask(aTask) { }
   NS_DECL_CYCLE_COLLECTING_ISUPPORTS
@@ -108,35 +111,100 @@ NS_IMPL_CYCLE_COLLECTION(FakeSynthCallba
 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(FakeSynthCallback)
   NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
 NS_INTERFACE_MAP_END
 
 NS_IMPL_CYCLE_COLLECTING_ADDREF(FakeSynthCallback)
 NS_IMPL_CYCLE_COLLECTING_RELEASE(FakeSynthCallback)
 
-// FakeSpeechSynth
+// FakeDirectAudioSynth
 
-class FakeSpeechSynth : public nsISpeechService
+class FakeDirectAudioSynth : public nsISpeechService
 {
 
 public:
-  FakeSpeechSynth() {}
+  FakeDirectAudioSynth() { }
 
   NS_DECL_ISUPPORTS
   NS_DECL_NSISPEECHSERVICE
 
 private:
-  virtual ~FakeSpeechSynth() { }
+  virtual ~FakeDirectAudioSynth() { }
 };
 
-NS_IMPL_ISUPPORTS(FakeSpeechSynth, nsISpeechService)
+NS_IMPL_ISUPPORTS(FakeDirectAudioSynth, nsISpeechService)
 
 NS_IMETHODIMP
-FakeSpeechSynth::Speak(const nsAString& aText, const nsAString& aUri,
+FakeDirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri,
+                            float aVolume, float aRate, float aPitch,
+                            nsISpeechTask* aTask)
+{
+  class Runnable final : public mozilla::Runnable
+  {
+  public:
+    Runnable(nsISpeechTask* aTask, const nsAString& aText)
+      : mozilla::Runnable("Runnable")
+      , mTask(aTask)
+      , mText(aText)
+    {
+    }
+
+    NS_IMETHOD Run() override
+    {
+      RefPtr<FakeSynthCallback> cb = new FakeSynthCallback(nullptr);
+      mTask->Setup(cb, CHANNELS, SAMPLERATE, 2);
+
+      // Just an arbitrary multiplier. Pretend that each character is
+      // synthesized to 40 frames.
+      uint32_t frames_length = 40 * mText.Length();
+      auto frames = MakeUnique<int16_t[]>(frames_length);
+      mTask->SendAudioNative(frames.get(), frames_length);
+
+      mTask->SendAudioNative(nullptr, 0);
+
+      return NS_OK;
+    }
+
+  private:
+    nsCOMPtr<nsISpeechTask> mTask;
+    nsString mText;
+  };
+
+  nsCOMPtr<nsIRunnable> runnable = new Runnable(aTask, aText);
+  NS_DispatchToMainThread(runnable);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+FakeDirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType)
+{
+  *aServiceType = nsISpeechService::SERVICETYPE_DIRECT_AUDIO;
+  return NS_OK;
+}
+
+// FakeDirectAudioSynth
+
+class FakeIndirectAudioSynth : public nsISpeechService
+{
+
+public:
+  FakeIndirectAudioSynth() {}
+
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSISPEECHSERVICE
+
+private:
+  virtual ~FakeIndirectAudioSynth() { }
+};
+
+NS_IMPL_ISUPPORTS(FakeIndirectAudioSynth, nsISpeechService)
+
+NS_IMETHODIMP
+FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri,
                               float aVolume, float aRate, float aPitch,
                               nsISpeechTask* aTask)
 {
   class DispatchStart final : public Runnable
   {
   public:
     explicit DispatchStart(nsISpeechTask* aTask)
       : mozilla::Runnable("DispatchStart")
@@ -195,56 +263,70 @@ FakeSpeechSynth::Speak(const nsAString& 
     }
 
   private:
     nsCOMPtr<nsISpeechTask> mTask;
     nsString mText;
   };
 
   uint32_t flags = 0;
-  for (VoiceDetails voice : sVoices) {
-    if (aUri.EqualsASCII(voice.uri)) {
-      flags = voice.flags;
-      break;
+  for (uint32_t i = 0; i < ArrayLength(sIndirectVoices); i++) {
+    if (aUri.EqualsASCII(sIndirectVoices[i].uri)) {
+      flags = sIndirectVoices[i].flags;
     }
   }
 
   if (flags & eFailAtStart) {
     return NS_ERROR_FAILURE;
   }
 
   RefPtr<FakeSynthCallback> cb = new FakeSynthCallback(
     (flags & eSuppressEvents) ? nullptr : aTask);
 
-  aTask->Setup(cb);
+  aTask->Setup(cb, 0, 0, 0);
 
   nsCOMPtr<nsIRunnable> runnable = new DispatchStart(aTask);
   NS_DispatchToMainThread(runnable);
 
   if (flags & eFail) {
     runnable = new DispatchError(aTask, aText);
     NS_DispatchToMainThread(runnable);
   } else if ((flags & eSuppressEnd) == 0) {
     runnable = new DispatchEnd(aTask, aText);
     NS_DispatchToMainThread(runnable);
   }
 
   return NS_OK;
 }
 
+NS_IMETHODIMP
+FakeIndirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType)
+{
+  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
+  return NS_OK;
+}
+
 // nsFakeSynthService
 
 NS_INTERFACE_MAP_BEGIN(nsFakeSynthServices)
   NS_INTERFACE_MAP_ENTRY(nsIObserver)
   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver)
 NS_INTERFACE_MAP_END
 
 NS_IMPL_ADDREF(nsFakeSynthServices)
 NS_IMPL_RELEASE(nsFakeSynthServices)
 
+nsFakeSynthServices::nsFakeSynthServices()
+{
+}
+
+nsFakeSynthServices::~nsFakeSynthServices()
+{
+}
+
 static void
 AddVoices(nsISpeechService* aService, const VoiceDetails* aVoices, uint32_t aLength)
 {
   RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
   for (uint32_t i = 0; i < aLength; i++) {
     NS_ConvertUTF8toUTF16 name(aVoices[i].name);
     NS_ConvertUTF8toUTF16 uri(aVoices[i].uri);
     NS_ConvertUTF8toUTF16 lang(aVoices[i].lang);
@@ -257,18 +339,21 @@ AddVoices(nsISpeechService* aService, co
   }
 
   registry->NotifyVoicesChanged();
 }
 
 void
 nsFakeSynthServices::Init()
 {
-  mSynthService = new FakeSpeechSynth();
-  AddVoices(mSynthService, sVoices, ArrayLength(sVoices));
+  mDirectService = new FakeDirectAudioSynth();
+  AddVoices(mDirectService, sDirectVoices, ArrayLength(sDirectVoices));
+
+  mIndirectService = new FakeIndirectAudioSynth();
+  AddVoices(mIndirectService, sIndirectVoices, ArrayLength(sIndirectVoices));
 }
 
 // nsIObserver
 
 NS_IMETHODIMP
 nsFakeSynthServices::Observe(nsISupports* aSubject, const char* aTopic,
                              const char16_t* aData)
 {
--- a/dom/media/webspeech/synth/test/nsFakeSynthServices.h
+++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.h
@@ -20,31 +20,33 @@ namespace dom {
 
 class nsFakeSynthServices : public nsIObserver
 {
 
 public:
   NS_DECL_ISUPPORTS
   NS_DECL_NSIOBSERVER
 
-  nsFakeSynthServices() = default;
+  nsFakeSynthServices();
 
   static nsFakeSynthServices* GetInstance();
 
   static already_AddRefed<nsFakeSynthServices> GetInstanceForService();
 
   static void Shutdown();
 
 private:
 
-  virtual ~nsFakeSynthServices() = default;
+  virtual ~nsFakeSynthServices();
 
   void Init();
 
-  nsCOMPtr<nsISpeechService> mSynthService;
+  nsCOMPtr<nsISpeechService> mDirectService;
+
+  nsCOMPtr<nsISpeechService> mIndirectService;
 
   static StaticRefPtr<nsFakeSynthServices> sSingleton;
 };
 
 } // namespace dom
 } // namespace mozilla
 
 #endif
--- a/dom/media/webspeech/synth/windows/SapiService.cpp
+++ b/dom/media/webspeech/synth/windows/SapiService.cpp
@@ -397,37 +397,44 @@ SapiService::Speak(const nsAString& aTex
   }
 
   xml.AppendLiteral("</pitch>");
 
   RefPtr<SapiCallback> callback =
     new SapiCallback(aTask, spVoice, textOffset, aText.Length());
 
   // The last three parameters doesn't matter for an indirect service
-  nsresult rv = aTask->Setup(callback);
+  nsresult rv = aTask->Setup(callback, 0, 0, 0);
   if (NS_FAILED(rv)) {
     return rv;
   }
 
   ULONG streamNum;
   if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) {
-    aTask->Setup(nullptr);
+    aTask->Setup(nullptr, 0, 0, 0);
     return NS_ERROR_FAILURE;
   }
 
   callback->SetStreamNum(streamNum);
   // streamNum reassigns same value when last stream is finished even if
   // callback for stream end isn't called
   // So we cannot use data hashtable and has to add it to vector at last.
   mCallbacks.AppendElement(callback);
 
   return NS_OK;
 }
 
 NS_IMETHODIMP
+SapiService::GetServiceType(SpeechServiceType* aServiceType)
+{
+  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
 SapiService::Observe(nsISupports* aSubject, const char* aTopic,
                      const char16_t* aData)
 {
   return NS_OK;
 }
 
 SapiService*
 SapiService::GetInstance()
--- a/old-configure.in
+++ b/old-configure.in
@@ -2567,16 +2567,18 @@ if test -n "$MOZ_WEBRTC"; then
     MOZ_RAW=1
     MOZ_SCTP=1
     MOZ_SRTP=1
     AC_DEFINE(MOZ_SCTP)
     AC_DEFINE(MOZ_SRTP)
     if test -n "$MOZ_X11"; then
       MOZ_WEBRTC_X11_LIBS="-lXext -lXdamage -lXfixes -lXcomposite"
     fi
+else
+    MOZ_SYNTH_PICO=
 fi
 
 dnl ========================================================
 dnl = Force hardware AEC, disable webrtc.org AEC
 dnl ========================================================
 MOZ_ARG_ENABLE_BOOL(hardware-aec-ns,
 [  --enable-hardware-aec-ns   Enable support for hardware AEC and noise suppression],
     MOZ_WEBRTC_HARDWARE_AEC_NS=1,
@@ -3959,16 +3961,28 @@ MOZ_ARG_DISABLE_BOOL(startupcache,
     MOZ_DISABLE_STARTUPCACHE=)
 
 if test -n "$MOZ_DISABLE_STARTUPCACHE"; then
   AC_DEFINE(MOZ_DISABLE_STARTUPCACHE)
 fi
 AC_SUBST(MOZ_DISABLE_STARTUPCACHE)
 
 dnl ========================================================
+dnl = Enable Pico Speech Synthesis
+dnl ========================================================
+MOZ_ARG_ENABLE_BOOL(synth-pico,
+[  --enable-synth-pico  Set compile flags necessary for compiling Pico Web Speech API ],
+    MOZ_SYNTH_PICO=1,
+    MOZ_SYNTH_PICO= )
+if test -n "$MOZ_SYNTH_PICO"; then
+    AC_DEFINE(MOZ_SYNTH_PICO)
+fi
+AC_SUBST(MOZ_SYNTH_PICO)
+
+dnl ========================================================
 dnl = Enable Support for Time Manager API
 dnl ========================================================
 if test -n "$MOZ_TIME_MANAGER"; then
     AC_DEFINE(MOZ_TIME_MANAGER)
 fi
 AC_SUBST(MOZ_TIME_MANAGER)
 
 dnl ========================================================