Bug 1333641 - Part 1: Making the speechSynthesis API unfingerprintable when 'privacy.resistFingerprinting' is true. r=arthuredelstein,smaug
authorTim Huang <tihuang@mozilla.com>
Thu, 20 Jul 2017 16:07:32 +0800
changeset 419770 543ace8b7216f5cb3e6dd5a85ef1efe3eca97811
parent 419769 16d3556f49050cd42087f4dbfbe924f50a71dab7
child 419771 3b4442aa9040dd915b73dd7ef3639f822208675e
push id7566
push usermtabara@mozilla.com
push dateWed, 02 Aug 2017 08:25:16 +0000
treeherdermozilla-beta@86913f512c3c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarthuredelstein, smaug
bugs1333641
milestone56.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1333641 - Part 1: Making the speechSynthesis API unfingerprintable when 'privacy.resistFingerprinting' is true. r=arthuredelstein,smaug The patch will change the behavior of speechSynthesis API when fingerprinting resistance is enabled. First, the speechSynthesis.getVoices() will always report an empty list and the speechSynthesis.onvoiceschanged event will be blocked. And it will immediately fail when using the speechSynthesis.speak() to speak an utterance. By doing so, websites can no longer fingerprint users through this speechSynthesis API. In addition, this only affect contents, so the chrome can still use this API even the fingerprinting resistance is enabled. MozReview-Commit-ID: KxJX8fo30WS
dom/media/webspeech/synth/SpeechSynthesis.cpp
dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
dom/media/webspeech/synth/nsSpeechTask.cpp
dom/media/webspeech/synth/nsSpeechTask.h
dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
--- a/dom/media/webspeech/synth/SpeechSynthesis.cpp
+++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp
@@ -9,18 +9,20 @@
 #include "mozilla/Logging.h"
 #include "mozilla/SizePrintfMacros.h"
 
 #include "mozilla/dom/ContentChild.h"
 #include "mozilla/dom/Element.h"
 
 #include "mozilla/dom/SpeechSynthesisBinding.h"
 #include "SpeechSynthesis.h"
+#include "nsContentUtils.h"
 #include "nsSynthVoiceRegistry.h"
 #include "nsIDocument.h"
+#include "nsIDocShell.h"
 
 #undef LOG
 mozilla::LogModule*
 GetSpeechSynthLog()
 {
   static mozilla::LazyLogModule sLog("SpeechSynthesis");
 
   return sLog;
@@ -244,16 +246,23 @@ SpeechSynthesis::OnEnd(const nsSpeechTas
   AdvanceQueue();
 }
 
 void
 SpeechSynthesis::GetVoices(nsTArray< RefPtr<SpeechSynthesisVoice> >& aResult)
 {
   aResult.Clear();
   uint32_t voiceCount = 0;
+  nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+  nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+
+  if (nsContentUtils::ShouldResistFingerprinting(docShell)) {
+    return;
+  }
 
   nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
   if(NS_WARN_IF(NS_FAILED(rv))) {
     return;
   }
 
   nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this);
 
@@ -313,20 +322,25 @@ SpeechSynthesis::Observe(nsISupports* aS
 
       nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
       if (obs) {
         obs->RemoveObserver(this, "inner-window-destroyed");
       }
     }
   } else if (strcmp(aTopic, "synth-voices-changed") == 0) {
     LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged"));
-    DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged"));
-    // If we have a pending item, and voices become available, speak it.
-    if (!mCurrentTask && !mHoldQueue && HasVoices()) {
-      AdvanceQueue();
+    nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+    nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+    if (!nsContentUtils::ShouldResistFingerprinting(docShell)) {
+      DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged"));
+      // If we have a pending item, and voices become available, speak it.
+      if (!mCurrentTask && !mHoldQueue && HasVoices()) {
+        AdvanceQueue();
+      }
     }
   }
 
   return NS_OK;
 }
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
+++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
@@ -37,13 +37,13 @@ child:
 
     async InitialVoicesAndState(RemoteVoice[] aVoices, nsString[] aDefaults,
                                 bool aIsSpeaking);
 
 parent:
     async __delete__();
 
     async PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang,
-                                  float aVolume, float aRate, float aPitch);
+                                  float aVolume, float aRate, float aPitch, bool aIsChrome);
 };
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@@ -64,17 +64,18 @@ SpeechSynthesisChild::RecvNotifyVoicesCh
 }
 
 PSpeechSynthesisRequestChild*
 SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild(const nsString& aText,
                                                         const nsString& aLang,
                                                         const nsString& aUri,
                                                         const float& aVolume,
                                                         const float& aRate,
-                                                        const float& aPitch)
+                                                        const float& aPitch,
+                                                        const bool& aIsChrome)
 {
   MOZ_CRASH("Caller is supposed to manually construct a request!");
 }
 
 bool
 SpeechSynthesisChild::DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor)
 {
   delete aActor;
@@ -154,18 +155,18 @@ SpeechSynthesisRequestChild::RecvOnMark(
                                         const uint32_t& aCharIndex)
 {
   mTask->DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
   return IPC_OK();
 }
 
 // SpeechTaskChild
 
-SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance)
-  : nsSpeechTask(aUtterance)
+SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
+  : nsSpeechTask(aUtterance, aIsChrome)
 {
 }
 
 NS_IMETHODIMP
 SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback,
                        uint32_t aChannels, uint32_t aRate, uint8_t argc)
 {
   MOZ_CRASH("Should never be called from child");
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@@ -40,17 +40,18 @@ protected:
   SpeechSynthesisChild();
   virtual ~SpeechSynthesisChild();
 
   PSpeechSynthesisRequestChild* AllocPSpeechSynthesisRequestChild(const nsString& aLang,
                                                                   const nsString& aUri,
                                                                   const nsString& aText,
                                                                   const float& aVolume,
                                                                   const float& aPitch,
-                                                                  const float& aRate) override;
+                                                                  const float& aRate,
+                                                                  const bool& aIsChrome) override;
   bool DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor) override;
 };
 
 class SpeechSynthesisRequestChild : public PSpeechSynthesisRequestChild
 {
 public:
   explicit SpeechSynthesisRequestChild(SpeechTaskChild* aTask);
   virtual ~SpeechSynthesisRequestChild();
@@ -77,17 +78,17 @@ protected:
   RefPtr<SpeechTaskChild> mTask;
 };
 
 class SpeechTaskChild : public nsSpeechTask
 {
   friend class SpeechSynthesisRequestChild;
 public:
 
-  explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance);
+  explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
 
   NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback,
                    uint32_t aChannels, uint32_t aRate, uint8_t argc) override;
 
   NS_IMETHOD SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
                        JSContext* aCx) override;
 
   NS_IMETHOD SendAudioNative(int16_t* aData, uint32_t aDataLen) override;
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
@@ -31,19 +31,20 @@ SpeechSynthesisParent::SendInit()
 }
 
 PSpeechSynthesisRequestParent*
 SpeechSynthesisParent::AllocPSpeechSynthesisRequestParent(const nsString& aText,
                                                           const nsString& aLang,
                                                           const nsString& aUri,
                                                           const float& aVolume,
                                                           const float& aRate,
-                                                          const float& aPitch)
+                                                          const float& aPitch,
+                                                          const bool& aIsChrome)
 {
-  RefPtr<SpeechTaskParent> task = new SpeechTaskParent(aVolume, aText);
+  RefPtr<SpeechTaskParent> task = new SpeechTaskParent(aVolume, aText, aIsChrome);
   SpeechSynthesisRequestParent* actor = new SpeechSynthesisRequestParent(task);
   return actor;
 }
 
 bool
 SpeechSynthesisParent::DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor)
 {
   delete aActor;
@@ -52,17 +53,18 @@ SpeechSynthesisParent::DeallocPSpeechSyn
 
 mozilla::ipc::IPCResult
 SpeechSynthesisParent::RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRequestParent* aActor,
                                                               const nsString& aText,
                                                               const nsString& aLang,
                                                               const nsString& aUri,
                                                               const float& aVolume,
                                                               const float& aRate,
-                                                              const float& aPitch)
+                                                              const float& aPitch,
+                                                              const bool& aIsChrome)
 {
   MOZ_ASSERT(aActor);
   SpeechSynthesisRequestParent* actor =
     static_cast<SpeechSynthesisRequestParent*>(aActor);
   nsSynthVoiceRegistry::GetInstance()->Speak(aText, aLang, aUri, aVolume, aRate,
                                              aPitch, actor->mTask);
   return IPC_OK();
 }
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@@ -29,28 +29,30 @@ public:
 protected:
   SpeechSynthesisParent();
   virtual ~SpeechSynthesisParent();
   PSpeechSynthesisRequestParent* AllocPSpeechSynthesisRequestParent(const nsString& aText,
                                                                     const nsString& aLang,
                                                                     const nsString& aUri,
                                                                     const float& aVolume,
                                                                     const float& aRate,
-                                                                    const float& aPitch)
+                                                                    const float& aPitch,
+                                                                    const bool& aIsChrome)
                                                                     override;
 
   bool DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor) override;
 
   mozilla::ipc::IPCResult RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRequestParent* aActor,
                                                                  const nsString& aText,
                                                                  const nsString& aLang,
                                                                  const nsString& aUri,
                                                                  const float& aVolume,
                                                                  const float& aRate,
-                                                                 const float& aPitch) override;
+                                                                 const float& aPitch,
+                                                                 const bool& aIsChrome) override;
 };
 
 class SpeechSynthesisRequestParent : public PSpeechSynthesisRequestParent
 {
 public:
   explicit SpeechSynthesisRequestParent(SpeechTaskParent* aTask);
   virtual ~SpeechSynthesisRequestParent();
 
@@ -72,18 +74,18 @@ protected:
 
   mozilla::ipc::IPCResult Recv__delete__() override;
 };
 
 class SpeechTaskParent : public nsSpeechTask
 {
   friend class SpeechSynthesisRequestParent;
 public:
-  SpeechTaskParent(float aVolume, const nsAString& aUtterance)
-    : nsSpeechTask(aVolume, aUtterance) {}
+  SpeechTaskParent(float aVolume, const nsAString& aUtterance, bool aIsChrome)
+    : nsSpeechTask(aVolume, aUtterance, aIsChrome) {}
 
   nsresult DispatchStartImpl(const nsAString& aUri);
 
   nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
 
   nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex);
 
   nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex);
--- a/dom/media/webspeech/synth/nsSpeechTask.cpp
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@@ -115,37 +115,39 @@ NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(
   NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
 NS_INTERFACE_MAP_END
 
 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
 
-nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance)
+nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
   : mUtterance(aUtterance)
   , mInited(false)
   , mPrePaused(false)
   , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
+  , mIsChrome(aIsChrome)
 {
   mText = aUtterance->mText;
   mVolume = aUtterance->Volume();
 }
 
-nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText)
+nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome)
   : mUtterance(nullptr)
   , mVolume(aVolume)
   , mText(aText)
   , mInited(false)
   , mPrePaused(false)
   , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
+  , mIsChrome(aIsChrome)
 {
 }
 
 nsSpeechTask::~nsSpeechTask()
 {
   LOG(LogLevel::Debug, ("~nsSpeechTask"));
   if (mStream) {
     if (!mStream->IsDestroyed()) {
@@ -508,26 +510,38 @@ nsSpeechTask::DispatchResumeImpl(float a
     mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"),
                                              aCharIndex, nullptr, aElapsedTime,
                                              EmptyString());
   }
 
   return NS_OK;
 }
 
+void
+nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex)
+{
+  DispatchErrorInner(aElapsedTime, aCharIndex);
+}
+
 NS_IMETHODIMP
 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
 {
   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchError"));
 
   if (!mIndirectAudio) {
     NS_WARNING("Can't call DispatchError() from a direct audio speech service");
     return NS_ERROR_FAILURE;
   }
 
+  return DispatchErrorInner(aElapsedTime, aCharIndex);
+}
+
+nsresult
+nsSpeechTask::DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex)
+{
   if (!mPreCanceled) {
     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
   }
 
   return DispatchErrorImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
--- a/dom/media/webspeech/synth/nsSpeechTask.h
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@@ -30,18 +30,18 @@ class nsSpeechTask : public nsISpeechTas
 
 public:
   NS_DECL_CYCLE_COLLECTING_ISUPPORTS
   NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsSpeechTask, nsISpeechTask)
 
   NS_DECL_NSISPEECHTASK
   NS_DECL_NSIAUDIOCHANNELAGENTCALLBACK
 
-  explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance);
-  nsSpeechTask(float aVolume, const nsAString& aText);
+  explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
+  nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome);
 
   virtual void Pause();
 
   virtual void Resume();
 
   virtual void Cancel();
 
   virtual void ForceEnd();
@@ -54,26 +54,33 @@ public:
 
   void InitDirectAudio();
   void InitIndirectAudio();
 
   void SetChosenVoiceURI(const nsAString& aUri);
 
   virtual void SetAudioOutputVolume(float aVolume);
 
+  void ForceError(float aElapsedTime, uint32_t aCharIndex);
+
   bool IsPreCanceled()
   {
     return mPreCanceled;
   };
 
   bool IsPrePaused()
   {
     return mPrePaused;
   }
 
+  bool IsChrome()
+  {
+    return mIsChrome;
+  }
+
 protected:
   virtual ~nsSpeechTask();
 
   nsresult DispatchStartImpl();
 
   virtual nsresult DispatchStartImpl(const nsAString& aUri);
 
   virtual nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
@@ -107,16 +114,17 @@ protected:
 
 private:
   void End();
 
   void SendAudioImpl(RefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen);
 
   nsresult DispatchStartInner();
 
+  nsresult DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex);
   nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex);
 
   void CreateAudioChannelAgent();
 
   void DestroyAudioChannelAgent();
 
   RefPtr<SourceMediaStream> mStream;
 
@@ -128,14 +136,16 @@ private:
 
   uint32_t mChannels;
 
   RefPtr<SpeechSynthesis> mSpeechSynthesis;
 
   bool mIndirectAudio;
 
   nsString mChosenVoiceURI;
+
+  bool mIsChrome;
 };
 
 } // namespace dom
 } // namespace mozilla
 
 #endif
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@@ -674,30 +674,36 @@ nsSynthVoiceRegistry::SpeakUtterance(Spe
       // TODO : use audio channel agent, open new bug to fix it.
       uint32_t channel = static_cast<uint32_t>(AudioChannelService::GetDefaultAudioChannel());
       AudioPlaybackConfig config = service->GetMediaConfig(topWindow->GetOuterWindow(),
                                                            channel);
       volume = config.mMuted ? 0.0f : config.mVolume * volume;
     }
   }
 
+  nsCOMPtr<nsPIDOMWindowInner> window = aUtterance.GetOwner();
+  nsCOMPtr<nsIDocument> doc = window ? window->GetDoc() : nullptr;
+
+  bool isChrome = nsContentUtils::IsChromeDoc(doc);
+
   RefPtr<nsSpeechTask> task;
   if (XRE_IsContentProcess()) {
-    task = new SpeechTaskChild(&aUtterance);
+    task = new SpeechTaskChild(&aUtterance, isChrome);
     SpeechSynthesisRequestChild* actor =
       new SpeechSynthesisRequestChild(static_cast<SpeechTaskChild*>(task.get()));
     mSpeechSynthChild->SendPSpeechSynthesisRequestConstructor(actor,
                                                               aUtterance.mText,
                                                               lang,
                                                               uri,
                                                               volume,
                                                               aUtterance.Rate(),
-                                                              aUtterance.Pitch());
+                                                              aUtterance.Pitch(),
+                                                              isChrome);
   } else {
-    task = new nsSpeechTask(&aUtterance);
+    task = new nsSpeechTask(&aUtterance, isChrome);
     Speak(aUtterance.mText, lang, uri,
           volume, aUtterance.Rate(), aUtterance.Pitch(), task);
   }
 
   return task.forget();
 }
 
 void
@@ -706,21 +712,26 @@ nsSynthVoiceRegistry::Speak(const nsAStr
                             const nsAString& aUri,
                             const float& aVolume,
                             const float& aRate,
                             const float& aPitch,
                             nsSpeechTask* aTask)
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
+  if (!aTask->IsChrome() && nsContentUtils::ShouldResistFingerprinting()) {
+    aTask->ForceError(0, 0);
+    return;
+  }
+
   VoiceData* voice = FindBestMatch(aUri, aLang);
 
   if (!voice) {
     NS_WARNING("No voices found.");
-    aTask->DispatchError(0, 0);
+    aTask->ForceError(0, 0);
     return;
   }
 
   aTask->SetChosenVoiceURI(voice->mUri);
 
   if (mUseGlobalQueue || MediaPrefs::WebSpeechForceGlobal()) {
     LOG(LogLevel::Debug,
         ("nsSynthVoiceRegistry::Speak queueing text='%s' lang='%s' uri='%s' rate=%f pitch=%f",