Bug 650295 - Implement main state machine for speech recognition. r=smaug
authorGuilherme Goncalves <ggoncalves@mozilla.com>
Wed, 27 Mar 2013 14:08:17 -0700
changeset 126474 dfd7768f8f48c7318d108d696beda993958ebb37
parent 126473 f22ec75a02d742937450bdef3e845598f10992de
child 126475 c5e0abff4496dfcce425c8ff8828090b26b1abb7
push id1458
push userryanvm@gmail.com
push dateThu, 28 Mar 2013 02:22:47 +0000
treeherderfx-team@962f5293f87f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmaug
bugs650295
milestone22.0a1
Bug 650295 - Implement main state machine for speech recognition. r=smaug
b2g/installer/package-manifest.in
browser/installer/package-manifest.in
configure.in
content/base/src/nsGkAtomList.h
content/media/moz.build
content/media/webspeech/Makefile.in
content/media/webspeech/moz.build
content/media/webspeech/recognition/EnableWebSpeechRecognitionCheck.cpp
content/media/webspeech/recognition/EnableWebSpeechRecognitionCheck.h
content/media/webspeech/recognition/SpeechGrammar.cpp
content/media/webspeech/recognition/SpeechGrammar.h
content/media/webspeech/recognition/SpeechGrammarList.cpp
content/media/webspeech/recognition/SpeechGrammarList.h
content/media/webspeech/recognition/SpeechRecognition.cpp
content/media/webspeech/recognition/SpeechRecognition.h
content/media/webspeech/recognition/SpeechRecognitionAlternative.cpp
content/media/webspeech/recognition/SpeechRecognitionAlternative.h
content/media/webspeech/recognition/SpeechRecognitionResult.cpp
content/media/webspeech/recognition/SpeechRecognitionResult.h
content/media/webspeech/recognition/SpeechRecognitionResultList.cpp
content/media/webspeech/recognition/SpeechRecognitionResultList.h
content/media/webspeech/recognition/SpeechStreamListener.cpp
content/media/webspeech/recognition/SpeechStreamListener.h
content/media/webspeech/recognition/endpointer.cc
content/media/webspeech/recognition/endpointer.h
content/media/webspeech/recognition/energy_endpointer.cc
content/media/webspeech/recognition/energy_endpointer.h
content/media/webspeech/recognition/energy_endpointer_params.cc
content/media/webspeech/recognition/energy_endpointer_params.h
content/media/webspeech/recognition/nsIDOMSpeechRecognitionError.idl
content/media/webspeech/recognition/nsIDOMSpeechRecognitionEvent.idl
content/media/webspeech/recognition/nsISpeechRecognitionService.idl
dom/bindings/Makefile.in
dom/dom-config.mk
dom/webidl/SpeechGrammar.webidl
dom/webidl/SpeechGrammarList.webidl
dom/webidl/SpeechRecognition.webidl
dom/webidl/SpeechRecognitionAlternative.webidl
dom/webidl/SpeechRecognitionResult.webidl
dom/webidl/SpeechRecognitionResultList.webidl
dom/webidl/WebIDL.mk
js/xpconnect/src/event_impl_gen.conf.in
layout/build/Makefile.in
mobile/android/installer/package-manifest.in
modules/libpref/src/init/all.js
toolkit/content/license.html
--- a/b2g/installer/package-manifest.in
+++ b/b2g/installer/package-manifest.in
@@ -207,16 +207,19 @@
 @BINPATH@/components/dom_permissionsettings.xpt
 @BINPATH@/components/dom_sidebar.xpt
 @BINPATH@/components/dom_mobilemessage.xpt
 @BINPATH@/components/dom_storage.xpt
 @BINPATH@/components/dom_stylesheets.xpt
 @BINPATH@/components/dom_threads.xpt
 @BINPATH@/components/dom_traversal.xpt
 @BINPATH@/components/dom_views.xpt
+#ifdef MOZ_WEBSPEECH
+@BINPATH@/components/dom_webspeech.xpt
+#endif
 @BINPATH@/components/dom_xbl.xpt
 @BINPATH@/components/dom_xpath.xpt
 @BINPATH@/components/dom_xul.xpt
 @BINPATH@/components/dom_time.xpt
 @BINPATH@/components/downloads.xpt
 @BINPATH@/components/editor.xpt
 @BINPATH@/components/embed_base.xpt
 @BINPATH@/components/extensions.xpt
--- a/browser/installer/package-manifest.in
+++ b/browser/installer/package-manifest.in
@@ -206,16 +206,19 @@
 @BINPATH@/components/dom_range.xpt
 @BINPATH@/components/dom_settings.xpt
 @BINPATH@/components/dom_permissionsettings.xpt
 @BINPATH@/components/dom_sidebar.xpt
 @BINPATH@/components/dom_mobilemessage.xpt
 @BINPATH@/components/dom_storage.xpt
 @BINPATH@/components/dom_stylesheets.xpt
 @BINPATH@/components/dom_traversal.xpt
+#ifdef MOZ_WEBSPEECH
+@BINPATH@/components/dom_webspeech.xpt
+#endif
 @BINPATH@/components/dom_xbl.xpt
 @BINPATH@/components/dom_xpath.xpt
 @BINPATH@/components/dom_xul.xpt
 #ifdef MOZ_GAMEPAD
 @BINPATH@/components/dom_gamepad.xpt
 #endif
 @BINPATH@/components/downloads.xpt
 @BINPATH@/components/editor.xpt
--- a/configure.in
+++ b/configure.in
@@ -4245,16 +4245,17 @@ MOZ_WEBRTC_IN_LIBXUL=
 MOZ_SCTP=
 MOZ_MEDIA_PLUGINS=
 MOZ_MEDIA_NAVIGATOR=
 MOZ_OMX_PLUGIN=
 MOZ_VP8=
 MOZ_VP8_ERROR_CONCEALMENT=
 MOZ_VP8_ENCODER=
 MOZ_WEBVTT=1
+MOZ_WEBSPEECH=1
 VPX_AS=
 VPX_ASFLAGS=
 VPX_AS_DASH_C_FLAG=
 VPX_AS_CONVERSION=
 VPX_ASM_SUFFIX=
 VPX_X86_ASM=
 VPX_ARM_ASM=
 LIBJPEG_TURBO_AS=
@@ -5363,16 +5364,31 @@ arm*)
 *)
     MOZ_SAMPLE_TYPE_FLOAT32=1
     AC_DEFINE(MOZ_SAMPLE_TYPE_FLOAT32)
     AC_SUBST(MOZ_SAMPLE_TYPE_FLOAT32)
 ;;
 esac
 
 dnl ========================================================
+dnl = Disable Speech API code
+dnl ========================================================
+MOZ_ARG_DISABLE_BOOL(webspeech,
+[  --disable-webspeech        Disable support for HTML Speech API],
+    MOZ_WEBSPEECH=,
+    MOZ_WEBSPEECH=1)
+
+if test -n "$MOZ_WEBSPEECH"; then
+    AC_DEFINE(MOZ_WEBSPEECH)
+    MOZ_MEDIA=1
+fi
+
+AC_SUBST(MOZ_WEBSPEECH)
+
+dnl ========================================================
 dnl = Enable Raw Codecs
 dnl ========================================================
 MOZ_ARG_ENABLE_BOOL(raw,
 [  --enable-raw           Enable support for RAW media],
     MOZ_RAW=1,
     MOZ_RAW=)
 
 if test -n "$MOZ_RAW"; then
--- a/content/base/src/nsGkAtomList.h
+++ b/content/base/src/nsGkAtomList.h
@@ -2156,8 +2156,20 @@ GK_ATOM(timer, "timer")
 GK_ATOM(toolbarname, "toolbarname")
 GK_ATOM(toolbarseparator, "toolbarseparator")
 GK_ATOM(toolbarspacer, "toolbarspacer")
 GK_ATOM(toolbarspring, "toolbarspring")
 GK_ATOM(treegrid, "treegrid")
 GK_ATOM(_undefined, "undefined")
 GK_ATOM(xmlroles, "xml-roles")
 #endif
+
+#ifdef MOZ_WEBSPEECH
+GK_ATOM(onaudiostart, "onaudiostart")
+GK_ATOM(onaudioend, "onaudioend")
+GK_ATOM(onsoundstart, "onsoundstart")
+GK_ATOM(onsoundend, "onsoundend")
+GK_ATOM(onspeechstart, "onspeechstart")
+GK_ATOM(onspeechend, "onspeechend")
+GK_ATOM(onresult, "onresult")
+GK_ATOM(onnomatch, "onnomatch")
+GK_ATOM(onstart, "onstart")
+#endif
--- a/content/media/moz.build
+++ b/content/media/moz.build
@@ -29,12 +29,15 @@ if CONFIG['MOZ_MEDIA_PLUGINS']:
 if CONFIG['MOZ_WMF']:
     PARALLEL_DIRS += ['wmf']
 
 PARALLEL_DIRS += ['webrtc']
 
 if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'gonk':
     PARALLEL_DIRS += ['omx']
 
+if CONFIG['MOZ_WEBSPEECH']:
+    PARALLEL_DIRS += ['webspeech']
+
 TEST_DIRS += ['test']
 
 MODULE = 'content'
 
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/Makefile.in
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DEPTH            := @DEPTH@
+topsrcdir        := @top_srcdir@
+srcdir           := @srcdir@
+VPATH            := @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+include $(topsrcdir)/dom/dom-config.mk
+
+VPATH += \
+  $(srcdir)/recognition \
+  $(NULL)
+
+LIBRARY_NAME   := gkconwebspeech_s
+LIBXUL_LIBRARY := 1
+
+LOCAL_INCLUDES += $(VPATH:%=-I%)
+
+CPPSRCS := \
+  EnableWebSpeechRecognitionCheck.cpp \
+  SpeechGrammar.cpp \
+  SpeechGrammarList.cpp \
+  SpeechRecognitionAlternative.cpp \
+  SpeechRecognition.cpp \
+  SpeechRecognitionResult.cpp \
+  SpeechRecognitionResultList.cpp \
+  SpeechStreamListener.cpp \
+  endpointer.cc \
+  energy_endpointer.cc \
+  energy_endpointer_params.cc \
+  $(NULL)
+
+EXPORTS_NAMESPACES := mozilla/dom
+EXPORTS_mozilla/dom := \
+  SpeechGrammar.h \
+  SpeechGrammarList.h \
+  SpeechRecognitionAlternative.h \
+  SpeechRecognition.h \
+  SpeechRecognitionResult.h \
+  SpeechRecognitionResultList.h \
+  SpeechStreamListener.h \
+  $(NULL)
+
+FORCE_STATIC_LIB := 1
+
+include $(topsrcdir)/config/config.mk
+include $(topsrcdir)/ipc/chromium/chromium-config.mk
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/moz.build
@@ -0,0 +1,14 @@
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+MODULE = 'content'
+
+XPIDL_MODULE = 'dom_webspeech'
+
+XPIDL_SOURCES = [
+  'nsIDOMSpeechRecognitionEvent.idl',
+  'nsIDOMSpeechRecognitionError.idl',
+  'nsISpeechRecognitionService.idl'
+]
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/EnableWebSpeechRecognitionCheck.cpp
@@ -0,0 +1,32 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "mozilla/Preferences.h"
+
+namespace {
+
+bool gPrefInitialized = false;
+bool gWebSpeechEnabled = false;
+
+}
+
+namespace mozilla {
+namespace dom {
+
+/* static */ bool
+EnableWebSpeechRecognitionCheck::PrefEnabled()
+{
+  if (!gPrefInitialized) {
+    Preferences::AddBoolVarCache(&gWebSpeechEnabled, "media.webspeech.recognition.enable");
+    gPrefInitialized = true;
+  }
+  return gWebSpeechEnabled;
+}
+
+}
+}
+
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/EnableWebSpeechRecognitionCheck.h
@@ -0,0 +1,19 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+namespace mozilla {
+namespace dom {
+
+class EnableWebSpeechRecognitionCheck
+{
+public:
+  static bool PrefEnabled();
+};
+
+}
+}
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechGrammar.cpp
@@ -0,0 +1,82 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechGrammar.h"
+
+#include "nsContentUtils.h"
+
+#include "mozilla/Preferences.h"
+#include "mozilla/dom/SpeechGrammarBinding.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(SpeechGrammar, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechGrammar)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechGrammar)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechGrammar)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechGrammar::SpeechGrammar(nsISupports* aParent)
+  : mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechGrammar::~SpeechGrammar()
+{
+}
+
+SpeechGrammar*
+SpeechGrammar::Constructor(const GlobalObject& aGlobal, ErrorResult& aRv)
+{
+  return new SpeechGrammar(aGlobal.Get());
+}
+
+nsISupports*
+SpeechGrammar::GetParentObject() const
+{
+  return mParent;
+}
+
+JSObject*
+SpeechGrammar::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechGrammarBinding::Wrap(aCx, aScope, this);
+}
+
+void
+SpeechGrammar::GetSrc(nsString& aRetVal, ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechGrammar::SetSrc(const nsAString& aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+float
+SpeechGrammar::GetWeight(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return 0;
+}
+
+void
+SpeechGrammar::SetWeight(float aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechGrammar.h
@@ -0,0 +1,56 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+
+#include "mozilla/Attributes.h"
+#include "mozilla/ErrorResult.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class GlobalObject;
+
+class SpeechGrammar MOZ_FINAL : public nsISupports,
+                                public nsWrapperCache,
+                                public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechGrammar(nsISupports* aParent);
+  ~SpeechGrammar();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechGrammar)
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  static SpeechGrammar* Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
+
+  void GetSrc(nsString& aRetVal, ErrorResult& aRv) const;
+
+  void SetSrc(const nsAString& aArg, ErrorResult& aRv);
+
+  float GetWeight(ErrorResult& aRv) const;
+
+  void SetWeight(float aArg, ErrorResult& aRv);
+
+private:
+  nsCOMPtr<nsISupports> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechGrammarList.cpp
@@ -0,0 +1,91 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechGrammarList.h"
+
+#include "nsContentUtils.h"
+
+#include "mozilla/dom/SpeechGrammarListBinding.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(SpeechGrammarList, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechGrammarList)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechGrammarList)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechGrammarList)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechGrammarList::SpeechGrammarList(nsISupports* aParent)
+  : mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechGrammarList::~SpeechGrammarList()
+{
+}
+
+SpeechGrammarList*
+SpeechGrammarList::Constructor(const GlobalObject& aGlobal, ErrorResult& aRv)
+{
+  return new SpeechGrammarList(aGlobal.Get());
+}
+
+JSObject*
+SpeechGrammarList::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechGrammarListBinding::Wrap(aCx, aScope, this);
+}
+
+nsISupports*
+SpeechGrammarList::GetParentObject() const
+{
+  return mParent;
+}
+
+uint32_t
+SpeechGrammarList::Length() const
+{
+  return 0;
+}
+
+already_AddRefed<SpeechGrammar>
+SpeechGrammarList::Item(uint32_t aIndex, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return nullptr;
+}
+
+void
+SpeechGrammarList::AddFromURI(const nsAString& aSrc,
+                              const Optional<float>& aWeight,
+                              ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechGrammarList::AddFromString(const nsAString& aString,
+                                 const Optional<float>& aWeight,
+                                 ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+already_AddRefed<SpeechGrammar>
+SpeechGrammarList::IndexedGetter(uint32_t aIndex, bool& aPresent,
+                                 ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return nullptr;
+}
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechGrammarList.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+
+#include "mozilla/Attributes.h"
+#include "mozilla/dom/BindingUtils.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "SpeechGrammar.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class GlobalObject;
+class SpeechGrammarList MOZ_FINAL : public nsISupports,
+                                    public nsWrapperCache,
+                                    public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechGrammarList(nsISupports* aParent);
+  ~SpeechGrammarList();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechGrammarList)
+
+  SpeechGrammarList* Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  uint32_t Length() const;
+
+  already_AddRefed<SpeechGrammar> Item(uint32_t aIndex, ErrorResult& aRv);
+
+  void AddFromURI(const nsAString& aSrc, const Optional<float>& aWeight, ErrorResult& aRv);
+
+  void AddFromString(const nsAString& aString, const Optional<float>& aWeight, ErrorResult& aRv);
+
+  already_AddRefed<SpeechGrammar> IndexedGetter(uint32_t aIndex, bool& aPresent, ErrorResult& aRv);
+
+private:
+  nsCOMPtr<nsISupports> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognition.cpp
@@ -0,0 +1,739 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognition.h"
+
+#include "nsCOMPtr.h"
+#include "nsContentUtils.h"
+#include "nsCycleCollectionParticipant.h"
+#include "mozilla/Preferences.h"
+
+#include "mozilla/dom/SpeechRecognitionBinding.h"
+
+#include "AudioSegment.h"
+#include "SpeechStreamListener.h"
+#include "endpointer.h"
+
+#include "GeneratedEvents.h"
+#include "nsIDOMSpeechRecognitionEvent.h"
+
+namespace mozilla {
+namespace dom {
+
+#define PREFERENCE_DEFAULT_RECOGNITION_SERVICE "media.webspeech.service.default"
+#define DEFAULT_RECOGNITION_SERVICE "google"
+
+#define PREFERENCE_ENDPOINTER_SILENCE_LENGTH "media.webspeech.silence_length"
+#define PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH "media.webspeech.long_silence_length"
+#define PREFERENCE_ENDPOINTER_LONG_SPEECH_LENGTH "media.webspeech.long_speech_length"
+
+static const uint32_t kSAMPLE_RATE = 16000;
+static const uint32_t kSPEECH_DETECTION_TIMEOUT_MS = 10000;
+
+// number of frames corresponding to 300ms of audio to send to endpointer while
+// it's in environment estimation mode
+// kSAMPLE_RATE frames = 1s, kESTIMATION_FRAMES frames = 300ms
+static const uint32_t kESTIMATION_SAMPLES = 300 * kSAMPLE_RATE / 1000;
+
+#define STATE_EQUALS(state) (mCurrentState == state)
+#define STATE_BETWEEN(state1, state2) \
+  (mCurrentState >= (state1) && mCurrentState <= (state2))
+
+#ifdef PR_LOGGING
+PRLogModuleInfo*
+GetSpeechRecognitionLog()
+{
+  static PRLogModuleInfo* sLog;
+  if (!sLog) {
+    sLog = PR_NewLogModule("SpeechRecognition");
+  }
+
+  return sLog;
+}
+#define SR_LOG(...) PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, (__VA_ARGS__))
+#else
+#define SR_LOG(...)
+#endif
+
+NS_INTERFACE_MAP_BEGIN(SpeechRecognition)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+NS_INTERFACE_MAP_END_INHERITING(nsDOMEventTargetHelper)
+
+NS_IMPL_ADDREF_INHERITED(SpeechRecognition, nsDOMEventTargetHelper)
+NS_IMPL_RELEASE_INHERITED(SpeechRecognition, nsDOMEventTargetHelper)
+
+SpeechRecognition::SpeechRecognition()
+  : mProcessingEvent(false)
+  , mEndpointer(kSAMPLE_RATE)
+  , mSpeechDetectionTimer(do_CreateInstance(NS_TIMER_CONTRACTID))
+{
+  SR_LOG("created SpeechRecognition");
+  SetIsDOMBinding();
+  mEndpointer.set_speech_input_complete_silence_length(
+      Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 500000));
+  mEndpointer.set_long_speech_input_complete_silence_length(
+      Preferences::GetInt(PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH, 1000000));
+  mEndpointer.set_long_speech_length(
+      Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 3 * 1000000));
+  mCurrentState = Reset();
+}
+
+JSObject*
+SpeechRecognition::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechRecognitionBinding::Wrap(aCx, aScope, this);
+}
+
+already_AddRefed<SpeechRecognition>
+SpeechRecognition::Constructor(const GlobalObject& aGlobal, ErrorResult& aRv)
+{
+  nsCOMPtr<nsPIDOMWindow> win = do_QueryInterface(aGlobal.Get());
+  if (!win) {
+    aRv.Throw(NS_ERROR_FAILURE);
+  }
+
+  MOZ_ASSERT(win->IsInnerWindow());
+  nsRefPtr<SpeechRecognition> object = new SpeechRecognition();
+  object->BindToOwner(win);
+  return object.forget();
+}
+
+nsISupports*
+SpeechRecognition::GetParentObject() const
+{
+  return GetOwner();
+}
+
+void
+SpeechRecognition::ProcessEvent(SpeechEvent* aEvent)
+{
+  SR_LOG("Processing event %d", aEvent->mType);
+
+  MOZ_ASSERT(!mProcessingEvent, "Event dispatch should be sequential!");
+  mProcessingEvent = true;
+
+  SR_LOG("Current state: %d", mCurrentState);
+  mCurrentState = TransitionAndGetNextState(aEvent);
+  SR_LOG("Transitioned to state: %d", mCurrentState);
+  mProcessingEvent = false;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::TransitionAndGetNextState(SpeechEvent* aEvent)
+{
+  switch (mCurrentState) {
+    case STATE_IDLE:
+      switch (aEvent->mType) {
+        case EVENT_START:
+          // TODO: may want to time out if we wait too long
+          // for user to approve
+          return STATE_STARTING;
+        case EVENT_STOP:
+        case EVENT_ABORT:
+        case EVENT_AUDIO_DATA:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+          return DoNothing(aEvent);
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return AbortError(aEvent);
+      }
+    case STATE_STARTING:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          return StartedAudioCapture(aEvent);
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return AbortError(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_STOP:
+          return Reset();
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+          return DoNothing(aEvent);
+        case EVENT_START:
+          SR_LOG("STATE_STARTING: Unhandled event %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+    case STATE_ESTIMATING:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          return WaitForEstimation(aEvent);
+        case EVENT_STOP:
+          return StopRecordingAndRecognize(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return DoNothing(aEvent);
+        case EVENT_START:
+        case EVENT_AUDIO_ERROR:
+          SR_LOG("STATE_ESTIMATING: Unhandled event %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+    case STATE_WAITING_FOR_SPEECH:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          return DetectSpeech(aEvent);
+        case EVENT_STOP:
+          return StopRecordingAndRecognize(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_AUDIO_ERROR:
+          return AbortError(aEvent);
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return DoNothing(aEvent);
+        case EVENT_START:
+          SR_LOG("STATE_STARTING: Unhandled event %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+    case STATE_RECOGNIZING:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          return WaitForSpeechEnd(aEvent);
+        case EVENT_STOP:
+          return StopRecordingAndRecognize(aEvent);
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return AbortError(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+          return DoNothing(aEvent);
+        case EVENT_START:
+          SR_LOG("STATE_RECOGNIZING: Unhandled aEvent %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+    case STATE_WAITING_FOR_RESULT:
+      switch (aEvent->mType) {
+        case EVENT_STOP:
+          return DoNothing(aEvent);
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return AbortError(aEvent);
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+          return NotifyFinalResult(aEvent);
+        case EVENT_AUDIO_DATA:
+          return DoNothing(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_START:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+          SR_LOG("STATE_WAITING_FOR_RESULT: Unhandled aEvent %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+  }
+  SR_LOG("Unhandled state %d", mCurrentState);
+  MOZ_NOT_REACHED("");
+  return mCurrentState;
+}
+
+/*
+ * Handle a segment of recorded audio data.
+ * Returns the number of samples that were processed.
+ */
+uint32_t
+SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment)
+{
+  AudioSegment::ChunkIterator iterator(*aSegment);
+  uint32_t samples = 0;
+  while (!iterator.IsEnded()) {
+    float out;
+    mEndpointer.ProcessAudio(*iterator, &out);
+    samples += iterator->GetDuration();
+    iterator.Next();
+  }
+
+  mRecognitionService->ProcessAudioSegment(aSegment);
+  return samples;
+}
+
+void
+SpeechRecognition::GetRecognitionServiceCID(nsACString& aResultCID)
+{
+  nsAdoptingCString prefValue =
+    Preferences::GetCString(PREFERENCE_DEFAULT_RECOGNITION_SERVICE);
+
+  nsAutoCString speechRecognitionService;
+  if (!prefValue.get() || prefValue.IsEmpty()) {
+    speechRecognitionService = DEFAULT_RECOGNITION_SERVICE;
+  } else {
+    speechRecognitionService = prefValue;
+  }
+
+  aResultCID =
+    NS_LITERAL_CSTRING(NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX) +
+    speechRecognitionService;
+
+  return;
+}
+
+/****************************
+ * FSM Transition functions *
+ ****************************/
+
+SpeechRecognition::FSMState
+SpeechRecognition::Reset()
+{
+  mRecognitionService = nullptr;
+  mEstimationSamples = 0;
+  mSpeechDetectionTimer->Cancel();
+
+  return STATE_IDLE;
+}
+
+/*
+ * Since the handler for "end" may call
+ * start(), we want to fully reset before dispatching
+ * the event.
+ */
+SpeechRecognition::FSMState
+SpeechRecognition::ResetAndEnd()
+{
+  mCurrentState = Reset();
+  DispatchTrustedEvent(NS_LITERAL_STRING("end"));
+  return mCurrentState;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::StartedAudioCapture(SpeechEvent* aEvent)
+{
+  mEndpointer.SetEnvironmentEstimationMode();
+  mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment);
+
+  DispatchTrustedEvent(NS_LITERAL_STRING("start"));
+  DispatchTrustedEvent(NS_LITERAL_STRING("audiostart"));
+
+  return STATE_ESTIMATING;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::StopRecordingAndRecognize(SpeechEvent* aEvent)
+{
+  StopRecording();
+  MOZ_ASSERT(mRecognitionService, "Service deleted before recording done");
+  mRecognitionService->SoundEnd();
+
+  return STATE_WAITING_FOR_RESULT;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::WaitForEstimation(SpeechEvent* aEvent)
+{
+  mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment);
+
+  if (mEstimationSamples > kESTIMATION_SAMPLES) {
+    mEndpointer.SetUserInputMode();
+    return STATE_WAITING_FOR_SPEECH;
+  }
+
+  return STATE_ESTIMATING;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::DetectSpeech(SpeechEvent* aEvent)
+{
+  ProcessAudioSegment(aEvent->mAudioSegment);
+
+  if (mEndpointer.DidStartReceivingSpeech()) {
+    mSpeechDetectionTimer->Cancel();
+    DispatchTrustedEvent(NS_LITERAL_STRING("speechstart"));
+    return STATE_RECOGNIZING;
+  }
+
+  return STATE_WAITING_FOR_SPEECH;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::WaitForSpeechEnd(SpeechEvent* aEvent)
+{
+  ProcessAudioSegment(aEvent->mAudioSegment);
+
+  if (mEndpointer.speech_input_complete()) {
+    // FIXME: StopRecordingAndRecognize should only be called for single
+    // shot services for continous we should just inform the service
+    DispatchTrustedEvent(NS_LITERAL_STRING("speechend"));
+    return StopRecordingAndRecognize(aEvent);
+  }
+
+   return STATE_RECOGNIZING;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::NotifyFinalResult(SpeechEvent* aEvent)
+{
+  nsCOMPtr<nsIDOMEvent> domEvent;
+  NS_NewDOMSpeechRecognitionEvent(getter_AddRefs(domEvent), nullptr, nullptr, nullptr);
+
+  nsCOMPtr<nsIDOMSpeechRecognitionEvent> srEvent = do_QueryInterface(domEvent);
+  nsRefPtr<SpeechRecognitionResultList> rlist = aEvent->mRecognitionResultList;
+  nsCOMPtr<nsISupports> ilist = do_QueryInterface(rlist);
+  srEvent->InitSpeechRecognitionEvent(NS_LITERAL_STRING("result"),
+                                      true, false, 0, ilist,
+                                      NS_LITERAL_STRING("NOT_IMPLEMENTED"),
+                                      NULL);
+  domEvent->SetTrusted(true);
+
+  bool defaultActionEnabled;
+  this->DispatchEvent(domEvent, &defaultActionEnabled);
+  return ResetAndEnd();
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::DoNothing(SpeechEvent* aEvent)
+{
+  return mCurrentState;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::AbortSilently(SpeechEvent* aEvent)
+{
+  if (mRecognitionService) {
+    mRecognitionService->Abort();
+  }
+
+  if (STATE_BETWEEN(STATE_ESTIMATING, STATE_RECOGNIZING)) {
+    StopRecording();
+  }
+
+  return ResetAndEnd();
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::AbortError(SpeechEvent* aEvent)
+{
+  FSMState nextState = AbortSilently(aEvent);
+  NotifyError(aEvent);
+  return nextState;
+}
+
+void
+SpeechRecognition::NotifyError(SpeechEvent* aEvent)
+{
+  nsCOMPtr<nsIDOMEvent> domEvent = do_QueryInterface(aEvent->mError);
+  domEvent->SetTrusted(true);
+
+  bool defaultActionEnabled;
+  this->DispatchEvent(domEvent, &defaultActionEnabled);
+
+  return;
+}
+
+/**************************************
+ * Event triggers and other functions *
+ **************************************/
+NS_IMETHODIMP
+SpeechRecognition::StartRecording(DOMLocalMediaStream* aDOMStream)
+{
+  // hold a reference so that the underlying stream
+  // doesn't get Destroy()'ed
+  mDOMStream = aDOMStream;
+
+  NS_ENSURE_STATE(mDOMStream->GetStream());
+  mDOMStream->GetStream()->AddListener(new SpeechStreamListener(this));
+
+  mEndpointer.StartSession();
+
+  return mSpeechDetectionTimer->Init(this, kSPEECH_DETECTION_TIMEOUT_MS,
+                                     nsITimer::TYPE_ONE_SHOT);
+}
+
+NS_IMETHODIMP
+SpeechRecognition::StopRecording()
+{
+  mDOMStream = nullptr;
+
+  mEndpointer.EndSession();
+  DispatchTrustedEvent(NS_LITERAL_STRING("audioend"));
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::Observe(nsISupports* aSubject, const char* aTopic,
+                           const PRUnichar* aData)
+{
+  MOZ_ASSERT(NS_IsMainThread(), "Observer invoked off the main thread");
+
+  if (!strcmp(aTopic, NS_TIMER_CALLBACK_TOPIC) &&
+      STATE_BETWEEN(STATE_IDLE, STATE_WAITING_FOR_SPEECH)) {
+
+    DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR,
+                  nsIDOMSpeechRecognitionError::NO_SPEECH,
+                  NS_LITERAL_STRING("No speech detected (timeout)"));
+  }
+
+  return NS_OK;
+}
+
+already_AddRefed<SpeechGrammarList>
+SpeechRecognition::GetGrammars(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return nullptr;
+}
+
+void
+SpeechRecognition::SetGrammars(mozilla::dom::SpeechGrammarList& aArg,
+                               ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::GetLang(nsString& aRetVal, ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::SetLang(const nsAString& aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+bool
+SpeechRecognition::GetContinuous(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return false;
+}
+
+void
+SpeechRecognition::SetContinuous(bool aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+bool
+SpeechRecognition::GetInterimResults(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return false;
+}
+
+void
+SpeechRecognition::SetInterimResults(bool aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+uint32_t
+SpeechRecognition::GetMaxAlternatives(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return 0;
+}
+
+void
+SpeechRecognition::SetMaxAlternatives(uint32_t aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::SetServiceURI(const nsAString& aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::Start(ErrorResult& aRv)
+{
+  if (!STATE_EQUALS(STATE_IDLE)) {
+    aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
+    return;
+  }
+
+  nsAutoCString speechRecognitionServiceCID;
+  GetRecognitionServiceCID(speechRecognitionServiceCID);
+
+  nsresult rv;
+  mRecognitionService = do_GetService(speechRecognitionServiceCID.get(), &rv);
+  MOZ_ASSERT(mRecognitionService.get(),
+             "failed to instantiate recognition service");
+
+  rv = mRecognitionService->Initialize(this->asWeakPtr());
+  NS_ENSURE_SUCCESS_VOID(rv);
+
+  MediaManager* manager = MediaManager::Get();
+  manager->GetUserMedia(false,
+                        GetOwner(),
+                        new GetUserMediaStreamOptions(),
+                        new GetUserMediaSuccessCallback(this),
+                        new GetUserMediaErrorCallback(this));
+
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_START);
+  NS_DispatchToMainThread(event);
+}
+
+void
+SpeechRecognition::Stop()
+{
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_STOP);
+  NS_DispatchToMainThread(event);
+}
+
+void
+SpeechRecognition::Abort()
+{
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_ABORT);
+  NS_DispatchToMainThread(event);
+}
+
+void
+SpeechRecognition::DispatchError(EventType aErrorType, int aErrorCode,
+                                 const nsAString& aMessage)
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  MOZ_ASSERT(aErrorType == EVENT_RECOGNITIONSERVICE_ERROR ||
+             aErrorType == EVENT_AUDIO_ERROR, "Invalid error type!");
+
+  nsCOMPtr<nsIDOMEvent> domEvent;
+  NS_NewDOMSpeechRecognitionError(getter_AddRefs(domEvent), nullptr, nullptr, nullptr);
+
+  nsCOMPtr<nsIDOMSpeechRecognitionError> srError = do_QueryInterface(domEvent);
+  srError->InitSpeechRecognitionError(NS_LITERAL_STRING("error"), true, false,
+                                      aErrorCode, aMessage);
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, aErrorType);
+  event->mError = srError;
+  NS_DispatchToMainThread(event);
+}
+
+void
+SpeechRecognition::FeedAudioData(already_AddRefed<SharedBuffer> aSamples,
+                                 uint32_t aDuration,
+                                 MediaStreamListener* aProvider)
+{
+  MOZ_ASSERT(!NS_IsMainThread(),
+             "FeedAudioData should not be called in the main thread");
+
+  AudioSegment* segment = new AudioSegment();
+
+  nsAutoTArray<const int16_t*, 1> channels;
+  channels.AppendElement(static_cast<const int16_t*>(aSamples.get()->Data()));
+  segment->AppendFrames(aSamples, channels, aDuration);
+
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_AUDIO_DATA);
+  event->mAudioSegment = segment;
+  event->mProvider = aProvider;
+  NS_DispatchToMainThread(event);
+
+  return;
+}
+
+NS_IMPL_ISUPPORTS1(SpeechRecognition::GetUserMediaStreamOptions, nsIMediaStreamOptions)
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetFake(bool* aFake)
+{
+  *aFake = false;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetAudio(bool* aAudio)
+{
+  *aAudio = true;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetVideo(bool* aVideo)
+{
+  *aVideo = false;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetPicture(bool* aPicture)
+{
+  *aPicture = false;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetCamera(nsAString& aCamera)
+{
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetAudioDevice(nsIMediaDevice** aAudioDevice)
+{
+  *aAudioDevice = nullptr;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetVideoDevice(nsIMediaDevice** aVideoDevice)
+{
+  *aVideoDevice = nullptr;
+  return NS_OK;
+}
+
+SpeechEvent::~SpeechEvent()
+{
+  delete mAudioSegment;
+}
+
+NS_IMETHODIMP
+SpeechEvent::Run()
+{
+  mRecognition->ProcessEvent(this);
+  return NS_OK;
+}
+
+NS_IMPL_ISUPPORTS1(SpeechRecognition::GetUserMediaSuccessCallback, nsIDOMGetUserMediaSuccessCallback)
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaSuccessCallback::OnSuccess(nsISupports* aStream)
+{
+  nsCOMPtr<nsIDOMLocalMediaStream> localStream = do_QueryInterface(aStream);
+  mRecognition->StartRecording(static_cast<DOMLocalMediaStream*>(localStream.get()));
+  return NS_OK;
+}
+
+NS_IMPL_ISUPPORTS1(SpeechRecognition::GetUserMediaErrorCallback, nsIDOMGetUserMediaErrorCallback)
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaErrorCallback::OnError(const nsAString& aError)
+{
+  int errorCode;
+
+  if (aError.Equals(NS_LITERAL_STRING("PERMISSION_DENIED"))) {
+    errorCode = nsIDOMSpeechRecognitionError::NOT_ALLOWED;
+  } else {
+    errorCode = nsIDOMSpeechRecognitionError::AUDIO_CAPTURE;
+  }
+
+  mRecognition->DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR, errorCode,
+                              aError);
+
+  return NS_OK;
+}
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognition.h
@@ -0,0 +1,249 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCOMPtr.h"
+#include "nsDOMEventTargetHelper.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+#include "nsIDOMNavigatorUserMedia.h"
+#include "nsTArray.h"
+
+#include "MediaManager.h"
+#include "MediaEngine.h"
+#include "MediaStreamGraph.h"
+#include "AudioSegment.h"
+#include "mozilla/WeakPtr.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "SpeechGrammarList.h"
+#include "SpeechRecognitionResultList.h"
+#include "nsISpeechRecognitionService.h"
+#include "endpointer.h"
+
+#include "nsIDOMSpeechRecognitionError.h"
+
+struct JSContext;
+class nsIDOMWindow;
+
+namespace mozilla {
+
+namespace dom {
+
+class GlobalObject;
+class SpeechEvent;
+
+#ifdef PR_LOGGING
+PRLogModuleInfo* GetSpeechRecognitionLog();
+#define SR_LOG(...) PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, (__VA_ARGS__))
+#else
+#define SR_LOG(...)
+#endif
+
+class SpeechRecognition MOZ_FINAL : public nsDOMEventTargetHelper,
+                                    public nsIObserver,
+                                    public EnableWebSpeechRecognitionCheck,
+                                    public SupportsWeakPtr<SpeechRecognition>
+{
+public:
+  SpeechRecognition();
+  virtual ~SpeechRecognition() {};
+
+  NS_DECL_ISUPPORTS_INHERITED
+
+  NS_DECL_NSIOBSERVER
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  static already_AddRefed<SpeechRecognition> Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
+
+  already_AddRefed<SpeechGrammarList> GetGrammars(ErrorResult& aRv) const;
+
+  void SetGrammars(mozilla::dom::SpeechGrammarList& aArg, ErrorResult& aRv);
+
+  void GetLang(nsString& aRetVal, ErrorResult& aRv) const;
+
+  void SetLang(const nsAString& aArg, ErrorResult& aRv);
+
+  bool GetContinuous(ErrorResult& aRv) const;
+
+  void SetContinuous(bool aArg, ErrorResult& aRv);
+
+  bool GetInterimResults(ErrorResult& aRv) const;
+
+  void SetInterimResults(bool aArg, ErrorResult& aRv);
+
+  uint32_t GetMaxAlternatives(ErrorResult& aRv) const;
+
+  void SetMaxAlternatives(uint32_t aArg, ErrorResult& aRv);
+
+  void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const;
+
+  void SetServiceURI(const nsAString& aArg, ErrorResult& aRv);
+
+  void Start(ErrorResult& aRv);
+
+  void Stop();
+
+  void Abort();
+
+  IMPL_EVENT_HANDLER(audiostart)
+  IMPL_EVENT_HANDLER(soundstart)
+  IMPL_EVENT_HANDLER(speechstart)
+  IMPL_EVENT_HANDLER(speechend)
+  IMPL_EVENT_HANDLER(soundend)
+  IMPL_EVENT_HANDLER(audioend)
+  IMPL_EVENT_HANDLER(result)
+  IMPL_EVENT_HANDLER(nomatch)
+  IMPL_EVENT_HANDLER(error)
+  IMPL_EVENT_HANDLER(start)
+  IMPL_EVENT_HANDLER(end)
+
+  enum EventType {
+    EVENT_START,
+    EVENT_STOP,
+    EVENT_ABORT,
+    EVENT_AUDIO_DATA,
+    EVENT_AUDIO_ERROR,
+    EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT,
+    EVENT_RECOGNITIONSERVICE_FINAL_RESULT,
+    EVENT_RECOGNITIONSERVICE_ERROR
+  };
+
+  void DispatchError(EventType aErrorType, int aErrorCode, const nsAString& aMessage);
+  void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider);
+
+  friend class SpeechEvent;
+private:
+  enum FSMState {
+    STATE_IDLE,
+    STATE_STARTING,
+    STATE_ESTIMATING,
+    STATE_WAITING_FOR_SPEECH,
+    STATE_RECOGNIZING,
+    STATE_WAITING_FOR_RESULT,
+  };
+
+  class GetUserMediaStreamOptions : public nsIMediaStreamOptions
+  {
+  public:
+    NS_DECL_ISUPPORTS
+    NS_DECL_NSIMEDIASTREAMOPTIONS
+
+    GetUserMediaStreamOptions() {}
+    virtual ~GetUserMediaStreamOptions() {}
+  };
+
+  class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback
+  {
+  public:
+    NS_DECL_ISUPPORTS
+    NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK
+
+    GetUserMediaSuccessCallback(SpeechRecognition* aRecognition)
+      : mRecognition(aRecognition)
+    {}
+
+    virtual ~GetUserMediaSuccessCallback() {}
+
+  private:
+    nsRefPtr<SpeechRecognition> mRecognition;
+  };
+
+  class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback
+  {
+  public:
+    NS_DECL_ISUPPORTS
+    NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK
+
+    GetUserMediaErrorCallback(SpeechRecognition* aRecognition)
+      : mRecognition(aRecognition)
+    {}
+
+    virtual ~GetUserMediaErrorCallback() {}
+
+  private:
+    nsRefPtr<SpeechRecognition> mRecognition;
+  };
+
+  NS_IMETHOD StartRecording(DOMLocalMediaStream* aDOMStream);
+  NS_IMETHOD StopRecording();
+
+  uint32_t ProcessAudioSegment(AudioSegment* aSegment);
+  void NotifyError(SpeechEvent* aEvent);
+
+  void ProcessEvent(SpeechEvent* aEvent);
+  FSMState TransitionAndGetNextState(SpeechEvent* aEvent);
+
+  FSMState Reset();
+  FSMState ResetAndEnd();
+  FSMState StartedAudioCapture(SpeechEvent* aEvent);
+  FSMState StopRecordingAndRecognize(SpeechEvent* aEvent);
+  FSMState WaitForEstimation(SpeechEvent* aEvent);
+  FSMState DetectSpeech(SpeechEvent* aEvent);
+  FSMState WaitForSpeechEnd(SpeechEvent* aEvent);
+  FSMState NotifyFinalResult(SpeechEvent* aEvent);
+  FSMState DoNothing(SpeechEvent* aEvent);
+  FSMState AbortSilently(SpeechEvent* aEvent);
+  FSMState AbortError(SpeechEvent* aEvent);
+
+  nsRefPtr<DOMLocalMediaStream> mDOMStream;
+  nsCOMPtr<nsISpeechRecognitionService> mRecognitionService;
+
+  void GetRecognitionServiceCID(nsACString& aResultCID);
+
+  FSMState mCurrentState;
+  bool mProcessingEvent;
+
+  Endpointer mEndpointer;
+  uint32_t mEstimationSamples;
+
+  nsCOMPtr<nsITimer> mSpeechDetectionTimer;
+
+};
+
+class SpeechEvent : public nsRunnable
+{
+public:
+  SpeechEvent(SpeechRecognition* aRecognition, SpeechRecognition::EventType aType)
+  : mAudioSegment(0)
+  , mRecognitionResultList(0)
+  , mError(0)
+  , mRecognition(aRecognition)
+  , mType(aType)
+  {
+  }
+
+  ~SpeechEvent();
+
+  NS_IMETHOD Run();
+  AudioSegment* mAudioSegment;
+  nsRefPtr<SpeechRecognitionResultList> mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff
+  nsCOMPtr<nsIDOMSpeechRecognitionError> mError;
+
+  friend class SpeechRecognition;
+private:
+  SpeechRecognition* mRecognition;
+
+  // for AUDIO_DATA events, keep a reference to the provider
+  // of the data (i.e., the SpeechStreamListener) to ensure it
+  // is kept alive (and keeps SpeechRecognition alive) until this
+  // event gets processed.
+  nsRefPtr<MediaStreamListener> mProvider;
+  SpeechRecognition::EventType mType;
+};
+
+} // namespace dom
+
+inline nsISupports*
+ToSupports(dom::SpeechRecognition* aRec)
+{
+  return static_cast<nsIObserver*>(aRec);
+}
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionAlternative.cpp
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognitionAlternative.h"
+
+#include "nsContentUtils.h"
+
+#include "mozilla/dom/SpeechRecognitionAlternativeBinding.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(SpeechRecognitionAlternative, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechRecognitionAlternative)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechRecognitionAlternative)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionAlternative)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechRecognitionAlternative::SpeechRecognitionAlternative(SpeechRecognition* aParent)
+  : mTranscript(NS_LITERAL_STRING(""))
+  , mConfidence(0)
+  , mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechRecognitionAlternative::~SpeechRecognitionAlternative()
+{
+}
+
+JSObject*
+SpeechRecognitionAlternative::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechRecognitionAlternativeBinding::Wrap(aCx, aScope, this);
+}
+
+nsISupports*
+SpeechRecognitionAlternative::GetParentObject() const
+{
+  return static_cast<nsDOMEventTargetHelper*>(mParent.get());
+}
+
+void
+SpeechRecognitionAlternative::GetTranscript(nsString& aRetVal) const
+{
+  aRetVal = mTranscript;
+}
+
+float
+SpeechRecognitionAlternative::Confidence() const
+{
+  return mConfidence;
+}
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionAlternative.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCycleCollectionParticipant.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+#include "nsAutoPtr.h"
+
+#include "mozilla/Attributes.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class SpeechRecognition;
+
+class SpeechRecognitionAlternative MOZ_FINAL : public nsISupports,
+                                               public nsWrapperCache,
+                                               public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechRecognitionAlternative(SpeechRecognition* aParent);
+  ~SpeechRecognitionAlternative();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechRecognitionAlternative)
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  void GetTranscript(nsString& aRetVal) const;
+
+  float Confidence() const;
+
+  nsString mTranscript;
+  float mConfidence;
+private:
+  nsRefPtr<SpeechRecognition> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionResult.cpp
@@ -0,0 +1,78 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsContentUtils.h"
+
+#include "SpeechRecognitionResult.h"
+#include "mozilla/dom/SpeechRecognitionResultBinding.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(SpeechRecognitionResult, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechRecognitionResult)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechRecognitionResult)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionResult)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechRecognitionResult::SpeechRecognitionResult(SpeechRecognition* aParent)
+  : mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechRecognitionResult::~SpeechRecognitionResult()
+{
+}
+
+JSObject*
+SpeechRecognitionResult::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechRecognitionResultBinding::Wrap(aCx, aScope, this);
+}
+
+nsISupports*
+SpeechRecognitionResult::GetParentObject() const
+{
+  return static_cast<nsDOMEventTargetHelper*>(mParent.get());
+}
+
+already_AddRefed<SpeechRecognitionAlternative>
+SpeechRecognitionResult::IndexedGetter(uint32_t aIndex, bool& aPresent)
+{
+  if (aIndex >= Length()) {
+    aPresent = false;
+    return nullptr;
+  }
+
+  aPresent = true;
+  return Item(aIndex);
+}
+
+uint32_t
+SpeechRecognitionResult::Length() const
+{
+  return mItems.Length();
+}
+
+already_AddRefed<SpeechRecognitionAlternative>
+SpeechRecognitionResult::Item(uint32_t aIndex)
+{
+  nsRefPtr<SpeechRecognitionAlternative> alternative = mItems.ElementAt(aIndex);
+  return alternative.forget();
+}
+
+bool
+SpeechRecognitionResult::Final() const
+{
+  return true; // TODO
+}
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionResult.h
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+#include "nsAutoPtr.h"
+#include "nsTArray.h"
+
+#include "mozilla/Attributes.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "SpeechRecognitionAlternative.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class SpeechRecognitionResult MOZ_FINAL : public nsISupports,
+                                          public nsWrapperCache,
+                                          public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechRecognitionResult(SpeechRecognition* aParent);
+  ~SpeechRecognitionResult();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechRecognitionResult)
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  uint32_t Length() const;
+
+  already_AddRefed<SpeechRecognitionAlternative> Item(uint32_t aIndex);
+
+  bool Final() const;
+
+  already_AddRefed<SpeechRecognitionAlternative> IndexedGetter(uint32_t aIndex, bool& aPresent);
+
+  nsTArray<nsRefPtr<SpeechRecognitionAlternative> > mItems;
+private:
+  nsRefPtr<SpeechRecognition> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionResultList.cpp
@@ -0,0 +1,73 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognitionResultList.h"
+
+#include "nsContentUtils.h"
+
+#include "mozilla/dom/SpeechRecognitionResultListBinding.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_2(SpeechRecognitionResultList, mParent, mItems)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechRecognitionResultList)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechRecognitionResultList)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionResultList)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechRecognitionResultList::SpeechRecognitionResultList(SpeechRecognition* aParent)
+  : mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechRecognitionResultList::~SpeechRecognitionResultList()
+{
+}
+
+nsISupports*
+SpeechRecognitionResultList::GetParentObject() const
+{
+  return static_cast<nsDOMEventTargetHelper*>(mParent.get());
+}
+
+JSObject*
+SpeechRecognitionResultList::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechRecognitionResultListBinding::Wrap(aCx, aScope, this);
+}
+
+already_AddRefed<SpeechRecognitionResult>
+SpeechRecognitionResultList::IndexedGetter(uint32_t aIndex, bool& aPresent)
+{
+  if (aIndex >= Length()) {
+    aPresent = false;
+    return nullptr;
+  }
+
+  aPresent = true;
+  return Item(aIndex);
+}
+
+uint32_t
+SpeechRecognitionResultList::Length() const
+{
+  return mItems.Length();
+}
+
+already_AddRefed<SpeechRecognitionResult>
+SpeechRecognitionResultList::Item(uint32_t aIndex)
+{
+  nsRefPtr<SpeechRecognitionResult> result = mItems.ElementAt(aIndex);
+  return result.forget();
+}
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionResultList.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+#include "nsAutoPtr.h"
+#include "nsTArray.h"
+
+#include "mozilla/Attributes.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "SpeechRecognitionResult.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class SpeechRecognition;
+
+class SpeechRecognitionResultList MOZ_FINAL : public nsISupports,
+                                              public nsWrapperCache,
+                                              public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechRecognitionResultList(SpeechRecognition* aParent);
+  ~SpeechRecognitionResultList();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechRecognitionResultList)
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  uint32_t Length() const;
+
+  already_AddRefed<SpeechRecognitionResult> Item(uint32_t aIndex);
+
+  already_AddRefed<SpeechRecognitionResult> IndexedGetter(uint32_t aIndex, bool& aPresent);
+
+  nsTArray<nsRefPtr<SpeechRecognitionResult> > mItems;
+private:
+  nsRefPtr<SpeechRecognition> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechStreamListener.cpp
@@ -0,0 +1,81 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechStreamListener.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla {
+namespace dom {
+
+SpeechStreamListener::SpeechStreamListener(SpeechRecognition* aRecognition)
+  : mRecognition(aRecognition)
+{
+}
+
+SpeechStreamListener::~SpeechStreamListener()
+{
+  nsCOMPtr<nsIThread> mainThread;
+  NS_GetMainThread(getter_AddRefs(mainThread));
+
+  SpeechRecognition* forgottenRecognition = nullptr;
+  mRecognition.swap(forgottenRecognition);
+  NS_ProxyRelease(mainThread,
+                  static_cast<nsDOMEventTargetHelper*>(forgottenRecognition));
+}
+
+void
+SpeechStreamListener::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph,
+                                               TrackID aID,
+                                               TrackRate aTrackRate,
+                                               TrackTicks aTrackOffset,
+                                               uint32_t aTrackEvents,
+                                               const MediaSegment& aQueuedMedia)
+{
+  AudioSegment* audio = const_cast<AudioSegment*>(
+    static_cast<const AudioSegment*>(&aQueuedMedia));
+
+  AudioSegment::ChunkIterator iterator(*audio);
+  while (!iterator.IsEnded()) {
+    AudioSampleFormat format = iterator->mBufferFormat;
+
+    MOZ_ASSERT(format == AUDIO_FORMAT_S16 || format == AUDIO_FORMAT_FLOAT32);
+
+    if (format == AUDIO_FORMAT_S16) {
+      ConvertAndDispatchAudioChunk<int16_t>(*iterator);
+    } else if (format == AUDIO_FORMAT_FLOAT32) {
+      ConvertAndDispatchAudioChunk<float>(*iterator);
+    }
+
+    iterator.Next();
+  }
+}
+
+template<typename SampleFormatType> void
+SpeechStreamListener::ConvertAndDispatchAudioChunk(AudioChunk& aChunk)
+{
+  nsRefPtr<SharedBuffer> samples(SharedBuffer::Create(aChunk.mDuration *
+                                                      1 * // channel
+                                                      sizeof(int16_t)));
+
+  const SampleFormatType* from =
+    static_cast<const SampleFormatType*>(aChunk.mChannelData[0]);
+
+  int16_t* to = static_cast<int16_t*>(samples->Data());
+  ConvertAudioSamplesWithScale(from, to, aChunk.mDuration, aChunk.mVolume);
+
+  mRecognition->FeedAudioData(samples.forget(), aChunk.mDuration, this);
+  return;
+}
+
+void
+SpeechStreamListener::NotifyFinished(MediaStreamGraph* aGraph)
+{
+  // TODO dispatch SpeechEnd event so services can be informed
+}
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechStreamListener.h
@@ -0,0 +1,40 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "MediaStreamGraph.h"
+#include "AudioSegment.h"
+
+namespace mozilla {
+
+class AudioSegment;
+
+namespace dom {
+
+class SpeechRecognition;
+
+class SpeechStreamListener : public MediaStreamListener
+{
+public:
+  SpeechStreamListener(SpeechRecognition* aRecognition);
+  ~SpeechStreamListener();
+
+  void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID,
+                                TrackRate aTrackRate,
+                                TrackTicks aTrackOffset,
+                                uint32_t aTrackEvents,
+                                const MediaSegment& aQueuedMedia);
+
+  void NotifyFinished(MediaStreamGraph* aGraph);
+
+private:
+  template<typename SampleFormatType> void ConvertAndDispatchAudioChunk(AudioChunk& aChunk);
+  nsRefPtr<SpeechRecognition> mRecognition;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/endpointer.cc
@@ -0,0 +1,193 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "endpointer.h"
+
+#include "AudioSegment.h"
+
+namespace {
+const int kFrameRate = 200;  // 1 frame = 5ms of audio.
+}
+
+namespace mozilla {
+
+Endpointer::Endpointer(int sample_rate)
+    : speech_input_possibly_complete_silence_length_us_(-1),
+      speech_input_complete_silence_length_us_(-1),
+      audio_frame_time_us_(0),
+      sample_rate_(sample_rate),
+      frame_size_(0) {
+  Reset();
+
+  frame_size_ = static_cast<int>(sample_rate / static_cast<float>(kFrameRate));
+
+  speech_input_minimum_length_us_ =
+      static_cast<int64_t>(1.7 * 1000000);
+  speech_input_complete_silence_length_us_ =
+      static_cast<int64_t>(0.5 * 1000000);
+  long_speech_input_complete_silence_length_us_ = -1;
+  long_speech_length_us_ = -1;
+  speech_input_possibly_complete_silence_length_us_ =
+      1 * 1000000;
+
+  // Set the default configuration for Push To Talk mode.
+  EnergyEndpointerParams ep_config;
+  ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_endpoint_margin(0.2f);
+  ep_config.set_onset_window(0.15f);
+  ep_config.set_speech_on_window(0.4f);
+  ep_config.set_offset_window(0.15f);
+  ep_config.set_onset_detect_dur(0.09f);
+  ep_config.set_onset_confirm_dur(0.075f);
+  ep_config.set_on_maintain_dur(0.10f);
+  ep_config.set_offset_confirm_dur(0.12f);
+  ep_config.set_decision_threshold(1000.0f);
+  ep_config.set_min_decision_threshold(50.0f);
+  ep_config.set_fast_update_dur(0.2f);
+  ep_config.set_sample_rate(static_cast<float>(sample_rate));
+  ep_config.set_min_fundamental_frequency(57.143f);
+  ep_config.set_max_fundamental_frequency(400.0f);
+  ep_config.set_contamination_rejection_period(0.25f);
+  energy_endpointer_.Init(ep_config);
+}
+
+void Endpointer::Reset() {
+  old_ep_status_ = EP_PRE_SPEECH;
+  waiting_for_speech_possibly_complete_timeout_ = false;
+  waiting_for_speech_complete_timeout_ = false;
+  speech_previously_detected_ = false;
+  speech_input_complete_ = false;
+  audio_frame_time_us_ = 0; // Reset time for packets sent to endpointer.
+  speech_end_time_us_ = -1;
+  speech_start_time_us_ = -1;
+}
+
+void Endpointer::StartSession() {
+  Reset();
+  energy_endpointer_.StartSession();
+}
+
+void Endpointer::EndSession() {
+  energy_endpointer_.EndSession();
+}
+
+void Endpointer::SetEnvironmentEstimationMode() {
+  Reset();
+  energy_endpointer_.SetEnvironmentEstimationMode();
+}
+
+void Endpointer::SetUserInputMode() {
+  energy_endpointer_.SetUserInputMode();
+}
+
+EpStatus Endpointer::Status(int64_t *time) {
+  return energy_endpointer_.Status(time);
+}
+
+EpStatus Endpointer::ProcessAudio(const AudioChunk& raw_audio, float* rms_out) {
+  MOZ_ASSERT(raw_audio.mBufferFormat == AUDIO_FORMAT_S16, "Audio is not in 16 bit format");
+  const int16_t* audio_data = static_cast<const int16_t*>(raw_audio.mChannelData[0]);
+  const int num_samples = raw_audio.mDuration;
+  EpStatus ep_status = EP_PRE_SPEECH;
+
+  // Process the input data in blocks of frame_size_, dropping any incomplete
+  // frames at the end (which is ok since typically the caller will be recording
+  // audio in multiples of our frame size).
+  int sample_index = 0;
+  while (sample_index + frame_size_ <= num_samples) {
+    // Have the endpointer process the frame.
+    energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_,
+                                         audio_data + sample_index,
+                                         frame_size_,
+                                         rms_out);
+    sample_index += frame_size_;
+    audio_frame_time_us_ += (frame_size_ * 1000000) /
+                         sample_rate_;
+
+    // Get the status of the endpointer.
+    int64_t ep_time;
+    ep_status = energy_endpointer_.Status(&ep_time);
+    if (old_ep_status_ != ep_status)
+        fprintf(stderr, "Status changed old= %d, new= %d\n", old_ep_status_, ep_status);
+
+    // Handle state changes.
+    if ((EP_SPEECH_PRESENT == ep_status) &&
+        (EP_POSSIBLE_ONSET == old_ep_status_)) {
+      speech_end_time_us_ = -1;
+      waiting_for_speech_possibly_complete_timeout_ = false;
+      waiting_for_speech_complete_timeout_ = false;
+      // Trigger SpeechInputDidStart event on first detection.
+      if (false == speech_previously_detected_) {
+        speech_previously_detected_ = true;
+        speech_start_time_us_ = ep_time;
+      }
+    }
+    if ((EP_PRE_SPEECH == ep_status) &&
+        (EP_POSSIBLE_OFFSET == old_ep_status_)) {
+      speech_end_time_us_ = ep_time;
+      waiting_for_speech_possibly_complete_timeout_ = true;
+      waiting_for_speech_complete_timeout_ = true;
+    }
+    if (ep_time > speech_input_minimum_length_us_) {
+      // Speech possibly complete timeout.
+      if ((waiting_for_speech_possibly_complete_timeout_) &&
+          (ep_time - speech_end_time_us_ >
+              speech_input_possibly_complete_silence_length_us_)) {
+        waiting_for_speech_possibly_complete_timeout_ = false;
+      }
+      if (waiting_for_speech_complete_timeout_) {
+        // The length of the silence timeout period can be held constant, or it
+        // can be changed after a fixed amount of time from the beginning of
+        // speech.
+        bool has_stepped_silence =
+            (long_speech_length_us_ > 0) &&
+            (long_speech_input_complete_silence_length_us_ > 0);
+        int64_t requested_silence_length;
+        if (has_stepped_silence &&
+            (ep_time - speech_start_time_us_) > long_speech_length_us_) {
+          requested_silence_length =
+              long_speech_input_complete_silence_length_us_;
+        } else {
+          requested_silence_length =
+              speech_input_complete_silence_length_us_;
+        }
+
+        // Speech complete timeout.
+        if ((ep_time - speech_end_time_us_) > requested_silence_length) {
+          waiting_for_speech_complete_timeout_ = false;
+          speech_input_complete_ = true;
+        }
+      }
+    }
+    old_ep_status_ = ep_status;
+  }
+  return ep_status;
+}
+
+}  // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/endpointer.h
@@ -0,0 +1,173 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+
+#include "energy_endpointer.h"
+
+namespace mozilla {
+
+struct AudioChunk;
+
+// A simple interface to the underlying energy-endpointer implementation, this
+// class lets callers provide audio as being recorded and let them poll to find
+// when the user has stopped speaking.
+//
+// There are two events that may trigger the end of speech:
+//
+// speechInputPossiblyComplete event:
+//
+// Signals that silence/noise has  been detected for a *short* amount of
+// time after some speech has been detected. It can be used for low latency
+// UI feedback. To disable it, set it to a large amount.
+//
+// speechInputComplete event:
+//
+// This event is intended to signal end of input and to stop recording.
+// The amount of time to wait after speech is set by
+// speech_input_complete_silence_length_ and optionally two other
+// parameters (see below).
+// This time can be held constant, or can change as more speech is detected.
+// In the latter case, the time changes after a set amount of time from the
+// *beginning* of speech.  This is motivated by the expectation that there
+// will be two distinct types of inputs: short search queries and longer
+// dictation style input.
+//
+// Three parameters are used to define the piecewise constant timeout function.
+// The timeout length is speech_input_complete_silence_length until
+// long_speech_length, when it changes to
+// long_speech_input_complete_silence_length.
+class Endpointer {
+ public:
+  explicit Endpointer(int sample_rate);
+
+  // Start the endpointer. This should be called at the beginning of a session.
+  void StartSession();
+
+  // Stop the endpointer.
+  void EndSession();
+
+  // Start environment estimation. Audio will be used for environment estimation
+  // i.e. noise level estimation.
+  void SetEnvironmentEstimationMode();
+
+  // Start user input. This should be called when the user indicates start of
+  // input, e.g. by pressing a button.
+  void SetUserInputMode();
+
+  // Process a segment of audio, which may be more than one frame.
+  // The status of the last frame will be returned.
+  EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
+
+  // Get the status of the endpointer.
+  EpStatus Status(int64_t *time_us);
+
+  // Returns true if the endpointer detected reasonable audio levels above
+  // background noise which could be user speech, false if not.
+  bool DidStartReceivingSpeech() const {
+    return speech_previously_detected_;
+  }
+
+  bool IsEstimatingEnvironment() const {
+    return energy_endpointer_.estimating_environment();
+  }
+
+  void set_speech_input_complete_silence_length(int64_t time_us) {
+    speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_input_complete_silence_length(int64_t time_us) {
+    long_speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_speech_input_possibly_complete_silence_length(int64_t time_us) {
+    speech_input_possibly_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_length(int64_t time_us) {
+    long_speech_length_us_ = time_us;
+  }
+
+  bool speech_input_complete() const {
+    return speech_input_complete_;
+  }
+
+  // RMS background noise level in dB.
+  float NoiseLevelDb() const { return energy_endpointer_.GetNoiseLevelDb(); }
+
+ private:
+  // Reset internal states. Helper method common to initial input utterance
+  // and following input utternaces.
+  void Reset();
+
+  // Minimum allowable length of speech input.
+  int64_t speech_input_minimum_length_us_;
+
+  // The speechInputPossiblyComplete event signals that silence/noise has been
+  // detected for a *short* amount of time after some speech has been detected.
+  // This proporty specifies the time period.
+  int64_t speech_input_possibly_complete_silence_length_us_;
+
+  // The speechInputComplete event signals that silence/noise has been
+  // detected for a *long* amount of time after some speech has been detected.
+  // This property specifies the time period.
+  int64_t speech_input_complete_silence_length_us_;
+
+  // Same as above, this specifies the required silence period after speech
+  // detection. This period is used instead of
+  // speech_input_complete_silence_length_ when the utterance is longer than
+  // long_speech_length_. This parameter is optional.
+  int64_t long_speech_input_complete_silence_length_us_;
+
+  // The period of time after which the endpointer should consider
+  // long_speech_input_complete_silence_length_ as a valid silence period
+  // instead of speech_input_complete_silence_length_. This parameter is
+  // optional.
+  int64_t long_speech_length_us_;
+
+  // First speech onset time, used in determination of speech complete timeout.
+  int64_t speech_start_time_us_;
+
+  // Most recent end time, used in determination of speech complete timeout.
+  int64_t speech_end_time_us_;
+
+  int64_t audio_frame_time_us_;
+  EpStatus old_ep_status_;
+  bool waiting_for_speech_possibly_complete_timeout_;
+  bool waiting_for_speech_complete_timeout_;
+  bool speech_previously_detected_;
+  bool speech_input_complete_;
+  EnergyEndpointer energy_endpointer_;
+  int sample_rate_;
+  int32_t frame_size_;
+};
+
+}  // namespace mozilla
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/energy_endpointer.cc
@@ -0,0 +1,393 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "energy_endpointer.h"
+
+#include <math.h>
+
+namespace {
+
+// Returns the RMS (quadratic mean) of the input signal.
+float RMS(const int16_t* samples, int num_samples) {
+  int64_t ssq_int64_t = 0;
+  int64_t sum_int64_t = 0;
+  for (int i = 0; i < num_samples; ++i) {
+    sum_int64_t += samples[i];
+    ssq_int64_t += samples[i] * samples[i];
+  }
+  // now convert to floats.
+  double sum = static_cast<double>(sum_int64_t);
+  sum /= num_samples;
+  double ssq = static_cast<double>(ssq_int64_t);
+  return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum)));
+}
+
+int64_t Secs2Usecs(float seconds) {
+  return static_cast<int64_t>(0.5 + (1.0e6 * seconds));
+}
+
+float GetDecibel(float value) {
+  if (value > 1.0e-100)
+    return 20 * log10(value);
+  return -2000.0;
+}
+
+}  // namespace
+
+namespace mozilla {
+
+// Stores threshold-crossing histories for making decisions about the speech
+// state.
+class EnergyEndpointer::HistoryRing {
+ public:
+  HistoryRing() : insertion_index_(0) {}
+
+  // Resets the ring to |size| elements each with state |initial_state|
+  void SetRing(int size, bool initial_state);
+
+  // Inserts a new entry into the ring and drops the oldest entry.
+  void Insert(int64_t time_us, bool decision);
+
+  // Returns the time in microseconds of the most recently added entry.
+  int64_t EndTime() const;
+
+  // Returns the sum of all intervals during which 'decision' is true within
+  // the time in seconds specified by 'duration'. The returned interval is
+  // in seconds.
+  float RingSum(float duration_sec);
+
+ private:
+  struct DecisionPoint {
+    int64_t time_us;
+    bool decision;
+  };
+
+  std::vector<DecisionPoint> decision_points_;
+  int insertion_index_;  // Index at which the next item gets added/inserted.
+
+  HistoryRing(const HistoryRing&);
+  void operator=(const HistoryRing&);
+};
+
+void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) {
+  insertion_index_ = 0;
+  decision_points_.clear();
+  DecisionPoint init = { -1, initial_state };
+  decision_points_.resize(size, init);
+}
+
+void EnergyEndpointer::HistoryRing::Insert(int64_t time_us, bool decision) {
+  decision_points_[insertion_index_].time_us = time_us;
+  decision_points_[insertion_index_].decision = decision;
+  insertion_index_ = (insertion_index_ + 1) % decision_points_.size();
+}
+
+int64_t EnergyEndpointer::HistoryRing::EndTime() const {
+  int ind = insertion_index_ - 1;
+  if (ind < 0)
+    ind = decision_points_.size() - 1;
+  return decision_points_[ind].time_us;
+}
+
+float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) {
+  if (!decision_points_.size())
+    return 0.0;
+
+  int64_t sum_us = 0;
+  int ind = insertion_index_ - 1;
+  if (ind < 0)
+    ind = decision_points_.size() - 1;
+  int64_t end_us = decision_points_[ind].time_us;
+  bool is_on = decision_points_[ind].decision;
+  int64_t start_us = end_us - static_cast<int64_t>(0.5 + (1.0e6 * duration_sec));
+  if (start_us < 0)
+    start_us = 0;
+  size_t n_summed = 1;  // n points ==> (n-1) intervals
+  while ((decision_points_[ind].time_us > start_us) &&
+         (n_summed < decision_points_.size())) {
+    --ind;
+    if (ind < 0)
+      ind = decision_points_.size() - 1;
+    if (is_on)
+      sum_us += end_us - decision_points_[ind].time_us;
+    is_on = decision_points_[ind].decision;
+    end_us = decision_points_[ind].time_us;
+    n_summed++;
+  }
+
+  return 1.0e-6f * sum_us;  //  Returns total time that was super threshold.
+}
+
+EnergyEndpointer::EnergyEndpointer()
+    : status_(EP_PRE_SPEECH),
+      offset_confirm_dur_sec_(0),
+      endpointer_time_us_(0),
+      fast_update_frames_(0),
+      frame_counter_(0),
+      max_window_dur_(4.0),
+      sample_rate_(0),
+      history_(new HistoryRing()),
+      decision_threshold_(0),
+      estimating_environment_(false),
+      noise_level_(0),
+      rms_adapt_(0),
+      start_lag_(0),
+      end_lag_(0),
+      user_input_start_time_us_(0) {
+}
+
+EnergyEndpointer::~EnergyEndpointer() {
+}
+
+int EnergyEndpointer::TimeToFrame(float time) const {
+  return static_cast<int32_t>(0.5 + (time / params_.frame_period()));
+}
+
+void EnergyEndpointer::Restart(bool reset_threshold) {
+  status_ = EP_PRE_SPEECH;
+  user_input_start_time_us_ = 0;
+
+  if (reset_threshold) {
+    decision_threshold_ = params_.decision_threshold();
+    rms_adapt_ = decision_threshold_;
+    noise_level_ = params_.decision_threshold() / 2.0f;
+    frame_counter_ = 0;  // Used for rapid initial update of levels.
+  }
+
+  // Set up the memories to hold the history windows.
+  history_->SetRing(TimeToFrame(max_window_dur_), false);
+
+  // Flag that indicates that current input should be used for
+  // estimating the environment. The user has not yet started input
+  // by e.g. pressed the push-to-talk button. By default, this is
+  // false for backward compatibility.
+  estimating_environment_ = false;
+}
+
+void EnergyEndpointer::Init(const EnergyEndpointerParams& params) {
+  params_ = params;
+
+  // Find the longest history interval to be used, and make the ring
+  // large enough to accommodate that number of frames.  NOTE: This
+  // depends upon ep_frame_period being set correctly in the factory
+  // that did this instantiation.
+  max_window_dur_ = params_.onset_window();
+  if (params_.speech_on_window() > max_window_dur_)
+    max_window_dur_ = params_.speech_on_window();
+  if (params_.offset_window() > max_window_dur_)
+    max_window_dur_ = params_.offset_window();
+  Restart(true);
+
+  offset_confirm_dur_sec_ = params_.offset_window() -
+                            params_.offset_confirm_dur();
+  if (offset_confirm_dur_sec_ < 0.0)
+    offset_confirm_dur_sec_ = 0.0;
+
+  user_input_start_time_us_ = 0;
+
+  // Flag that indicates that  current input should be used for
+  // estimating the environment. The user has not yet started input
+  // by e.g. pressed the push-to-talk button. By default, this is
+  // false for backward compatibility.
+  estimating_environment_ = false;
+  // The initial value of the noise and speech levels is inconsequential.
+  // The level of the first frame will overwrite these values.
+  noise_level_ = params_.decision_threshold() / 2.0f;
+  fast_update_frames_ =
+      static_cast<int64_t>(params_.fast_update_dur() / params_.frame_period());
+
+  frame_counter_ = 0;  // Used for rapid initial update of levels.
+
+  sample_rate_ = params_.sample_rate();
+  start_lag_ = static_cast<int>(sample_rate_ /
+                                params_.max_fundamental_frequency());
+  end_lag_ = static_cast<int>(sample_rate_ /
+                              params_.min_fundamental_frequency());
+}
+
+void EnergyEndpointer::StartSession() {
+  Restart(true);
+}
+
+void EnergyEndpointer::EndSession() {
+  status_ = EP_POST_SPEECH;
+}
+
+void EnergyEndpointer::SetEnvironmentEstimationMode() {
+  Restart(true);
+  estimating_environment_ = true;
+}
+
+void EnergyEndpointer::SetUserInputMode() {
+  estimating_environment_ = false;
+  user_input_start_time_us_ = endpointer_time_us_;
+}
+
+void EnergyEndpointer::ProcessAudioFrame(int64_t time_us,
+                                         const int16_t* samples,
+                                         int num_samples,
+                                         float* rms_out) {
+  endpointer_time_us_ = time_us;
+  float rms = RMS(samples, num_samples);
+
+  // Check that this is user input audio vs. pre-input adaptation audio.
+  // Input audio starts when the user indicates start of input, by e.g.
+  // pressing push-to-talk. Audio recieved prior to that is used to update
+  // noise and speech level estimates.
+  if (!estimating_environment_) {
+    bool decision = false;
+    if ((endpointer_time_us_ - user_input_start_time_us_) <
+        Secs2Usecs(params_.contamination_rejection_period())) {
+      decision = false;
+      //PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, ("decision: forced to false, time: %d", endpointer_time_us_));
+    } else {
+      decision = (rms > decision_threshold_);
+    }
+
+    history_->Insert(endpointer_time_us_, decision);
+
+    switch (status_) {
+      case EP_PRE_SPEECH:
+        if (history_->RingSum(params_.onset_window()) >
+            params_.onset_detect_dur()) {
+          status_ = EP_POSSIBLE_ONSET;
+        }
+        break;
+
+      case EP_POSSIBLE_ONSET: {
+        float tsum = history_->RingSum(params_.onset_window());
+        if (tsum > params_.onset_confirm_dur()) {
+          status_ = EP_SPEECH_PRESENT;
+        } else {  // If signal is not maintained, drop back to pre-speech.
+          if (tsum <= params_.onset_detect_dur())
+            status_ = EP_PRE_SPEECH;
+        }
+        break;
+      }
+
+      case EP_SPEECH_PRESENT: {
+        // To induce hysteresis in the state residency, we allow a
+        // smaller residency time in the on_ring, than was required to
+        // enter the SPEECH_PERSENT state.
+        float on_time = history_->RingSum(params_.speech_on_window());
+        if (on_time < params_.on_maintain_dur())
+          status_ = EP_POSSIBLE_OFFSET;
+        break;
+      }
+
+      case EP_POSSIBLE_OFFSET:
+        if (history_->RingSum(params_.offset_window()) <=
+            offset_confirm_dur_sec_) {
+          // Note that this offset time may be beyond the end
+          // of the input buffer in a real-time system.  It will be up
+          // to the RecognizerSession to decide what to do.
+          status_ = EP_PRE_SPEECH;  // Automatically reset for next utterance.
+        } else {  // If speech picks up again we allow return to SPEECH_PRESENT.
+          if (history_->RingSum(params_.speech_on_window()) >=
+              params_.on_maintain_dur())
+            status_ = EP_SPEECH_PRESENT;
+        }
+        break;
+
+      default:
+        break;
+    }
+
+    // If this is a quiet, non-speech region, slowly adapt the detection
+    // threshold to be about 6dB above the average RMS.
+    if ((!decision) && (status_ == EP_PRE_SPEECH)) {
+      decision_threshold_ = (0.98f * decision_threshold_) + (0.02f * 2 * rms);
+      rms_adapt_ = decision_threshold_;
+    } else {
+      // If this is in a speech region, adapt the decision threshold to
+      // be about 10dB below the average RMS. If the noise level is high,
+      // the threshold is pushed up.
+      // Adaptation up to a higher level is 5 times faster than decay to
+      // a lower level.
+      if ((status_ == EP_SPEECH_PRESENT) && decision) {
+        if (rms_adapt_ > rms) {
+          rms_adapt_ = (0.99f * rms_adapt_) + (0.01f * rms);
+        } else {
+          rms_adapt_ = (0.95f * rms_adapt_) + (0.05f * rms);
+        }
+        float target_threshold = 0.3f * rms_adapt_ +  noise_level_;
+        decision_threshold_ = (.90f * decision_threshold_) +
+                              (0.10f * target_threshold);
+      }
+    }
+
+    // Set a floor
+    if (decision_threshold_ < params_.min_decision_threshold())
+      decision_threshold_ = params_.min_decision_threshold();
+  }
+
+  // Update speech and noise levels.
+  UpdateLevels(rms);
+  ++frame_counter_;
+
+  if (rms_out)
+    *rms_out = GetDecibel(rms);
+}
+
+float EnergyEndpointer::GetNoiseLevelDb() const {
+  return GetDecibel(noise_level_);
+}
+
+void EnergyEndpointer::UpdateLevels(float rms) {
+  // Update quickly initially. We assume this is noise and that
+  // speech is 6dB above the noise.
+  if (frame_counter_ < fast_update_frames_) {
+    // Alpha increases from 0 to (k-1)/k where k is the number of time
+    // steps in the initial adaptation period.
+    float alpha = static_cast<float>(frame_counter_) /
+        static_cast<float>(fast_update_frames_);
+    noise_level_ = (alpha * noise_level_) + ((1 - alpha) * rms);
+    //PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, ("FAST UPDATE, frame_counter_ %d, fast_update_frames_ %d", frame_counter_, fast_update_frames_));
+  } else {
+    // Update Noise level. The noise level adapts quickly downward, but
+    // slowly upward. The noise_level_ parameter is not currently used
+    // for threshold adaptation. It is used for UI feedback.
+    if (noise_level_ < rms)
+      noise_level_ = (0.999f * noise_level_) + (0.001f * rms);
+    else
+      noise_level_ = (0.95f * noise_level_) + (0.05f * rms);
+  }
+  if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) {
+    decision_threshold_ = noise_level_ * 2; // 6dB above noise level.
+    // Set a floor
+    if (decision_threshold_ < params_.min_decision_threshold())
+      decision_threshold_ = params_.min_decision_threshold();
+  }
+}
+
+EpStatus EnergyEndpointer::Status(int64_t* status_time)  const {
+  *status_time = history_->EndTime();
+  return status_;
+}
+
+}  // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/energy_endpointer.h
@@ -0,0 +1,180 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The EnergyEndpointer class finds likely speech onset and offset points.
+//
+// The implementation described here is about the simplest possible.
+// It is based on timings of threshold crossings for overall signal
+// RMS. It is suitable for light weight applications.
+//
+// As written, the basic idea is that one specifies intervals that
+// must be occupied by super- and sub-threshold energy levels, and
+// defers decisions re onset and offset times until these
+// specifications have been met.  Three basic intervals are tested: an
+// onset window, a speech-on window, and an offset window.  We require
+// super-threshold to exceed some mimimum total durations in the onset
+// and speech-on windows before declaring the speech onset time, and
+// we specify a required sub-threshold residency in the offset window
+// before declaring speech offset. As the various residency requirements are
+// met, the EnergyEndpointer instance assumes various states, and can return the
+// ID of these states to the client (see EpStatus below).
+//
+// The levels of the speech and background noise are continuously updated. It is
+// important that the background noise level be estimated initially for
+// robustness in noisy conditions. The first frames are assumed to be background
+// noise and a fast update rate is used for the noise level. The duration for
+// fast update is controlled by the fast_update_dur_ paramter.
+//
+// If used in noisy conditions, the endpointer should be started and run in the
+// EnvironmentEstimation mode, for at least 200ms, before switching to
+// UserInputMode.
+// Audio feedback contamination can appear in the input audio, if not cut
+// out or handled by echo cancellation. Audio feedback can trigger a false
+// accept. The false accepts can be ignored by setting
+// ep_contamination_rejection_period.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+
+#include <vector>
+
+#include "nsAutoPtr.h"
+
+#include "energy_endpointer_params.h"
+
+namespace mozilla {
+
+// Endpointer status codes
+enum EpStatus {
+  EP_PRE_SPEECH = 10,
+  EP_POSSIBLE_ONSET,
+  EP_SPEECH_PRESENT,
+  EP_POSSIBLE_OFFSET,
+  EP_POST_SPEECH,
+};
+
+class EnergyEndpointer {
+ public:
+  // The default construction MUST be followed by Init(), before any
+  // other use can be made of the instance.
+  EnergyEndpointer();
+  virtual ~EnergyEndpointer();
+
+  void Init(const EnergyEndpointerParams& params);
+
+  // Start the endpointer. This should be called at the beginning of a session.
+  void StartSession();
+
+  // Stop the endpointer.
+  void EndSession();
+
+  // Start environment estimation. Audio will be used for environment estimation
+  // i.e. noise level estimation.
+  void SetEnvironmentEstimationMode();
+
+  // Start user input. This should be called when the user indicates start of
+  // input, e.g. by pressing a button.
+  void SetUserInputMode();
+
+  // Computes the next input frame and modifies EnergyEndpointer status as
+  // appropriate based on the computation.
+  void ProcessAudioFrame(int64_t time_us,
+                         const int16_t* samples, int num_samples,
+                         float* rms_out);
+
+  // Returns the current state of the EnergyEndpointer and the time
+  // corresponding to the most recently computed frame.
+  EpStatus Status(int64_t* status_time_us) const;
+
+  bool estimating_environment() const {
+    return estimating_environment_;
+  }
+
+  // Returns estimated noise level in dB.
+  float GetNoiseLevelDb() const;
+
+ private:
+  class HistoryRing;
+
+  // Resets the endpointer internal state.  If reset_threshold is true, the
+  // state will be reset completely, including adaptive thresholds and the
+  // removal of all history information.
+  void Restart(bool reset_threshold);
+
+  // Update internal speech and noise levels.
+  void UpdateLevels(float rms);
+
+  // Returns the number of frames (or frame number) corresponding to
+  // the 'time' (in seconds).
+  int TimeToFrame(float time) const;
+
+  EpStatus status_;  // The current state of this instance.
+  float offset_confirm_dur_sec_;  // max on time allowed to confirm POST_SPEECH
+  int64_t endpointer_time_us_;  // Time of the most recently received audio frame.
+  int64_t fast_update_frames_; // Number of frames for initial level adaptation.
+  int64_t frame_counter_;  // Number of frames seen. Used for initial adaptation.
+  float max_window_dur_;  // Largest search window size (seconds)
+  float sample_rate_;  // Sampling rate.
+
+  // Ring buffers to hold the speech activity history.
+  nsAutoPtr<HistoryRing> history_;
+
+  // Configuration parameters.
+  EnergyEndpointerParams params_;
+
+  // RMS which must be exceeded to conclude frame is speech.
+  float decision_threshold_;
+
+  // Flag to indicate that audio should be used to estimate environment, prior
+  // to receiving user input.
+  bool estimating_environment_;
+
+  // Estimate of the background noise level. Used externally for UI feedback.
+  float noise_level_;
+
+  // An adaptive threshold used to update decision_threshold_ when appropriate.
+  float rms_adapt_;
+
+  // Start lag corresponds to the highest fundamental frequency.
+  int start_lag_;
+
+  // End lag corresponds to the lowest fundamental frequency.
+  int end_lag_;
+
+  // Time when mode switched from environment estimation to user input. This
+  // is used to time forced rejection of audio feedback contamination.
+  int64_t user_input_start_time_us_;
+
+  // prevent copy constructor and assignment
+  EnergyEndpointer(const EnergyEndpointer&);
+  void operator=(const EnergyEndpointer&);
+};
+
+}  // namespace mozilla
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/energy_endpointer_params.cc
@@ -0,0 +1,77 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "energy_endpointer_params.h"
+
+namespace mozilla {
+
+EnergyEndpointerParams::EnergyEndpointerParams() {
+  SetDefaults();
+}
+
+void EnergyEndpointerParams::SetDefaults() {
+  frame_period_ = 0.01f;
+  frame_duration_ = 0.01f;
+  endpoint_margin_ = 0.2f;
+  onset_window_ = 0.15f;
+  speech_on_window_ = 0.4f;
+  offset_window_ = 0.15f;
+  onset_detect_dur_ = 0.09f;
+  onset_confirm_dur_ = 0.075f;
+  on_maintain_dur_ = 0.10f;
+  offset_confirm_dur_ = 0.12f;
+  decision_threshold_ = 150.0f;
+  min_decision_threshold_ = 50.0f;
+  fast_update_dur_ = 0.2f;
+  sample_rate_ = 8000.0f;
+  min_fundamental_frequency_ = 57.143f;
+  max_fundamental_frequency_ = 400.0f;
+  contamination_rejection_period_ = 0.25f;
+}
+
+void EnergyEndpointerParams::operator=(const EnergyEndpointerParams& source) {
+  frame_period_ = source.frame_period();
+  frame_duration_ = source.frame_duration();
+  endpoint_margin_ = source.endpoint_margin();
+  onset_window_ = source.onset_window();
+  speech_on_window_ = source.speech_on_window();
+  offset_window_ = source.offset_window();
+  onset_detect_dur_ = source.onset_detect_dur();
+  onset_confirm_dur_ = source.onset_confirm_dur();
+  on_maintain_dur_ = source.on_maintain_dur();
+  offset_confirm_dur_ = source.offset_confirm_dur();
+  decision_threshold_ = source.decision_threshold();
+  min_decision_threshold_ = source.min_decision_threshold();
+  fast_update_dur_ = source.fast_update_dur();
+  sample_rate_ = source.sample_rate();
+  min_fundamental_frequency_ = source.min_fundamental_frequency();
+  max_fundamental_frequency_ = source.max_fundamental_frequency();
+  contamination_rejection_period_ = source.contamination_rejection_period();
+}
+
+}  //  namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/energy_endpointer_params.h
@@ -0,0 +1,159 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+
+namespace mozilla {
+
+// Input parameters for the EnergyEndpointer class.
+class EnergyEndpointerParams {
+ public:
+  EnergyEndpointerParams();
+
+  void SetDefaults();
+
+  void operator=(const EnergyEndpointerParams& source);
+
+  // Accessors and mutators
+  float frame_period() const { return frame_period_; }
+  void set_frame_period(float frame_period) {
+    frame_period_ = frame_period;
+  }
+
+  float frame_duration() const { return frame_duration_; }
+  void set_frame_duration(float frame_duration) {
+    frame_duration_ = frame_duration;
+  }
+
+  float endpoint_margin() const { return endpoint_margin_; }
+  void set_endpoint_margin(float endpoint_margin) {
+    endpoint_margin_ = endpoint_margin;
+  }
+
+  float onset_window() const { return onset_window_; }
+  void set_onset_window(float onset_window) { onset_window_ = onset_window; }
+
+  float speech_on_window() const { return speech_on_window_; }
+  void set_speech_on_window(float speech_on_window) {
+    speech_on_window_ = speech_on_window;
+  }
+
+  float offset_window() const { return offset_window_; }
+  void set_offset_window(float offset_window) {
+    offset_window_ = offset_window;
+  }
+
+  float onset_detect_dur() const { return onset_detect_dur_; }
+  void set_onset_detect_dur(float onset_detect_dur) {
+    onset_detect_dur_ = onset_detect_dur;
+  }
+
+  float onset_confirm_dur() const { return onset_confirm_dur_; }
+  void set_onset_confirm_dur(float onset_confirm_dur) {
+    onset_confirm_dur_ = onset_confirm_dur;
+  }
+
+  float on_maintain_dur() const { return on_maintain_dur_; }
+  void set_on_maintain_dur(float on_maintain_dur) {
+    on_maintain_dur_ = on_maintain_dur;
+  }
+
+  float offset_confirm_dur() const { return offset_confirm_dur_; }
+  void set_offset_confirm_dur(float offset_confirm_dur) {
+    offset_confirm_dur_ = offset_confirm_dur;
+  }
+
+  float decision_threshold() const { return decision_threshold_; }
+  void set_decision_threshold(float decision_threshold) {
+    decision_threshold_ = decision_threshold;
+  }
+
+  float min_decision_threshold() const { return min_decision_threshold_; }
+  void set_min_decision_threshold(float min_decision_threshold) {
+    min_decision_threshold_ = min_decision_threshold;
+  }
+
+  float fast_update_dur() const { return fast_update_dur_; }
+  void set_fast_update_dur(float fast_update_dur) {
+    fast_update_dur_ = fast_update_dur;
+  }
+
+  float sample_rate() const { return sample_rate_; }
+  void set_sample_rate(float sample_rate) { sample_rate_ = sample_rate; }
+
+  float min_fundamental_frequency() const { return min_fundamental_frequency_; }
+  void set_min_fundamental_frequency(float min_fundamental_frequency) {
+    min_fundamental_frequency_ = min_fundamental_frequency;
+  }
+
+  float max_fundamental_frequency() const { return max_fundamental_frequency_; }
+  void set_max_fundamental_frequency(float max_fundamental_frequency) {
+    max_fundamental_frequency_ = max_fundamental_frequency;
+  }
+
+  float contamination_rejection_period() const {
+    return contamination_rejection_period_;
+  }
+  void set_contamination_rejection_period(
+      float contamination_rejection_period) {
+    contamination_rejection_period_ = contamination_rejection_period;
+  }
+
+ private:
+  float frame_period_;  // Frame period
+  float frame_duration_;  // Window size
+  float onset_window_;  // Interval scanned for onset activity
+  float speech_on_window_;  // Inverval scanned for ongoing speech
+  float offset_window_;  // Interval scanned for offset evidence
+  float offset_confirm_dur_;  // Silence duration required to confirm offset
+  float decision_threshold_;  // Initial rms detection threshold
+  float min_decision_threshold_;  // Minimum rms detection threshold
+  float fast_update_dur_;  // Period for initial estimation of levels.
+  float sample_rate_;  // Expected sample rate.
+
+  // Time to add on either side of endpoint threshold crossings
+  float endpoint_margin_;
+  // Total dur within onset_window required to enter ONSET state
+  float onset_detect_dur_;
+  // Total on time within onset_window required to enter SPEECH_ON state
+  float onset_confirm_dur_;
+  // Minimum dur in SPEECH_ON state required to maintain ON state
+  float on_maintain_dur_;
+  // Minimum fundamental frequency for autocorrelation.
+  float min_fundamental_frequency_;
+  // Maximum fundamental frequency for autocorrelation.
+  float max_fundamental_frequency_;
+  // Period after start of user input that above threshold values are ignored.
+  // This is to reject audio feedback contamination.
+  float contamination_rejection_period_;
+};
+
+}  //  namespace mozilla
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/nsIDOMSpeechRecognitionError.idl
@@ -0,0 +1,39 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+#include "nsIDOMEvent.idl"
+
+[scriptable, builtinclass, uuid(5ddc5a46-e7db-4c5c-8ed4-80cf5d88fca3)]
+interface nsIDOMSpeechRecognitionError : nsIDOMEvent {
+    const unsigned long NO_SPEECH = 0;
+    const unsigned long ABORTED = 1;
+    const unsigned long AUDIO_CAPTURE = 2;
+    const unsigned long NETWORK = 3;
+    const unsigned long NOT_ALLOWED = 4;
+    const unsigned long SERVICE_NOT_ALLOWED = 5;
+    const unsigned long BAD_GRAMMAR = 6;
+    const unsigned long LANGUAGE_NOT_SUPPORTED = 7;
+
+    [noscript] void initSpeechRecognitionError(in DOMString eventTypeArg,
+                                               in boolean canBubbleArg,
+                                               in boolean cancelableArg,
+                                               in unsigned long error,
+                                               in DOMString message);
+
+    readonly attribute unsigned long error;
+    readonly attribute DOMString message;
+};
+
+dictionary SpeechRecognitionErrorInit : EventInit {
+    unsigned long error;
+    DOMString message;
+};
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/nsIDOMSpeechRecognitionEvent.idl
@@ -0,0 +1,38 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+#include "nsIDOMEvent.idl"
+
+interface SpeechRecognitionResultList;
+
+[scriptable, builtinclass, uuid(98dded70-33af-42d5-819d-e15b6f4a3aba)]
+interface nsIDOMSpeechRecognitionEvent : nsIDOMEvent {
+    [noscript] void initSpeechRecognitionEvent(in DOMString eventTypeArg,
+                                               in boolean canBubbleArg,
+                                               in boolean cancelableArg,
+                                               in unsigned long resultIndex,
+                                               in nsISupports results,
+                                               in DOMString interpretation,
+                                               in nsIDOMDocument emma);
+
+    readonly attribute unsigned long resultIndex;
+    readonly attribute nsISupports results;
+    readonly attribute DOMString interpretation;
+    readonly attribute nsIDOMDocument emma;
+};
+
+dictionary SpeechRecognitionEventInit : EventInit {
+    unsigned long resultIndex;
+    nsISupports results;
+    DOMString interpretation;
+    nsIDOMDocument emma;
+};
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/nsISpeechRecognitionService.idl
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+%{C++
+#include "mozilla/WeakPtr.h"
+%}
+
+native SpeechRecognitionWeakPtr(mozilla::WeakPtr<mozilla::dom::SpeechRecognition>);
+[ptr] native AudioSegmentPtr(mozilla::AudioSegment);
+
+[uuid(cb98d929-81cd-4a51-a214-80d3e6281d24)]
+interface nsISpeechRecognitionService : nsISupports {
+    void initialize(in SpeechRecognitionWeakPtr aSpeechRecognition);
+    void processAudioSegment(in AudioSegmentPtr aAudioSegment);
+    void soundEnd();
+    void abort();
+};
+
+%{C++
+#define NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "@mozilla.org/webspeech/service;1?name="
+%}
--- a/dom/bindings/Makefile.in
+++ b/dom/bindings/Makefile.in
@@ -84,16 +84,17 @@ LOCAL_INCLUDES += -I$(topsrcdir)/js/xpco
   -I$(topsrcdir)/dom/base \
   -I$(topsrcdir)/dom/battery \
   -I$(topsrcdir)/dom/indexedDB \
   -I$(topsrcdir)/content/xslt/src/base \
   -I$(topsrcdir)/content/xslt/src/xpath \
   -I$(topsrcdir)/content/xml/content/src \
   -I$(topsrcdir)/content/xul/content/src \
   -I$(topsrcdir)/content/xul/document/src \
+  -I$(topsrcdir)/content/media/webspeech/recognition \
   $(NULL)
 
 ifdef MOZ_AUDIO_CHANNEL_MANAGER
 LOCAL_INCLUDES += \
   -I$(topsrcdir)/dom/system/gonk \
   $(NULL)
 endif
 
--- a/dom/dom-config.mk
+++ b/dom/dom-config.mk
@@ -52,10 +52,14 @@ DOM_SRCDIRS += \
   dom/fm \
   $(NULL)
 endif
 
 ifdef MOZ_B2G_BT
 DOM_SRCDIRS += dom/bluetooth
 endif
 
+ifdef MOZ_WEBSPEECH
+DOM_SRCDIRS += content/media/webspeech
+endif
+
 LOCAL_INCLUDES += $(DOM_SRCDIRS:%=-I$(topsrcdir)/%)
 DEFINES += -D_IMPL_NS_LAYOUT
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechGrammar.webidl
@@ -0,0 +1,20 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[Constructor, PrefControlled]
+interface SpeechGrammar {
+    [Throws]
+    attribute DOMString src;
+    [Throws]
+    attribute float weight;
+};
+
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechGrammarList.webidl
@@ -0,0 +1,22 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[PrefControlled]
+interface SpeechGrammarList {
+    readonly attribute unsigned long length;
+    [Throws]
+    getter SpeechGrammar item(unsigned long index);
+    [Throws]
+    void addFromURI(DOMString src, optional float weight);
+    [Throws]
+    void addFromString(DOMString string, optional float weight);
+};
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechRecognition.webidl
@@ -0,0 +1,58 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[Constructor, PrefControlled]
+interface SpeechRecognition : EventTarget {
+    // recognition parameters
+    [Throws]
+    attribute SpeechGrammarList grammars;
+    [Throws]
+    attribute DOMString lang;
+    [Throws]
+    attribute boolean continuous;
+    [Throws]
+    attribute boolean interimResults;
+    [Throws]
+    attribute unsigned long maxAlternatives;
+    [Throws]
+    attribute DOMString serviceURI;
+
+    // methods to drive the speech interaction
+    [Throws]
+    void start();
+    void stop();
+    void abort();
+
+    // event methods
+    [SetterThrows]
+    attribute EventHandler onaudiostart;
+    [SetterThrows]
+    attribute EventHandler onsoundstart;
+    [SetterThrows]
+    attribute EventHandler onspeechstart;
+    [SetterThrows]
+    attribute EventHandler onspeechend;
+    [SetterThrows]
+    attribute EventHandler onsoundend;
+    [SetterThrows]
+    attribute EventHandler onaudioend;
+    [SetterThrows]
+    attribute EventHandler onresult;
+    [SetterThrows]
+    attribute EventHandler onnomatch;
+    [SetterThrows]
+    attribute EventHandler onerror;
+    [SetterThrows]
+    attribute EventHandler onstart;
+    [SetterThrows]
+    attribute EventHandler onend;
+};
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechRecognitionAlternative.webidl
@@ -0,0 +1,17 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[PrefControlled]
+interface SpeechRecognitionAlternative {
+    readonly attribute DOMString transcript;
+    readonly attribute float confidence;
+};
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechRecognitionResult.webidl
@@ -0,0 +1,18 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[PrefControlled]
+interface SpeechRecognitionResult {
+    readonly attribute unsigned long length;
+    getter SpeechRecognitionAlternative item(unsigned long index);
+    readonly attribute boolean final;
+};
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechRecognitionResultList.webidl
@@ -0,0 +1,17 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[PrefControlled]
+interface SpeechRecognitionResultList {
+    readonly attribute unsigned long length;
+    getter SpeechRecognitionResult item(unsigned long index);
+};
--- a/dom/webidl/WebIDL.mk
+++ b/dom/webidl/WebIDL.mk
@@ -306,16 +306,27 @@ webidl_files += \
 endif
 
 ifdef MOZ_WEBRTC
 webidl_files += \
   MediaStreamList.webidl \
   $(NULL)
 endif
 
+ifdef MOZ_WEBSPEECH
+webidl_files += \
+    SpeechGrammar.webidl \
+    SpeechGrammarList.webidl \
+    SpeechRecognitionAlternative.webidl \
+    SpeechRecognitionResultList.webidl \
+    SpeechRecognitionResult.webidl \
+    SpeechRecognition.webidl \
+    $(NULL)
+endif
+
 ifdef ENABLE_TESTS
 test_webidl_files := \
   TestCodeGen.webidl \
   TestDictionary.webidl \
   TestExampleGen.webidl \
   TestJSImplGen.webidl \
   TestTypedef.webidl \
   $(NULL)
--- a/js/xpconnect/src/event_impl_gen.conf.in
+++ b/js/xpconnect/src/event_impl_gen.conf.in
@@ -45,16 +45,20 @@ simple_events = [
     'DeviceStorageChangeEvent',
     'PopupBlockedEvent',
     'BlobEvent',
 #ifdef MOZ_GAMEPAD
     'GamepadEvent',
     'GamepadButtonEvent',
     'GamepadAxisMoveEvent',
 #endif
+#ifdef MOZ_WEBSPEECH
+    'SpeechRecognitionEvent',
+    'SpeechRecognitionError',
+#endif
   ]
 
 """ include file names """
 special_includes = [
     'DictionaryHelpers.h',
     'nsContentUtils.h',
     'nsIDOMApplicationRegistry.h',
     'nsIDOMFile.h'
--- a/layout/build/Makefile.in
+++ b/layout/build/Makefile.in
@@ -203,16 +203,22 @@ SHARED_LIBRARY_LIBS 	+= \
 endif
 
 ifdef MOZ_DASH
 SHARED_LIBRARY_LIBS += \
   $(DEPTH)/content/media/dash/$(LIB_PREFIX)gkcondash_s.$(LIB_SUFFIX) \
   $(NULL)
 endif
 
+ifdef MOZ_WEBSPEECH
+SHARED_LIBRARY_LIBS += \
+  $(DEPTH)/content/media/webspeech/$(LIB_PREFIX)gkconwebspeech_s.$(LIB_SUFFIX) \
+  $(NULL)
+endif
+
 ifeq (gonk,$(MOZ_WIDGET_TOOLKIT))
 INCLUDES	+= \
 		-I$(srcdir)/../../base/src \
 		-I$(srcdir)/../../html/content/src \
 		-I$(ANDROID_SOURCE)/dalvik/libnativehelper/include/nativehelper \
 		-I$(ANDROID_SOURCE)/frameworks/base/include/ \
 		-I$(ANDROID_SOURCE)/frameworks/base/include/binder/ \
 		-I$(ANDROID_SOURCE)/frameworks/base/include/utils/ \
--- a/mobile/android/installer/package-manifest.in
+++ b/mobile/android/installer/package-manifest.in
@@ -136,16 +136,19 @@
 @BINPATH@/components/dom_sidebar.xpt
 @BINPATH@/components/dom_mobilemessage.xpt
 @BINPATH@/components/dom_storage.xpt
 @BINPATH@/components/dom_stylesheets.xpt
 @BINPATH@/components/dom_system.xpt
 @BINPATH@/components/dom_threads.xpt
 @BINPATH@/components/dom_traversal.xpt
 @BINPATH@/components/dom_views.xpt
+#ifdef MOZ_WEBSPEECH
+@BINPATH@/components/dom_webspeech.xpt
+#endif
 @BINPATH@/components/dom_xbl.xpt
 @BINPATH@/components/dom_xpath.xpt
 @BINPATH@/components/dom_xul.xpt
 @BINPATH@/components/downloads.xpt
 @BINPATH@/components/editor.xpt
 @BINPATH@/components/embed_base.xpt
 @BINPATH@/components/extensions.xpt
 @BINPATH@/components/exthandler.xpt
--- a/modules/libpref/src/init/all.js
+++ b/modules/libpref/src/init/all.js
@@ -198,16 +198,20 @@ pref("media.peerconnection.agc", 1);
 pref("media.peerconnection.noise_enabled", false);
 pref("media.peerconnection.noise", 1);
 #else
 #ifdef ANDROID
 pref("media.navigator.enabled", true);
 #endif
 #endif
 
+#ifdef MOZ_WEBSPEECH
+pref("media.webspeech.recognition.enable", false);
+#endif
+
 // Whether to enable Web Audio support
 pref("media.webaudio.enabled", false);
 
 // Whether to autostart a media element with an |autoplay| attribute
 pref("media.autoplay.enabled", true);
 
 // The default number of decoded video frames that are enqueued in
 // MediaDecoderReader's mVideoQueue.
--- a/toolkit/content/license.html
+++ b/toolkit/content/license.html
@@ -1064,18 +1064,19 @@ WITH THE USE OR PERFORMANCE OF THIS SOFT
 
     <p>This license applies to parts of the code in
       <span class="path">editor/libeditor/base/nsEditorEventListener.cpp</span>,
       <span class="path">widget/cocoa/GfxInfo.mm</span>
       and also some files in the directories
       <span class="path">ipc/chromium/</span>,
       <span class="path">dom/plugins/</span>,
       <span class="path">tools/profiler/sps/</span>,
-      <span class="path">gfx/ots/</span> and
-      <span class="path">gfx/ycbcr</span>.
+      <span class="path">gfx/ots/</span>,
+      <span class="path">gfx/ycbcr</span> and
+      <span class="path">content/media/webspeech/recognition/</span>.
     </p>
 
 <pre>
 Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
 met: