Bug 650295 - Implement main state machine for speech recognition. r=smaug
authorGuilherme Goncalves <ggoncalves@mozilla.com>
Wed, 27 Mar 2013 14:08:17 -0700
changeset 136862 dfd7768f8f48c7318d108d696beda993958ebb37
parent 136861 f22ec75a02d742937450bdef3e845598f10992de
child 136863 c5e0abff4496dfcce425c8ff8828090b26b1abb7
push id2452
push userlsblakk@mozilla.com
push dateMon, 13 May 2013 16:59:38 +0000
treeherdermozilla-beta@d4b152d29d8d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmaug
bugs650295
milestone22.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 650295 - Implement main state machine for speech recognition. r=smaug
b2g/installer/package-manifest.in
browser/installer/package-manifest.in
configure.in
content/base/src/nsGkAtomList.h
content/media/moz.build
content/media/webspeech/Makefile.in
content/media/webspeech/moz.build
content/media/webspeech/recognition/EnableWebSpeechRecognitionCheck.cpp
content/media/webspeech/recognition/EnableWebSpeechRecognitionCheck.h
content/media/webspeech/recognition/SpeechGrammar.cpp
content/media/webspeech/recognition/SpeechGrammar.h
content/media/webspeech/recognition/SpeechGrammarList.cpp
content/media/webspeech/recognition/SpeechGrammarList.h
content/media/webspeech/recognition/SpeechRecognition.cpp
content/media/webspeech/recognition/SpeechRecognition.h
content/media/webspeech/recognition/SpeechRecognitionAlternative.cpp
content/media/webspeech/recognition/SpeechRecognitionAlternative.h
content/media/webspeech/recognition/SpeechRecognitionResult.cpp
content/media/webspeech/recognition/SpeechRecognitionResult.h
content/media/webspeech/recognition/SpeechRecognitionResultList.cpp
content/media/webspeech/recognition/SpeechRecognitionResultList.h
content/media/webspeech/recognition/SpeechStreamListener.cpp
content/media/webspeech/recognition/SpeechStreamListener.h
content/media/webspeech/recognition/endpointer.cc
content/media/webspeech/recognition/endpointer.h
content/media/webspeech/recognition/energy_endpointer.cc
content/media/webspeech/recognition/energy_endpointer.h
content/media/webspeech/recognition/energy_endpointer_params.cc
content/media/webspeech/recognition/energy_endpointer_params.h
content/media/webspeech/recognition/nsIDOMSpeechRecognitionError.idl
content/media/webspeech/recognition/nsIDOMSpeechRecognitionEvent.idl
content/media/webspeech/recognition/nsISpeechRecognitionService.idl
dom/bindings/Makefile.in
dom/dom-config.mk
dom/webidl/SpeechGrammar.webidl
dom/webidl/SpeechGrammarList.webidl
dom/webidl/SpeechRecognition.webidl
dom/webidl/SpeechRecognitionAlternative.webidl
dom/webidl/SpeechRecognitionResult.webidl
dom/webidl/SpeechRecognitionResultList.webidl
dom/webidl/WebIDL.mk
js/xpconnect/src/event_impl_gen.conf.in
layout/build/Makefile.in
mobile/android/installer/package-manifest.in
modules/libpref/src/init/all.js
toolkit/content/license.html
--- a/b2g/installer/package-manifest.in
+++ b/b2g/installer/package-manifest.in
@@ -207,16 +207,19 @@
 @BINPATH@/components/dom_permissionsettings.xpt
 @BINPATH@/components/dom_sidebar.xpt
 @BINPATH@/components/dom_mobilemessage.xpt
 @BINPATH@/components/dom_storage.xpt
 @BINPATH@/components/dom_stylesheets.xpt
 @BINPATH@/components/dom_threads.xpt
 @BINPATH@/components/dom_traversal.xpt
 @BINPATH@/components/dom_views.xpt
+#ifdef MOZ_WEBSPEECH
+@BINPATH@/components/dom_webspeech.xpt
+#endif
 @BINPATH@/components/dom_xbl.xpt
 @BINPATH@/components/dom_xpath.xpt
 @BINPATH@/components/dom_xul.xpt
 @BINPATH@/components/dom_time.xpt
 @BINPATH@/components/downloads.xpt
 @BINPATH@/components/editor.xpt
 @BINPATH@/components/embed_base.xpt
 @BINPATH@/components/extensions.xpt
--- a/browser/installer/package-manifest.in
+++ b/browser/installer/package-manifest.in
@@ -206,16 +206,19 @@
 @BINPATH@/components/dom_range.xpt
 @BINPATH@/components/dom_settings.xpt
 @BINPATH@/components/dom_permissionsettings.xpt
 @BINPATH@/components/dom_sidebar.xpt
 @BINPATH@/components/dom_mobilemessage.xpt
 @BINPATH@/components/dom_storage.xpt
 @BINPATH@/components/dom_stylesheets.xpt
 @BINPATH@/components/dom_traversal.xpt
+#ifdef MOZ_WEBSPEECH
+@BINPATH@/components/dom_webspeech.xpt
+#endif
 @BINPATH@/components/dom_xbl.xpt
 @BINPATH@/components/dom_xpath.xpt
 @BINPATH@/components/dom_xul.xpt
 #ifdef MOZ_GAMEPAD
 @BINPATH@/components/dom_gamepad.xpt
 #endif
 @BINPATH@/components/downloads.xpt
 @BINPATH@/components/editor.xpt
--- a/configure.in
+++ b/configure.in
@@ -4245,16 +4245,17 @@ MOZ_WEBRTC_IN_LIBXUL=
 MOZ_SCTP=
 MOZ_MEDIA_PLUGINS=
 MOZ_MEDIA_NAVIGATOR=
 MOZ_OMX_PLUGIN=
 MOZ_VP8=
 MOZ_VP8_ERROR_CONCEALMENT=
 MOZ_VP8_ENCODER=
 MOZ_WEBVTT=1
+MOZ_WEBSPEECH=1
 VPX_AS=
 VPX_ASFLAGS=
 VPX_AS_DASH_C_FLAG=
 VPX_AS_CONVERSION=
 VPX_ASM_SUFFIX=
 VPX_X86_ASM=
 VPX_ARM_ASM=
 LIBJPEG_TURBO_AS=
@@ -5363,16 +5364,31 @@ arm*)
 *)
     MOZ_SAMPLE_TYPE_FLOAT32=1
     AC_DEFINE(MOZ_SAMPLE_TYPE_FLOAT32)
     AC_SUBST(MOZ_SAMPLE_TYPE_FLOAT32)
 ;;
 esac
 
 dnl ========================================================
+dnl = Disable Speech API code
+dnl ========================================================
+MOZ_ARG_DISABLE_BOOL(webspeech,
+[  --disable-webspeech        Disable support for HTML Speech API],
+    MOZ_WEBSPEECH=,
+    MOZ_WEBSPEECH=1)
+
+if test -n "$MOZ_WEBSPEECH"; then
+    AC_DEFINE(MOZ_WEBSPEECH)
+    MOZ_MEDIA=1
+fi
+
+AC_SUBST(MOZ_WEBSPEECH)
+
+dnl ========================================================
 dnl = Enable Raw Codecs
 dnl ========================================================
 MOZ_ARG_ENABLE_BOOL(raw,
 [  --enable-raw           Enable support for RAW media],
     MOZ_RAW=1,
     MOZ_RAW=)
 
 if test -n "$MOZ_RAW"; then
--- a/content/base/src/nsGkAtomList.h
+++ b/content/base/src/nsGkAtomList.h
@@ -2156,8 +2156,20 @@ GK_ATOM(timer, "timer")
 GK_ATOM(toolbarname, "toolbarname")
 GK_ATOM(toolbarseparator, "toolbarseparator")
 GK_ATOM(toolbarspacer, "toolbarspacer")
 GK_ATOM(toolbarspring, "toolbarspring")
 GK_ATOM(treegrid, "treegrid")
 GK_ATOM(_undefined, "undefined")
 GK_ATOM(xmlroles, "xml-roles")
 #endif
+
+#ifdef MOZ_WEBSPEECH
+GK_ATOM(onaudiostart, "onaudiostart")
+GK_ATOM(onaudioend, "onaudioend")
+GK_ATOM(onsoundstart, "onsoundstart")
+GK_ATOM(onsoundend, "onsoundend")
+GK_ATOM(onspeechstart, "onspeechstart")
+GK_ATOM(onspeechend, "onspeechend")
+GK_ATOM(onresult, "onresult")
+GK_ATOM(onnomatch, "onnomatch")
+GK_ATOM(onstart, "onstart")
+#endif
--- a/content/media/moz.build
+++ b/content/media/moz.build
@@ -29,12 +29,15 @@ if CONFIG['MOZ_MEDIA_PLUGINS']:
 if CONFIG['MOZ_WMF']:
     PARALLEL_DIRS += ['wmf']
 
 PARALLEL_DIRS += ['webrtc']
 
 if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'gonk':
     PARALLEL_DIRS += ['omx']
 
+if CONFIG['MOZ_WEBSPEECH']:
+    PARALLEL_DIRS += ['webspeech']
+
 TEST_DIRS += ['test']
 
 MODULE = 'content'
 
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/Makefile.in
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DEPTH            := @DEPTH@
+topsrcdir        := @top_srcdir@
+srcdir           := @srcdir@
+VPATH            := @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+include $(topsrcdir)/dom/dom-config.mk
+
+VPATH += \
+  $(srcdir)/recognition \
+  $(NULL)
+
+LIBRARY_NAME   := gkconwebspeech_s
+LIBXUL_LIBRARY := 1
+
+LOCAL_INCLUDES += $(VPATH:%=-I%)
+
+CPPSRCS := \
+  EnableWebSpeechRecognitionCheck.cpp \
+  SpeechGrammar.cpp \
+  SpeechGrammarList.cpp \
+  SpeechRecognitionAlternative.cpp \
+  SpeechRecognition.cpp \
+  SpeechRecognitionResult.cpp \
+  SpeechRecognitionResultList.cpp \
+  SpeechStreamListener.cpp \
+  endpointer.cc \
+  energy_endpointer.cc \
+  energy_endpointer_params.cc \
+  $(NULL)
+
+EXPORTS_NAMESPACES := mozilla/dom
+EXPORTS_mozilla/dom := \
+  SpeechGrammar.h \
+  SpeechGrammarList.h \
+  SpeechRecognitionAlternative.h \
+  SpeechRecognition.h \
+  SpeechRecognitionResult.h \
+  SpeechRecognitionResultList.h \
+  SpeechStreamListener.h \
+  $(NULL)
+
+FORCE_STATIC_LIB := 1
+
+include $(topsrcdir)/config/config.mk
+include $(topsrcdir)/ipc/chromium/chromium-config.mk
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/moz.build
@@ -0,0 +1,14 @@
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+MODULE = 'content'
+
+XPIDL_MODULE = 'dom_webspeech'
+
+XPIDL_SOURCES = [
+  'nsIDOMSpeechRecognitionEvent.idl',
+  'nsIDOMSpeechRecognitionError.idl',
+  'nsISpeechRecognitionService.idl'
+]
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/EnableWebSpeechRecognitionCheck.cpp
@@ -0,0 +1,32 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "mozilla/Preferences.h"
+
+namespace {
+
+bool gPrefInitialized = false;
+bool gWebSpeechEnabled = false;
+
+}
+
+namespace mozilla {
+namespace dom {
+
+/* static */ bool
+EnableWebSpeechRecognitionCheck::PrefEnabled()
+{
+  if (!gPrefInitialized) {
+    Preferences::AddBoolVarCache(&gWebSpeechEnabled, "media.webspeech.recognition.enable");
+    gPrefInitialized = true;
+  }
+  return gWebSpeechEnabled;
+}
+
+}
+}
+
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/EnableWebSpeechRecognitionCheck.h
@@ -0,0 +1,19 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+namespace mozilla {
+namespace dom {
+
+class EnableWebSpeechRecognitionCheck
+{
+public:
+  static bool PrefEnabled();
+};
+
+}
+}
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechGrammar.cpp
@@ -0,0 +1,82 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechGrammar.h"
+
+#include "nsContentUtils.h"
+
+#include "mozilla/Preferences.h"
+#include "mozilla/dom/SpeechGrammarBinding.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(SpeechGrammar, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechGrammar)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechGrammar)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechGrammar)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechGrammar::SpeechGrammar(nsISupports* aParent)
+  : mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechGrammar::~SpeechGrammar()
+{
+}
+
+SpeechGrammar*
+SpeechGrammar::Constructor(const GlobalObject& aGlobal, ErrorResult& aRv)
+{
+  return new SpeechGrammar(aGlobal.Get());
+}
+
+nsISupports*
+SpeechGrammar::GetParentObject() const
+{
+  return mParent;
+}
+
+JSObject*
+SpeechGrammar::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechGrammarBinding::Wrap(aCx, aScope, this);
+}
+
+void
+SpeechGrammar::GetSrc(nsString& aRetVal, ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechGrammar::SetSrc(const nsAString& aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+float
+SpeechGrammar::GetWeight(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return 0;
+}
+
+void
+SpeechGrammar::SetWeight(float aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechGrammar.h
@@ -0,0 +1,56 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+
+#include "mozilla/Attributes.h"
+#include "mozilla/ErrorResult.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class GlobalObject;
+
+class SpeechGrammar MOZ_FINAL : public nsISupports,
+                                public nsWrapperCache,
+                                public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechGrammar(nsISupports* aParent);
+  ~SpeechGrammar();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechGrammar)
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  static SpeechGrammar* Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
+
+  void GetSrc(nsString& aRetVal, ErrorResult& aRv) const;
+
+  void SetSrc(const nsAString& aArg, ErrorResult& aRv);
+
+  float GetWeight(ErrorResult& aRv) const;
+
+  void SetWeight(float aArg, ErrorResult& aRv);
+
+private:
+  nsCOMPtr<nsISupports> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechGrammarList.cpp
@@ -0,0 +1,91 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechGrammarList.h"
+
+#include "nsContentUtils.h"
+
+#include "mozilla/dom/SpeechGrammarListBinding.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(SpeechGrammarList, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechGrammarList)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechGrammarList)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechGrammarList)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechGrammarList::SpeechGrammarList(nsISupports* aParent)
+  : mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechGrammarList::~SpeechGrammarList()
+{
+}
+
+SpeechGrammarList*
+SpeechGrammarList::Constructor(const GlobalObject& aGlobal, ErrorResult& aRv)
+{
+  return new SpeechGrammarList(aGlobal.Get());
+}
+
+JSObject*
+SpeechGrammarList::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechGrammarListBinding::Wrap(aCx, aScope, this);
+}
+
+nsISupports*
+SpeechGrammarList::GetParentObject() const
+{
+  return mParent;
+}
+
+uint32_t
+SpeechGrammarList::Length() const
+{
+  return 0;
+}
+
+already_AddRefed<SpeechGrammar>
+SpeechGrammarList::Item(uint32_t aIndex, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return nullptr;
+}
+
+void
+SpeechGrammarList::AddFromURI(const nsAString& aSrc,
+                              const Optional<float>& aWeight,
+                              ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechGrammarList::AddFromString(const nsAString& aString,
+                                 const Optional<float>& aWeight,
+                                 ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+already_AddRefed<SpeechGrammar>
+SpeechGrammarList::IndexedGetter(uint32_t aIndex, bool& aPresent,
+                                 ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return nullptr;
+}
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechGrammarList.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+
+#include "mozilla/Attributes.h"
+#include "mozilla/dom/BindingUtils.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "SpeechGrammar.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class GlobalObject;
+class SpeechGrammarList MOZ_FINAL : public nsISupports,
+                                    public nsWrapperCache,
+                                    public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechGrammarList(nsISupports* aParent);
+  ~SpeechGrammarList();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechGrammarList)
+
+  SpeechGrammarList* Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  uint32_t Length() const;
+
+  already_AddRefed<SpeechGrammar> Item(uint32_t aIndex, ErrorResult& aRv);
+
+  void AddFromURI(const nsAString& aSrc, const Optional<float>& aWeight, ErrorResult& aRv);
+
+  void AddFromString(const nsAString& aString, const Optional<float>& aWeight, ErrorResult& aRv);
+
+  already_AddRefed<SpeechGrammar> IndexedGetter(uint32_t aIndex, bool& aPresent, ErrorResult& aRv);
+
+private:
+  nsCOMPtr<nsISupports> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognition.cpp
@@ -0,0 +1,739 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognition.h"
+
+#include "nsCOMPtr.h"
+#include "nsContentUtils.h"
+#include "nsCycleCollectionParticipant.h"
+#include "mozilla/Preferences.h"
+
+#include "mozilla/dom/SpeechRecognitionBinding.h"
+
+#include "AudioSegment.h"
+#include "SpeechStreamListener.h"
+#include "endpointer.h"
+
+#include "GeneratedEvents.h"
+#include "nsIDOMSpeechRecognitionEvent.h"
+
+namespace mozilla {
+namespace dom {
+
+#define PREFERENCE_DEFAULT_RECOGNITION_SERVICE "media.webspeech.service.default"
+#define DEFAULT_RECOGNITION_SERVICE "google"
+
+#define PREFERENCE_ENDPOINTER_SILENCE_LENGTH "media.webspeech.silence_length"
+#define PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH "media.webspeech.long_silence_length"
+#define PREFERENCE_ENDPOINTER_LONG_SPEECH_LENGTH "media.webspeech.long_speech_length"
+
+static const uint32_t kSAMPLE_RATE = 16000;
+static const uint32_t kSPEECH_DETECTION_TIMEOUT_MS = 10000;
+
+// number of frames corresponding to 300ms of audio to send to endpointer while
+// it's in environment estimation mode
+// kSAMPLE_RATE frames = 1s, kESTIMATION_FRAMES frames = 300ms
+static const uint32_t kESTIMATION_SAMPLES = 300 * kSAMPLE_RATE / 1000;
+
+#define STATE_EQUALS(state) (mCurrentState == state)
+#define STATE_BETWEEN(state1, state2) \
+  (mCurrentState >= (state1) && mCurrentState <= (state2))
+
+#ifdef PR_LOGGING
+PRLogModuleInfo*
+GetSpeechRecognitionLog()
+{
+  static PRLogModuleInfo* sLog;
+  if (!sLog) {
+    sLog = PR_NewLogModule("SpeechRecognition");
+  }
+
+  return sLog;
+}
+#define SR_LOG(...) PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, (__VA_ARGS__))
+#else
+#define SR_LOG(...)
+#endif
+
+NS_INTERFACE_MAP_BEGIN(SpeechRecognition)
+  NS_INTERFACE_MAP_ENTRY(nsIObserver)
+NS_INTERFACE_MAP_END_INHERITING(nsDOMEventTargetHelper)
+
+NS_IMPL_ADDREF_INHERITED(SpeechRecognition, nsDOMEventTargetHelper)
+NS_IMPL_RELEASE_INHERITED(SpeechRecognition, nsDOMEventTargetHelper)
+
+SpeechRecognition::SpeechRecognition()
+  : mProcessingEvent(false)
+  , mEndpointer(kSAMPLE_RATE)
+  , mSpeechDetectionTimer(do_CreateInstance(NS_TIMER_CONTRACTID))
+{
+  SR_LOG("created SpeechRecognition");
+  SetIsDOMBinding();
+  mEndpointer.set_speech_input_complete_silence_length(
+      Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 500000));
+  mEndpointer.set_long_speech_input_complete_silence_length(
+      Preferences::GetInt(PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH, 1000000));
+  mEndpointer.set_long_speech_length(
+      Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 3 * 1000000));
+  mCurrentState = Reset();
+}
+
+JSObject*
+SpeechRecognition::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechRecognitionBinding::Wrap(aCx, aScope, this);
+}
+
+already_AddRefed<SpeechRecognition>
+SpeechRecognition::Constructor(const GlobalObject& aGlobal, ErrorResult& aRv)
+{
+  nsCOMPtr<nsPIDOMWindow> win = do_QueryInterface(aGlobal.Get());
+  if (!win) {
+    aRv.Throw(NS_ERROR_FAILURE);
+  }
+
+  MOZ_ASSERT(win->IsInnerWindow());
+  nsRefPtr<SpeechRecognition> object = new SpeechRecognition();
+  object->BindToOwner(win);
+  return object.forget();
+}
+
+nsISupports*
+SpeechRecognition::GetParentObject() const
+{
+  return GetOwner();
+}
+
+void
+SpeechRecognition::ProcessEvent(SpeechEvent* aEvent)
+{
+  SR_LOG("Processing event %d", aEvent->mType);
+
+  MOZ_ASSERT(!mProcessingEvent, "Event dispatch should be sequential!");
+  mProcessingEvent = true;
+
+  SR_LOG("Current state: %d", mCurrentState);
+  mCurrentState = TransitionAndGetNextState(aEvent);
+  SR_LOG("Transitioned to state: %d", mCurrentState);
+  mProcessingEvent = false;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::TransitionAndGetNextState(SpeechEvent* aEvent)
+{
+  switch (mCurrentState) {
+    case STATE_IDLE:
+      switch (aEvent->mType) {
+        case EVENT_START:
+          // TODO: may want to time out if we wait too long
+          // for user to approve
+          return STATE_STARTING;
+        case EVENT_STOP:
+        case EVENT_ABORT:
+        case EVENT_AUDIO_DATA:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+          return DoNothing(aEvent);
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return AbortError(aEvent);
+      }
+    case STATE_STARTING:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          return StartedAudioCapture(aEvent);
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return AbortError(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_STOP:
+          return Reset();
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+          return DoNothing(aEvent);
+        case EVENT_START:
+          SR_LOG("STATE_STARTING: Unhandled event %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+    case STATE_ESTIMATING:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          return WaitForEstimation(aEvent);
+        case EVENT_STOP:
+          return StopRecordingAndRecognize(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return DoNothing(aEvent);
+        case EVENT_START:
+        case EVENT_AUDIO_ERROR:
+          SR_LOG("STATE_ESTIMATING: Unhandled event %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+    case STATE_WAITING_FOR_SPEECH:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          return DetectSpeech(aEvent);
+        case EVENT_STOP:
+          return StopRecordingAndRecognize(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_AUDIO_ERROR:
+          return AbortError(aEvent);
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return DoNothing(aEvent);
+        case EVENT_START:
+          SR_LOG("STATE_STARTING: Unhandled event %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+    case STATE_RECOGNIZING:
+      switch (aEvent->mType) {
+        case EVENT_AUDIO_DATA:
+          return WaitForSpeechEnd(aEvent);
+        case EVENT_STOP:
+          return StopRecordingAndRecognize(aEvent);
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return AbortError(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+          return DoNothing(aEvent);
+        case EVENT_START:
+          SR_LOG("STATE_RECOGNIZING: Unhandled aEvent %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+    case STATE_WAITING_FOR_RESULT:
+      switch (aEvent->mType) {
+        case EVENT_STOP:
+          return DoNothing(aEvent);
+        case EVENT_AUDIO_ERROR:
+        case EVENT_RECOGNITIONSERVICE_ERROR:
+          return AbortError(aEvent);
+        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
+          return NotifyFinalResult(aEvent);
+        case EVENT_AUDIO_DATA:
+          return DoNothing(aEvent);
+        case EVENT_ABORT:
+          return AbortSilently(aEvent);
+        case EVENT_START:
+        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
+          SR_LOG("STATE_WAITING_FOR_RESULT: Unhandled aEvent %d", aEvent->mType);
+          MOZ_NOT_REACHED("");
+      }
+  }
+  SR_LOG("Unhandled state %d", mCurrentState);
+  MOZ_NOT_REACHED("");
+  return mCurrentState;
+}
+
+/*
+ * Handle a segment of recorded audio data.
+ * Returns the number of samples that were processed.
+ */
+uint32_t
+SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment)
+{
+  AudioSegment::ChunkIterator iterator(*aSegment);
+  uint32_t samples = 0;
+  while (!iterator.IsEnded()) {
+    float out;
+    mEndpointer.ProcessAudio(*iterator, &out);
+    samples += iterator->GetDuration();
+    iterator.Next();
+  }
+
+  mRecognitionService->ProcessAudioSegment(aSegment);
+  return samples;
+}
+
+void
+SpeechRecognition::GetRecognitionServiceCID(nsACString& aResultCID)
+{
+  nsAdoptingCString prefValue =
+    Preferences::GetCString(PREFERENCE_DEFAULT_RECOGNITION_SERVICE);
+
+  nsAutoCString speechRecognitionService;
+  if (!prefValue.get() || prefValue.IsEmpty()) {
+    speechRecognitionService = DEFAULT_RECOGNITION_SERVICE;
+  } else {
+    speechRecognitionService = prefValue;
+  }
+
+  aResultCID =
+    NS_LITERAL_CSTRING(NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX) +
+    speechRecognitionService;
+
+  return;
+}
+
+/****************************
+ * FSM Transition functions *
+ ****************************/
+
+SpeechRecognition::FSMState
+SpeechRecognition::Reset()
+{
+  mRecognitionService = nullptr;
+  mEstimationSamples = 0;
+  mSpeechDetectionTimer->Cancel();
+
+  return STATE_IDLE;
+}
+
+/*
+ * Since the handler for "end" may call
+ * start(), we want to fully reset before dispatching
+ * the event.
+ */
+SpeechRecognition::FSMState
+SpeechRecognition::ResetAndEnd()
+{
+  mCurrentState = Reset();
+  DispatchTrustedEvent(NS_LITERAL_STRING("end"));
+  return mCurrentState;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::StartedAudioCapture(SpeechEvent* aEvent)
+{
+  mEndpointer.SetEnvironmentEstimationMode();
+  mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment);
+
+  DispatchTrustedEvent(NS_LITERAL_STRING("start"));
+  DispatchTrustedEvent(NS_LITERAL_STRING("audiostart"));
+
+  return STATE_ESTIMATING;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::StopRecordingAndRecognize(SpeechEvent* aEvent)
+{
+  StopRecording();
+  MOZ_ASSERT(mRecognitionService, "Service deleted before recording done");
+  mRecognitionService->SoundEnd();
+
+  return STATE_WAITING_FOR_RESULT;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::WaitForEstimation(SpeechEvent* aEvent)
+{
+  mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment);
+
+  if (mEstimationSamples > kESTIMATION_SAMPLES) {
+    mEndpointer.SetUserInputMode();
+    return STATE_WAITING_FOR_SPEECH;
+  }
+
+  return STATE_ESTIMATING;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::DetectSpeech(SpeechEvent* aEvent)
+{
+  ProcessAudioSegment(aEvent->mAudioSegment);
+
+  if (mEndpointer.DidStartReceivingSpeech()) {
+    mSpeechDetectionTimer->Cancel();
+    DispatchTrustedEvent(NS_LITERAL_STRING("speechstart"));
+    return STATE_RECOGNIZING;
+  }
+
+  return STATE_WAITING_FOR_SPEECH;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::WaitForSpeechEnd(SpeechEvent* aEvent)
+{
+  ProcessAudioSegment(aEvent->mAudioSegment);
+
+  if (mEndpointer.speech_input_complete()) {
+    // FIXME: StopRecordingAndRecognize should only be called for single
+    // shot services for continous we should just inform the service
+    DispatchTrustedEvent(NS_LITERAL_STRING("speechend"));
+    return StopRecordingAndRecognize(aEvent);
+  }
+
+   return STATE_RECOGNIZING;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::NotifyFinalResult(SpeechEvent* aEvent)
+{
+  nsCOMPtr<nsIDOMEvent> domEvent;
+  NS_NewDOMSpeechRecognitionEvent(getter_AddRefs(domEvent), nullptr, nullptr, nullptr);
+
+  nsCOMPtr<nsIDOMSpeechRecognitionEvent> srEvent = do_QueryInterface(domEvent);
+  nsRefPtr<SpeechRecognitionResultList> rlist = aEvent->mRecognitionResultList;
+  nsCOMPtr<nsISupports> ilist = do_QueryInterface(rlist);
+  srEvent->InitSpeechRecognitionEvent(NS_LITERAL_STRING("result"),
+                                      true, false, 0, ilist,
+                                      NS_LITERAL_STRING("NOT_IMPLEMENTED"),
+                                      NULL);
+  domEvent->SetTrusted(true);
+
+  bool defaultActionEnabled;
+  this->DispatchEvent(domEvent, &defaultActionEnabled);
+  return ResetAndEnd();
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::DoNothing(SpeechEvent* aEvent)
+{
+  return mCurrentState;
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::AbortSilently(SpeechEvent* aEvent)
+{
+  if (mRecognitionService) {
+    mRecognitionService->Abort();
+  }
+
+  if (STATE_BETWEEN(STATE_ESTIMATING, STATE_RECOGNIZING)) {
+    StopRecording();
+  }
+
+  return ResetAndEnd();
+}
+
+SpeechRecognition::FSMState
+SpeechRecognition::AbortError(SpeechEvent* aEvent)
+{
+  FSMState nextState = AbortSilently(aEvent);
+  NotifyError(aEvent);
+  return nextState;
+}
+
+void
+SpeechRecognition::NotifyError(SpeechEvent* aEvent)
+{
+  nsCOMPtr<nsIDOMEvent> domEvent = do_QueryInterface(aEvent->mError);
+  domEvent->SetTrusted(true);
+
+  bool defaultActionEnabled;
+  this->DispatchEvent(domEvent, &defaultActionEnabled);
+
+  return;
+}
+
+/**************************************
+ * Event triggers and other functions *
+ **************************************/
+NS_IMETHODIMP
+SpeechRecognition::StartRecording(DOMLocalMediaStream* aDOMStream)
+{
+  // hold a reference so that the underlying stream
+  // doesn't get Destroy()'ed
+  mDOMStream = aDOMStream;
+
+  NS_ENSURE_STATE(mDOMStream->GetStream());
+  mDOMStream->GetStream()->AddListener(new SpeechStreamListener(this));
+
+  mEndpointer.StartSession();
+
+  return mSpeechDetectionTimer->Init(this, kSPEECH_DETECTION_TIMEOUT_MS,
+                                     nsITimer::TYPE_ONE_SHOT);
+}
+
+NS_IMETHODIMP
+SpeechRecognition::StopRecording()
+{
+  mDOMStream = nullptr;
+
+  mEndpointer.EndSession();
+  DispatchTrustedEvent(NS_LITERAL_STRING("audioend"));
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::Observe(nsISupports* aSubject, const char* aTopic,
+                           const PRUnichar* aData)
+{
+  MOZ_ASSERT(NS_IsMainThread(), "Observer invoked off the main thread");
+
+  if (!strcmp(aTopic, NS_TIMER_CALLBACK_TOPIC) &&
+      STATE_BETWEEN(STATE_IDLE, STATE_WAITING_FOR_SPEECH)) {
+
+    DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR,
+                  nsIDOMSpeechRecognitionError::NO_SPEECH,
+                  NS_LITERAL_STRING("No speech detected (timeout)"));
+  }
+
+  return NS_OK;
+}
+
+already_AddRefed<SpeechGrammarList>
+SpeechRecognition::GetGrammars(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return nullptr;
+}
+
+void
+SpeechRecognition::SetGrammars(mozilla::dom::SpeechGrammarList& aArg,
+                               ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::GetLang(nsString& aRetVal, ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::SetLang(const nsAString& aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+bool
+SpeechRecognition::GetContinuous(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return false;
+}
+
+void
+SpeechRecognition::SetContinuous(bool aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+bool
+SpeechRecognition::GetInterimResults(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return false;
+}
+
+void
+SpeechRecognition::SetInterimResults(bool aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+uint32_t
+SpeechRecognition::GetMaxAlternatives(ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return 0;
+}
+
+void
+SpeechRecognition::SetMaxAlternatives(uint32_t aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::SetServiceURI(const nsAString& aArg, ErrorResult& aRv)
+{
+  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  return;
+}
+
+void
+SpeechRecognition::Start(ErrorResult& aRv)
+{
+  if (!STATE_EQUALS(STATE_IDLE)) {
+    aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
+    return;
+  }
+
+  nsAutoCString speechRecognitionServiceCID;
+  GetRecognitionServiceCID(speechRecognitionServiceCID);
+
+  nsresult rv;
+  mRecognitionService = do_GetService(speechRecognitionServiceCID.get(), &rv);
+  MOZ_ASSERT(mRecognitionService.get(),
+             "failed to instantiate recognition service");
+
+  rv = mRecognitionService->Initialize(this->asWeakPtr());
+  NS_ENSURE_SUCCESS_VOID(rv);
+
+  MediaManager* manager = MediaManager::Get();
+  manager->GetUserMedia(false,
+                        GetOwner(),
+                        new GetUserMediaStreamOptions(),
+                        new GetUserMediaSuccessCallback(this),
+                        new GetUserMediaErrorCallback(this));
+
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_START);
+  NS_DispatchToMainThread(event);
+}
+
+void
+SpeechRecognition::Stop()
+{
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_STOP);
+  NS_DispatchToMainThread(event);
+}
+
+void
+SpeechRecognition::Abort()
+{
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_ABORT);
+  NS_DispatchToMainThread(event);
+}
+
+void
+SpeechRecognition::DispatchError(EventType aErrorType, int aErrorCode,
+                                 const nsAString& aMessage)
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  MOZ_ASSERT(aErrorType == EVENT_RECOGNITIONSERVICE_ERROR ||
+             aErrorType == EVENT_AUDIO_ERROR, "Invalid error type!");
+
+  nsCOMPtr<nsIDOMEvent> domEvent;
+  NS_NewDOMSpeechRecognitionError(getter_AddRefs(domEvent), nullptr, nullptr, nullptr);
+
+  nsCOMPtr<nsIDOMSpeechRecognitionError> srError = do_QueryInterface(domEvent);
+  srError->InitSpeechRecognitionError(NS_LITERAL_STRING("error"), true, false,
+                                      aErrorCode, aMessage);
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, aErrorType);
+  event->mError = srError;
+  NS_DispatchToMainThread(event);
+}
+
+void
+SpeechRecognition::FeedAudioData(already_AddRefed<SharedBuffer> aSamples,
+                                 uint32_t aDuration,
+                                 MediaStreamListener* aProvider)
+{
+  MOZ_ASSERT(!NS_IsMainThread(),
+             "FeedAudioData should not be called in the main thread");
+
+  AudioSegment* segment = new AudioSegment();
+
+  nsAutoTArray<const int16_t*, 1> channels;
+  channels.AppendElement(static_cast<const int16_t*>(aSamples.get()->Data()));
+  segment->AppendFrames(aSamples, channels, aDuration);
+
+  nsRefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_AUDIO_DATA);
+  event->mAudioSegment = segment;
+  event->mProvider = aProvider;
+  NS_DispatchToMainThread(event);
+
+  return;
+}
+
+NS_IMPL_ISUPPORTS1(SpeechRecognition::GetUserMediaStreamOptions, nsIMediaStreamOptions)
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetFake(bool* aFake)
+{
+  *aFake = false;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetAudio(bool* aAudio)
+{
+  *aAudio = true;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetVideo(bool* aVideo)
+{
+  *aVideo = false;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetPicture(bool* aPicture)
+{
+  *aPicture = false;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetCamera(nsAString& aCamera)
+{
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetAudioDevice(nsIMediaDevice** aAudioDevice)
+{
+  *aAudioDevice = nullptr;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaStreamOptions::GetVideoDevice(nsIMediaDevice** aVideoDevice)
+{
+  *aVideoDevice = nullptr;
+  return NS_OK;
+}
+
+SpeechEvent::~SpeechEvent()
+{
+  delete mAudioSegment;
+}
+
+NS_IMETHODIMP
+SpeechEvent::Run()
+{
+  mRecognition->ProcessEvent(this);
+  return NS_OK;
+}
+
+NS_IMPL_ISUPPORTS1(SpeechRecognition::GetUserMediaSuccessCallback, nsIDOMGetUserMediaSuccessCallback)
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaSuccessCallback::OnSuccess(nsISupports* aStream)
+{
+  nsCOMPtr<nsIDOMLocalMediaStream> localStream = do_QueryInterface(aStream);
+  mRecognition->StartRecording(static_cast<DOMLocalMediaStream*>(localStream.get()));
+  return NS_OK;
+}
+
+NS_IMPL_ISUPPORTS1(SpeechRecognition::GetUserMediaErrorCallback, nsIDOMGetUserMediaErrorCallback)
+
+NS_IMETHODIMP
+SpeechRecognition::GetUserMediaErrorCallback::OnError(const nsAString& aError)
+{
+  int errorCode;
+
+  if (aError.Equals(NS_LITERAL_STRING("PERMISSION_DENIED"))) {
+    errorCode = nsIDOMSpeechRecognitionError::NOT_ALLOWED;
+  } else {
+    errorCode = nsIDOMSpeechRecognitionError::AUDIO_CAPTURE;
+  }
+
+  mRecognition->DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR, errorCode,
+                              aError);
+
+  return NS_OK;
+}
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognition.h
@@ -0,0 +1,249 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCOMPtr.h"
+#include "nsDOMEventTargetHelper.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+#include "nsIDOMNavigatorUserMedia.h"
+#include "nsTArray.h"
+
+#include "MediaManager.h"
+#include "MediaEngine.h"
+#include "MediaStreamGraph.h"
+#include "AudioSegment.h"
+#include "mozilla/WeakPtr.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "SpeechGrammarList.h"
+#include "SpeechRecognitionResultList.h"
+#include "nsISpeechRecognitionService.h"
+#include "endpointer.h"
+
+#include "nsIDOMSpeechRecognitionError.h"
+
+struct JSContext;
+class nsIDOMWindow;
+
+namespace mozilla {
+
+namespace dom {
+
+class GlobalObject;
+class SpeechEvent;
+
+#ifdef PR_LOGGING
+PRLogModuleInfo* GetSpeechRecognitionLog();
+#define SR_LOG(...) PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, (__VA_ARGS__))
+#else
+#define SR_LOG(...)
+#endif
+
+class SpeechRecognition MOZ_FINAL : public nsDOMEventTargetHelper,
+                                    public nsIObserver,
+                                    public EnableWebSpeechRecognitionCheck,
+                                    public SupportsWeakPtr<SpeechRecognition>
+{
+public:
+  SpeechRecognition();
+  virtual ~SpeechRecognition() {};
+
+  NS_DECL_ISUPPORTS_INHERITED
+
+  NS_DECL_NSIOBSERVER
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  static already_AddRefed<SpeechRecognition> Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
+
+  already_AddRefed<SpeechGrammarList> GetGrammars(ErrorResult& aRv) const;
+
+  void SetGrammars(mozilla::dom::SpeechGrammarList& aArg, ErrorResult& aRv);
+
+  void GetLang(nsString& aRetVal, ErrorResult& aRv) const;
+
+  void SetLang(const nsAString& aArg, ErrorResult& aRv);
+
+  bool GetContinuous(ErrorResult& aRv) const;
+
+  void SetContinuous(bool aArg, ErrorResult& aRv);
+
+  bool GetInterimResults(ErrorResult& aRv) const;
+
+  void SetInterimResults(bool aArg, ErrorResult& aRv);
+
+  uint32_t GetMaxAlternatives(ErrorResult& aRv) const;
+
+  void SetMaxAlternatives(uint32_t aArg, ErrorResult& aRv);
+
+  void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const;
+
+  void SetServiceURI(const nsAString& aArg, ErrorResult& aRv);
+
+  void Start(ErrorResult& aRv);
+
+  void Stop();
+
+  void Abort();
+
+  IMPL_EVENT_HANDLER(audiostart)
+  IMPL_EVENT_HANDLER(soundstart)
+  IMPL_EVENT_HANDLER(speechstart)
+  IMPL_EVENT_HANDLER(speechend)
+  IMPL_EVENT_HANDLER(soundend)
+  IMPL_EVENT_HANDLER(audioend)
+  IMPL_EVENT_HANDLER(result)
+  IMPL_EVENT_HANDLER(nomatch)
+  IMPL_EVENT_HANDLER(error)
+  IMPL_EVENT_HANDLER(start)
+  IMPL_EVENT_HANDLER(end)
+
+  enum EventType {
+    EVENT_START,
+    EVENT_STOP,
+    EVENT_ABORT,
+    EVENT_AUDIO_DATA,
+    EVENT_AUDIO_ERROR,
+    EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT,
+    EVENT_RECOGNITIONSERVICE_FINAL_RESULT,
+    EVENT_RECOGNITIONSERVICE_ERROR
+  };
+
+  void DispatchError(EventType aErrorType, int aErrorCode, const nsAString& aMessage);
+  void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider);
+
+  friend class SpeechEvent;
+private:
+  enum FSMState {
+    STATE_IDLE,
+    STATE_STARTING,
+    STATE_ESTIMATING,
+    STATE_WAITING_FOR_SPEECH,
+    STATE_RECOGNIZING,
+    STATE_WAITING_FOR_RESULT,
+  };
+
+  class GetUserMediaStreamOptions : public nsIMediaStreamOptions
+  {
+  public:
+    NS_DECL_ISUPPORTS
+    NS_DECL_NSIMEDIASTREAMOPTIONS
+
+    GetUserMediaStreamOptions() {}
+    virtual ~GetUserMediaStreamOptions() {}
+  };
+
+  class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback
+  {
+  public:
+    NS_DECL_ISUPPORTS
+    NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK
+
+    GetUserMediaSuccessCallback(SpeechRecognition* aRecognition)
+      : mRecognition(aRecognition)
+    {}
+
+    virtual ~GetUserMediaSuccessCallback() {}
+
+  private:
+    nsRefPtr<SpeechRecognition> mRecognition;
+  };
+
+  class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback
+  {
+  public:
+    NS_DECL_ISUPPORTS
+    NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK
+
+    GetUserMediaErrorCallback(SpeechRecognition* aRecognition)
+      : mRecognition(aRecognition)
+    {}
+
+    virtual ~GetUserMediaErrorCallback() {}
+
+  private:
+    nsRefPtr<SpeechRecognition> mRecognition;
+  };
+
+  NS_IMETHOD StartRecording(DOMLocalMediaStream* aDOMStream);
+  NS_IMETHOD StopRecording();
+
+  uint32_t ProcessAudioSegment(AudioSegment* aSegment);
+  void NotifyError(SpeechEvent* aEvent);
+
+  void ProcessEvent(SpeechEvent* aEvent);
+  FSMState TransitionAndGetNextState(SpeechEvent* aEvent);
+
+  FSMState Reset();
+  FSMState ResetAndEnd();
+  FSMState StartedAudioCapture(SpeechEvent* aEvent);
+  FSMState StopRecordingAndRecognize(SpeechEvent* aEvent);
+  FSMState WaitForEstimation(SpeechEvent* aEvent);
+  FSMState DetectSpeech(SpeechEvent* aEvent);
+  FSMState WaitForSpeechEnd(SpeechEvent* aEvent);
+  FSMState NotifyFinalResult(SpeechEvent* aEvent);
+  FSMState DoNothing(SpeechEvent* aEvent);
+  FSMState AbortSilently(SpeechEvent* aEvent);
+  FSMState AbortError(SpeechEvent* aEvent);
+
+  nsRefPtr<DOMLocalMediaStream> mDOMStream;
+  nsCOMPtr<nsISpeechRecognitionService> mRecognitionService;
+
+  void GetRecognitionServiceCID(nsACString& aResultCID);
+
+  FSMState mCurrentState;
+  bool mProcessingEvent;
+
+  Endpointer mEndpointer;
+  uint32_t mEstimationSamples;
+
+  nsCOMPtr<nsITimer> mSpeechDetectionTimer;
+
+};
+
+class SpeechEvent : public nsRunnable
+{
+public:
+  SpeechEvent(SpeechRecognition* aRecognition, SpeechRecognition::EventType aType)
+  : mAudioSegment(0)
+  , mRecognitionResultList(0)
+  , mError(0)
+  , mRecognition(aRecognition)
+  , mType(aType)
+  {
+  }
+
+  ~SpeechEvent();
+
+  NS_IMETHOD Run();
+  AudioSegment* mAudioSegment;
+  nsRefPtr<SpeechRecognitionResultList> mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff
+  nsCOMPtr<nsIDOMSpeechRecognitionError> mError;
+
+  friend class SpeechRecognition;
+private:
+  SpeechRecognition* mRecognition;
+
+  // for AUDIO_DATA events, keep a reference to the provider
+  // of the data (i.e., the SpeechStreamListener) to ensure it
+  // is kept alive (and keeps SpeechRecognition alive) until this
+  // event gets processed.
+  nsRefPtr<MediaStreamListener> mProvider;
+  SpeechRecognition::EventType mType;
+};
+
+} // namespace dom
+
+inline nsISupports*
+ToSupports(dom::SpeechRecognition* aRec)
+{
+  return static_cast<nsIObserver*>(aRec);
+}
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionAlternative.cpp
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognitionAlternative.h"
+
+#include "nsContentUtils.h"
+
+#include "mozilla/dom/SpeechRecognitionAlternativeBinding.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(SpeechRecognitionAlternative, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechRecognitionAlternative)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechRecognitionAlternative)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionAlternative)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechRecognitionAlternative::SpeechRecognitionAlternative(SpeechRecognition* aParent)
+  : mTranscript(NS_LITERAL_STRING(""))
+  , mConfidence(0)
+  , mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechRecognitionAlternative::~SpeechRecognitionAlternative()
+{
+}
+
+JSObject*
+SpeechRecognitionAlternative::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechRecognitionAlternativeBinding::Wrap(aCx, aScope, this);
+}
+
+nsISupports*
+SpeechRecognitionAlternative::GetParentObject() const
+{
+  return static_cast<nsDOMEventTargetHelper*>(mParent.get());
+}
+
+void
+SpeechRecognitionAlternative::GetTranscript(nsString& aRetVal) const
+{
+  aRetVal = mTranscript;
+}
+
+float
+SpeechRecognitionAlternative::Confidence() const
+{
+  return mConfidence;
+}
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionAlternative.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCycleCollectionParticipant.h"
+#include "nsString.h"
+#include "nsWrapperCache.h"
+#include "nsAutoPtr.h"
+
+#include "mozilla/Attributes.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class SpeechRecognition;
+
+class SpeechRecognitionAlternative MOZ_FINAL : public nsISupports,
+                                               public nsWrapperCache,
+                                               public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechRecognitionAlternative(SpeechRecognition* aParent);
+  ~SpeechRecognitionAlternative();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechRecognitionAlternative)
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  void GetTranscript(nsString& aRetVal) const;
+
+  float Confidence() const;
+
+  nsString mTranscript;
+  float mConfidence;
+private:
+  nsRefPtr<SpeechRecognition> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionResult.cpp
@@ -0,0 +1,78 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsContentUtils.h"
+
+#include "SpeechRecognitionResult.h"
+#include "mozilla/dom/SpeechRecognitionResultBinding.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_1(SpeechRecognitionResult, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechRecognitionResult)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechRecognitionResult)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionResult)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechRecognitionResult::SpeechRecognitionResult(SpeechRecognition* aParent)
+  : mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechRecognitionResult::~SpeechRecognitionResult()
+{
+}
+
+JSObject*
+SpeechRecognitionResult::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechRecognitionResultBinding::Wrap(aCx, aScope, this);
+}
+
+nsISupports*
+SpeechRecognitionResult::GetParentObject() const
+{
+  return static_cast<nsDOMEventTargetHelper*>(mParent.get());
+}
+
+already_AddRefed<SpeechRecognitionAlternative>
+SpeechRecognitionResult::IndexedGetter(uint32_t aIndex, bool& aPresent)
+{
+  if (aIndex >= Length()) {
+    aPresent = false;
+    return nullptr;
+  }
+
+  aPresent = true;
+  return Item(aIndex);
+}
+
+uint32_t
+SpeechRecognitionResult::Length() const
+{
+  return mItems.Length();
+}
+
+already_AddRefed<SpeechRecognitionAlternative>
+SpeechRecognitionResult::Item(uint32_t aIndex)
+{
+  nsRefPtr<SpeechRecognitionAlternative> alternative = mItems.ElementAt(aIndex);
+  return alternative.forget();
+}
+
+bool
+SpeechRecognitionResult::Final() const
+{
+  return true; // TODO
+}
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionResult.h
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCOMPtr.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+#include "nsAutoPtr.h"
+#include "nsTArray.h"
+
+#include "mozilla/Attributes.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "SpeechRecognitionAlternative.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class SpeechRecognitionResult MOZ_FINAL : public nsISupports,
+                                          public nsWrapperCache,
+                                          public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechRecognitionResult(SpeechRecognition* aParent);
+  ~SpeechRecognitionResult();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechRecognitionResult)
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  uint32_t Length() const;
+
+  already_AddRefed<SpeechRecognitionAlternative> Item(uint32_t aIndex);
+
+  bool Final() const;
+
+  already_AddRefed<SpeechRecognitionAlternative> IndexedGetter(uint32_t aIndex, bool& aPresent);
+
+  nsTArray<nsRefPtr<SpeechRecognitionAlternative> > mItems;
+private:
+  nsRefPtr<SpeechRecognition> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionResultList.cpp
@@ -0,0 +1,73 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechRecognitionResultList.h"
+
+#include "nsContentUtils.h"
+
+#include "mozilla/dom/SpeechRecognitionResultListBinding.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla {
+namespace dom {
+
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_2(SpeechRecognitionResultList, mParent, mItems)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechRecognitionResultList)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechRecognitionResultList)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionResultList)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+SpeechRecognitionResultList::SpeechRecognitionResultList(SpeechRecognition* aParent)
+  : mParent(aParent)
+{
+  SetIsDOMBinding();
+}
+
+SpeechRecognitionResultList::~SpeechRecognitionResultList()
+{
+}
+
+nsISupports*
+SpeechRecognitionResultList::GetParentObject() const
+{
+  return static_cast<nsDOMEventTargetHelper*>(mParent.get());
+}
+
+JSObject*
+SpeechRecognitionResultList::WrapObject(JSContext* aCx, JSObject* aScope)
+{
+  return SpeechRecognitionResultListBinding::Wrap(aCx, aScope, this);
+}
+
+already_AddRefed<SpeechRecognitionResult>
+SpeechRecognitionResultList::IndexedGetter(uint32_t aIndex, bool& aPresent)
+{
+  if (aIndex >= Length()) {
+    aPresent = false;
+    return nullptr;
+  }
+
+  aPresent = true;
+  return Item(aIndex);
+}
+
+uint32_t
+SpeechRecognitionResultList::Length() const
+{
+  return mItems.Length();
+}
+
+already_AddRefed<SpeechRecognitionResult>
+SpeechRecognitionResultList::Item(uint32_t aIndex)
+{
+  nsRefPtr<SpeechRecognitionResult> result = mItems.ElementAt(aIndex);
+  return result.forget();
+}
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechRecognitionResultList.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+#include "nsAutoPtr.h"
+#include "nsTArray.h"
+
+#include "mozilla/Attributes.h"
+
+#include "EnableWebSpeechRecognitionCheck.h"
+#include "SpeechRecognitionResult.h"
+
+struct JSContext;
+
+namespace mozilla {
+namespace dom {
+
+class SpeechRecognition;
+
+class SpeechRecognitionResultList MOZ_FINAL : public nsISupports,
+                                              public nsWrapperCache,
+                                              public EnableWebSpeechRecognitionCheck
+{
+public:
+  SpeechRecognitionResultList(SpeechRecognition* aParent);
+  ~SpeechRecognitionResultList();
+
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(SpeechRecognitionResultList)
+
+  nsISupports* GetParentObject() const;
+
+  virtual JSObject* WrapObject(JSContext* aCx, JSObject* aScope);
+
+  uint32_t Length() const;
+
+  already_AddRefed<SpeechRecognitionResult> Item(uint32_t aIndex);
+
+  already_AddRefed<SpeechRecognitionResult> IndexedGetter(uint32_t aIndex, bool& aPresent);
+
+  nsTArray<nsRefPtr<SpeechRecognitionResult> > mItems;
+private:
+  nsRefPtr<SpeechRecognition> mParent;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechStreamListener.cpp
@@ -0,0 +1,81 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SpeechStreamListener.h"
+
+#include "SpeechRecognition.h"
+
+namespace mozilla {
+namespace dom {
+
+SpeechStreamListener::SpeechStreamListener(SpeechRecognition* aRecognition)
+  : mRecognition(aRecognition)
+{
+}
+
+SpeechStreamListener::~SpeechStreamListener()
+{
+  nsCOMPtr<nsIThread> mainThread;
+  NS_GetMainThread(getter_AddRefs(mainThread));
+
+  SpeechRecognition* forgottenRecognition = nullptr;
+  mRecognition.swap(forgottenRecognition);
+  NS_ProxyRelease(mainThread,
+                  static_cast<nsDOMEventTargetHelper*>(forgottenRecognition));
+}
+
+void
+SpeechStreamListener::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph,
+                                               TrackID aID,
+                                               TrackRate aTrackRate,
+                                               TrackTicks aTrackOffset,
+                                               uint32_t aTrackEvents,
+                                               const MediaSegment& aQueuedMedia)
+{
+  AudioSegment* audio = const_cast<AudioSegment*>(
+    static_cast<const AudioSegment*>(&aQueuedMedia));
+
+  AudioSegment::ChunkIterator iterator(*audio);
+  while (!iterator.IsEnded()) {
+    AudioSampleFormat format = iterator->mBufferFormat;
+
+    MOZ_ASSERT(format == AUDIO_FORMAT_S16 || format == AUDIO_FORMAT_FLOAT32);
+
+    if (format == AUDIO_FORMAT_S16) {
+      ConvertAndDispatchAudioChunk<int16_t>(*iterator);
+    } else if (format == AUDIO_FORMAT_FLOAT32) {
+      ConvertAndDispatchAudioChunk<float>(*iterator);
+    }
+
+    iterator.Next();
+  }
+}
+
+template<typename SampleFormatType> void
+SpeechStreamListener::ConvertAndDispatchAudioChunk(AudioChunk& aChunk)
+{
+  nsRefPtr<SharedBuffer> samples(SharedBuffer::Create(aChunk.mDuration *
+                                                      1 * // channel
+                                                      sizeof(int16_t)));
+
+  const SampleFormatType* from =
+    static_cast<const SampleFormatType*>(aChunk.mChannelData[0]);
+
+  int16_t* to = static_cast<int16_t*>(samples->Data());
+  ConvertAudioSamplesWithScale(from, to, aChunk.mDuration, aChunk.mVolume);
+
+  mRecognition->FeedAudioData(samples.forget(), aChunk.mDuration, this);
+  return;
+}
+
+void
+SpeechStreamListener::NotifyFinished(MediaStreamGraph* aGraph)
+{
+  // TODO dispatch SpeechEnd event so services can be informed
+}
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/SpeechStreamListener.h
@@ -0,0 +1,40 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#pragma once
+
+#include "MediaStreamGraph.h"
+#include "AudioSegment.h"
+
+namespace mozilla {
+
+class AudioSegment;
+
+namespace dom {
+
+class SpeechRecognition;
+
+class SpeechStreamListener : public MediaStreamListener
+{
+public:
+  SpeechStreamListener(SpeechRecognition* aRecognition);
+  ~SpeechStreamListener();
+
+  void NotifyQueuedTrackChanges(MediaStreamGraph* aGraph, TrackID aID,
+                                TrackRate aTrackRate,
+                                TrackTicks aTrackOffset,
+                                uint32_t aTrackEvents,
+                                const MediaSegment& aQueuedMedia);
+
+  void NotifyFinished(MediaStreamGraph* aGraph);
+
+private:
+  template<typename SampleFormatType> void ConvertAndDispatchAudioChunk(AudioChunk& aChunk);
+  nsRefPtr<SpeechRecognition> mRecognition;
+};
+
+} // namespace dom
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/endpointer.cc
@@ -0,0 +1,193 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "endpointer.h"
+
+#include "AudioSegment.h"
+
+namespace {
+const int kFrameRate = 200;  // 1 frame = 5ms of audio.
+}
+
+namespace mozilla {
+
+Endpointer::Endpointer(int sample_rate)
+    : speech_input_possibly_complete_silence_length_us_(-1),
+      speech_input_complete_silence_length_us_(-1),
+      audio_frame_time_us_(0),
+      sample_rate_(sample_rate),
+      frame_size_(0) {
+  Reset();
+
+  frame_size_ = static_cast<int>(sample_rate / static_cast<float>(kFrameRate));
+
+  speech_input_minimum_length_us_ =
+      static_cast<int64_t>(1.7 * 1000000);
+  speech_input_complete_silence_length_us_ =
+      static_cast<int64_t>(0.5 * 1000000);
+  long_speech_input_complete_silence_length_us_ = -1;
+  long_speech_length_us_ = -1;
+  speech_input_possibly_complete_silence_length_us_ =
+      1 * 1000000;
+
+  // Set the default configuration for Push To Talk mode.
+  EnergyEndpointerParams ep_config;
+  ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_endpoint_margin(0.2f);
+  ep_config.set_onset_window(0.15f);
+  ep_config.set_speech_on_window(0.4f);
+  ep_config.set_offset_window(0.15f);
+  ep_config.set_onset_detect_dur(0.09f);
+  ep_config.set_onset_confirm_dur(0.075f);
+  ep_config.set_on_maintain_dur(0.10f);
+  ep_config.set_offset_confirm_dur(0.12f);
+  ep_config.set_decision_threshold(1000.0f);
+  ep_config.set_min_decision_threshold(50.0f);
+  ep_config.set_fast_update_dur(0.2f);
+  ep_config.set_sample_rate(static_cast<float>(sample_rate));
+  ep_config.set_min_fundamental_frequency(57.143f);
+  ep_config.set_max_fundamental_frequency(400.0f);
+  ep_config.set_contamination_rejection_period(0.25f);
+  energy_endpointer_.Init(ep_config);
+}
+
+void Endpointer::Reset() {
+  old_ep_status_ = EP_PRE_SPEECH;
+  waiting_for_speech_possibly_complete_timeout_ = false;
+  waiting_for_speech_complete_timeout_ = false;
+  speech_previously_detected_ = false;
+  speech_input_complete_ = false;
+  audio_frame_time_us_ = 0; // Reset time for packets sent to endpointer.
+  speech_end_time_us_ = -1;
+  speech_start_time_us_ = -1;
+}
+
+void Endpointer::StartSession() {
+  Reset();
+  energy_endpointer_.StartSession();
+}
+
+void Endpointer::EndSession() {
+  energy_endpointer_.EndSession();
+}
+
+void Endpointer::SetEnvironmentEstimationMode() {
+  Reset();
+  energy_endpointer_.SetEnvironmentEstimationMode();
+}
+
+void Endpointer::SetUserInputMode() {
+  energy_endpointer_.SetUserInputMode();
+}
+
+EpStatus Endpointer::Status(int64_t *time) {
+  return energy_endpointer_.Status(time);
+}
+
+EpStatus Endpointer::ProcessAudio(const AudioChunk& raw_audio, float* rms_out) {
+  MOZ_ASSERT(raw_audio.mBufferFormat == AUDIO_FORMAT_S16, "Audio is not in 16 bit format");
+  const int16_t* audio_data = static_cast<const int16_t*>(raw_audio.mChannelData[0]);
+  const int num_samples = raw_audio.mDuration;
+  EpStatus ep_status = EP_PRE_SPEECH;
+
+  // Process the input data in blocks of frame_size_, dropping any incomplete
+  // frames at the end (which is ok since typically the caller will be recording
+  // audio in multiples of our frame size).
+  int sample_index = 0;
+  while (sample_index + frame_size_ <= num_samples) {
+    // Have the endpointer process the frame.
+    energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_,
+                                         audio_data + sample_index,
+                                         frame_size_,
+                                         rms_out);
+    sample_index += frame_size_;
+    audio_frame_time_us_ += (frame_size_ * 1000000) /
+                         sample_rate_;
+
+    // Get the status of the endpointer.
+    int64_t ep_time;
+    ep_status = energy_endpointer_.Status(&ep_time);
+    if (old_ep_status_ != ep_status)
+        fprintf(stderr, "Status changed old= %d, new= %d\n", old_ep_status_, ep_status);
+
+    // Handle state changes.
+    if ((EP_SPEECH_PRESENT == ep_status) &&
+        (EP_POSSIBLE_ONSET == old_ep_status_)) {
+      speech_end_time_us_ = -1;
+      waiting_for_speech_possibly_complete_timeout_ = false;
+      waiting_for_speech_complete_timeout_ = false;
+      // Trigger SpeechInputDidStart event on first detection.
+      if (false == speech_previously_detected_) {
+        speech_previously_detected_ = true;
+        speech_start_time_us_ = ep_time;
+      }
+    }
+    if ((EP_PRE_SPEECH == ep_status) &&
+        (EP_POSSIBLE_OFFSET == old_ep_status_)) {
+      speech_end_time_us_ = ep_time;
+      waiting_for_speech_possibly_complete_timeout_ = true;
+      waiting_for_speech_complete_timeout_ = true;
+    }
+    if (ep_time > speech_input_minimum_length_us_) {
+      // Speech possibly complete timeout.
+      if ((waiting_for_speech_possibly_complete_timeout_) &&
+          (ep_time - speech_end_time_us_ >
+              speech_input_possibly_complete_silence_length_us_)) {
+        waiting_for_speech_possibly_complete_timeout_ = false;
+      }
+      if (waiting_for_speech_complete_timeout_) {
+        // The length of the silence timeout period can be held constant, or it
+        // can be changed after a fixed amount of time from the beginning of
+        // speech.
+        bool has_stepped_silence =
+            (long_speech_length_us_ > 0) &&
+            (long_speech_input_complete_silence_length_us_ > 0);
+        int64_t requested_silence_length;
+        if (has_stepped_silence &&
+            (ep_time - speech_start_time_us_) > long_speech_length_us_) {
+          requested_silence_length =
+              long_speech_input_complete_silence_length_us_;
+        } else {
+          requested_silence_length =
+              speech_input_complete_silence_length_us_;
+        }
+
+        // Speech complete timeout.
+        if ((ep_time - speech_end_time_us_) > requested_silence_length) {
+          waiting_for_speech_complete_timeout_ = false;
+          speech_input_complete_ = true;
+        }
+      }
+    }
+    old_ep_status_ = ep_status;
+  }
+  return ep_status;
+}
+
+}  // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/endpointer.h
@@ -0,0 +1,173 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+
+#include "energy_endpointer.h"
+
+namespace mozilla {
+
+struct AudioChunk;
+
+// A simple interface to the underlying energy-endpointer implementation, this
+// class lets callers provide audio as being recorded and let them poll to find
+// when the user has stopped speaking.
+//
+// There are two events that may trigger the end of speech:
+//
+// speechInputPossiblyComplete event:
+//
+// Signals that silence/noise has  been detected for a *short* amount of
+// time after some speech has been detected. It can be used for low latency
+// UI feedback. To disable it, set it to a large amount.
+//
+// speechInputComplete event:
+//
+// This event is intended to signal end of input and to stop recording.
+// The amount of time to wait after speech is set by
+// speech_input_complete_silence_length_ and optionally two other
+// parameters (see below).
+// This time can be held constant, or can change as more speech is detected.
+// In the latter case, the time changes after a set amount of time from the
+// *beginning* of speech.  This is motivated by the expectation that there
+// will be two distinct types of inputs: short search queries and longer
+// dictation style input.
+//
+// Three parameters are used to define the piecewise constant timeout function.
+// The timeout length is speech_input_complete_silence_length until
+// long_speech_length, when it changes to
+// long_speech_input_complete_silence_length.
+class Endpointer {
+ public:
+  explicit Endpointer(int sample_rate);
+
+  // Start the endpointer. This should be called at the beginning of a session.
+  void StartSession();
+
+  // Stop the endpointer.
+  void EndSession();
+
+  // Start environment estimation. Audio will be used for environment estimation
+  // i.e. noise level estimation.
+  void SetEnvironmentEstimationMode();
+
+  // Start user input. This should be called when the user indicates start of
+  // input, e.g. by pressing a button.
+  void SetUserInputMode();
+
+  // Process a segment of audio, which may be more than one frame.
+  // The status of the last frame will be returned.
+  EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
+
+  // Get the status of the endpointer.
+  EpStatus Status(int64_t *time_us);
+
+  // Returns true if the endpointer detected reasonable audio levels above
+  // background noise which could be user speech, false if not.
+  bool DidStartReceivingSpeech() const {
+    return speech_previously_detected_;
+  }
+
+  bool IsEstimatingEnvironment() const {
+    return energy_endpointer_.estimating_environment();
+  }
+
+  void set_speech_input_complete_silence_length(int64_t time_us) {
+    speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_input_complete_silence_length(int64_t time_us) {
+    long_speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_speech_input_possibly_complete_silence_length(int64_t time_us) {
+    speech_input_possibly_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_length(int64_t time_us) {
+    long_speech_length_us_ = time_us;
+  }
+
+  bool speech_input_complete() const {
+    return speech_input_complete_;
+  }
+
+  // RMS background noise level in dB.
+  float NoiseLevelDb() const { return energy_endpointer_.GetNoiseLevelDb(); }
+
+ private:
+  // Reset internal states. Helper method common to initial input utterance
+  // and following input utternaces.
+  void Reset();
+
+  // Minimum allowable length of speech input.
+  int64_t speech_input_minimum_length_us_;
+
+  // The speechInputPossiblyComplete event signals that silence/noise has been
+  // detected for a *short* amount of time after some speech has been detected.
+  // This proporty specifies the time period.
+  int64_t speech_input_possibly_complete_silence_length_us_;
+
+  // The speechInputComplete event signals that silence/noise has been
+  // detected for a *long* amount of time after some speech has been detected.
+  // This property specifies the time period.
+  int64_t speech_input_complete_silence_length_us_;
+
+  // Same as above, this specifies the required silence period after speech
+  // detection. This period is used instead of
+  // speech_input_complete_silence_length_ when the utterance is longer than
+  // long_speech_length_. This parameter is optional.
+  int64_t long_speech_input_complete_silence_length_us_;
+
+  // The period of time after which the endpointer should consider
+  // long_speech_input_complete_silence_length_ as a valid silence period
+  // instead of speech_input_complete_silence_length_. This parameter is
+  // optional.
+  int64_t long_speech_length_us_;
+
+  // First speech onset time, used in determination of speech complete timeout.
+  int64_t speech_start_time_us_;
+
+  // Most recent end time, used in determination of speech complete timeout.
+  int64_t speech_end_time_us_;
+
+  int64_t audio_frame_time_us_;
+  EpStatus old_ep_status_;
+  bool waiting_for_speech_possibly_complete_timeout_;
+  bool waiting_for_speech_complete_timeout_;
+  bool speech_previously_detected_;
+  bool speech_input_complete_;
+  EnergyEndpointer energy_endpointer_;
+  int sample_rate_;
+  int32_t frame_size_;
+};
+
+}  // namespace mozilla
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/energy_endpointer.cc
@@ -0,0 +1,393 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "energy_endpointer.h"
+
+#include <math.h>
+
+namespace {
+
+// Returns the RMS (quadratic mean) of the input signal.
+float RMS(const int16_t* samples, int num_samples) {
+  int64_t ssq_int64_t = 0;
+  int64_t sum_int64_t = 0;
+  for (int i = 0; i < num_samples; ++i) {
+    sum_int64_t += samples[i];
+    ssq_int64_t += samples[i] * samples[i];
+  }
+  // now convert to floats.
+  double sum = static_cast<double>(sum_int64_t);
+  sum /= num_samples;
+  double ssq = static_cast<double>(ssq_int64_t);
+  return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum)));
+}
+
+int64_t Secs2Usecs(float seconds) {
+  return static_cast<int64_t>(0.5 + (1.0e6 * seconds));
+}
+
+float GetDecibel(float value) {
+  if (value > 1.0e-100)
+    return 20 * log10(value);
+  return -2000.0;
+}
+
+}  // namespace
+
+namespace mozilla {
+
+// Stores threshold-crossing histories for making decisions about the speech
+// state.
+class EnergyEndpointer::HistoryRing {
+ public:
+  HistoryRing() : insertion_index_(0) {}
+
+  // Resets the ring to |size| elements each with state |initial_state|
+  void SetRing(int size, bool initial_state);
+
+  // Inserts a new entry into the ring and drops the oldest entry.
+  void Insert(int64_t time_us, bool decision);
+
+  // Returns the time in microseconds of the most recently added entry.
+  int64_t EndTime() const;
+
+  // Returns the sum of all intervals during which 'decision' is true within
+  // the time in seconds specified by 'duration'. The returned interval is
+  // in seconds.
+  float RingSum(float duration_sec);
+
+ private:
+  struct DecisionPoint {
+    int64_t time_us;
+    bool decision;
+  };
+
+  std::vector<DecisionPoint> decision_points_;
+  int insertion_index_;  // Index at which the next item gets added/inserted.
+
+  HistoryRing(const HistoryRing&);
+  void operator=(const HistoryRing&);
+};
+
+void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) {
+  insertion_index_ = 0;
+  decision_points_.clear();
+  DecisionPoint init = { -1, initial_state };
+  decision_points_.resize(size, init);
+}
+
+void EnergyEndpointer::HistoryRing::Insert(int64_t time_us, bool decision) {
+  decision_points_[insertion_index_].time_us = time_us;
+  decision_points_[insertion_index_].decision = decision;
+  insertion_index_ = (insertion_index_ + 1) % decision_points_.size();
+}
+
+int64_t EnergyEndpointer::HistoryRing::EndTime() const {
+  int ind = insertion_index_ - 1;
+  if (ind < 0)
+    ind = decision_points_.size() - 1;
+  return decision_points_[ind].time_us;
+}
+
+float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) {
+  if (!decision_points_.size())
+    return 0.0;
+
+  int64_t sum_us = 0;
+  int ind = insertion_index_ - 1;
+  if (ind < 0)
+    ind = decision_points_.size() - 1;
+  int64_t end_us = decision_points_[ind].time_us;
+  bool is_on = decision_points_[ind].decision;
+  int64_t start_us = end_us - static_cast<int64_t>(0.5 + (1.0e6 * duration_sec));
+  if (start_us < 0)
+    start_us = 0;
+  size_t n_summed = 1;  // n points ==> (n-1) intervals
+  while ((decision_points_[ind].time_us > start_us) &&
+         (n_summed < decision_points_.size())) {
+    --ind;
+    if (ind < 0)
+      ind = decision_points_.size() - 1;
+    if (is_on)
+      sum_us += end_us - decision_points_[ind].time_us;
+    is_on = decision_points_[ind].decision;
+    end_us = decision_points_[ind].time_us;
+    n_summed++;
+  }
+
+  return 1.0e-6f * sum_us;  //  Returns total time that was super threshold.
+}
+
+EnergyEndpointer::EnergyEndpointer()
+    : status_(EP_PRE_SPEECH),
+      offset_confirm_dur_sec_(0),
+      endpointer_time_us_(0),
+      fast_update_frames_(0),
+      frame_counter_(0),
+      max_window_dur_(4.0),
+      sample_rate_(0),
+      history_(new HistoryRing()),
+      decision_threshold_(0),
+      estimating_environment_(false),
+      noise_level_(0),
+      rms_adapt_(0),
+      start_lag_(0),
+      end_lag_(0),
+      user_input_start_time_us_(0) {
+}
+
+EnergyEndpointer::~EnergyEndpointer() {
+}
+
+int EnergyEndpointer::TimeToFrame(float time) const {
+  return static_cast<int32_t>(0.5 + (time / params_.frame_period()));
+}
+
+void EnergyEndpointer::Restart(bool reset_threshold) {
+  status_ = EP_PRE_SPEECH;
+  user_input_start_time_us_ = 0;
+
+  if (reset_threshold) {
+    decision_threshold_ = params_.decision_threshold();
+    rms_adapt_ = decision_threshold_;
+    noise_level_ = params_.decision_threshold() / 2.0f;
+    frame_counter_ = 0;  // Used for rapid initial update of levels.
+  }
+
+  // Set up the memories to hold the history windows.
+  history_->SetRing(TimeToFrame(max_window_dur_), false);
+
+  // Flag that indicates that current input should be used for
+  // estimating the environment. The user has not yet started input
+  // by e.g. pressed the push-to-talk button. By default, this is
+  // false for backward compatibility.
+  estimating_environment_ = false;
+}
+
+void EnergyEndpointer::Init(const EnergyEndpointerParams& params) {
+  params_ = params;
+
+  // Find the longest history interval to be used, and make the ring
+  // large enough to accommodate that number of frames.  NOTE: This
+  // depends upon ep_frame_period being set correctly in the factory
+  // that did this instantiation.
+  max_window_dur_ = params_.onset_window();
+  if (params_.speech_on_window() > max_window_dur_)
+    max_window_dur_ = params_.speech_on_window();
+  if (params_.offset_window() > max_window_dur_)
+    max_window_dur_ = params_.offset_window();
+  Restart(true);
+
+  offset_confirm_dur_sec_ = params_.offset_window() -
+                            params_.offset_confirm_dur();
+  if (offset_confirm_dur_sec_ < 0.0)
+    offset_confirm_dur_sec_ = 0.0;
+
+  user_input_start_time_us_ = 0;
+
+  // Flag that indicates that  current input should be used for
+  // estimating the environment. The user has not yet started input
+  // by e.g. pressed the push-to-talk button. By default, this is
+  // false for backward compatibility.
+  estimating_environment_ = false;
+  // The initial value of the noise and speech levels is inconsequential.
+  // The level of the first frame will overwrite these values.
+  noise_level_ = params_.decision_threshold() / 2.0f;
+  fast_update_frames_ =
+      static_cast<int64_t>(params_.fast_update_dur() / params_.frame_period());
+
+  frame_counter_ = 0;  // Used for rapid initial update of levels.
+
+  sample_rate_ = params_.sample_rate();
+  start_lag_ = static_cast<int>(sample_rate_ /
+                                params_.max_fundamental_frequency());
+  end_lag_ = static_cast<int>(sample_rate_ /
+                              params_.min_fundamental_frequency());
+}
+
+void EnergyEndpointer::StartSession() {
+  Restart(true);
+}
+
+void EnergyEndpointer::EndSession() {
+  status_ = EP_POST_SPEECH;
+}
+
+void EnergyEndpointer::SetEnvironmentEstimationMode() {
+  Restart(true);
+  estimating_environment_ = true;
+}
+
+void EnergyEndpointer::SetUserInputMode() {
+  estimating_environment_ = false;
+  user_input_start_time_us_ = endpointer_time_us_;
+}
+
+void EnergyEndpointer::ProcessAudioFrame(int64_t time_us,
+                                         const int16_t* samples,
+                                         int num_samples,
+                                         float* rms_out) {
+  endpointer_time_us_ = time_us;
+  float rms = RMS(samples, num_samples);
+
+  // Check that this is user input audio vs. pre-input adaptation audio.
+  // Input audio starts when the user indicates start of input, by e.g.
+  // pressing push-to-talk. Audio recieved prior to that is used to update
+  // noise and speech level estimates.
+  if (!estimating_environment_) {
+    bool decision = false;
+    if ((endpointer_time_us_ - user_input_start_time_us_) <
+        Secs2Usecs(params_.contamination_rejection_period())) {
+      decision = false;
+      //PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, ("decision: forced to false, time: %d", endpointer_time_us_));
+    } else {
+      decision = (rms > decision_threshold_);
+    }
+
+    history_->Insert(endpointer_time_us_, decision);
+
+    switch (status_) {
+      case EP_PRE_SPEECH:
+        if (history_->RingSum(params_.onset_window()) >
+            params_.onset_detect_dur()) {
+          status_ = EP_POSSIBLE_ONSET;
+        }
+        break;
+
+      case EP_POSSIBLE_ONSET: {
+        float tsum = history_->RingSum(params_.onset_window());
+        if (tsum > params_.onset_confirm_dur()) {
+          status_ = EP_SPEECH_PRESENT;
+        } else {  // If signal is not maintained, drop back to pre-speech.
+          if (tsum <= params_.onset_detect_dur())
+            status_ = EP_PRE_SPEECH;
+        }
+        break;
+      }
+
+      case EP_SPEECH_PRESENT: {
+        // To induce hysteresis in the state residency, we allow a
+        // smaller residency time in the on_ring, than was required to
+        // enter the SPEECH_PERSENT state.
+        float on_time = history_->RingSum(params_.speech_on_window());
+        if (on_time < params_.on_maintain_dur())
+          status_ = EP_POSSIBLE_OFFSET;
+        break;
+      }
+
+      case EP_POSSIBLE_OFFSET:
+        if (history_->RingSum(params_.offset_window()) <=
+            offset_confirm_dur_sec_) {
+          // Note that this offset time may be beyond the end
+          // of the input buffer in a real-time system.  It will be up
+          // to the RecognizerSession to decide what to do.
+          status_ = EP_PRE_SPEECH;  // Automatically reset for next utterance.
+        } else {  // If speech picks up again we allow return to SPEECH_PRESENT.
+          if (history_->RingSum(params_.speech_on_window()) >=
+              params_.on_maintain_dur())
+            status_ = EP_SPEECH_PRESENT;
+        }
+        break;
+
+      default:
+        break;
+    }
+
+    // If this is a quiet, non-speech region, slowly adapt the detection
+    // threshold to be about 6dB above the average RMS.
+    if ((!decision) && (status_ == EP_PRE_SPEECH)) {
+      decision_threshold_ = (0.98f * decision_threshold_) + (0.02f * 2 * rms);
+      rms_adapt_ = decision_threshold_;
+    } else {
+      // If this is in a speech region, adapt the decision threshold to
+      // be about 10dB below the average RMS. If the noise level is high,
+      // the threshold is pushed up.
+      // Adaptation up to a higher level is 5 times faster than decay to
+      // a lower level.
+      if ((status_ == EP_SPEECH_PRESENT) && decision) {
+        if (rms_adapt_ > rms) {
+          rms_adapt_ = (0.99f * rms_adapt_) + (0.01f * rms);
+        } else {
+          rms_adapt_ = (0.95f * rms_adapt_) + (0.05f * rms);
+        }
+        float target_threshold = 0.3f * rms_adapt_ +  noise_level_;
+        decision_threshold_ = (.90f * decision_threshold_) +
+                              (0.10f * target_threshold);
+      }
+    }
+
+    // Set a floor
+    if (decision_threshold_ < params_.min_decision_threshold())
+      decision_threshold_ = params_.min_decision_threshold();
+  }
+
+  // Update speech and noise levels.
+  UpdateLevels(rms);
+  ++frame_counter_;
+
+  if (rms_out)
+    *rms_out = GetDecibel(rms);
+}
+
+float EnergyEndpointer::GetNoiseLevelDb() const {
+  return GetDecibel(noise_level_);
+}
+
+void EnergyEndpointer::UpdateLevels(float rms) {
+  // Update quickly initially. We assume this is noise and that
+  // speech is 6dB above the noise.
+  if (frame_counter_ < fast_update_frames_) {
+    // Alpha increases from 0 to (k-1)/k where k is the number of time
+    // steps in the initial adaptation period.
+    float alpha = static_cast<float>(frame_counter_) /
+        static_cast<float>(fast_update_frames_);
+    noise_level_ = (alpha * noise_level_) + ((1 - alpha) * rms);
+    //PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, ("FAST UPDATE, frame_counter_ %d, fast_update_frames_ %d", frame_counter_, fast_update_frames_));
+  } else {
+    // Update Noise level. The noise level adapts quickly downward, but
+    // slowly upward. The noise_level_ parameter is not currently used
+    // for threshold adaptation. It is used for UI feedback.
+    if (noise_level_ < rms)
+      noise_level_ = (0.999f * noise_level_) + (0.001f * rms);
+    else
+      noise_level_ = (0.95f * noise_level_) + (0.05f * rms);
+  }
+  if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) {
+    decision_threshold_ = noise_level_ * 2; // 6dB above noise level.
+    // Set a floor
+    if (decision_threshold_ < params_.min_decision_threshold())
+      decision_threshold_ = params_.min_decision_threshold();
+  }
+}
+
+EpStatus EnergyEndpointer::Status(int64_t* status_time)  const {
+  *status_time = history_->EndTime();
+  return status_;
+}
+
+}  // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/energy_endpointer.h
@@ -0,0 +1,180 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The EnergyEndpointer class finds likely speech onset and offset points.
+//
+// The implementation described here is about the simplest possible.
+// It is based on timings of threshold crossings for overall signal
+// RMS. It is suitable for light weight applications.
+//
+// As written, the basic idea is that one specifies intervals that
+// must be occupied by super- and sub-threshold energy levels, and
+// defers decisions re onset and offset times until these
+// specifications have been met.  Three basic intervals are tested: an
+// onset window, a speech-on window, and an offset window.  We require
+// super-threshold to exceed some mimimum total durations in the onset
+// and speech-on windows before declaring the speech onset time, and
+// we specify a required sub-threshold residency in the offset window
+// before declaring speech offset. As the various residency requirements are
+// met, the EnergyEndpointer instance assumes various states, and can return the
+// ID of these states to the client (see EpStatus below).
+//
+// The levels of the speech and background noise are continuously updated. It is
+// important that the background noise level be estimated initially for
+// robustness in noisy conditions. The first frames are assumed to be background
+// noise and a fast update rate is used for the noise level. The duration for
+// fast update is controlled by the fast_update_dur_ paramter.
+//
+// If used in noisy conditions, the endpointer should be started and run in the
+// EnvironmentEstimation mode, for at least 200ms, before switching to
+// UserInputMode.
+// Audio feedback contamination can appear in the input audio, if not cut
+// out or handled by echo cancellation. Audio feedback can trigger a false
+// accept. The false accepts can be ignored by setting
+// ep_contamination_rejection_period.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+
+#include <vector>
+
+#include "nsAutoPtr.h"
+
+#include "energy_endpointer_params.h"
+
+namespace mozilla {
+
+// Endpointer status codes
+enum EpStatus {
+  EP_PRE_SPEECH = 10,
+  EP_POSSIBLE_ONSET,
+  EP_SPEECH_PRESENT,
+  EP_POSSIBLE_OFFSET,
+  EP_POST_SPEECH,
+};
+
+class EnergyEndpointer {
+ public:
+  // The default construction MUST be followed by Init(), before any
+  // other use can be made of the instance.
+  EnergyEndpointer();
+  virtual ~EnergyEndpointer();
+
+  void Init(const EnergyEndpointerParams& params);
+
+  // Start the endpointer. This should be called at the beginning of a session.
+  void StartSession();
+
+  // Stop the endpointer.
+  void EndSession();
+
+  // Start environment estimation. Audio will be used for environment estimation
+  // i.e. noise level estimation.
+  void SetEnvironmentEstimationMode();
+
+  // Start user input. This should be called when the user indicates start of
+  // input, e.g. by pressing a button.
+  void SetUserInputMode();
+
+  // Computes the next input frame and modifies EnergyEndpointer status as
+  // appropriate based on the computation.
+  void ProcessAudioFrame(int64_t time_us,
+                         const int16_t* samples, int num_samples,
+                         float* rms_out);
+
+  // Returns the current state of the EnergyEndpointer and the time
+  // corresponding to the most recently computed frame.
+  EpStatus Status(int64_t* status_time_us) const;
+
+  bool estimating_environment() const {
+    return estimating_environment_;
+  }
+
+  // Returns estimated noise level in dB.
+  float GetNoiseLevelDb() const;
+
+ private:
+  class HistoryRing;
+
+  // Resets the endpointer internal state.  If reset_threshold is true, the
+  // state will be reset completely, including adaptive thresholds and the
+  // removal of all history information.
+  void Restart(bool reset_threshold);
+
+  // Update internal speech and noise levels.
+  void UpdateLevels(float rms);
+
+  // Returns the number of frames (or frame number) corresponding to
+  // the 'time' (in seconds).
+  int TimeToFrame(float time) const;
+
+  EpStatus status_;  // The current state of this instance.
+  float offset_confirm_dur_sec_;  // max on time allowed to confirm POST_SPEECH
+  int64_t endpointer_time_us_;  // Time of the most recently received audio frame.
+  int64_t fast_update_frames_; // Number of frames for initial level adaptation.
+  int64_t frame_counter_;  // Number of frames seen. Used for initial adaptation.
+  float max_window_dur_;  // Largest search window size (seconds)
+  float sample_rate_;  // Sampling rate.
+
+  // Ring buffers to hold the speech activity history.
+  nsAutoPtr<HistoryRing> history_;
+
+  // Configuration parameters.
+  EnergyEndpointerParams params_;
+
+  // RMS which must be exceeded to conclude frame is speech.
+  float decision_threshold_;
+
+  // Flag to indicate that audio should be used to estimate environment, prior
+  // to receiving user input.
+  bool estimating_environment_;
+
+  // Estimate of the background noise level. Used externally for UI feedback.
+  float noise_level_;
+
+  // An adaptive threshold used to update decision_threshold_ when appropriate.
+  float rms_adapt_;
+
+  // Start lag corresponds to the highest fundamental frequency.
+  int start_lag_;
+
+  // End lag corresponds to the lowest fundamental frequency.
+  int end_lag_;
+
+  // Time when mode switched from environment estimation to user input. This
+  // is used to time forced rejection of audio feedback contamination.
+  int64_t user_input_start_time_us_;
+
+  // prevent copy constructor and assignment
+  EnergyEndpointer(const EnergyEndpointer&);
+  void operator=(const EnergyEndpointer&);
+};
+
+}  // namespace mozilla
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/energy_endpointer_params.cc
@@ -0,0 +1,77 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "energy_endpointer_params.h"
+
+namespace mozilla {
+
+EnergyEndpointerParams::EnergyEndpointerParams() {
+  SetDefaults();
+}
+
+void EnergyEndpointerParams::SetDefaults() {
+  frame_period_ = 0.01f;
+  frame_duration_ = 0.01f;
+  endpoint_margin_ = 0.2f;
+  onset_window_ = 0.15f;
+  speech_on_window_ = 0.4f;
+  offset_window_ = 0.15f;
+  onset_detect_dur_ = 0.09f;
+  onset_confirm_dur_ = 0.075f;
+  on_maintain_dur_ = 0.10f;
+  offset_confirm_dur_ = 0.12f;
+  decision_threshold_ = 150.0f;
+  min_decision_threshold_ = 50.0f;
+  fast_update_dur_ = 0.2f;
+  sample_rate_ = 8000.0f;
+  min_fundamental_frequency_ = 57.143f;
+  max_fundamental_frequency_ = 400.0f;
+  contamination_rejection_period_ = 0.25f;
+}
+
+void EnergyEndpointerParams::operator=(const EnergyEndpointerParams& source) {
+  frame_period_ = source.frame_period();
+  frame_duration_ = source.frame_duration();
+  endpoint_margin_ = source.endpoint_margin();
+  onset_window_ = source.onset_window();
+  speech_on_window_ = source.speech_on_window();
+  offset_window_ = source.offset_window();
+  onset_detect_dur_ = source.onset_detect_dur();
+  onset_confirm_dur_ = source.onset_confirm_dur();
+  on_maintain_dur_ = source.on_maintain_dur();
+  offset_confirm_dur_ = source.offset_confirm_dur();
+  decision_threshold_ = source.decision_threshold();
+  min_decision_threshold_ = source.min_decision_threshold();
+  fast_update_dur_ = source.fast_update_dur();
+  sample_rate_ = source.sample_rate();
+  min_fundamental_frequency_ = source.min_fundamental_frequency();
+  max_fundamental_frequency_ = source.max_fundamental_frequency();
+  contamination_rejection_period_ = source.contamination_rejection_period();
+}
+
+}  //  namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/energy_endpointer_params.h
@@ -0,0 +1,159 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+
+namespace mozilla {
+
+// Input parameters for the EnergyEndpointer class.
+class EnergyEndpointerParams {
+ public:
+  EnergyEndpointerParams();
+
+  void SetDefaults();
+
+  void operator=(const EnergyEndpointerParams& source);
+
+  // Accessors and mutators
+  float frame_period() const { return frame_period_; }
+  void set_frame_period(float frame_period) {
+    frame_period_ = frame_period;
+  }
+
+  float frame_duration() const { return frame_duration_; }
+  void set_frame_duration(float frame_duration) {
+    frame_duration_ = frame_duration;
+  }
+
+  float endpoint_margin() const { return endpoint_margin_; }
+  void set_endpoint_margin(float endpoint_margin) {
+    endpoint_margin_ = endpoint_margin;
+  }
+
+  float onset_window() const { return onset_window_; }
+  void set_onset_window(float onset_window) { onset_window_ = onset_window; }
+
+  float speech_on_window() const { return speech_on_window_; }
+  void set_speech_on_window(float speech_on_window) {
+    speech_on_window_ = speech_on_window;
+  }
+
+  float offset_window() const { return offset_window_; }
+  void set_offset_window(float offset_window) {
+    offset_window_ = offset_window;
+  }
+
+  float onset_detect_dur() const { return onset_detect_dur_; }
+  void set_onset_detect_dur(float onset_detect_dur) {
+    onset_detect_dur_ = onset_detect_dur;
+  }
+
+  float onset_confirm_dur() const { return onset_confirm_dur_; }
+  void set_onset_confirm_dur(float onset_confirm_dur) {
+    onset_confirm_dur_ = onset_confirm_dur;
+  }
+
+  float on_maintain_dur() const { return on_maintain_dur_; }
+  void set_on_maintain_dur(float on_maintain_dur) {
+    on_maintain_dur_ = on_maintain_dur;
+  }
+
+  float offset_confirm_dur() const { return offset_confirm_dur_; }
+  void set_offset_confirm_dur(float offset_confirm_dur) {
+    offset_confirm_dur_ = offset_confirm_dur;
+  }
+
+  float decision_threshold() const { return decision_threshold_; }
+  void set_decision_threshold(float decision_threshold) {
+    decision_threshold_ = decision_threshold;
+  }
+
+  float min_decision_threshold() const { return min_decision_threshold_; }
+  void set_min_decision_threshold(float min_decision_threshold) {
+    min_decision_threshold_ = min_decision_threshold;
+  }
+
+  float fast_update_dur() const { return fast_update_dur_; }
+  void set_fast_update_dur(float fast_update_dur) {
+    fast_update_dur_ = fast_update_dur;
+  }
+
+  float sample_rate() const { return sample_rate_; }
+  void set_sample_rate(float sample_rate) { sample_rate_ = sample_rate; }
+
+  float min_fundamental_frequency() const { return min_fundamental_frequency_; }
+  void set_min_fundamental_frequency(float min_fundamental_frequency) {
+    min_fundamental_frequency_ = min_fundamental_frequency;
+  }
+
+  float max_fundamental_frequency() const { return max_fundamental_frequency_; }
+  void set_max_fundamental_frequency(float max_fundamental_frequency) {
+    max_fundamental_frequency_ = max_fundamental_frequency;
+  }
+
+  float contamination_rejection_period() const {
+    return contamination_rejection_period_;
+  }
+  void set_contamination_rejection_period(
+      float contamination_rejection_period) {
+    contamination_rejection_period_ = contamination_rejection_period;
+  }
+
+ private:
+  float frame_period_;  // Frame period
+  float frame_duration_;  // Window size
+  float onset_window_;  // Interval scanned for onset activity
+  float speech_on_window_;  // Inverval scanned for ongoing speech
+  float offset_window_;  // Interval scanned for offset evidence
+  float offset_confirm_dur_;  // Silence duration required to confirm offset
+  float decision_threshold_;  // Initial rms detection threshold
+  float min_decision_threshold_;  // Minimum rms detection threshold
+  float fast_update_dur_;  // Period for initial estimation of levels.
+  float sample_rate_;  // Expected sample rate.
+
+  // Time to add on either side of endpoint threshold crossings
+  float endpoint_margin_;
+  // Total dur within onset_window required to enter ONSET state
+  float onset_detect_dur_;
+  // Total on time within onset_window required to enter SPEECH_ON state
+  float onset_confirm_dur_;
+  // Minimum dur in SPEECH_ON state required to maintain ON state
+  float on_maintain_dur_;
+  // Minimum fundamental frequency for autocorrelation.
+  float min_fundamental_frequency_;
+  // Maximum fundamental frequency for autocorrelation.
+  float max_fundamental_frequency_;
+  // Period after start of user input that above threshold values are ignored.
+  // This is to reject audio feedback contamination.
+  float contamination_rejection_period_;
+};
+
+}  //  namespace mozilla
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/nsIDOMSpeechRecognitionError.idl
@@ -0,0 +1,39 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+#include "nsIDOMEvent.idl"
+
+[scriptable, builtinclass, uuid(5ddc5a46-e7db-4c5c-8ed4-80cf5d88fca3)]
+interface nsIDOMSpeechRecognitionError : nsIDOMEvent {
+    const unsigned long NO_SPEECH = 0;
+    const unsigned long ABORTED = 1;
+    const unsigned long AUDIO_CAPTURE = 2;
+    const unsigned long NETWORK = 3;
+    const unsigned long NOT_ALLOWED = 4;
+    const unsigned long SERVICE_NOT_ALLOWED = 5;
+    const unsigned long BAD_GRAMMAR = 6;
+    const unsigned long LANGUAGE_NOT_SUPPORTED = 7;
+
+    [noscript] void initSpeechRecognitionError(in DOMString eventTypeArg,
+                                               in boolean canBubbleArg,
+                                               in boolean cancelableArg,
+                                               in unsigned long error,
+                                               in DOMString message);
+
+    readonly attribute unsigned long error;
+    readonly attribute DOMString message;
+};
+
+dictionary SpeechRecognitionErrorInit : EventInit {
+    unsigned long error;
+    DOMString message;
+};
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/nsIDOMSpeechRecognitionEvent.idl
@@ -0,0 +1,38 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+#include "nsIDOMEvent.idl"
+
+interface SpeechRecognitionResultList;
+
+[scriptable, builtinclass, uuid(98dded70-33af-42d5-819d-e15b6f4a3aba)]
+interface nsIDOMSpeechRecognitionEvent : nsIDOMEvent {
+    [noscript] void initSpeechRecognitionEvent(in DOMString eventTypeArg,
+                                               in boolean canBubbleArg,
+                                               in boolean cancelableArg,
+                                               in unsigned long resultIndex,
+                                               in nsISupports results,
+                                               in DOMString interpretation,
+                                               in nsIDOMDocument emma);
+
+    readonly attribute unsigned long resultIndex;
+    readonly attribute nsISupports results;
+    readonly attribute DOMString interpretation;
+    readonly attribute nsIDOMDocument emma;
+};
+
+dictionary SpeechRecognitionEventInit : EventInit {
+    unsigned long resultIndex;
+    nsISupports results;
+    DOMString interpretation;
+    nsIDOMDocument emma;
+};
new file mode 100644
--- /dev/null
+++ b/content/media/webspeech/recognition/nsISpeechRecognitionService.idl
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+%{C++
+#include "mozilla/WeakPtr.h"
+%}
+
+native SpeechRecognitionWeakPtr(mozilla::WeakPtr<mozilla::dom::SpeechRecognition>);
+[ptr] native AudioSegmentPtr(mozilla::AudioSegment);
+
+[uuid(cb98d929-81cd-4a51-a214-80d3e6281d24)]
+interface nsISpeechRecognitionService : nsISupports {
+    void initialize(in SpeechRecognitionWeakPtr aSpeechRecognition);
+    void processAudioSegment(in AudioSegmentPtr aAudioSegment);
+    void soundEnd();
+    void abort();
+};
+
+%{C++
+#define NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "@mozilla.org/webspeech/service;1?name="
+%}
--- a/dom/bindings/Makefile.in
+++ b/dom/bindings/Makefile.in
@@ -84,16 +84,17 @@ LOCAL_INCLUDES += -I$(topsrcdir)/js/xpco
   -I$(topsrcdir)/dom/base \
   -I$(topsrcdir)/dom/battery \
   -I$(topsrcdir)/dom/indexedDB \
   -I$(topsrcdir)/content/xslt/src/base \
   -I$(topsrcdir)/content/xslt/src/xpath \
   -I$(topsrcdir)/content/xml/content/src \
   -I$(topsrcdir)/content/xul/content/src \
   -I$(topsrcdir)/content/xul/document/src \
+  -I$(topsrcdir)/content/media/webspeech/recognition \
   $(NULL)
 
 ifdef MOZ_AUDIO_CHANNEL_MANAGER
 LOCAL_INCLUDES += \
   -I$(topsrcdir)/dom/system/gonk \
   $(NULL)
 endif
 
--- a/dom/dom-config.mk
+++ b/dom/dom-config.mk
@@ -52,10 +52,14 @@ DOM_SRCDIRS += \
   dom/fm \
   $(NULL)
 endif
 
 ifdef MOZ_B2G_BT
 DOM_SRCDIRS += dom/bluetooth
 endif
 
+ifdef MOZ_WEBSPEECH
+DOM_SRCDIRS += content/media/webspeech
+endif
+
 LOCAL_INCLUDES += $(DOM_SRCDIRS:%=-I$(topsrcdir)/%)
 DEFINES += -D_IMPL_NS_LAYOUT
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechGrammar.webidl
@@ -0,0 +1,20 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[Constructor, PrefControlled]
+interface SpeechGrammar {
+    [Throws]
+    attribute DOMString src;
+    [Throws]
+    attribute float weight;
+};
+
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechGrammarList.webidl
@@ -0,0 +1,22 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[PrefControlled]
+interface SpeechGrammarList {
+    readonly attribute unsigned long length;
+    [Throws]
+    getter SpeechGrammar item(unsigned long index);
+    [Throws]
+    void addFromURI(DOMString src, optional float weight);
+    [Throws]
+    void addFromString(DOMString string, optional float weight);
+};
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechRecognition.webidl
@@ -0,0 +1,58 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[Constructor, PrefControlled]
+interface SpeechRecognition : EventTarget {
+    // recognition parameters
+    [Throws]
+    attribute SpeechGrammarList grammars;
+    [Throws]
+    attribute DOMString lang;
+    [Throws]
+    attribute boolean continuous;
+    [Throws]
+    attribute boolean interimResults;
+    [Throws]
+    attribute unsigned long maxAlternatives;
+    [Throws]
+    attribute DOMString serviceURI;
+
+    // methods to drive the speech interaction
+    [Throws]
+    void start();
+    void stop();
+    void abort();
+
+    // event methods
+    [SetterThrows]
+    attribute EventHandler onaudiostart;
+    [SetterThrows]
+    attribute EventHandler onsoundstart;
+    [SetterThrows]
+    attribute EventHandler onspeechstart;
+    [SetterThrows]
+    attribute EventHandler onspeechend;
+    [SetterThrows]
+    attribute EventHandler onsoundend;
+    [SetterThrows]
+    attribute EventHandler onaudioend;
+    [SetterThrows]
+    attribute EventHandler onresult;
+    [SetterThrows]
+    attribute EventHandler onnomatch;
+    [SetterThrows]
+    attribute EventHandler onerror;
+    [SetterThrows]
+    attribute EventHandler onstart;
+    [SetterThrows]
+    attribute EventHandler onend;
+};
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechRecognitionAlternative.webidl
@@ -0,0 +1,17 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[PrefControlled]
+interface SpeechRecognitionAlternative {
+    readonly attribute DOMString transcript;
+    readonly attribute float confidence;
+};
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechRecognitionResult.webidl
@@ -0,0 +1,18 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[PrefControlled]
+interface SpeechRecognitionResult {
+    readonly attribute unsigned long length;
+    getter SpeechRecognitionAlternative item(unsigned long index);
+    readonly attribute boolean final;
+};
new file mode 100644
--- /dev/null
+++ b/dom/webidl/SpeechRecognitionResultList.webidl
@@ -0,0 +1,17 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * The origin of this IDL file is
+ * http://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
+ *
+ * Copyright © 2012 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C
+ * liability, trademark and document use rules apply.
+ */
+
+[PrefControlled]
+interface SpeechRecognitionResultList {
+    readonly attribute unsigned long length;
+    getter SpeechRecognitionResult item(unsigned long index);
+};
--- a/dom/webidl/WebIDL.mk
+++ b/dom/webidl/WebIDL.mk
@@ -306,16 +306,27 @@ webidl_files += \
 endif
 
 ifdef MOZ_WEBRTC
 webidl_files += \
   MediaStreamList.webidl \
   $(NULL)
 endif
 
+ifdef MOZ_WEBSPEECH
+webidl_files += \
+    SpeechGrammar.webidl \
+    SpeechGrammarList.webidl \
+    SpeechRecognitionAlternative.webidl \
+    SpeechRecognitionResultList.webidl \
+    SpeechRecognitionResult.webidl \
+    SpeechRecognition.webidl \
+    $(NULL)
+endif
+
 ifdef ENABLE_TESTS
 test_webidl_files := \
   TestCodeGen.webidl \
   TestDictionary.webidl \
   TestExampleGen.webidl \
   TestJSImplGen.webidl \
   TestTypedef.webidl \
   $(NULL)
--- a/js/xpconnect/src/event_impl_gen.conf.in
+++ b/js/xpconnect/src/event_impl_gen.conf.in
@@ -45,16 +45,20 @@ simple_events = [
     'DeviceStorageChangeEvent',
     'PopupBlockedEvent',
     'BlobEvent',
 #ifdef MOZ_GAMEPAD
     'GamepadEvent',
     'GamepadButtonEvent',
     'GamepadAxisMoveEvent',
 #endif
+#ifdef MOZ_WEBSPEECH
+    'SpeechRecognitionEvent',
+    'SpeechRecognitionError',
+#endif
   ]
 
 """ include file names """
 special_includes = [
     'DictionaryHelpers.h',
     'nsContentUtils.h',
     'nsIDOMApplicationRegistry.h',
     'nsIDOMFile.h'
--- a/layout/build/Makefile.in
+++ b/layout/build/Makefile.in
@@ -203,16 +203,22 @@ SHARED_LIBRARY_LIBS 	+= \
 endif
 
 ifdef MOZ_DASH
 SHARED_LIBRARY_LIBS += \
   $(DEPTH)/content/media/dash/$(LIB_PREFIX)gkcondash_s.$(LIB_SUFFIX) \
   $(NULL)
 endif
 
+ifdef MOZ_WEBSPEECH
+SHARED_LIBRARY_LIBS += \
+  $(DEPTH)/content/media/webspeech/$(LIB_PREFIX)gkconwebspeech_s.$(LIB_SUFFIX) \
+  $(NULL)
+endif
+
 ifeq (gonk,$(MOZ_WIDGET_TOOLKIT))
 INCLUDES	+= \
 		-I$(srcdir)/../../base/src \
 		-I$(srcdir)/../../html/content/src \
 		-I$(ANDROID_SOURCE)/dalvik/libnativehelper/include/nativehelper \
 		-I$(ANDROID_SOURCE)/frameworks/base/include/ \
 		-I$(ANDROID_SOURCE)/frameworks/base/include/binder/ \
 		-I$(ANDROID_SOURCE)/frameworks/base/include/utils/ \
--- a/mobile/android/installer/package-manifest.in
+++ b/mobile/android/installer/package-manifest.in
@@ -136,16 +136,19 @@
 @BINPATH@/components/dom_sidebar.xpt
 @BINPATH@/components/dom_mobilemessage.xpt
 @BINPATH@/components/dom_storage.xpt
 @BINPATH@/components/dom_stylesheets.xpt
 @BINPATH@/components/dom_system.xpt
 @BINPATH@/components/dom_threads.xpt
 @BINPATH@/components/dom_traversal.xpt
 @BINPATH@/components/dom_views.xpt
+#ifdef MOZ_WEBSPEECH
+@BINPATH@/components/dom_webspeech.xpt
+#endif
 @BINPATH@/components/dom_xbl.xpt
 @BINPATH@/components/dom_xpath.xpt
 @BINPATH@/components/dom_xul.xpt
 @BINPATH@/components/downloads.xpt
 @BINPATH@/components/editor.xpt
 @BINPATH@/components/embed_base.xpt
 @BINPATH@/components/extensions.xpt
 @BINPATH@/components/exthandler.xpt
--- a/modules/libpref/src/init/all.js
+++ b/modules/libpref/src/init/all.js
@@ -198,16 +198,20 @@ pref("media.peerconnection.agc", 1);
 pref("media.peerconnection.noise_enabled", false);
 pref("media.peerconnection.noise", 1);
 #else
 #ifdef ANDROID
 pref("media.navigator.enabled", true);
 #endif
 #endif
 
+#ifdef MOZ_WEBSPEECH
+pref("media.webspeech.recognition.enable", false);
+#endif
+
 // Whether to enable Web Audio support
 pref("media.webaudio.enabled", false);
 
 // Whether to autostart a media element with an |autoplay| attribute
 pref("media.autoplay.enabled", true);
 
 // The default number of decoded video frames that are enqueued in
 // MediaDecoderReader's mVideoQueue.
--- a/toolkit/content/license.html
+++ b/toolkit/content/license.html
@@ -1064,18 +1064,19 @@ WITH THE USE OR PERFORMANCE OF THIS SOFT
 
     <p>This license applies to parts of the code in
       <span class="path">editor/libeditor/base/nsEditorEventListener.cpp</span>,
       <span class="path">widget/cocoa/GfxInfo.mm</span>
       and also some files in the directories
       <span class="path">ipc/chromium/</span>,
       <span class="path">dom/plugins/</span>,
       <span class="path">tools/profiler/sps/</span>,
-      <span class="path">gfx/ots/</span> and
-      <span class="path">gfx/ycbcr</span>.
+      <span class="path">gfx/ots/</span>,
+      <span class="path">gfx/ycbcr</span> and
+      <span class="path">content/media/webspeech/recognition/</span>.
     </p>
 
 <pre>
 Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
 met: