Bug 1251502 - Add a generic duplex resampler and a duplex WASAPI implementation. r=kinetik
authorPaul Adenot <paul@paul.cx>
Fri, 25 Mar 2016 17:44:02 +0100
changeset 290641 54534d5c938247df4873aa8c37116b8c5de42a52
parent 290640 ca8f6e82829b6b904bdb6e95e88cfef95161aa74
child 290642 5e1216b00e35c597cfe721677d4f0392262a64e3
push id19656
push usergwagner@mozilla.com
push dateMon, 04 Apr 2016 13:43:23 +0000
treeherderb2g-inbound@e99061fde28a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskinetik
bugs1251502
milestone48.0a1
Bug 1251502 - Add a generic duplex resampler and a duplex WASAPI implementation. r=kinetik This also imports various other commits from cubeb. MozReview-Commit-ID: 4oUwHR8EUvd
media/libcubeb/README_MOZILLA
media/libcubeb/include/cubeb.h
media/libcubeb/src/android/audiotrack_definitions.h
media/libcubeb/src/cubeb.c
media/libcubeb/src/cubeb_opensl.c
media/libcubeb/src/cubeb_panner.cpp
media/libcubeb/src/cubeb_pulse.c
media/libcubeb/src/cubeb_resampler.cpp
media/libcubeb/src/cubeb_resampler.h
media/libcubeb/src/cubeb_resampler_internal.h
media/libcubeb/src/cubeb_ring_array.h
media/libcubeb/src/cubeb_sndio.c
media/libcubeb/src/cubeb_utils.h
media/libcubeb/src/cubeb_wasapi.cpp
media/libcubeb/tests/common.h
media/libcubeb/tests/test_devices.cpp
media/libcubeb/tests/test_duplex.cpp
media/libcubeb/tests/test_record.cpp
media/libcubeb/tests/test_resampler.cpp
media/libcubeb/tests/test_utils.cpp
--- a/media/libcubeb/README_MOZILLA
+++ b/media/libcubeb/README_MOZILLA
@@ -1,8 +1,8 @@
 The source from this directory was copied from the cubeb 
 git repository using the update.sh script.  The only changes
 made were those applied by update.sh and the addition of
 Makefile.in build files for the Mozilla build system.
 
 The cubeb git repository is: git://github.com/kinetiknz/cubeb.git
 
-The git commit ID used was c438f775a69cdc8ba6d7d543073c3ccf982050b8.
+The git commit ID used was f3470c48b362c9f74c1aeb55a6ed440d0509dfa1.
--- a/media/libcubeb/include/cubeb.h
+++ b/media/libcubeb/include/cubeb.h
@@ -2,17 +2,17 @@
  * Copyright © 2011 Mozilla Foundation
  *
  * This program is made available under an ISC-style license.  See the
  * accompanying file LICENSE for details.
  */
 #if !defined(CUBEB_c2f983e9_c96f_e71c_72c3_bbf62992a382)
 #define CUBEB_c2f983e9_c96f_e71c_72c3_bbf62992a382
 
-#include <cubeb/cubeb-stdint.h>
+#include <stdint.h>
 
 #if defined(__cplusplus)
 extern "C" {
 #endif
 
 /** @mainpage
 
     @section intro Introduction
--- a/media/libcubeb/src/android/audiotrack_definitions.h
+++ b/media/libcubeb/src/android/audiotrack_definitions.h
@@ -9,17 +9,17 @@
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
-#include <cubeb/cubeb-stdint.h>
+#include <stdint.h>
 
 /*
  * The following definitions are copied from the android sources. Only the
  * relevant enum member and values needed are copied.
  */
 
 /*
  * From https://android.googlesource.com/platform/frameworks/base/+/android-2.2.3_r2.1/include/utils/Errors.h
--- a/media/libcubeb/src/cubeb.c
+++ b/media/libcubeb/src/cubeb.c
@@ -420,16 +420,20 @@ int cubeb_device_collection_destroy(cube
     cubeb_device_info_destroy(collection->device[i]);
 
   free(collection);
   return CUBEB_OK;
 }
 
 int cubeb_device_info_destroy(cubeb_device_info * info)
 {
+  if (info == NULL) {
+    return CUBEB_ERROR_INVALID_PARAMETER;
+  }
+
   free(info->device_id);
   free(info->friendly_name);
   free(info->group_id);
   free(info->vendor_name);
 
   free(info);
   return CUBEB_OK;
 }
--- a/media/libcubeb/src/cubeb_opensl.c
+++ b/media/libcubeb/src/cubeb_opensl.c
@@ -12,16 +12,17 @@
 #include <SLES/OpenSLES.h>
 #include <math.h>
 #include <time.h>
 #if defined(__ANDROID__)
 #include <sys/system_properties.h>
 #include "android/sles_definitions.h"
 #include <SLES/OpenSLES_Android.h>
 #include <android/log.h>
+#include <android/api-level.h>
 #define LOG(args...)  __android_log_print(ANDROID_LOG_INFO, "Cubeb_OpenSL" , ## args)
 #define ANDROID_VERSION_GINGERBREAD_MR1 10
 #endif
 #include "cubeb/cubeb.h"
 #include "cubeb-internal.h"
 #include "cubeb_resampler.h"
 #include "cubeb-sles.h"
 
@@ -115,18 +116,19 @@ bufferqueue_callback(SLBufferQueueItf ca
   for (i = state.count; i < NBUFS; i++) {
     uint8_t *buf = stm->queuebuf[stm->queuebuf_idx];
     long written = 0;
     pthread_mutex_lock(&stm->mutex);
     int draining = stm->draining;
     pthread_mutex_unlock(&stm->mutex);
 
     if (!draining) {
-      written = cubeb_resampler_fill(stm->resampler, NULL, buf,
-                                     stm->queuebuf_len / stm->framesize);
+      written = cubeb_resampler_fill(stm->resampler,
+                                     NULL, NULL,
+                                     buf, stm->queuebuf_len / stm->framesize);
       if (written < 0 || written * stm->framesize > stm->queuebuf_len) {
         (*stm->play)->SetPlayState(stm->play, SL_PLAYSTATE_PAUSED);
         return;
       }
     }
 
     // Keep sending silent data even in draining mode to prevent the audio
     // back-end from being stopped automatically by OpenSL/ES.
@@ -175,17 +177,17 @@ convert_stream_type_to_sl_stream(cubeb_s
   default:
     return 0xFFFFFFFF;
   }
 }
 #endif
 
 static void opensl_destroy(cubeb * ctx);
 
-#if defined(__ANDROID__)
+#if defined(__ANDROID__) && (__ANDROID_API__ <= ANDROID_VERSION_GINGERBREAD_MR1)
 
 static int
 get_android_version(void)
 {
   char version_string[PROP_VALUE_MAX];
 
   memset(version_string, 0, PROP_VALUE_MAX);
 
@@ -199,17 +201,17 @@ get_android_version(void)
 }
 #endif
 
 /*static*/ int
 opensl_init(cubeb ** context, char const * context_name)
 {
   cubeb * ctx;
 
-#if defined(__ANDROID__)
+#if defined(__ANDROID__) && (__ANDROID_API__ <= ANDROID_VERSION_GINGERBREAD_MR1)
   int android_version = get_android_version();
   if (android_version > 0 && android_version <= ANDROID_VERSION_GINGERBREAD_MR1) {
     // Don't even attempt to run on Gingerbread and lower
     return CUBEB_ERROR;
   }
 #endif
 
   *context = NULL;
@@ -335,17 +337,16 @@ opensl_get_preferred_sample_rate(cubeb *
 {
   /* https://android.googlesource.com/platform/ndk.git/+/master/docs/opensles/index.html
    * We don't want to deal with JNI here (and we don't have Java on b2g anyways),
    * so we just dlopen the library and get the two symbols we need. */
   int r;
   void * libmedia;
   uint32_t (*get_primary_output_samplingrate)();
   uint32_t (*get_output_samplingrate)(int * samplingRate, int streamType);
-  uint32_t primary_sampling_rate;
 
   libmedia = dlopen("libmedia.so", RTLD_LAZY);
   if (!libmedia) {
     return CUBEB_ERROR;
   }
 
   /* uint32_t AudioSystem::getPrimaryOutputSamplingRate(void) */
   get_primary_output_samplingrate =
@@ -579,20 +580,19 @@ opensl_stream_init(cubeb * ctx, cubeb_st
   stm->outputrate = preferred_sampling_rate;
   stm->bytespersec = preferred_sampling_rate * stm->framesize;
   stm->queuebuf_len = (stm->bytespersec * latency) / (1000 * NBUFS);
   // round up to the next multiple of stm->framesize, if needed.
   if (stm->queuebuf_len % stm->framesize) {
     stm->queuebuf_len += stm->framesize - (stm->queuebuf_len % stm->framesize);
   }
 
-  stm->resampler = cubeb_resampler_create(stm, *output_stream_params,
+  stm->resampler = cubeb_resampler_create(stm, NULL, output_stream_params,
                                           preferred_sampling_rate,
                                           data_callback,
-                                          stm->queuebuf_len / stm->framesize,
                                           user_ptr,
                                           CUBEB_RESAMPLER_QUALITY_DEFAULT);
 
   if (!stm->resampler) {
     opensl_stream_destroy(stm);
     return CUBEB_ERROR;
   }
 
--- a/media/libcubeb/src/cubeb_panner.cpp
+++ b/media/libcubeb/src/cubeb_panner.cpp
@@ -2,20 +2,24 @@
  * Copyright © 2014 Mozilla Foundation
  *
  * This program is made available under an ISC-style license.  See the
  * accompanying file LICENSE for details.
  */
 
 #define _USE_MATH_DEFINES
 #include <math.h>
-#include <cubeb/cubeb-stdint.h>
+#include <stdint.h>
 
 #include "cubeb_panner.h"
 
+#ifndef M_PI
+#define M_PI 3.14159263
+#endif
+
 /**
  * We use a cos/sin law.
  */
 
 namespace {
 template<typename T>
 void cubeb_pan_stereo_buffer(T * buf, uint32_t frames, float pan)
 {
--- a/media/libcubeb/src/cubeb_pulse.c
+++ b/media/libcubeb/src/cubeb_pulse.c
@@ -1031,17 +1031,17 @@ pulse_sink_info_cb(pa_context * context,
   (void)context;
 
   if (eol || info == NULL)
     return;
 
   devinfo = calloc(1, sizeof(cubeb_device_info));
 
   devinfo->device_id = strdup(info->name);
-  devinfo->devid = (cubeb_devid)devinfo->device_id;
+  devinfo->devid = devinfo->device_id;
   devinfo->friendly_name = strdup(info->description);
   prop = WRAP(pa_proplist_gets)(info->proplist, "sysfs.path");
   if (prop)
     devinfo->group_id = strdup(prop);
   prop = WRAP(pa_proplist_gets)(info->proplist, "device.vendor.name");
   if (prop)
     devinfo->vendor_name = strdup(prop);
 
@@ -1091,17 +1091,17 @@ pulse_source_info_cb(pa_context * contex
   (void)context;
 
   if (eol)
     return;
 
   devinfo = calloc(1, sizeof(cubeb_device_info));
 
   devinfo->device_id = strdup(info->name);
-  devinfo->devid = (cubeb_devid)devinfo->device_id;
+  devinfo->devid = devinfo->device_id;
   devinfo->friendly_name = strdup(info->description);
   prop = WRAP(pa_proplist_gets)(info->proplist, "sysfs.path");
   if (prop)
     devinfo->group_id = strdup(prop);
   prop = WRAP(pa_proplist_gets)(info->proplist, "device.vendor.name");
   if (prop)
     devinfo->vendor_name = strdup(prop);
 
--- a/media/libcubeb/src/cubeb_resampler.cpp
+++ b/media/libcubeb/src/cubeb_resampler.cpp
@@ -1,256 +1,281 @@
 /*
  * Copyright © 2014 Mozilla Foundation
  *
  * This program is made available under an ISC-style license.  See the
  * accompanying file LICENSE for details.
  */
+#include <algorithm>
 #include <cmath>
 #include <cassert>
 #include <cstring>
 #include <cstddef>
 #include <cstdio>
 #if defined(HAVE_CONFIG_H)
 #include "config.h"
 #endif
 #include "cubeb_resampler.h"
 #include "cubeb-speex-resampler.h"
-
-namespace {
-
-template<typename T>
-class auto_array
-{
-public:
-  auto_array(uint32_t size)
-    : data(new T[size])
-  {}
-
-  ~auto_array()
-  {
-    delete [] data;
-  }
-
-  T * get() const
-  {
-    return data;
-  }
-
-private:
-  T * data;
-};
-
-long
-frame_count_at_rate(long frame_count, float rate)
-{
-  return static_cast<long>(ceilf(rate * frame_count) + 1);
-}
-
-size_t
-frames_to_bytes(cubeb_stream_params params, size_t frames)
-{
-  assert(params.format == CUBEB_SAMPLE_S16NE || params.format == CUBEB_SAMPLE_FLOAT32NE);
-  size_t sample_size = params.format == CUBEB_SAMPLE_S16NE ? sizeof(short) : sizeof(float);
-  size_t frame_size = params.channels * sample_size;
-  return frame_size * frames;
-}
+#include "cubeb_resampler_internal.h"
+#include "cubeb_utils.h"
 
 int
 to_speex_quality(cubeb_resampler_quality q)
 {
   switch(q) {
   case CUBEB_RESAMPLER_QUALITY_VOIP:
     return SPEEX_RESAMPLER_QUALITY_VOIP;
   case CUBEB_RESAMPLER_QUALITY_DEFAULT:
     return SPEEX_RESAMPLER_QUALITY_DEFAULT;
   case CUBEB_RESAMPLER_QUALITY_DESKTOP:
     return SPEEX_RESAMPLER_QUALITY_DESKTOP;
   default:
     assert(false);
     return 0XFFFFFFFF;
   }
 }
-} // end of anonymous namespace
-
-struct cubeb_resampler {
-  virtual long fill(void * input_buffer, void * output_buffer, long frames_needed) = 0;
-  virtual ~cubeb_resampler() {}
-};
 
-class noop_resampler : public cubeb_resampler {
-public:
-  noop_resampler(cubeb_stream * s,
-                 cubeb_data_callback cb,
-                 void * ptr)
-    : stream(s)
-    , data_callback(cb)
-    , user_ptr(ptr)
-  {
+long noop_resampler::fill(void * input_buffer, long * input_frames_count,
+                          void * output_buffer, long output_frames)
+{
+  assert((input_buffer && output_buffer &&
+         *input_frames_count >= output_frames) ||
+         (!input_buffer && (!input_frames_count || *input_frames_count == 0)) ||
+         (!output_buffer && output_frames == 0));
+
+  if (output_buffer == nullptr) {
+    output_frames = *input_frames_count;
   }
 
-  virtual long fill(void * input_buffer, void * output_buffer, long frames_needed)
-  {
-    long got = data_callback(stream, user_ptr, input_buffer, output_buffer, frames_needed);
-    assert(got <= frames_needed);
-    return got;
+  if (input_buffer && *input_frames_count != output_frames) {
+    assert(*input_frames_count > output_frames);
+    *input_frames_count = output_frames;
   }
 
-private:
-  cubeb_stream * const stream;
-  const cubeb_data_callback data_callback;
-  void * const user_ptr;
-};
-
-class cubeb_resampler_speex : public cubeb_resampler {
-public:
-  cubeb_resampler_speex(SpeexResamplerState * r, cubeb_stream * s,
-                        cubeb_stream_params params, uint32_t out_rate,
-                        cubeb_data_callback cb, long max_count,
-                        void * ptr);
-
-  virtual ~cubeb_resampler_speex();
-
-  virtual long fill(void * input_buffer, void * output_buffer, long frames_needed);
-
-private:
-  SpeexResamplerState * const speex_resampler;
-  cubeb_stream * const stream;
-  const cubeb_stream_params stream_params;
-  const cubeb_data_callback data_callback;
-  void * const user_ptr;
+  return data_callback(stream, user_ptr,
+                       input_buffer, output_buffer, output_frames);
+}
 
-  // Maximum number of frames we can be requested in a callback.
-  const long buffer_frame_count;
-  // input rate / output rate
-  const float resampling_ratio;
-  // Maximum frames that can be stored in |leftover_frames_buffer|.
-  const uint32_t leftover_frame_size;
-  // Number of leftover frames stored in |leftover_frames_buffer|.
-  uint32_t leftover_frame_count;
-
-  // A little buffer to store the leftover frames,
-  // that is, the samples not consumed by the resampler that we will end up
-  // using next time fill() is called.
-  auto_array<uint8_t> leftover_frames_buffer;
-  // A buffer to store frames that will be consumed by the resampler.
-  auto_array<uint8_t> resampling_src_buffer;
-};
-
-cubeb_resampler_speex::cubeb_resampler_speex(SpeexResamplerState * r,
-                                             cubeb_stream * s,
-                                             cubeb_stream_params params,
-                                             uint32_t out_rate,
-                                             cubeb_data_callback cb,
-                                             long max_count,
-                                             void * ptr)
-  : speex_resampler(r)
+template<typename T, typename InputProcessor, typename OutputProcessor>
+cubeb_resampler_speex<T, InputProcessor, OutputProcessor>
+  ::cubeb_resampler_speex(InputProcessor * input_processor,
+                          OutputProcessor * output_processor,
+                          cubeb_stream * s,
+                          cubeb_data_callback cb,
+                          void * ptr)
+  : input_processor(input_processor)
+  , output_processor(output_processor)
   , stream(s)
-  , stream_params(params)
   , data_callback(cb)
   , user_ptr(ptr)
-  , buffer_frame_count(max_count)
-  , resampling_ratio(static_cast<float>(params.rate) / out_rate)
-  , leftover_frame_size(static_cast<uint32_t>(ceilf(1 / resampling_ratio * 2) + 1))
-  , leftover_frame_count(0)
-  , leftover_frames_buffer(auto_array<uint8_t>(frames_to_bytes(params, leftover_frame_size)))
-  , resampling_src_buffer(auto_array<uint8_t>(frames_to_bytes(params,
-        frame_count_at_rate(buffer_frame_count, resampling_ratio))))
 {
-  assert(r);
+  if (input_processor && output_processor) {
+    // Add some delay on the processor that has the lowest delay so that the
+    // streams are synchronized.
+    uint32_t in_latency = input_processor->latency();
+    uint32_t out_latency = output_processor->latency();
+    if (in_latency > out_latency) {
+      uint32_t latency_diff = in_latency - out_latency;
+      output_processor->add_latency(latency_diff);
+    } else if (in_latency < out_latency) {
+      uint32_t latency_diff = out_latency - in_latency;
+      input_processor->add_latency(latency_diff);
+    }
+    fill_internal = &cubeb_resampler_speex::fill_internal_duplex;
+  }  else if (input_processor) {
+    fill_internal = &cubeb_resampler_speex::fill_internal_input;
+  }  else if (output_processor) {
+    fill_internal = &cubeb_resampler_speex::fill_internal_output;
+  }
 }
 
-cubeb_resampler_speex::~cubeb_resampler_speex()
+template<typename T, typename InputProcessor, typename OutputProcessor>
+cubeb_resampler_speex<T, InputProcessor, OutputProcessor>
+  ::~cubeb_resampler_speex()
+{ }
+
+template<typename T, typename InputProcessor, typename OutputProcessor>
+long
+cubeb_resampler_speex<T, InputProcessor, OutputProcessor>
+::fill(void * input_buffer, long * input_frames_count,
+       void * output_buffer, long output_frames_needed)
+{
+  /* Input and output buffers, typed */
+  T * in_buffer = reinterpret_cast<T*>(input_buffer);
+  T * out_buffer = reinterpret_cast<T*>(output_buffer);
+  return (this->*fill_internal)(in_buffer, input_frames_count,
+                                out_buffer, output_frames_needed);
+}
+
+template<typename T, typename InputProcessor, typename OutputProcessor>
+long
+cubeb_resampler_speex<T, InputProcessor, OutputProcessor>
+::fill_internal_output(T * input_buffer, long * input_frames_count,
+                       T * output_buffer, long output_frames_needed)
 {
-  speex_resampler_destroy(speex_resampler);
+  assert(!input_buffer && (!input_frames_count || *input_frames_count == 0) &&
+         output_buffer && output_frames_needed);
+
+  long got = 0;
+  T * out_unprocessed = nullptr;
+  long output_frames_before_processing = 0;
+
+
+  /* fill directly the input buffer of the output processor to save a copy */
+  output_frames_before_processing =
+    output_processor->input_needed_for_output(output_frames_needed);
+
+  out_unprocessed =
+    output_processor->input_buffer(output_frames_before_processing);
+
+  got = data_callback(stream, user_ptr,
+                      nullptr, out_unprocessed,
+                      output_frames_before_processing);
+
+  output_processor->written(got);
+
+  /* Process the output. If not enough frames have been returned from the
+  * callback, drain the processors. */
+  return output_processor->output(output_buffer, output_frames_needed);
 }
 
+template<typename T, typename InputProcessor, typename OutputProcessor>
 long
-cubeb_resampler_speex::fill(void * input_buffer, void * output_buffer, long frames_needed)
+cubeb_resampler_speex<T, InputProcessor, OutputProcessor>
+::fill_internal_input(T * input_buffer, long * input_frames_count,
+                      T * output_buffer, long output_frames_needed)
 {
-  // Use more input frames than strictly necessary, so in the worst case,
-  // we have leftover unresampled frames at the end, that we can use
-  // during the next iteration.
-  assert(frames_needed <= buffer_frame_count);
-  long before_resampling = frame_count_at_rate(frames_needed, resampling_ratio);
-  long frames_requested = before_resampling - leftover_frame_count;
+  assert(input_buffer && input_frames_count && *input_frames_count &&
+         !output_buffer);
+
+  /* The input data, after eventual resampling. This is passed to the callback. */
+  T * resampled_input = nullptr;
+  uint32_t resampled_frame_count = input_processor->output_for_input(*input_frames_count);
+
+  /* process the input, and present exactly `output_frames_needed` in the
+  * callback. */
+  input_processor->input(input_buffer, *input_frames_count);
+  resampled_input = input_processor->output(resampled_frame_count);
+
+  return data_callback(stream, user_ptr,
+                       resampled_input, nullptr, resampled_frame_count);
+}
+
 
-  // Copy the previous leftover frames to the front of the buffer.
-  size_t leftover_bytes = frames_to_bytes(stream_params, leftover_frame_count);
-  memcpy(resampling_src_buffer.get(), leftover_frames_buffer.get(), leftover_bytes);
-  uint8_t * buffer_start = resampling_src_buffer.get() + leftover_bytes;
+template<typename T, typename InputProcessor, typename OutputProcessor>
+long
+cubeb_resampler_speex<T, InputProcessor, OutputProcessor>
+::fill_internal_duplex(T * in_buffer, long * input_frames_count,
+                       T * out_buffer, long output_frames_needed)
+{
+  /* The input data, after eventual resampling. This is passed to the callback. */
+  T * resampled_input = nullptr;
+  /* The output buffer passed down in the callback, that might be resampled. */
+  T * out_unprocessed = nullptr;
+  size_t output_frames_before_processing = 0;
+  /* The number of frames returned from the callback. */
+  long got = 0;
 
-  long got = data_callback(stream, user_ptr, NULL, buffer_start, frames_requested);
-  assert(got <= frames_requested);
+  /* We need to determine how much frames to present to the consumer.
+   * - If we have a two way stream, but we're only resampling input, we resample
+   * the input to the number of output frames.
+   * - If we have a two way stream, but we're only resampling the output, we
+   * resize the input buffer of the output resampler to the number of input
+   * frames, and we resample it afterwards.
+   * - If we resample both ways, we resample the input to the number of frames
+   * we would need to pass down to the consumer (before resampling the output),
+   * get the output data, and resample it to the number of frames needed by the
+   * caller. */
 
-  if (got < 0) {
-    return CUBEB_ERROR;
+  output_frames_before_processing =
+    output_processor->input_needed_for_output(output_frames_needed);
+   /* fill directly the input buffer of the output processor to save a copy */
+  out_unprocessed =
+    output_processor->input_buffer(output_frames_before_processing);
+
+  if (in_buffer) {
+    /* process the input, and present exactly `output_frames_needed` in the
+    * callback. */
+    input_processor->input(in_buffer, *input_frames_count);
+    resampled_input =
+      input_processor->output(output_frames_before_processing);
+  } else {
+    resampled_input = nullptr;
   }
 
-  uint32_t in_frames = leftover_frame_count + got;
-  uint32_t out_frames = frames_needed;
-  uint32_t old_in_frames = in_frames;
+  got = data_callback(stream, user_ptr,
+                      resampled_input, out_unprocessed,
+                      output_frames_before_processing);
+
+  output_processor->written(got);
 
-  if (stream_params.format == CUBEB_SAMPLE_FLOAT32NE) {
-    float * in_buffer = reinterpret_cast<float *>(resampling_src_buffer.get());
-    float * out_buffer = reinterpret_cast<float *>(output_buffer);
-    speex_resampler_process_interleaved_float(speex_resampler, in_buffer, &in_frames,
-                                              out_buffer, &out_frames);
-  } else {
-    short * in_buffer = reinterpret_cast<short *>(resampling_src_buffer.get());
-    short * out_buffer = reinterpret_cast<short *>(output_buffer);
-    speex_resampler_process_interleaved_int(speex_resampler, in_buffer, &in_frames,
-                                            out_buffer, &out_frames);
-  }
+  /* Process the output. If not enough frames have been returned from the
+   * callback, drain the processors. */
+  return output_processor->output(out_buffer, output_frames_needed);
+}
 
-  // Copy the leftover frames to buffer for the next time.
-  leftover_frame_count = old_in_frames - in_frames;
-  assert(leftover_frame_count <= leftover_frame_size);
-
-  size_t unresampled_bytes = frames_to_bytes(stream_params, leftover_frame_count);
-  uint8_t * leftover_frames_start = resampling_src_buffer.get();
-  leftover_frames_start += frames_to_bytes(stream_params, in_frames);
-  memcpy(leftover_frames_buffer.get(), leftover_frames_start, unresampled_bytes);
-
-  return out_frames;
-}
+/* Resampler C API */
 
 cubeb_resampler *
 cubeb_resampler_create(cubeb_stream * stream,
-                       cubeb_stream_params params,
-                       unsigned int out_rate,
+                       cubeb_stream_params * input_params,
+                       cubeb_stream_params * output_params,
+                       unsigned int target_rate,
                        cubeb_data_callback callback,
-                       long buffer_frame_count,
                        void * user_ptr,
                        cubeb_resampler_quality quality)
 {
-  if (params.rate != out_rate) {
-    SpeexResamplerState * resampler = NULL;
-    resampler = speex_resampler_init(params.channels,
-                                     params.rate,
-                                     out_rate,
-                                     to_speex_quality(quality),
-                                     NULL);
-    if (!resampler) {
-      return NULL;
-    }
+  cubeb_sample_format format;
+
+  assert(input_params || output_params);
 
-    return new cubeb_resampler_speex(resampler, stream, params, out_rate,
-                                     callback, buffer_frame_count, user_ptr);
+  if (input_params) {
+    format = input_params->format;
+  } else {
+    format = output_params->format;
   }
 
-  return new noop_resampler(stream, callback, user_ptr);
+  switch(format) {
+    case CUBEB_SAMPLE_S16NE:
+      return cubeb_resampler_create_internal<short>(stream,
+                                                    input_params,
+                                                    output_params,
+                                                    target_rate,
+                                                    callback,
+                                                    user_ptr,
+                                                    quality);
+    case CUBEB_SAMPLE_FLOAT32NE:
+      return cubeb_resampler_create_internal<float>(stream,
+                                                    input_params,
+                                                    output_params,
+                                                    target_rate,
+                                                    callback,
+                                                    user_ptr,
+                                                    quality);
+    default:
+      assert(false);
+      return nullptr;
+  }
 }
 
 long
 cubeb_resampler_fill(cubeb_resampler * resampler,
                      void * input_buffer,
+                     long * input_frames_count,
                      void * output_buffer,
-                     long frames_needed)
+                     long output_frames_needed)
 {
-  return resampler->fill(input_buffer, output_buffer, frames_needed);
+  return resampler->fill(input_buffer, input_frames_count,
+                         output_buffer, output_frames_needed);
 }
 
 void
 cubeb_resampler_destroy(cubeb_resampler * resampler)
 {
   delete resampler;
 }
+
+long
+cubeb_resampler_latency(cubeb_resampler * resampler)
+{
+  return resampler->latency();
+}
--- a/media/libcubeb/src/cubeb_resampler.h
+++ b/media/libcubeb/src/cubeb_resampler.h
@@ -21,49 +21,58 @@ typedef enum {
   CUBEB_RESAMPLER_QUALITY_DESKTOP
 } cubeb_resampler_quality;
 
 /**
  * Create a resampler to adapt the requested sample rate into something that
  * is accepted by the audio backend.
  * @param stream A cubeb_stream instance supplied to the data callback.
  * @param params Used to calculate bytes per frame and buffer size for resampling.
- * @param out_rate The sampling rate after resampling.
+ * @param target_rate The sampling rate after resampling.
  * @param callback A callback to request data for resampling.
- * @param buffer_frame_count Maximum number of frames passed to cubeb_resampler_fill
- *                           as |frames_needed|. This is also used to calculate
- *                           the size of buffer allocated for resampling.
  * @param user_ptr User data supplied to the data callback.
  * @param quality Quality of the resampler.
  * @retval A non-null pointer if success.
  */
 cubeb_resampler * cubeb_resampler_create(cubeb_stream * stream,
-                                         cubeb_stream_params params,
-                                         unsigned int out_rate,
+                                         cubeb_stream_params * input_params,
+                                         cubeb_stream_params * output_params,
+                                         unsigned int target_rate,
                                          cubeb_data_callback callback,
-                                         long buffer_frame_count,
                                          void * user_ptr,
                                          cubeb_resampler_quality quality);
 
 /**
  * Fill the buffer with frames acquired using the data callback. Resampling will
  * happen if necessary.
  * @param resampler A cubeb_resampler instance.
+ * @param input_buffer A buffer of input samples
+ * @param input_frame_count The size of the buffer. Returns the number of frames
+ * consumed.
  * @param buffer The buffer to be filled.
  * @param frames_needed Number of frames that should be produced.
  * @retval Number of frames that are actually produced.
  * @retval CUBEB_ERROR on error.
  */
 long cubeb_resampler_fill(cubeb_resampler * resampler,
-                          void * input_buffer, 
-						  void * output_buffer, long frames_needed);
+                          void * input_buffer,
+                          long * input_frame_count,
+                          void * output_buffer,
+                          long output_frames_needed);
 
 /**
  * Destroy a cubeb_resampler.
  * @param resampler A cubeb_resampler instance.
  */
 void cubeb_resampler_destroy(cubeb_resampler * resampler);
 
+/**
+ * Returns the latency, in frames, of the resampler.
+ * @param resampler A cubeb resampler instance.
+ * @retval The latency, in frames, induced by the resampler.
+ */
+long cubeb_resampler_latency(cubeb_resampler * resampler);
+
 #if defined(__cplusplus)
 }
 #endif
 
 #endif /* CUBEB_RESAMPLER_H */
new file mode 100644
--- /dev/null
+++ b/media/libcubeb/src/cubeb_resampler_internal.h
@@ -0,0 +1,535 @@
+/*
+ * Copyright © 2016 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+
+#if !defined(CUBEB_RESAMPLER_INTERNAL)
+#define CUBEB_RESAMPLER_INTERNAL
+
+#include <cmath>
+#include <cassert>
+#include <algorithm>
+#include <memory>
+#ifdef CUBEB_GECKO_BUILD
+#include "mozilla/UniquePtr.h"
+namespace std
+{
+  using mozilla::DefaultDelete;
+  using mozilla::UniquePtr;
+  #define default_delete DefaultDelete
+  #define unique_ptr UniquePtr
+}
+#endif
+#include "cubeb/cubeb.h"
+#include "cubeb_utils.h"
+#include "cubeb-speex-resampler.h"
+#include "cubeb_resampler.h"
+#include <stdio.h>
+
+/* This header file contains the internal C++ API of the resamplers, for testing. */
+
+int to_speex_quality(cubeb_resampler_quality q);
+
+struct cubeb_resampler {
+  virtual long fill(void * input_buffer, long * input_frames_count,
+                    void * output_buffer, long frames_needed) = 0;
+  virtual long latency() = 0;
+  virtual ~cubeb_resampler() {}
+};
+
+class noop_resampler : public cubeb_resampler {
+public:
+  noop_resampler(cubeb_stream * s,
+                 cubeb_data_callback cb,
+                 void * ptr)
+    : stream(s)
+    , data_callback(cb)
+    , user_ptr(ptr)
+  {
+  }
+
+  virtual long fill(void * input_buffer, long * input_frames_count,
+                    void * output_buffer, long output_frames);
+
+  virtual long latency()
+  {
+    return 0;
+  }
+
+private:
+  cubeb_stream * const stream;
+  const cubeb_data_callback data_callback;
+  void * const user_ptr;
+};
+
+/** Base class for processors. This is just used to share methods for now. */
+class processor {
+public:
+  explicit processor(uint32_t channels)
+    : channels(channels)
+  {}
+protected:
+  size_t frames_to_samples(size_t frames)
+  {
+    return frames * channels;
+  }
+  size_t samples_to_frames(size_t samples)
+  {
+    assert(!(samples % channels));
+    return samples / channels;
+  }
+  /** The number of channel of the audio buffers to be resampled. */
+  const uint32_t channels;
+};
+
+/** Bidirectional resampler, can resample an input and an output stream, or just
+ * an input stream or output stream. In this case a delay is inserted in the
+ * opposite direction to keep the streams synchronized. */
+template<typename T, typename InputProcessing, typename OutputProcessing>
+class cubeb_resampler_speex : public cubeb_resampler {
+public:
+  cubeb_resampler_speex(InputProcessing * input_processor,
+                        OutputProcessing * output_processor,
+                        cubeb_stream * s,
+                        cubeb_data_callback cb,
+                        void * ptr);
+
+  virtual ~cubeb_resampler_speex();
+
+  virtual long fill(void * input_buffer, long * input_frames_count,
+                    void * output_buffer, long output_frames_needed);
+
+  virtual long latency()
+  {
+    if (input_processor && output_processor) {
+      assert(input_processor->latency() == output_processor->latency());
+      return input_processor->latency();
+    } else if (input_processor) {
+      return input_processor->latency();
+    } else {
+      return output_processor->latency();
+    }
+  }
+
+private:
+  typedef long(cubeb_resampler_speex::*processing_callback)(T * input_buffer, long * input_frames_count, T * output_buffer, long output_frames_needed);
+
+  long fill_internal_duplex(T * input_buffer, long * input_frames_count,
+                            T * output_buffer, long output_frames_needed);
+  long fill_internal_input(T * input_buffer, long * input_frames_count,
+                           T * output_buffer, long output_frames_needed);
+  long fill_internal_output(T * input_buffer, long * input_frames_count,
+                            T * output_buffer, long output_frames_needed);
+
+  std::unique_ptr<InputProcessing> input_processor;
+  std::unique_ptr<OutputProcessing> output_processor;
+  processing_callback fill_internal;
+  cubeb_stream * const stream;
+  const cubeb_data_callback data_callback;
+  void * const user_ptr;
+};
+
+/** Handles one way of a (possibly) duplex resampler, working on interleaved
+ * audio buffers of type T. This class is designed so that the number of frames
+ * coming out of the resampler can be precisely controled. It manages its own
+ * input buffer, and can use the caller's output buffer, or allocate its own. */
+template<typename T>
+class cubeb_resampler_speex_one_way : public processor {
+public:
+  /** The sample type of this resampler, either 16-bit integers or 32-bit
+   * floats. */
+  typedef T sample_type;
+  /** Construct a resampler resampling from #source_rate to #target_rate, that
+   * can be arbitrary, strictly positive number.
+   * @parameter channels The number of channels this resampler will resample.
+   * @parameter source_rate The sample-rate of the audio input.
+   * @parameter target_rate The sample-rate of the audio output.
+   * @parameter quality A number between 0 (fast, low quality) and 10 (slow,
+   * high quality). */
+  cubeb_resampler_speex_one_way(uint32_t channels,
+                                uint32_t source_rate,
+                                uint32_t target_rate,
+                                int quality)
+  : processor(channels)
+  , resampling_ratio(static_cast<float>(source_rate) / target_rate)
+  , additional_latency(0)
+  , leftover_samples(0)
+  {
+    int r;
+    speex_resampler = speex_resampler_init(channels, source_rate,
+                                           target_rate, quality, &r);
+    assert(r == RESAMPLER_ERR_SUCCESS && "resampler allocation failure");
+  }
+
+  /** Destructor, deallocate the resampler */
+  virtual ~cubeb_resampler_speex_one_way()
+  {
+    speex_resampler_destroy(speex_resampler);
+  }
+
+  /** Sometimes, it is necessary to add latency on one way of a two-way
+   * resampler so that the stream are synchronized. This must be called only on
+   * a fresh resampler, otherwise, silent samples will be inserted in the
+   * stream.
+   * @param frames the number of frames of latency to add. */
+  void add_latency(size_t frames)
+  {
+    additional_latency += frames;
+    resampling_in_buffer.push_silence(frames_to_samples(frames));
+  }
+
+  /* Fill the resampler with `input_frame_count` frames. */
+  void input(T * input_buffer, size_t input_frame_count)
+  {
+    resampling_in_buffer.push(input_buffer,
+                              frames_to_samples(input_frame_count));
+  }
+
+  /** Outputs exactly `output_frame_count` into `output_buffer`.
+    * `output_buffer` has to be at least `output_frame_count` long. */
+  size_t output(T * output_buffer, size_t output_frame_count)
+  {
+    uint32_t in_len = samples_to_frames(resampling_in_buffer.length());
+    uint32_t out_len = output_frame_count;
+
+    speex_resample(resampling_in_buffer.data(), &in_len,
+                   output_buffer, &out_len);
+
+    /* This shifts back any unresampled samples to the beginning of the input
+       buffer. */
+    resampling_in_buffer.pop(nullptr, frames_to_samples(in_len));
+
+    return out_len;
+  }
+
+  size_t output_for_input(uint32_t input_frames)
+  {
+    return ceilf(input_frames * resampling_ratio) + 1
+           - resampling_in_buffer.length() / channels;
+  }
+
+  /** Returns a buffer containing exactly `output_frame_count` resampled frames.
+    * The consumer should not hold onto the pointer. */
+  T * output(size_t output_frame_count)
+  {
+    if (resampling_out_buffer.capacity() < frames_to_samples(output_frame_count)) {
+      resampling_out_buffer.reserve(frames_to_samples(output_frame_count));
+    }
+
+    uint32_t in_len = samples_to_frames(resampling_in_buffer.length());
+    uint32_t out_len = output_frame_count;
+
+    speex_resample(resampling_in_buffer.data(), &in_len,
+                   resampling_out_buffer.data(), &out_len);
+
+    assert(out_len == output_frame_count);
+
+    /* This shifts back any unresampled samples to the beginning of the input
+       buffer. */
+    resampling_in_buffer.pop(nullptr, frames_to_samples(in_len));
+
+    return resampling_out_buffer.data();
+  }
+
+  /** Get the latency of the resampler, in output frames. */
+  uint32_t latency() const
+  {
+    /* The documentation of the resampler talks about "samples" here, but it
+     * only consider a single channel here so it's the same number of frames. */
+    int latency = 0;
+
+    latency =
+      speex_resampler_get_output_latency(speex_resampler) + additional_latency;
+
+    assert(latency >= 0);
+
+    return latency;
+  }
+
+  /** Returns the number of frames to pass in the input of the resampler to have
+   * exactly `output_frame_count` resampled frames. This can return a number
+   * slightly bigger than what is strictly necessary, but it guaranteed that the
+   * number of output frames will be exactly equal. */
+  uint32_t input_needed_for_output(uint32_t output_frame_count)
+  {
+    return ceilf(output_frame_count * resampling_ratio) + 1
+           - samples_to_frames(resampling_in_buffer.length());
+  }
+
+  /** Returns a pointer to the input buffer, that contains empty space for at
+   * least `frame_count` elements. This is useful so that consumer can directly
+   * write into the input buffer of the resampler. The pointer returned is
+   * adjusted so that leftover data are not overwritten.
+   */
+  T * input_buffer(size_t frame_count)
+  {
+    leftover_samples = resampling_in_buffer.length();
+    resampling_in_buffer.reserve(leftover_samples +
+                                 frames_to_samples(frame_count));
+    return resampling_in_buffer.data() + leftover_samples;
+  }
+
+  /** This method works with `input_buffer`, and allows to inform the processor
+      how much frames have been written in the provided buffer. */
+  void written(size_t written_frames)
+  {
+    resampling_in_buffer.set_length(leftover_samples +
+                                    frames_to_samples(written_frames));
+  }
+private:
+  /** Wrapper for the speex resampling functions to have a typed
+    * interface. */
+  void speex_resample(float * input_buffer, uint32_t * input_frame_count,
+                      float * output_buffer, uint32_t * output_frame_count)
+  {
+#ifndef NDEBUG
+    int rv;
+    rv =
+#endif
+      speex_resampler_process_interleaved_float(speex_resampler,
+                                                input_buffer,
+                                                input_frame_count,
+                                                output_buffer,
+                                                output_frame_count);
+    assert(rv == RESAMPLER_ERR_SUCCESS);
+  }
+
+  void speex_resample(short * input_buffer, uint32_t * input_frame_count,
+                      short * output_buffer, uint32_t * output_frame_count)
+  {
+#ifndef NDEBUG
+    int rv;
+    rv =
+#endif
+      speex_resampler_process_interleaved_int(speex_resampler,
+                                              input_buffer,
+                                              input_frame_count,
+                                              output_buffer,
+                                              output_frame_count);
+    assert(rv == RESAMPLER_ERR_SUCCESS);
+  }
+  /** The state for the speex resampler used internaly. */
+  SpeexResamplerState * speex_resampler;
+  /** Source rate / target rate. */
+  const float resampling_ratio;
+  /** Storage for the input frames, to be resampled. Also contains
+   * any unresampled frames after resampling. */
+  auto_array<T> resampling_in_buffer;
+  /* Storage for the resampled frames, to be passed back to the caller. */
+  auto_array<T> resampling_out_buffer;
+  /** Additional latency inserted into the pipeline for synchronisation. */
+  uint32_t additional_latency;
+  /** When `input_buffer` is called, this allows tracking the number of samples
+      that where in the buffer. */
+  uint32_t leftover_samples;
+};
+
+/** This class allows delaying an audio stream by `frames` frames. */
+template<typename T>
+class delay_line : public processor {
+public:
+  /** Constructor
+   * @parameter frames the number of frames of delay.
+   * @parameter channels the number of channels of this delay line. */
+  delay_line(uint32_t frames, uint32_t channels)
+    : processor(channels)
+    , length(frames)
+    , leftover_samples(0)
+  {
+    /* Fill the delay line with some silent frames to add latency. */
+    delay_input_buffer.push_silence(frames * channels);
+  }
+  /* Add some latency to the delay line.
+   * @param frames the number of frames of latency to add. */
+  void add_latency(size_t frames)
+  {
+    length += frames;
+    delay_input_buffer.push_silence(frames_to_samples(frames));
+  }
+  /** Push some frames into the delay line.
+   * @parameter buffer the frames to push.
+   * @parameter frame_count the number of frames in #buffer. */
+  void input(T * buffer, uint32_t frame_count)
+  {
+    delay_input_buffer.push(buffer, frames_to_samples(frame_count));
+  }
+  /** Pop some frames from the internal buffer, into a internal output buffer.
+   * @parameter frames_needed the number of frames to be returned.
+   * @return a buffer containing the delayed frames. The consumer should not
+   * hold onto the pointer. */
+  T * output(uint32_t frames_needed)
+  {
+    if (delay_output_buffer.capacity() < frames_to_samples(frames_needed)) {
+      delay_output_buffer.reserve(frames_to_samples(frames_needed));
+    }
+
+    delay_output_buffer.clear();
+    delay_output_buffer.push(delay_input_buffer.data(),
+                             frames_to_samples(frames_needed));
+    delay_input_buffer.pop(nullptr, frames_to_samples(frames_needed));
+
+    return delay_output_buffer.data();
+  }
+  /** Get a pointer to the first writable location in the input buffer>
+   * @parameter frames_needed the number of frames the user needs to write into
+   * the buffer.
+   * @returns a pointer to a location in the input buffer where #frames_needed
+   * can be writen. */
+  T * input_buffer(uint32_t frames_needed)
+  {
+    leftover_samples = delay_input_buffer.length();
+    delay_input_buffer.reserve(leftover_samples + frames_to_samples(frames_needed));
+    return delay_input_buffer.data() + leftover_samples;
+  }
+  /** This method works with `input_buffer`, and allows to inform the processor
+      how much frames have been written in the provided buffer. */
+  void written(size_t frames_written)
+  {
+    delay_input_buffer.set_length(leftover_samples +
+                                  frames_to_samples(frames_written));
+  }
+  /** Drains the delay line, emptying the buffer.
+   * @parameter output_buffer the buffer in which the frames are written.
+   * @parameter frames_needed the maximum number of frames to write.
+   * @return the actual number of frames written. */
+  size_t output(T * output_buffer, uint32_t frames_needed)
+  {
+    uint32_t in_len = samples_to_frames(delay_input_buffer.length());
+    uint32_t out_len = frames_needed;
+
+    uint32_t to_pop = std::min(in_len, out_len);
+
+    delay_input_buffer.pop(output_buffer, frames_to_samples(to_pop));
+
+    return to_pop;
+  }
+  /** Returns the number of frames one needs to input into the delay line to get
+   * #frames_needed frames back.
+   * @parameter frames_needed the number of frames one want to write into the
+   * delay_line
+   * @returns the number of frames one will get. */
+  size_t input_needed_for_output(uint32_t frames_needed)
+  {
+    return frames_needed;
+  }
+  /** Returns the number of frames produces for `input_frames` frames in input */
+  size_t output_for_input(uint32_t input_frames)
+  {
+    return input_frames;
+  }
+  /** The number of frames this delay line delays the stream by.
+   * @returns The number of frames of delay. */
+  size_t latency()
+  {
+    return length;
+  }
+private:
+  /** The length, in frames, of this delay line */
+  uint32_t length;
+  /** When `input_buffer` is called, this allows tracking the number of samples
+      that where in the buffer. */
+  uint32_t leftover_samples;
+  /** The input buffer, where the delay is applied. */
+  auto_array<T> delay_input_buffer;
+  /** The output buffer. This is only ever used if using the ::output with a
+   * single argument. */
+  auto_array<T> delay_output_buffer;
+};
+
+/** This sits behind the C API and is more typed. */
+template<typename T>
+cubeb_resampler *
+cubeb_resampler_create_internal(cubeb_stream * stream,
+                                cubeb_stream_params * input_params,
+                                cubeb_stream_params * output_params,
+                                unsigned int target_rate,
+                                cubeb_data_callback callback,
+                                void * user_ptr,
+                                cubeb_resampler_quality quality)
+{
+  std::unique_ptr<cubeb_resampler_speex_one_way<T>> input_resampler = nullptr;
+  std::unique_ptr<cubeb_resampler_speex_one_way<T>> output_resampler = nullptr;
+  std::unique_ptr<delay_line<T>> input_delay = nullptr;
+  std::unique_ptr<delay_line<T>> output_delay = nullptr;
+
+  assert((input_params || output_params) &&
+         "need at least one valid parameter pointer.");
+
+  /* All the streams we have have a sample rate that matches the target
+     sample rate, use a no-op resampler, that simply forwards the buffers to the
+     callback. */
+  if (((input_params && input_params->rate == target_rate) &&
+      (output_params && output_params->rate == target_rate)) ||
+      (input_params && !output_params && (input_params->rate == target_rate)) ||
+      (output_params && !input_params && (output_params->rate == target_rate))) {
+    return new noop_resampler(stream, callback, user_ptr);
+  }
+
+  /* Determine if we need to resampler one or both directions, and create the
+     resamplers. */
+  if (output_params && (output_params->rate != target_rate)) {
+    output_resampler.reset(
+        new cubeb_resampler_speex_one_way<T>(output_params->channels,
+                                             target_rate,
+                                             output_params->rate,
+                                             to_speex_quality(quality)));
+    if (!output_resampler) {
+      return NULL;
+    }
+  }
+
+  if (input_params && (input_params->rate != target_rate)) {
+    input_resampler.reset(
+        new cubeb_resampler_speex_one_way<T>(input_params->channels,
+                                             input_params->rate,
+                                             target_rate,
+                                             to_speex_quality(quality)));
+    if (!input_resampler) {
+      return NULL;
+    }
+  }
+
+  /* If we resample only one direction but we have a duplex stream, insert a
+   * delay line with a length equal to the resampler latency of the
+   * other direction so that the streams are synchronized. */
+  if (input_resampler && !output_resampler && input_params && output_params) {
+    output_delay.reset(new delay_line<T>(input_resampler->latency(),
+                                         output_params->channels));
+    if (!output_delay) {
+      return NULL;
+    }
+  } else if (output_resampler && !input_resampler && input_params && output_params) {
+    input_delay.reset(new delay_line<T>(output_resampler->latency(),
+                                        input_params->channels));
+    if (!input_delay) {
+      return NULL;
+    }
+  }
+
+  if (input_resampler && output_resampler) {
+    return new cubeb_resampler_speex<T,
+                                     cubeb_resampler_speex_one_way<T>,
+                                     cubeb_resampler_speex_one_way<T>>
+                                       (input_resampler.release(),
+                                        output_resampler.release(),
+                                        stream, callback, user_ptr);
+  } else if (input_resampler) {
+    return new cubeb_resampler_speex<T,
+                                     cubeb_resampler_speex_one_way<T>,
+                                     delay_line<T>>
+                                      (input_resampler.release(),
+                                       output_delay.release(),
+                                       stream, callback, user_ptr);
+  } else {
+    return new cubeb_resampler_speex<T,
+                                     delay_line<T>,
+                                     cubeb_resampler_speex_one_way<T>>
+                                      (input_delay.release(),
+                                       output_resampler.release(),
+                                       stream, callback, user_ptr);
+  }
+}
+
+#endif /* CUBEB_RESAMPLER_INTERNAL */
new file mode 100644
--- /dev/null
+++ b/media/libcubeb/src/cubeb_ring_array.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright © 2016 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+
+#ifndef CUBEB_RING_ARRAY_H
+#define CUBEB_RING_ARRAY_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/** Ring array of pointers is used to hold buffers. In case that
+    asynchronous producer/consumer callbacks do not arrive in a
+    repeated order the ring array stores the buffers and fetch
+    them in the correct order. */
+
+typedef struct {
+  AudioBuffer * buffer_array;   /**< Array that hold pointers of the allocated space for the buffers. */
+  unsigned int tail;            /**< Index of the last element (first to deliver). */
+  unsigned int count;           /**< Number of elements in the array. */
+  unsigned int capacity;        /**< Total length of the array. */
+} ring_array;
+
+static int
+single_audiobuffer_init(AudioBuffer * buffer,
+                        uint32_t bytesPerFrame,
+                        uint32_t channelsPerFrame,
+                        uint32_t frames)
+{
+  assert(buffer);
+  assert(bytesPerFrame > 0 && channelsPerFrame && frames > 0);
+
+  size_t size = bytesPerFrame * frames;
+  buffer->mData = calloc(1, size);
+  if (buffer->mData == NULL) {
+    return CUBEB_ERROR;
+  }
+
+  buffer->mNumberChannels = channelsPerFrame;
+  buffer->mDataByteSize = size;
+
+  return CUBEB_OK;
+}
+
+/** Initialize the ring array.
+    @param ra The ring_array pointer of allocated structure.
+    @retval 0 on success. */
+int
+ring_array_init(ring_array * ra,
+                uint32_t capacity,
+                uint32_t bytesPerFrame,
+                uint32_t channelsPerFrame,
+                uint32_t framesPerBuffer)
+{
+  assert(ra);
+  if (capacity == 0 || bytesPerFrame == 0 ||
+      channelsPerFrame == 0 || framesPerBuffer == 0) {
+    return CUBEB_ERROR_INVALID_PARAMETER;
+  }
+  ra->capacity = capacity;
+  ra->tail = 0;
+  ra->count = 0;
+
+  ra->buffer_array = calloc(ra->capacity, sizeof(AudioBuffer));
+  if (ra->buffer_array == NULL) {
+    return CUBEB_ERROR;
+  }
+
+  for (unsigned int i = 0; i < ra->capacity; ++i) {
+    if (single_audiobuffer_init(&ra->buffer_array[i],
+                                bytesPerFrame,
+                                channelsPerFrame,
+                                framesPerBuffer) != CUBEB_OK) {
+      return CUBEB_ERROR;
+    }
+  }
+
+  return CUBEB_OK;
+}
+
+/** Destroy the ring array.
+    @param ra The ring_array pointer.*/
+void
+ring_array_destroy(ring_array * ra)
+{
+  assert(ra);
+  if (ra->buffer_array == NULL){
+    return;
+  }
+  for (unsigned int i = 0; i < ra->capacity; ++i) {
+    if (ra->buffer_array[i].mData) {
+      free(ra->buffer_array[i].mData);
+    }
+  }
+  free(ra->buffer_array);
+}
+
+/** Get the allocated buffer to be stored with fresh data.
+    @param ra The ring_array pointer.
+    @retval Pointer of the allocated space to be stored with fresh data or NULL if full. */
+AudioBuffer *
+ring_array_get_free_buffer(ring_array * ra)
+{
+  assert(ra && ra->buffer_array);
+  assert(ra->buffer_array[0].mData != NULL);
+  if (ra->count == ra->capacity) {
+    return NULL;
+  }
+
+  assert(ra->count == 0 || (ra->tail + ra->count) % ra->capacity != ra->tail);
+  void * ret = &ra->buffer_array[(ra->tail + ra->count) % ra->capacity];
+
+  ++ra->count;
+  assert(ra->count <= ra->capacity);
+
+  return ret;
+}
+
+/** Get the next available buffer with data.
+    @param ra The ring_array pointer.
+    @retval Pointer of the next in order data buffer or NULL if empty. */
+AudioBuffer *
+ring_array_get_data_buffer(ring_array * ra)
+{
+  assert(ra && ra->buffer_array);
+  assert(ra->buffer_array[0].mData != NULL);
+
+  if (ra->count == 0) {
+    return NULL;
+  }
+  void * ret = &ra->buffer_array[ra->tail];
+
+  ra->tail = (ra->tail + 1) % ra->capacity;
+  assert(ra->tail < ra->capacity);
+
+  assert(ra->count > 0);
+  --ra->count;
+
+  return ret;
+}
+
+/** When array is empty get the first allocated buffer in the array.
+    @param ra The ring_array pointer.
+    @retval If arrays is empty, pointer of the allocated space else NULL. */
+AudioBuffer *
+ring_array_get_dummy_buffer(ring_array * ra)
+{
+  assert(ra && ra->buffer_array);
+  assert(ra->capacity > 0);
+  if (ra->count > 0) {
+    return NULL;
+  }
+  return &ra->buffer_array[0];
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif //CUBEB_RING_ARRAY_H
--- a/media/libcubeb/src/cubeb_sndio.c
+++ b/media/libcubeb/src/cubeb_sndio.c
@@ -62,17 +62,17 @@ float_to_s16(void *ptr, long nsamp)
   }
 }
 
 static void
 sndio_onmove(void *arg, int delta)
 {
   cubeb_stream *s = (cubeb_stream *)arg;
 
-  s->rdpos += delta;
+  s->rdpos += delta * s->bpf;
 }
 
 static void *
 sndio_mainloop(void *arg)
 {
 #define MAXFDS 8
   struct pollfd pfds[MAXFDS];
   cubeb_stream *s = arg;
@@ -130,17 +130,17 @@ sndio_mainloop(void *arg)
       break;
     if (revents & POLLOUT) {
       n = sio_write(s->hdl, s->buf + start, end - start);
       if (n == 0) {
         DPR("sndio_mainloop() werr\n");
         state = CUBEB_STATE_ERROR;
         break;
       }
-      s->wrpos = 0;
+      s->wrpos += n;
       start += n;
     }
   }
   sio_stop(s->hdl);
   s->rdpos = s->wrpos;
   pthread_mutex_unlock(&s->mtx);
   s->state_cb(s, s->arg, state);
   return NULL;
@@ -192,17 +192,17 @@ sndio_stream_init(cubeb * context,
     /* Device selection not yet implemented. */
     return CUBEB_ERROR_DEVICE_UNAVAILABLE;
   }
 
   s = malloc(sizeof(cubeb_stream));
   if (s == NULL)
     return CUBEB_ERROR;
   s->context = context;
-  s->hdl = sio_open(NULL, SIO_PLAY, 0);
+  s->hdl = sio_open(NULL, SIO_PLAY, 1);
   if (s->hdl == NULL) {
     free(s);
     DPR("sndio_stream_init(), sio_open() failed\n");
     return CUBEB_ERROR;
   }
   sio_initpar(&wpar);
   wpar.sig = 1;
   wpar.bits = 16;
@@ -331,17 +331,17 @@ sndio_stream_stop(cubeb_stream *s)
   return CUBEB_OK;
 }
 
 static int
 sndio_stream_get_position(cubeb_stream *s, uint64_t *p)
 {
   pthread_mutex_lock(&s->mtx);
   DPR("sndio_stream_get_position() %lld\n", s->rdpos);
-  *p = s->rdpos;
+  *p = s->rdpos / s->bpf;
   pthread_mutex_unlock(&s->mtx);
   return CUBEB_OK;
 }
 
 static int
 sndio_stream_set_volume(cubeb_stream *s, float volume)
 {
   DPR("sndio_stream_set_volume(%f)\n", volume);
@@ -351,17 +351,17 @@ sndio_stream_set_volume(cubeb_stream *s,
   return CUBEB_OK;
 }
 
 int
 sndio_stream_get_latency(cubeb_stream * stm, uint32_t * latency)
 {
   // http://www.openbsd.org/cgi-bin/man.cgi?query=sio_open
   // in the "Measuring the latency and buffers usage" paragraph.
-  *latency = stm->wrpos - stm->rdpos;
+  *latency = (stm->wrpos - stm->rdpos) / stm->bpf;
   return CUBEB_OK;
 }
 
 static struct cubeb_ops const sndio_ops = {
   .init = sndio_init,
   .get_backend_id = sndio_get_backend_id,
   .get_max_channel_count = sndio_get_max_channel_count,
   .get_min_latency = sndio_get_min_latency,
new file mode 100644
--- /dev/null
+++ b/media/libcubeb/src/cubeb_utils.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright © 2016 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+
+#if !defined(CUBEB_UTILS)
+#define CUBEB_UTILS
+
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+/** Similar to memcpy, but accounts for the size of an element. */
+template<typename T>
+void PodCopy(T * destination, const T * source, size_t count)
+{
+  memcpy(destination, source, count * sizeof(T));
+}
+
+/** Similar to memmove, but accounts for the size of an element. */
+template<typename T>
+void PodMove(T * destination, const T * source, size_t count)
+{
+  memmove(destination, source, count * sizeof(T));
+}
+
+/** Similar to a memset to zero, but accounts for the size of an element. */
+template<typename T>
+void PodZero(T * destination, size_t count)
+{
+  memset(destination, 0,  count * sizeof(T));
+}
+
+template<typename T>
+class auto_array
+{
+public:
+  explicit auto_array(uint32_t capacity = 0)
+    : data_(capacity ? new T[capacity] : nullptr)
+    , capacity_(capacity)
+    , length_(0)
+  {}
+
+  ~auto_array()
+  {
+    delete [] data_;
+  }
+
+  /** Get a constant pointer to the underlying data. */
+  T * data() const
+  {
+    return data_;
+  }
+
+  const T& at(size_t index) const
+  {
+    assert(index < length_ && "out of range");
+    return data_[index];
+  }
+
+  T& at(size_t index)
+  {
+    assert(index < length_ && "out of range");
+    return data_[index];
+  }
+
+  /** Get how much underlying storage this auto_array has. */
+  size_t capacity() const
+  {
+    return capacity_;
+  }
+
+  /** Get how much elements this auto_array contains. */
+  size_t length() const
+  {
+    return length_;
+  }
+
+  /** Keeps the storage, but removes all the elements from the array. */
+  void clear()
+  {
+    length_ = 0;
+  }
+
+   /** Change the storage of this auto array, copying the elements to the new
+    * storage.
+    * @returns true in case of success
+    * @returns false if the new capacity is not big enough to accomodate for the
+    *                elements in the array.
+    */
+  bool reserve(size_t new_capacity)
+  {
+    if (new_capacity < length_) {
+      return false;
+    }
+    T * new_data = new T[new_capacity];
+    if (data_ && length_) {
+      PodCopy(new_data, data_, length_);
+    }
+    capacity_ = new_capacity;
+    delete [] data_;
+    data_ = new_data;
+
+    return true;
+  }
+
+   /** Append `length` elements to the end of the array, resizing the array if
+    * needed.
+    * @parameter elements the elements to append to the array.
+    * @parameter length the number of elements to append to the array.
+    */
+  void push(const T * elements, size_t length)
+  {
+    if (length_ + length > capacity_) {
+      reserve(length_ + length);
+    }
+    PodCopy(data_ + length_, elements, length);
+    length_ += length;
+  }
+
+  /** Append `length` zero-ed elements to the end of the array, resizing the
+   * array if needed.
+   * @parameter length the number of elements to append to the array.
+   */
+  void push_silence(size_t length)
+  {
+    if (length_ + length > capacity_) {
+      reserve(length + length_);
+    }
+    PodZero(data_ + length_, length);
+    length_ += length;
+  }
+
+  /** Return the number of free elements in the array. */
+  size_t available() const
+  {
+    return capacity_ - length_;
+  }
+
+  /** Copies `length` elements to `elements` if it is not null, and shift
+    * the remaining elements of the `auto_array` to the beginning.
+    * @parameter elements a buffer to copy the elements to, or nullptr.
+    * @parameter length the number of elements to copy.
+    * @returns true in case of success.
+    * @returns false if the auto_array contains less than `length` elements. */
+  bool pop(T * elements, size_t length)
+  {
+    if (length > length_) {
+      return false;
+    }
+    if (elements) {
+      PodCopy(elements, data_, length);
+    }
+    PodMove(data_, data_ + length, length_ - length);
+
+    length_ -= length;
+
+    return true;
+  }
+
+  void set_length(size_t length)
+  {
+    assert(length <= capacity_);
+    length_ = length;
+  }
+
+private:
+  /** The underlying storage */
+  T * data_;
+  /** The size, in number of elements, of the storage. */
+  size_t capacity_;
+  /** The number of elements the array contains. */
+  size_t length_;
+};
+
+#endif /* CUBEB_UTILS */
--- a/media/libcubeb/src/cubeb_wasapi.cpp
+++ b/media/libcubeb/src/cubeb_wasapi.cpp
@@ -1,32 +1,39 @@
 /*
  * Copyright © 2013 Mozilla Foundation
  *
  * This program is made available under an ISC-style license.  See the
  * accompanying file LICENSE for details.
  */
+#define NOMINMAX
+
 #if defined(HAVE_CONFIG_H)
 #include "config.h"
 #endif
 #include <initguid.h>
 #include <windows.h>
 #include <mmdeviceapi.h>
 #include <windef.h>
 #include <audioclient.h>
 #include <devicetopology.h>
 #include <process.h>
 #include <avrt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <cmath>
+#include <algorithm>
+#include <memory>
+
 #include "cubeb/cubeb.h"
 #include "cubeb-internal.h"
-#include "cubeb/cubeb-stdint.h"
 #include "cubeb_resampler.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <cmath>
+#include "cubeb_utils.h"
 
 /* devicetopology.h missing in MinGW. */
 #ifndef __devicetopology_h__
 #include "cubeb_devicetopology.h"
 #endif
 
 /* Taken from winbase.h, Not in MinGW. */
 #ifndef STACK_SIZE_PARAM_IS_A_RESERVATION
@@ -58,17 +65,17 @@ uint32_t
 ms_to_hns(uint32_t ms)
 {
   return ms * 10000;
 }
 
 uint32_t
 hns_to_ms(REFERENCE_TIME hns)
 {
-  return hns / 10000;
+  return static_cast<uint32_t>(hns / 10000);
 }
 
 double
 hns_to_s(REFERENCE_TIME hns)
 {
   return static_cast<double>(hns) / 10000000;
 }
 
@@ -186,53 +193,71 @@ typedef HANDLE (WINAPI *set_mm_thread_ch
 typedef BOOL (WINAPI *revert_mm_thread_characteristics_function)(HANDLE handle);
 
 extern cubeb_ops const wasapi_ops;
 
 int wasapi_stream_stop(cubeb_stream * stm);
 int wasapi_stream_start(cubeb_stream * stm);
 void close_wasapi_stream(cubeb_stream * stm);
 int setup_wasapi_stream(cubeb_stream * stm);
+static char * wstr_to_utf8(const wchar_t * str);
+static const wchar_t * utf8_to_wstr(char* str);
 
 }
 
 struct cubeb
 {
   cubeb_ops const * ops;
   /* Library dynamically opened to increase the render thread priority, and
      the two function pointers we need. */
   HMODULE mmcss_module;
   set_mm_thread_characteristics_function set_mm_thread_characteristics;
   revert_mm_thread_characteristics_function revert_mm_thread_characteristics;
 };
 
 class wasapi_endpoint_notification_client;
 
+/* We have three possible callbacks we can use with a stream:
+ * - input only
+ * - output only
+ * - synchronized input and output
+ *
+ * Returns true when we should continue to play, false otherwise.
+ */
+typedef bool (*wasapi_refill_callback)(cubeb_stream * stm);
+
 struct cubeb_stream
 {
   cubeb * context;
   /* Mixer pameters. We need to convert the input stream to this
-     samplerate/channel layout, as WASAPI * does not resample nor upmix
+     samplerate/channel layout, as WASAPI does not resample nor upmix
      itself. */
-  cubeb_stream_params mix_params;
-  cubeb_stream_params stream_params;
+  cubeb_stream_params input_mix_params;
+  cubeb_stream_params output_mix_params;
+  /* Stream parameters. This is what the client requested,
+   * and what will be presented in the callback. */
+  cubeb_stream_params input_stream_params;
+  cubeb_stream_params output_stream_params;
+  /* The input and output device, or NULL for default. */
+  cubeb_devid input_device;
+  cubeb_devid output_device;
   /* The latency initially requested for this stream. */
   unsigned latency;
   cubeb_state_callback state_callback;
   cubeb_data_callback data_callback;
+  wasapi_refill_callback refill_callback;
   void * user_ptr;
-
   /* Lifetime considerations:
      - client, render_client, audio_clock and audio_stream_volume are interface
        pointer to the IAudioClient.
      - The lifetime for device_enumerator and notification_client, resampler,
        mix_buffer are the same as the cubeb_stream instance. */
 
   /* Main handle on the WASAPI stream. */
-  IAudioClient * client;
+  IAudioClient * output_client;
   /* Interface pointer to use the event-driven interface. */
   IAudioRenderClient * render_client;
   /* Interface pointer to use the volume facilities. */
   IAudioStreamVolume * audio_stream_volume;
   /* Interface pointer to use the stream audio clock. */
   IAudioClock * audio_clock;
   /* Frames written to the stream since it was opened. Reset on device
      change. Uses mix_params.rate. */
@@ -245,37 +270,49 @@ struct cubeb_stream
   UINT64 prev_position;
   /* Device enumerator to be able to be notified when the default
      device change. */
   IMMDeviceEnumerator * device_enumerator;
   /* Device notification client, to be able to be notified when the default
      audio device changes and route the audio to the new default audio output
      device */
   wasapi_endpoint_notification_client * notification_client;
+  /* Main andle to the WASAPI capture stream. */
+  IAudioClient * input_client;
+  /* Interface to use the event driven capture interface */
+  IAudioCaptureClient * capture_client;
   /* This event is set by the stream_stop and stream_destroy
      function, so the render loop can exit properly. */
   HANDLE shutdown_event;
   /* Set by OnDefaultDeviceChanged when a stream reconfiguration is required.
      The reconfiguration is handled by the render loop thread. */
   HANDLE reconfigure_event;
   /* This is set by WASAPI when we should refill the stream. */
   HANDLE refill_event;
+  /* This is set by WASAPI when we should read from the input stream. In
+   * practice, we read from the input stream in the output callback, so
+   * this is not used, but it is necessary to start getting input data. */
+  HANDLE input_available_event;
   /* Each cubeb_stream has its own thread. */
   HANDLE thread;
   /* The lock protects all members that are touched by the render thread or
      change during a device reset, including: audio_clock, audio_stream_volume,
      client, frames_written, mix_params, total_frames_written, prev_position. */
   owned_critical_section * stream_reset_lock;
-  /* Maximum number of frames we can be requested in a callback. */
-  uint32_t buffer_frame_count;
+  /* Maximum number of frames that can be passed down in a callback. */
+  uint32_t input_buffer_frame_count;
+  /* Maximum number of frames that can be requested in a callback. */
+  uint32_t output_buffer_frame_count;
   /* Resampler instance. Resampling will only happen if necessary. */
   cubeb_resampler * resampler;
-  /* Buffer used to downmix or upmix to the number of channels the mixer has.
-     its size is |frames_to_bytes_before_mix(buffer_frame_count)|. */
+  /* A buffer for up/down mixing multi-channel audio. */
   float * mix_buffer;
+  /* WASAPI input works in "packets". We re-linearize the audio packets
+   * into this buffer before handing it to the resampler. */
+  auto_array<float> linear_input_buffer;
   /* Stream volume.  Set via stream_set_volume and used to reset volume on
      device changes. */
   float volume;
   /* True if the stream is draining. */
   bool draining;
 };
 
 
@@ -370,30 +407,39 @@ public:
   }
 private:
   /* refcount for this instance, necessary to implement MSCOM semantics. */
   LONG ref_count;
   HANDLE reconfigure_event;
 };
 
 namespace {
-bool should_upmix(cubeb_stream * stream)
+bool has_input(cubeb_stream * stm)
 {
-  return stream->mix_params.channels > stream->stream_params.channels;
+  return stm->input_stream_params.rate != 0;
+}
+
+bool has_output(cubeb_stream * stm)
+{
+  return stm->output_stream_params.rate != 0;
 }
 
-bool should_downmix(cubeb_stream * stream)
+bool should_upmix(cubeb_stream_params & stream, cubeb_stream_params & mixer)
 {
-  return stream->mix_params.channels < stream->stream_params.channels;
+  return mixer.channels > stream.channels;
 }
 
-double stream_to_mix_samplerate_ratio(cubeb_stream * stream)
+bool should_downmix(cubeb_stream_params & stream, cubeb_stream_params & mixer)
 {
-  stream->stream_reset_lock->assert_current_thread_owns();
-  return double(stream->stream_params.rate) / stream->mix_params.rate;
+  return mixer.channels < stream.channels;
+}
+
+double stream_to_mix_samplerate_ratio(cubeb_stream_params & stream, cubeb_stream_params & mixer)
+{
+  return double(stream.rate) / mixer.rate;
 }
 
 /* Upmix function, copies a mono channel into L and R */
 template<typename T>
 void
 mono_to_stereo(T * in, long insamples, T * out, int32_t out_channels)
 {
   for (int i = 0, j = 0; i < insamples; ++i, j += out_channels) {
@@ -451,69 +497,303 @@ downmix(T * in, long inframes, T * out, 
   }
 }
 
 /* This returns the size of a frame in the stream, before the eventual upmix
    occurs. */
 static size_t
 frames_to_bytes_before_mix(cubeb_stream * stm, size_t frames)
 {
-  size_t stream_frame_size = stm->stream_params.channels * sizeof(float);
+  size_t stream_frame_size = stm->output_stream_params.channels * sizeof(float);
   return stream_frame_size * frames;
 }
 
+/* This function handles the processing of the input and output audio,
+ * converting it to rate and channel layout specified at initialization.
+ * It then calls the data callback, via the resampler. */
 long
-refill(cubeb_stream * stm, float * data, long frames_needed)
+refill(cubeb_stream * stm, float * input_buffer, long input_frames_count,
+       float * output_buffer, long output_frames_needed)
 {
   /* If we need to upmix after resampling, resample into the mix buffer to
      avoid a copy. */
-  float * dest;
-  if (should_upmix(stm) || should_downmix(stm)) {
-    dest = stm->mix_buffer;
-  } else {
-    dest = data;
+  float * dest = nullptr;
+  if (has_output(stm)) {
+    if (should_upmix(stm->output_stream_params, stm->output_mix_params) ||
+        should_downmix(stm->output_stream_params, stm->output_mix_params)) {
+      dest = stm->mix_buffer;
+    } else {
+      dest = output_buffer;
+    }
   }
 
-  long out_frames = cubeb_resampler_fill(stm->resampler, NULL, dest, frames_needed);
+  long out_frames = cubeb_resampler_fill(stm->resampler,
+                                         input_buffer,
+                                         &input_frames_count,
+                                         dest,
+                                         output_frames_needed);
   /* TODO: Report out_frames < 0 as an error via the API. */
   XASSERT(out_frames >= 0);
 
   {
     auto_lock lock(stm->stream_reset_lock);
     stm->frames_written += out_frames;
   }
 
   /* Go in draining mode if we got fewer frames than requested. */
-  if (out_frames < frames_needed) {
+  if (out_frames < output_frames_needed) {
     LOG("start draining.\n");
     stm->draining = true;
   }
 
   /* If this is not true, there will be glitches.
      It is alright to have produced less frames if we are draining, though. */
-  XASSERT(out_frames == frames_needed || stm->draining);
+  XASSERT(out_frames == output_frames_needed || stm->draining || !has_output(stm));
 
-  if (should_upmix(stm)) {
-    upmix(dest, out_frames, data,
-          stm->stream_params.channels, stm->mix_params.channels);
-  } else if (should_downmix(stm)) {
-    downmix(dest, out_frames, data,
-            stm->stream_params.channels, stm->mix_params.channels);
+  if (has_output(stm)) {
+    if (should_upmix(stm->output_stream_params, stm->output_mix_params)) {
+      upmix(dest, out_frames, output_buffer,
+            stm->output_stream_params.channels, stm->output_mix_params.channels);
+    } else if (should_downmix(stm->output_stream_params, stm->output_mix_params)) {
+      downmix(dest, out_frames, output_buffer,
+              stm->output_stream_params.channels, stm->output_mix_params.channels);
+    }
   }
 
   return out_frames;
 }
 
+/* This helper grabs all the frames available from a capture client, put them in
+ * linear_input_buffer. linear_input_buffer should be cleared before the
+ * callback exits. */
+bool get_input_buffer(cubeb_stream * stm)
+{
+  HRESULT hr;
+  UINT32 padding_in;
+
+  XASSERT(has_input(stm));
+
+  hr = stm->input_client->GetCurrentPadding(&padding_in);
+  if (FAILED(hr)) {
+    LOG("Failed to get padding\n");
+    return false;
+  }
+  XASSERT(padding_in <= stm->input_buffer_frame_count);
+  UINT32 total_available_input = padding_in;
+
+  BYTE * input_packet = NULL;
+  DWORD flags;
+  UINT64 dev_pos;
+  UINT32 next;
+  /* Get input packets until we have captured enough frames, and put them in a
+   * contiguous buffer. */
+  uint32_t offset = 0;
+  uint32_t input_channel_count = stm->input_stream_params.channels;
+  while (offset != total_available_input * input_channel_count &&
+      total_available_input) {
+    hr = stm->capture_client->GetNextPacketSize(&next);
+    if (FAILED(hr)) {
+      LOG("cannot get next packet size: %x\n", hr);
+      return false;
+    }
+    /* This can happen if the capture stream has stopped. Just return in this
+     * case. */
+    if (!next) {
+      break;
+    }
+
+    UINT32 packet_size;
+    hr = stm->capture_client->GetBuffer(&input_packet,
+        &packet_size,
+        &flags,
+        &dev_pos,
+        NULL);
+    if (FAILED(hr)) {
+      LOG("GetBuffer failed for capture: %x\n", hr);
+      return false;
+    }
+    XASSERT(packet_size == next);
+    if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
+      stm->linear_input_buffer.push_silence(packet_size * input_channel_count);
+    } else {
+      stm->linear_input_buffer.push(reinterpret_cast<float*>(input_packet),
+          packet_size * input_channel_count);
+    }
+    hr = stm->capture_client->ReleaseBuffer(packet_size);
+    if (FAILED(hr)) {
+      LOG("FAILED to release intput buffer");
+      return false;
+    }
+    offset += packet_size * input_channel_count;
+  }
+
+  assert(stm->linear_input_buffer.length() == total_available_input);
+
+  return true;
+}
+
+/* Get an output buffer from the render_client. It has to be released before
+ * exiting the callback. */
+bool get_output_buffer(cubeb_stream * stm, float *& buffer, size_t & frame_count)
+{
+  UINT32 padding_out;
+  HRESULT hr;
+
+  XASSERT(has_output(stm));
+
+  hr = stm->output_client->GetCurrentPadding(&padding_out);
+  if (FAILED(hr)) {
+    LOG("Failed to get padding: %x\n", hr);
+    return false;
+  }
+  XASSERT(padding_out <= stm->output_buffer_frame_count);
+
+  if (stm->draining) {
+    if (padding_out == 0) {
+      stm->state_callback(stm, stm->user_ptr, CUBEB_STATE_DRAINED);
+      return false;
+    }
+    return true;
+  }
+
+  frame_count = stm->output_buffer_frame_count - padding_out;
+  BYTE * output_buffer;
+
+  hr = stm->render_client->GetBuffer(frame_count, &output_buffer);
+  if (FAILED(hr)) {
+    LOG("cannot get render buffer\n");
+    return false;
+  }
+
+  buffer = reinterpret_cast<float*>(output_buffer);
+
+  return true;
+}
+
+/**
+ * This function gets input data from a input device, and pass it along with an
+ * output buffer to the resamplers.  */
+bool
+refill_callback_duplex(cubeb_stream * stm)
+{
+  HRESULT hr;
+  float * output_buffer;
+  size_t output_frames;
+  bool rv;
+
+  XASSERT(has_input(stm) && has_output(stm));
+
+  rv = get_input_buffer(stm);
+  if (!rv) {
+    return rv;
+  }
+
+  rv = get_output_buffer(stm, output_buffer, output_frames);
+  if (!rv) {
+    return rv;
+  }
+
+  /* This can only happen when debugging, and having breakpoints set in the
+   * callback in a way that it makes the stream underrun. */
+  if (output_frames == 0) {
+    return true;
+  }
+
+  // When WASAPI has not filled the input buffer yet, send silence.
+  if (stm->linear_input_buffer.length() == 0) {
+    stm->linear_input_buffer.push_silence(output_frames* stm->output_mix_params.channels);
+  }
+
+  refill(stm,
+         stm->linear_input_buffer.data(),
+         stm->linear_input_buffer.length(),
+         output_buffer,
+         output_frames);
+
+  stm->linear_input_buffer.clear();
+
+  hr = stm->render_client->ReleaseBuffer(output_frames, 0);
+  if (FAILED(hr)) {
+    LOG("failed to release buffer: %x\n", hr);
+    return false;
+  }
+  return true;
+}
+
+bool
+refill_callback_input(cubeb_stream * stm)
+{
+  bool rv, consumed_all_buffer;
+
+  XASSERT(has_input(stm) && !has_output(stm));
+
+  rv = get_input_buffer(stm);
+  if (!rv) {
+    return rv;
+  }
+
+  long read = refill(stm,
+                     stm->linear_input_buffer.data(),
+                     stm->linear_input_buffer.length(),
+                     nullptr,
+                     0);
+
+  consumed_all_buffer = read == stm->linear_input_buffer.length();
+
+  stm->linear_input_buffer.clear();
+
+  return consumed_all_buffer;
+}
+
+bool
+refill_callback_output(cubeb_stream * stm)
+{
+  bool rv;
+  HRESULT hr;
+  float * output_buffer;
+  size_t output_frames;
+
+  XASSERT(!has_input(stm) && has_output(stm));
+
+  rv = get_output_buffer(stm, output_buffer, output_frames);
+  if (!rv) {
+    return rv;
+  }
+  if (stm->draining || output_frames == 0) {
+    return true;
+  }
+
+
+  long got = refill(stm,
+                    nullptr,
+                    0,
+                    output_buffer,
+                    output_frames);
+
+  hr = stm->render_client->ReleaseBuffer(output_frames, 0);
+  if (FAILED(hr)) {
+    LOG("failed to release buffer: %x\n", hr);
+    return false;
+  }
+
+  return got == output_frames || stm->draining;
+}
+
 static unsigned int __stdcall
 wasapi_stream_render_loop(LPVOID stream)
 {
   cubeb_stream * stm = static_cast<cubeb_stream *>(stream);
 
   bool is_playing = true;
-  HANDLE wait_array[3] = {stm->shutdown_event, stm->reconfigure_event, stm->refill_event};
+  HANDLE wait_array[4] = {
+    stm->shutdown_event,
+    stm->reconfigure_event,
+    stm->refill_event,
+    stm->input_available_event
+  };
   HANDLE mmcss_handle = NULL;
   HRESULT hr = 0;
   DWORD mmcss_task_index = 0;
   auto_com com;
   if (!com.ok()) {
     LOG("COM initialization failed on render_loop thread.\n");
     stm->state_callback(stm, stm->user_ptr, CUBEB_STATE_ERROR);
     return 0;
@@ -550,75 +830,52 @@ wasapi_stream_render_loop(LPVOID stream)
          shutdown. */
       if (stm->draining) {
         stm->state_callback(stm, stm->user_ptr, CUBEB_STATE_DRAINED);
       }
       continue;
     }
     case WAIT_OBJECT_0 + 1: { /* reconfigure */
       /* Close the stream */
-      stm->client->Stop();
+      if (stm->output_client) {
+        stm->output_client->Stop();
+      }
+      if (stm->input_client) {
+        stm->input_client->Stop();
+      }
       {
         auto_lock lock(stm->stream_reset_lock);
         close_wasapi_stream(stm);
         /* Reopen a stream and start it immediately. This will automatically pick the
            new default device for this role. */
         int r = setup_wasapi_stream(stm);
         if (r != CUBEB_OK) {
           /* Don't destroy the stream here, since we expect the caller to do
              so after the error has propagated via the state callback. */
           is_playing = false;
           hr = E_FAIL;
           continue;
         }
       }
-      stm->client->Start();
+      if (stm->output_client) {
+        stm->output_client->Start();
+      }
+      if (stm->input_client) {
+        stm->input_client->Start();
+      }
       break;
     }
-    case WAIT_OBJECT_0 + 2: { /* refill */
-      UINT32 padding;
-
-      hr = stm->client->GetCurrentPadding(&padding);
-      if (FAILED(hr)) {
-        LOG("Failed to get padding: %x\n", hr);
-        is_playing = false;
-        continue;
-      }
-      XASSERT(padding <= stm->buffer_frame_count);
-
-      if (stm->draining) {
-        if (padding == 0) {
-          stm->state_callback(stm, stm->user_ptr, CUBEB_STATE_DRAINED);
-          is_playing = false;
-        }
-        continue;
-      }
-
-      long available = stm->buffer_frame_count - padding;
-
-      if (available == 0) {
-        continue;
-      }
-
-      BYTE * data;
-      hr = stm->render_client->GetBuffer(available, &data);
-      if (SUCCEEDED(hr)) {
-        long wrote = refill(stm, reinterpret_cast<float *>(data), available);
-        XASSERT(wrote == available || stm->draining);
-
-        hr = stm->render_client->ReleaseBuffer(wrote, 0);
-        if (FAILED(hr)) {
-          LOG("failed to release buffer: %x\n", hr);
-          is_playing = false;
-        }
-      } else {
-        LOG("failed to get buffer: %x\n", hr);
-        is_playing = false;
-      }
-    }
+    case WAIT_OBJECT_0 + 2:  /* refill */
+      XASSERT(has_input(stm) && has_output(stm) ||
+              !has_input(stm) && has_output(stm));
+      is_playing = stm->refill_callback(stm);
+      break;
+    case WAIT_OBJECT_0 + 3: /* input available */
+      if (has_input(stm) && has_output(stm)) { continue; }
+      is_playing = stm->refill_callback(stm);
       break;
     case WAIT_TIMEOUT:
       XASSERT(stm->shutdown_event == wait_array[0]);
       if (++timeout_count >= timeout_limit) {
         is_playing = false;
         hr = E_FAIL;
       }
       break;
@@ -681,31 +938,54 @@ HRESULT unregister_notification_client(c
   stm->device_enumerator->UnregisterEndpointNotificationCallback(stm->notification_client);
 
   SafeRelease(stm->notification_client);
   SafeRelease(stm->device_enumerator);
 
   return S_OK;
 }
 
-HRESULT get_default_endpoint(IMMDevice ** device)
+HRESULT get_endpoint(IMMDevice ** device, LPCWSTR devid)
+{
+  IMMDeviceEnumerator * enumerator;
+  HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),
+                                NULL, CLSCTX_INPROC_SERVER,
+                                IID_PPV_ARGS(&enumerator));
+  if (FAILED(hr)) {
+    LOG("Could not get device enumerator: %x\n", hr);
+    return hr;
+  }
+
+  hr = enumerator->GetDevice(devid, device);
+  if (FAILED(hr)) {
+    LOG("Could not get device: %x\n", hr);
+    SafeRelease(enumerator);
+    return hr;
+  }
+
+  SafeRelease(enumerator);
+
+  return S_OK;
+}
+
+HRESULT get_default_endpoint(IMMDevice ** device, EDataFlow direction)
 {
   IMMDeviceEnumerator * enumerator;
   HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),
                                 NULL, CLSCTX_INPROC_SERVER,
                                 IID_PPV_ARGS(&enumerator));
   if (FAILED(hr)) {
     LOG("Could not get device enumerator: %x\n", hr);
     return hr;
   }
   /* eMultimedia is okay for now ("Music, movies, narration, [...]").
      We will need to change this when we distinguish streams by use-case, other
      possible values being eConsole ("Games, system notification sounds [...]")
      and eCommunication ("Voice communication"). */
-  hr = enumerator->GetDefaultAudioEndpoint(eRender, eMultimedia, device);
+  hr = enumerator->GetDefaultAudioEndpoint(direction, eMultimedia, device);
   if (FAILED(hr)) {
     LOG("Could not get default audio endpoint: %x\n", hr);
     SafeRelease(enumerator);
     return hr;
   }
 
   SafeRelease(enumerator);
 
@@ -734,19 +1014,19 @@ current_stream_delay(cubeb_stream * stm)
   UINT64 pos;
   hr = stm->audio_clock->GetPosition(&pos, NULL);
   if (FAILED(hr)) {
     LOG("GetPosition failed: %x\n", hr);
     return 0;
   }
 
   double cur_pos = static_cast<double>(pos) / freq;
-  double max_pos = static_cast<double>(stm->frames_written)  / stm->mix_params.rate;
+  double max_pos = static_cast<double>(stm->frames_written)  / stm->output_mix_params.rate;
   double delay = max_pos - cur_pos;
-  XASSERT(delay >= 0);
+  XASSERT(delay >= 0 || stm->draining);
 
   return delay;
 }
 
 int
 stream_set_volume(cubeb_stream * stm, float volume)
 {
   stm->stream_reset_lock->assert_current_thread_owns();
@@ -790,17 +1070,17 @@ int wasapi_init(cubeb ** context, char c
   if (!com.ok()) {
     return CUBEB_ERROR;
   }
 
   /* We don't use the device yet, but need to make sure we can initialize one
      so that this backend is not incorrectly enabled on platforms that don't
      support WASAPI. */
   IMMDevice * device;
-  hr = get_default_endpoint(&device);
+  hr = get_default_endpoint(&device, eRender);
   if (FAILED(hr)) {
     LOG("Could not get device: %x\n", hr);
     return CUBEB_ERROR;
   }
   SafeRelease(device);
 
   cubeb * ctx = (cubeb *)calloc(1, sizeof(cubeb));
   if (!ctx) {
@@ -882,17 +1162,17 @@ wasapi_get_max_channel_count(cubeb * ctx
   auto_com com;
   if (!com.ok()) {
     return CUBEB_ERROR;
   }
 
   XASSERT(ctx && max_channels);
 
   IMMDevice * device;
-  hr = get_default_endpoint(&device);
+  hr = get_default_endpoint(&device, eRender);
   if (FAILED(hr)) {
     return CUBEB_ERROR;
   }
 
   hr = device->Activate(__uuidof(IAudioClient),
                         CLSCTX_INPROC_SERVER,
                         NULL, (void **)&client);
   SafeRelease(device);
@@ -925,17 +1205,17 @@ wasapi_get_min_latency(cubeb * ctx, cube
     return CUBEB_ERROR;
   }
 
   if (params.format != CUBEB_SAMPLE_FLOAT32NE) {
     return CUBEB_ERROR_INVALID_FORMAT;
   }
 
   IMMDevice * device;
-  hr = get_default_endpoint(&device);
+  hr = get_default_endpoint(&device, eRender);
   if (FAILED(hr)) {
     LOG("Could not get default endpoint: %x\n", hr);
     return CUBEB_ERROR;
   }
 
   hr = device->Activate(__uuidof(IAudioClient),
                         CLSCTX_INPROC_SERVER,
                         NULL, (void **)&client);
@@ -972,17 +1252,17 @@ wasapi_get_preferred_sample_rate(cubeb *
   IAudioClient * client;
   WAVEFORMATEX * mix_format;
   auto_com com;
   if (!com.ok()) {
     return CUBEB_ERROR;
   }
 
   IMMDevice * device;
-  hr = get_default_endpoint(&device);
+  hr = get_default_endpoint(&device, eRender);
   if (FAILED(hr)) {
     return CUBEB_ERROR;
   }
 
   hr = device->Activate(__uuidof(IAudioClient),
                         CLSCTX_INPROC_SERVER,
                         NULL, (void **)&client);
   SafeRelease(device);
@@ -1048,20 +1328,19 @@ handle_channel_layout(cubeb_stream * stm
   (*mix_format)->nBlockAlign = ((*mix_format)->wBitsPerSample * (*mix_format)->nChannels) / 8;
   (*mix_format)->nAvgBytesPerSec = (*mix_format)->nSamplesPerSec * (*mix_format)->nBlockAlign;
   format_pcm->SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT;
   (*mix_format)->wBitsPerSample = 32;
   format_pcm->Samples.wValidBitsPerSample = (*mix_format)->wBitsPerSample;
 
   /* Check if wasapi will accept our channel layout request. */
   WAVEFORMATEX * closest;
-  HRESULT hr = stm->client->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,
-                                              *mix_format,
-                                              &closest);
-
+  HRESULT hr = stm->output_client->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,
+                                                     *mix_format,
+                                                     &closest);
   if (hr == S_FALSE) {
     /* Not supported, but WASAPI gives us a suggestion. Use it, and handle the
        eventual upmix/downmix ourselves */
     LOG("Using WASAPI suggested format: channels: %d\n", closest->nChannels);
     WAVEFORMATEXTENSIBLE * closest_pcm = reinterpret_cast<WAVEFORMATEXTENSIBLE *>(closest);
     XASSERT(closest_pcm->SubFormat == format_pcm->SubFormat);
     CoTaskMemFree(*mix_format);
     *mix_format = closest;
@@ -1072,133 +1351,234 @@ handle_channel_layout(cubeb_stream * stm
     *reinterpret_cast<WAVEFORMATEXTENSIBLE *>(*mix_format) = hw_mix_format;
   } else if (hr == S_OK) {
     LOG("Requested format accepted by WASAPI.\n");
   } else {
     LOG("IsFormatSupported unhandled error: %x\n", hr);
   }
 }
 
+#define DIRECTION_NAME (direction == eCapture ? "capture" : "render")
+
+template<typename T>
+int setup_wasapi_stream_one_side(cubeb_stream * stm,
+                                 cubeb_stream_params * stream_params,
+                                 cubeb_devid devid,
+                                 EDataFlow direction,
+                                 REFIID riid,
+                                 IAudioClient ** audio_client,
+                                 uint32_t * buffer_frame_count,
+                                 HANDLE & event,
+                                 T ** render_or_capture_client,
+                                 cubeb_stream_params * mix_params)
+{
+  IMMDevice * device;
+  WAVEFORMATEX * mix_format;
+  HRESULT hr;
+
+  stm->stream_reset_lock->assert_current_thread_owns();
+
+  if (devid) {
+    std::unique_ptr<const wchar_t> id;
+    id.reset(utf8_to_wstr(reinterpret_cast<char*>(devid)));
+    hr = get_endpoint(&device, id.get());
+    if (FAILED(hr)) {
+      LOG("Could not get %s endpoint, error: %x\n", DIRECTION_NAME, hr);
+      return CUBEB_ERROR;
+    }
+  } else {
+    hr = get_default_endpoint(&device, direction);
+    if (FAILED(hr)) {
+      LOG("Could not get default %s endpoint, error: %x\n", DIRECTION_NAME, hr);
+      return CUBEB_ERROR;
+    }
+  }
+
+  /* Get a client. We will get all other interfaces we need from
+   * this pointer. */
+  hr = device->Activate(__uuidof(IAudioClient),
+                        CLSCTX_INPROC_SERVER,
+                        NULL, (void **)audio_client);
+  SafeRelease(device);
+  if (FAILED(hr)) {
+    LOG("Could not activate the device to get an audio"
+        " client for %s: error: %x\n", DIRECTION_NAME, hr);
+    return CUBEB_ERROR;
+  }
+
+  /* We have to distinguish between the format the mixer uses,
+   * and the format the stream we want to play uses. */
+  hr = (*audio_client)->GetMixFormat(&mix_format);
+  if (FAILED(hr)) {
+    LOG("Could not fetch current mix format from the audio"
+        " client for %s: error: %x\n", DIRECTION_NAME, hr);
+    return CUBEB_ERROR;
+  }
+
+  handle_channel_layout(stm, &mix_format, stream_params);
+
+  /* Shared mode WASAPI always supports float32 sample format, so this
+   * is safe. */
+  mix_params->format = CUBEB_SAMPLE_FLOAT32NE;
+  mix_params->rate = mix_format->nSamplesPerSec;
+  mix_params->channels = mix_format->nChannels;
+
+  hr = (*audio_client)->Initialize(AUDCLNT_SHAREMODE_SHARED,
+                                   AUDCLNT_STREAMFLAGS_EVENTCALLBACK |
+                                   AUDCLNT_STREAMFLAGS_NOPERSIST,
+                                   ms_to_hns(stm->latency),
+                                   0,
+                                   mix_format,
+                                   NULL);
+  if (FAILED(hr)) {
+    LOG("Unable to initialize audio client for %s: %x.\n", DIRECTION_NAME, hr);
+    return CUBEB_ERROR;
+  }
+
+  CoTaskMemFree(mix_format);
+
+  hr = (*audio_client)->GetBufferSize(buffer_frame_count);
+  if (FAILED(hr)) {
+    LOG("Could not get the buffer size from the client"
+        " for %s %x.\n", DIRECTION_NAME, hr);
+    return CUBEB_ERROR;
+  }
+
+  if (should_upmix(*stream_params, *mix_params) ||
+      should_downmix(*stream_params, *mix_params)) {
+    stm->mix_buffer = (float *)malloc(frames_to_bytes_before_mix(stm, *buffer_frame_count));
+  }
+
+  hr = (*audio_client)->SetEventHandle(event);
+  if (FAILED(hr)) {
+    LOG("Could set the event handle for the %s client %x.\n",
+        DIRECTION_NAME, hr);
+    return CUBEB_ERROR;
+  }
+
+  hr = (*audio_client)->GetService(riid, (void **)render_or_capture_client);
+  if (FAILED(hr)) {
+    LOG("Could not get the %s client %x.\n", DIRECTION_NAME, hr);
+    return CUBEB_ERROR;
+  }
+
+  return CUBEB_OK;
+}
+
+#undef DIRECTION_NAME
+
 int setup_wasapi_stream(cubeb_stream * stm)
 {
   HRESULT hr;
-  IMMDevice * device;
-  WAVEFORMATEX * mix_format;
+  int rv;
 
   stm->stream_reset_lock->assert_current_thread_owns();
 
   auto_com com;
   if (!com.ok()) {
     return CUBEB_ERROR;
   }
 
-  XASSERT(!stm->client && "WASAPI stream already setup, close it first.");
-
-  hr = get_default_endpoint(&device);
-  if (FAILED(hr)) {
-    LOG("Could not get default endpoint, error: %x\n", hr);
-    return CUBEB_ERROR;
-  }
+  XASSERT(!stm->output_client && "WASAPI stream already setup, close it first.");
 
-  /* Get a client. We will get all other interfaces we need from
-     this pointer. */
-  hr = device->Activate(__uuidof(IAudioClient),
-                        CLSCTX_INPROC_SERVER,
-                        NULL, (void **)&stm->client);
-  SafeRelease(device);
-  if (FAILED(hr)) {
-    LOG("Could not activate the device to get an audio client: error: %x\n", hr);
-    return CUBEB_ERROR;
+  if (has_input(stm)) {
+    rv = setup_wasapi_stream_one_side(stm,
+                                      &stm->input_stream_params,
+                                      stm->input_device,
+                                      eCapture,
+                                      __uuidof(IAudioCaptureClient),
+                                      &stm->input_client,
+                                      &stm->input_buffer_frame_count,
+                                      stm->input_available_event,
+                                      &stm->capture_client,
+                                      &stm->input_mix_params);
+    if (rv != CUBEB_OK) {
+      return rv;
+    }
   }
 
-  /* We have to distinguish between the format the mixer uses,
-     and the format the stream we want to play uses. */
-  hr = stm->client->GetMixFormat(&mix_format);
-  if (FAILED(hr)) {
-    LOG("Could not fetch current mix format from the audio client: error: %x\n", hr);
-    return CUBEB_ERROR;
+  if (has_output(stm)) {
+    rv = setup_wasapi_stream_one_side(stm,
+                                      &stm->output_stream_params,
+                                      stm->output_device,
+                                      eRender,
+                                      __uuidof(IAudioRenderClient),
+                                      &stm->output_client,
+                                      &stm->output_buffer_frame_count,
+                                      stm->refill_event,
+                                      &stm->render_client,
+                                      &stm->output_mix_params);
+    if (rv != CUBEB_OK) {
+      return rv;
+    }
+
+    hr = stm->output_client->GetService(__uuidof(IAudioStreamVolume),
+                                        (void **)&stm->audio_stream_volume);
+    if (FAILED(hr)) {
+      LOG("Could not get the IAudioStreamVolume: %x\n", hr);
+      return CUBEB_ERROR;
+    }
+
+    XASSERT(stm->frames_written == 0);
+    hr = stm->output_client->GetService(__uuidof(IAudioClock),
+                                        (void **)&stm->audio_clock);
+    if (FAILED(hr)) {
+      LOG("Could not get the IAudioClock: %x\n", hr);
+      return CUBEB_ERROR;
+    }
+
+    /* Restore the stream volume over a device change. */
+    if (stream_set_volume(stm, stm->volume) != CUBEB_OK) {
+      return CUBEB_ERROR;
+    }
   }
 
-  handle_channel_layout(stm, &mix_format, &stm->stream_params);
-
-  /* Shared mode WASAPI always supports float32 sample format, so this
-     is safe. */
-  stm->mix_params.format = CUBEB_SAMPLE_FLOAT32NE;
-  stm->mix_params.rate = mix_format->nSamplesPerSec;
-  stm->mix_params.channels = mix_format->nChannels;
+  /* If we have both input and output, we resample to
+   * the highest sample rate available. */
+  int32_t target_sample_rate;
+  if (has_input(stm) && has_output(stm)) {
+    assert(stm->input_stream_params.rate == stm->output_stream_params.rate);
+    target_sample_rate = stm->input_stream_params.rate;
+  }  else if (has_input(stm)) {
+    target_sample_rate = stm->input_stream_params.rate;
+  } else {
+    XASSERT(has_output(stm));
+    target_sample_rate = stm->output_stream_params.rate;
+  }
 
-  hr = stm->client->Initialize(AUDCLNT_SHAREMODE_SHARED,
-                               AUDCLNT_STREAMFLAGS_EVENTCALLBACK |
-                               AUDCLNT_STREAMFLAGS_NOPERSIST,
-                               ms_to_hns(stm->latency),
-                               0,
-                               mix_format,
-                               NULL);
-  CoTaskMemFree(mix_format);
-  if (FAILED(hr)) {
-    LOG("Unable to initialize audio client: %x\n", hr);
+  /* If we are playing/capturing a mono stream, we only resample one channel,
+   and copy it over, so we are always resampling the number
+   of channels of the stream, not the number of channels
+   that WASAPI wants. */
+  cubeb_stream_params input_params = stm->input_mix_params;
+  input_params.channels = stm->input_stream_params.channels;
+  cubeb_stream_params output_params = stm->output_mix_params;
+  output_params.channels = stm->output_stream_params.channels;
+
+  stm->resampler =
+    cubeb_resampler_create(stm,
+                           has_input(stm) ? &input_params : nullptr,
+                           has_output(stm) ? &output_params : nullptr,
+                           target_sample_rate,
+                           stm->data_callback,
+                           stm->user_ptr,
+                           CUBEB_RESAMPLER_QUALITY_DESKTOP);
+  if (!stm->resampler) {
+    LOG("Could not get a resampler\n");
     return CUBEB_ERROR;
   }
 
-  hr = stm->client->GetBufferSize(&stm->buffer_frame_count);
-  if (FAILED(hr)) {
-    LOG("Could not get the buffer size from the client: %x\n", hr);
-    return CUBEB_ERROR;
-  }
-
-  if (should_upmix(stm) || should_downmix(stm)) {
-    stm->mix_buffer = (float *) malloc(frames_to_bytes_before_mix(stm, stm->buffer_frame_count));
-  }
-
-  hr = stm->client->SetEventHandle(stm->refill_event);
-  if (FAILED(hr)) {
-    LOG("Could set the event handle for the client: %x\n", hr);
-    return CUBEB_ERROR;
-  }
-
-  hr = stm->client->GetService(__uuidof(IAudioRenderClient),
-                               (void **)&stm->render_client);
-  if (FAILED(hr)) {
-    LOG("Could not get the render client: %x\n", hr);
-    return CUBEB_ERROR;
-  }
+  XASSERT(has_input(stm) || has_output(stm));
 
-  hr = stm->client->GetService(__uuidof(IAudioStreamVolume),
-                               (void **)&stm->audio_stream_volume);
-  if (FAILED(hr)) {
-    LOG("Could not get the IAudioStreamVolume: %x\n", hr);
-    return CUBEB_ERROR;
-  }
-
-  XASSERT(stm->frames_written == 0);
-  hr = stm->client->GetService(__uuidof(IAudioClock),
-                               (void **)&stm->audio_clock);
-  if (FAILED(hr)) {
-    LOG("Could not get the IAudioClock: %x\n", hr);
-    return CUBEB_ERROR;
-  }
-
-  /* Restore the stream volume over a device change. */
-  if (stream_set_volume(stm, stm->volume) != CUBEB_OK) {
-    return CUBEB_ERROR;
-  }
-
-  /* If we are playing a mono stream, we only resample one channel,
-     and copy it over, so we are always resampling the number
-     of channels of the stream, not the number of channels
-     that WASAPI wants. */
-  stm->resampler = cubeb_resampler_create(stm, stm->stream_params,
-                                          stm->mix_params.rate,
-                                          stm->data_callback,
-                                          stm->buffer_frame_count,
-                                          stm->user_ptr,
-                                          CUBEB_RESAMPLER_QUALITY_DESKTOP);
-  if (!stm->resampler) {
-    LOG("Could not get a resampler\n");
-    return CUBEB_ERROR;
+  if (has_input(stm) && has_output(stm)) {
+    stm->refill_callback = refill_callback_duplex;
+  } else if (has_input(stm)) {
+    stm->refill_callback = refill_callback_input;
+  } else if (has_output(stm)) {
+    stm->refill_callback = refill_callback_output;
   }
 
   return CUBEB_OK;
 }
 
 int
 wasapi_stream_init(cubeb * context, cubeb_stream ** stream,
                    char const * stream_name,
@@ -1211,57 +1591,71 @@ wasapi_stream_init(cubeb * context, cube
 {
   HRESULT hr;
   int rv;
   auto_com com;
   if (!com.ok()) {
     return CUBEB_ERROR;
   }
 
-  XASSERT(!input_stream_params && "not supported.");
   if (input_device || output_device) {
     /* Device selection not yet implemented. */
     return CUBEB_ERROR_DEVICE_UNAVAILABLE;
   }
 
   XASSERT(context && stream);
 
-  if (output_stream_params->format != CUBEB_SAMPLE_FLOAT32NE) {
+  if (output_stream_params && output_stream_params->format != CUBEB_SAMPLE_FLOAT32NE ||
+      input_stream_params && input_stream_params->format != CUBEB_SAMPLE_FLOAT32NE) {
     return CUBEB_ERROR_INVALID_FORMAT;
   }
 
   cubeb_stream * stm = (cubeb_stream *)calloc(1, sizeof(cubeb_stream));
 
   XASSERT(stm);
 
   stm->context = context;
   stm->data_callback = data_callback;
   stm->state_callback = state_callback;
   stm->user_ptr = user_ptr;
-  stm->stream_params = *output_stream_params;
   stm->draining = false;
+  if (input_stream_params) {
+    stm->input_stream_params = *input_stream_params;
+  }
+  if (output_stream_params) {
+    stm->output_stream_params = *output_stream_params;
+  }
   stm->latency = latency;
   stm->volume = 1.0;
 
   stm->stream_reset_lock = new owned_critical_section();
 
   stm->reconfigure_event = CreateEvent(NULL, 0, 0, NULL);
   if (!stm->reconfigure_event) {
     LOG("Can't create the reconfigure event, error: %x\n", GetLastError());
     wasapi_stream_destroy(stm);
     return CUBEB_ERROR;
   }
 
+  /* Unconditionally create the two events so that the wait logic is simpler. */
   stm->refill_event = CreateEvent(NULL, 0, 0, NULL);
   if (!stm->refill_event) {
     LOG("Can't create the refill event, error: %x\n", GetLastError());
     wasapi_stream_destroy(stm);
     return CUBEB_ERROR;
   }
 
+  stm->input_available_event = CreateEvent(NULL, 0, 0, NULL);
+  if (!stm->input_available_event) {
+    LOG("Can't create the input available event , error: %x\n", GetLastError());
+    wasapi_stream_destroy(stm);
+    return CUBEB_ERROR;
+  }
+
+
   {
     /* Locking here is not strictly necessary, because we don't have a
        notification client that can reset the stream yet, but it lets us
        assert that the lock is held in the function. */
     auto_lock lock(stm->stream_reset_lock);
     rv = setup_wasapi_stream(stm);
   }
   if (rv != CUBEB_OK) {
@@ -1282,28 +1676,30 @@ wasapi_stream_init(cubeb * context, cube
 }
 
 void close_wasapi_stream(cubeb_stream * stm)
 {
   XASSERT(stm);
 
   stm->stream_reset_lock->assert_current_thread_owns();
 
-  SafeRelease(stm->client);
-  stm->client = NULL;
+  SafeRelease(stm->output_client);
+  stm->output_client = NULL;
+  SafeRelease(stm->input_client);
+  stm->capture_client = NULL;
 
   SafeRelease(stm->render_client);
   stm->render_client = NULL;
 
   SafeRelease(stm->audio_stream_volume);
   stm->audio_stream_volume = NULL;
 
   SafeRelease(stm->audio_clock);
   stm->audio_clock = NULL;
-  stm->total_frames_written += round(stm->frames_written * stream_to_mix_samplerate_ratio(stm));
+  stm->total_frames_written += static_cast<UINT64>(round(stm->frames_written * stream_to_mix_samplerate_ratio(stm->output_stream_params, stm->output_mix_params)));
   stm->frames_written = 0;
 
   if (stm->resampler) {
     cubeb_resampler_destroy(stm->resampler);
     stm->resampler = NULL;
   }
 
   free(stm->mix_buffer);
@@ -1315,63 +1711,86 @@ void wasapi_stream_destroy(cubeb_stream 
   XASSERT(stm);
 
   unregister_notification_client(stm);
 
   stop_and_join_render_thread(stm);
 
   SafeRelease(stm->reconfigure_event);
   SafeRelease(stm->refill_event);
+  SafeRelease(stm->input_available_event);
 
   {
     auto_lock lock(stm->stream_reset_lock);
     close_wasapi_stream(stm);
   }
 
   delete stm->stream_reset_lock;
 
   free(stm);
 }
 
-int wasapi_stream_start(cubeb_stream * stm)
+int stream_start_one_side(cubeb_stream * stm, IAudioClient * client)
 {
-  auto_lock lock(stm->stream_reset_lock);
-
-  XASSERT(stm && !stm->thread && !stm->shutdown_event);
+  XASSERT(stm->output_client == client || stm->input_client == client);
 
-  if (!stm->client) {
-    return CUBEB_ERROR;
-  }
-
-  HRESULT hr = stm->client->Start();
+  HRESULT hr = client->Start();
   if (hr == AUDCLNT_E_DEVICE_INVALIDATED) {
-    LOG("audioclient invalid device, reconfiguring\n", hr);
+    LOG("audioclient invalidated for %s device, reconfiguring\n",
+        stm->output_client == client ? "output" : "input", hr);
 
     BOOL ok = ResetEvent(stm->reconfigure_event);
     if (!ok) {
-      LOG("resetting reconfig event failed: %x\n", GetLastError());
+      LOG("resetting reconfig event failed for %s stream: %x\n",
+          stm->output_client == client ? "output" : "input", GetLastError());
     }
 
     close_wasapi_stream(stm);
     int r = setup_wasapi_stream(stm);
     if (r != CUBEB_OK) {
       LOG("reconfigure failed\n");
       return r;
     }
 
-    HRESULT hr = stm->client->Start();
+    HRESULT hr = client->Start();
     if (FAILED(hr)) {
-      LOG("could not start the stream after reconfig: %x\n", hr);
+      LOG("could not start the %s stream after reconfig: %x (%s)\n",
+        stm->output_client == client ? "output" : "input", hr);
       return CUBEB_ERROR;
     }
   } else if (FAILED(hr)) {
-    LOG("could not start the stream.\n");
+    LOG("could not start the %s stream: %x.\n",
+        stm->output_client == client ? "output" : "input", hr);
     return CUBEB_ERROR;
   }
 
+  return CUBEB_OK;
+}
+
+int wasapi_stream_start(cubeb_stream * stm)
+{
+  int rv;
+  auto_lock lock(stm->stream_reset_lock);
+
+  XASSERT(stm && !stm->thread && !stm->shutdown_event);
+
+  if (has_output(stm)) {
+    rv = stream_start_one_side(stm, stm->output_client);
+    if (rv != CUBEB_OK) {
+      return rv;
+    }
+  }
+
+  if (stm->input_client) {
+    rv = stream_start_one_side(stm, stm->input_client);
+    if (rv != CUBEB_OK) {
+      return rv;
+    }
+  }
+
   stm->shutdown_event = CreateEvent(NULL, 0, 0, NULL);
   if (!stm->shutdown_event) {
     LOG("Can't create the shutdown event, error: %x\n", GetLastError());
     return CUBEB_ERROR;
   }
 
   stm->thread = (HANDLE) _beginthreadex(NULL, 256 * 1024, wasapi_stream_render_loop, stm, STACK_SIZE_PARAM_IS_A_RESERVATION, NULL);
   if (stm->thread == NULL) {
@@ -1382,47 +1801,61 @@ int wasapi_stream_start(cubeb_stream * s
   stm->state_callback(stm, stm->user_ptr, CUBEB_STATE_STARTED);
 
   return CUBEB_OK;
 }
 
 int wasapi_stream_stop(cubeb_stream * stm)
 {
   XASSERT(stm);
+  HRESULT hr;
 
   {
     auto_lock lock(stm->stream_reset_lock);
 
-    if (stm->client) {
-      HRESULT hr = stm->client->Stop();
+    if (stm->output_client) {
+      hr = stm->output_client->Stop();
       if (FAILED(hr)) {
-        LOG("could not stop AudioClient\n");
+        LOG("could not stop AudioClient (output)\n");
         return CUBEB_ERROR;
       }
     }
 
+    if (stm->input_client) {
+      hr = stm->input_client->Stop();
+      if (FAILED(hr)) {
+        LOG("could not stop AudioClient (input)\n");
+        return CUBEB_ERROR;
+      }
+    }
+
+
     stm->state_callback(stm, stm->user_ptr, CUBEB_STATE_STOPPED);
   }
 
   stop_and_join_render_thread(stm);
 
   return CUBEB_OK;
 }
 
 int wasapi_stream_get_position(cubeb_stream * stm, uint64_t * position)
 {
   XASSERT(stm && position);
   auto_lock lock(stm->stream_reset_lock);
 
+  if (!has_output(stm)) {
+    return CUBEB_ERROR;
+  }
+
   /* Calculate how far behind the current stream head the playback cursor is. */
-  uint64_t stream_delay = current_stream_delay(stm) * stm->stream_params.rate;
+  uint64_t stream_delay = static_cast<uint64_t>(current_stream_delay(stm) * stm->output_stream_params.rate);
 
   /* Calculate the logical stream head in frames at the stream sample rate. */
   uint64_t max_pos = stm->total_frames_written +
-                     round(stm->frames_written * stream_to_mix_samplerate_ratio(stm));
+                     static_cast<uint64_t>(round(stm->frames_written * stream_to_mix_samplerate_ratio(stm->output_stream_params, stm->output_mix_params)));
 
   *position = max_pos;
   if (stream_delay <= *position) {
     *position -= stream_delay;
   }
 
   if (*position < stm->prev_position) {
     *position = stm->prev_position;
@@ -1431,39 +1864,47 @@ int wasapi_stream_get_position(cubeb_str
 
   return CUBEB_OK;
 }
 
 int wasapi_stream_get_latency(cubeb_stream * stm, uint32_t * latency)
 {
   XASSERT(stm && latency);
 
+  if (!has_output(stm)) {
+    return CUBEB_ERROR;
+  }
+
   auto_lock lock(stm->stream_reset_lock);
 
   /* The GetStreamLatency method only works if the
      AudioClient has been initialized. */
-  if (!stm->client) {
+  if (!stm->output_client) {
     return CUBEB_ERROR;
   }
 
   REFERENCE_TIME latency_hns;
-  HRESULT hr = stm->client->GetStreamLatency(&latency_hns);
+  HRESULT hr = stm->output_client->GetStreamLatency(&latency_hns);
   if (FAILED(hr)) {
     return CUBEB_ERROR;
   }
   double latency_s = hns_to_s(latency_hns);
-  *latency = static_cast<uint32_t>(latency_s * stm->stream_params.rate);
+  *latency = static_cast<uint32_t>(latency_s * stm->output_stream_params.rate);
 
   return CUBEB_OK;
 }
 
 int wasapi_stream_set_volume(cubeb_stream * stm, float volume)
 {
   auto_lock lock(stm->stream_reset_lock);
 
+  if (!has_output(stm)) {
+    return CUBEB_ERROR;
+  }
+
   if (stream_set_volume(stm, volume) != CUBEB_OK) {
     return CUBEB_ERROR;
   }
 
   stm->volume = volume;
 
   return CUBEB_OK;
 }
@@ -1471,23 +1912,38 @@ int wasapi_stream_set_volume(cubeb_strea
 static char *
 wstr_to_utf8(LPCWSTR str)
 {
   char * ret = NULL;
   int size;
 
   size = ::WideCharToMultiByte(CP_UTF8, 0, str, -1, ret, 0, NULL, NULL);
   if (size > 0) {
-    ret = (char *) malloc(size);
+    ret =  new char[size];
     ::WideCharToMultiByte(CP_UTF8, 0, str, -1, ret, size, NULL, NULL);
   }
 
   return ret;
 }
 
+static const wchar_t *
+utf8_to_wstr(char* str)
+{
+  wchar_t * ret = nullptr;
+  int size;
+
+  size = ::MultiByteToWideChar(CP_UTF8, 0, str, -1, ret, 0);
+  if (size > 0) {
+    ret = new wchar_t[size];
+    ::MultiByteToWideChar(CP_UTF8, 0, str, -1, ret, size);
+  }
+
+  return ret;
+}
+
 static IMMDevice *
 wasapi_get_device_node(IMMDeviceEnumerator * enumerator, IMMDevice * dev)
 {
   IMMDevice * ret = NULL;
   IDeviceTopology * devtopo = NULL;
   IConnector * connector = NULL;
 
   if (SUCCEEDED(dev->Activate(__uuidof(IDeviceTopology), CLSCTX_ALL, NULL, (void**)&devtopo)) &&
--- a/media/libcubeb/tests/common.h
+++ b/media/libcubeb/tests/common.h
@@ -12,19 +12,50 @@
 #include <windows.h>
 #else
 #include <unistd.h>
 #endif
 
 void delay(unsigned int ms)
 {
 #if defined(_WIN32)
-	Sleep(ms);
+  Sleep(ms);
 #else
-	sleep(ms / 1000);
-	usleep(ms % 1000 * 1000);
+  sleep(ms / 1000);
+  usleep(ms % 1000 * 1000);
 #endif
 }
 
 #if !defined(M_PI)
 #define M_PI 3.14159265358979323846
 #endif
 
+int has_available_input_device(cubeb * ctx)
+{
+  cubeb_device_collection * devices;
+  int input_device_available = 0;
+  int r;
+  /* Bail out early if the host does not have input devices. */
+  r = cubeb_enumerate_devices(ctx, CUBEB_DEVICE_TYPE_INPUT, &devices);
+  if (r != CUBEB_OK) {
+    fprintf(stderr, "error enumerating devices.");
+    return 0;
+  }
+
+  if (devices->count == 0) {
+    fprintf(stderr, "no input device available, skipping test.\n");
+    return 0;
+  }
+
+  for (uint32_t i = 0; i < devices->count; i++) {
+    input_device_available |= (devices->device[i]->state ==
+                               CUBEB_DEVICE_STATE_ENABLED);
+  }
+
+  if (!input_device_available) {
+    fprintf(stderr, "there are input devices, but they are not "
+        "available, skipping\n");
+    return 0;
+  }
+
+  return 1;
+}
+
new file mode 100644
--- /dev/null
+++ b/media/libcubeb/tests/test_devices.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2015 Haakon Sporsheim <haakon.sporsheim@telenordigital.com>
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+
+/* libcubeb enumerate device test/example.
+ * Prints out a list of devices enumerated. */
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cubeb/cubeb.h"
+
+
+static void
+print_device_info(cubeb_device_info * info, FILE * f)
+{
+  char devfmts[64] = "";
+  const char * devtype, * devstate, * devdeffmt;
+
+  switch (info->type) {
+    case CUBEB_DEVICE_TYPE_INPUT:
+      devtype = "input";
+      break;
+    case CUBEB_DEVICE_TYPE_OUTPUT:
+      devtype = "output";
+      break;
+    case CUBEB_DEVICE_TYPE_UNKNOWN:
+    default:
+      devtype = "unknown?";
+      break;
+  };
+
+  switch (info->state) {
+    case CUBEB_DEVICE_STATE_DISABLED:
+      devstate = "disabled";
+      break;
+    case CUBEB_DEVICE_STATE_UNPLUGGED:
+      devstate = "unplugged";
+      break;
+    case CUBEB_DEVICE_STATE_ENABLED:
+      devstate = "enabled";
+      break;
+    default:
+      devstate = "unknown?";
+      break;
+  };
+
+  switch (info->default_format) {
+    case CUBEB_DEVICE_FMT_S16LE:
+      devdeffmt = "S16LE";
+      break;
+    case CUBEB_DEVICE_FMT_S16BE:
+      devdeffmt = "S16BE";
+      break;
+    case CUBEB_DEVICE_FMT_F32LE:
+      devdeffmt = "F32LE";
+      break;
+    case CUBEB_DEVICE_FMT_F32BE:
+      devdeffmt = "F32BE";
+      break;
+    default:
+      devdeffmt = "unknown?";
+      break;
+  };
+
+  if (info->format & CUBEB_DEVICE_FMT_S16LE)
+    strcat(devfmts, " S16LE");
+  if (info->format & CUBEB_DEVICE_FMT_S16BE)
+    strcat(devfmts, " S16BE");
+  if (info->format & CUBEB_DEVICE_FMT_F32LE)
+    strcat(devfmts, " F32LE");
+  if (info->format & CUBEB_DEVICE_FMT_F32BE)
+    strcat(devfmts, " F32BE");
+
+  fprintf(f,
+      "dev: \"%s\"%s\n"
+      "\tName:    \"%s\"\n"
+      "\tGroup:   \"%s\"\n"
+      "\tVendor:  \"%s\"\n"
+      "\tType:    %s\n"
+      "\tState:   %s\n"
+      "\tCh:      %u\n"
+      "\tFormat:  %s (0x%x) (default: %s)\n"
+      "\tRate:    %u - %u (default: %u)\n"
+      "\tLatency: lo %ums, hi %ums\n",
+      info->device_id, info->preferred ? " (PREFERRED)" : "",
+      info->friendly_name, info->group_id, info->vendor_name,
+      devtype, devstate, info->max_channels,
+      (devfmts[0] == ' ') ? &devfmts[1] : devfmts,
+      (unsigned int)info->format, devdeffmt,
+      info->min_rate, info->max_rate, info->default_rate,
+      info->latency_lo_ms, info->latency_hi_ms);
+}
+
+static void
+print_device_collection(cubeb_device_collection * collection, FILE * f)
+{
+  uint32_t i;
+
+  for (i = 0; i < collection->count; i++)
+    print_device_info(collection->device[i], f);
+}
+
+static int
+run_enumerate_devices(void)
+{
+  int r = CUBEB_OK;
+  cubeb * ctx = NULL;
+  cubeb_device_collection * collection = NULL;
+
+  r = cubeb_init(&ctx, "Cubeb audio test");
+  if (r != CUBEB_OK) {
+    fprintf(stderr, "Error initializing cubeb library\n");
+    return r;
+  }
+
+  fprintf(stdout, "Enumerating input devices for backend %s\n",
+      cubeb_get_backend_id(ctx));
+
+  r = cubeb_enumerate_devices(ctx, CUBEB_DEVICE_TYPE_INPUT, &collection);
+  if (r != CUBEB_OK) {
+    fprintf(stderr, "Error enumerating devices %d\n", r);
+    goto cleanup;
+  }
+
+  fprintf(stdout, "Found %u input devices\n", collection->count);
+  print_device_collection(collection, stdout);
+  cubeb_device_collection_destroy(collection);
+
+  fprintf(stdout, "Enumerating output devices for backend %s\n",
+          cubeb_get_backend_id(ctx));
+
+  r = cubeb_enumerate_devices(ctx, CUBEB_DEVICE_TYPE_OUTPUT, &collection);
+  if (r != CUBEB_OK) {
+    fprintf(stderr, "Error enumerating devices %d\n", r);
+    goto cleanup;
+  }
+
+  fprintf(stdout, "Found %u output devices\n", collection->count);
+  print_device_collection(collection, stdout);
+  cubeb_device_collection_destroy(collection);
+
+cleanup:
+  cubeb_destroy(ctx);
+  return r;
+}
+
+int main(int argc, char *argv[])
+{
+  int ret;
+
+  ret = run_enumerate_devices();
+
+  return ret;
+}
new file mode 100644
--- /dev/null
+++ b/media/libcubeb/tests/test_duplex.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright © 2016 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+
+/* libcubeb api/function test. Loops input back to output and check audio
+ * is flowing. */
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#define _XOPEN_SOURCE 500
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+
+#include "cubeb/cubeb.h"
+#include "common.h"
+#ifdef CUBEB_GECKO_BUILD
+#include "TestHarness.h"
+#endif
+
+#define SAMPLE_FREQUENCY 48000
+#if (defined(_WIN32) || defined(__WIN32__))
+#define STREAM_FORMAT CUBEB_SAMPLE_FLOAT32LE
+#define SILENT_SAMPLE 0.0f
+#else
+#define STREAM_FORMAT CUBEB_SAMPLE_S16LE
+#define SILENT_SAMPLE 0
+#endif
+
+struct user_state
+{
+  bool seen_noise;
+};
+
+
+
+long data_cb(cubeb_stream *stream, void *user, const void * inputbuffer, void *outputbuffer, long nframes)
+{
+  user_state * u = reinterpret_cast<user_state*>(user);
+#if (defined(_WIN32) || defined(__WIN32__))
+  float *ib = (float *)inputbuffer;
+  float *ob = (float *)outputbuffer;
+#else
+  short *ib = (short *)inputbuffer;
+  short *ob = (short *)outputbuffer;
+#endif
+  bool seen_noise = false;
+
+  if (stream == NULL || inputbuffer == NULL || outputbuffer == NULL) {
+    return CUBEB_ERROR;
+  }
+
+  // Loop back: upmix the single input channel to the two output channels,
+  // checking if there is noise in the process.
+  long output_index = 0;
+  for (long i = 0; i < nframes; i++) {
+    if (ib[i] != SILENT_SAMPLE) {
+      seen_noise = true;
+    }
+    ob[output_index] = ob[output_index + 1] = ib[i];
+    output_index += 2;
+  }
+
+  u->seen_noise |= seen_noise;
+
+  return nframes;
+}
+
+void state_cb(cubeb_stream *stream, void *user, cubeb_state state)
+{
+  if (stream == NULL)
+    return;
+
+  switch (state) {
+  case CUBEB_STATE_STARTED:
+    printf("stream started\n"); break;
+  case CUBEB_STATE_STOPPED:
+    printf("stream stopped\n"); break;
+  case CUBEB_STATE_DRAINED:
+    printf("stream drained\n"); break;
+  default:
+    printf("unknown stream state %d\n", state);
+  }
+
+  return;
+}
+
+int main(int argc, char *argv[])
+{
+#ifdef CUBEB_GECKO_BUILD
+  ScopedXPCOM xpcom("test_record");
+#endif
+
+  cubeb *ctx;
+  cubeb_stream *stream;
+  cubeb_stream_params input_params;
+  cubeb_stream_params output_params;
+  int r;
+  user_state stream_state = { false };
+  uint32_t latency_ms = 0;
+
+  r = cubeb_init(&ctx, "Cubeb duplex example");
+  if (r != CUBEB_OK) {
+    fprintf(stderr, "Error initializing cubeb library\n");
+    return r;
+  }
+
+  /* This test needs an available input device, skip it if this host does not
+   * have one. */
+  if (!has_available_input_device(ctx)) {
+    return 0;
+  }
+
+  /* typical user-case: mono input, stereo output, low latency. */
+  input_params.format = STREAM_FORMAT;
+  input_params.rate = 48000;
+  input_params.channels = 1;
+  output_params.format = STREAM_FORMAT;
+  output_params.rate = 48000;
+  output_params.channels = 2;
+
+  r = cubeb_get_min_latency(ctx, output_params, &latency_ms);
+
+  if (r != CUBEB_OK) {
+    fprintf(stderr, "Could not get minimal latency\n");
+    return r;
+  }
+
+  r = cubeb_stream_init(ctx, &stream, "Cubeb duplex",
+                        NULL, &input_params, NULL, &output_params,
+                        latency_ms, data_cb, state_cb, &stream_state);
+  if (r != CUBEB_OK) {
+    fprintf(stderr, "Error initializing cubeb stream\n");
+    return r;
+  }
+
+  cubeb_stream_start(stream);
+  delay(500);
+  cubeb_stream_stop(stream);
+
+  cubeb_stream_destroy(stream);
+  cubeb_destroy(ctx);
+
+  assert(stream_state.seen_noise);
+
+  return CUBEB_OK;
+}
new file mode 100644
--- /dev/null
+++ b/media/libcubeb/tests/test_record.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2016 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+
+/* libcubeb api/function test. Record the mic and check there is sound. */
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#define _XOPEN_SOURCE 500
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+
+#include "cubeb/cubeb.h"
+#include "common.h"
+#ifdef CUBEB_GECKO_BUILD
+#include "TestHarness.h"
+#endif
+
+#define SAMPLE_FREQUENCY 48000
+#if (defined(_WIN32) || defined(__WIN32__))
+#define STREAM_FORMAT CUBEB_SAMPLE_FLOAT32LE
+#else
+#define STREAM_FORMAT CUBEB_SAMPLE_S16LE
+#endif
+
+struct user_state
+{
+  bool seen_noise;
+};
+
+long data_cb(cubeb_stream *stream, void *user, const void * inputbuffer, void *outputbuffer, long nframes)
+{
+  user_state * u = reinterpret_cast<user_state*>(user);
+#if STREAM_FORMAT != CUBEB_SAMPLE_FLOAT32LE
+  short *b = (short *)inputbuffer;
+#else
+  float *b = (float *)inputbuffer;
+#endif
+
+  if (stream == NULL  || inputbuffer == NULL || outputbuffer != NULL) {
+    return CUBEB_ERROR;
+  }
+
+  bool seen_noise = false;
+  for (long i = 0; i < nframes; i++) {
+    if (b[i] != 0.0) {
+      seen_noise = true;
+    }
+  }
+
+  u->seen_noise |= seen_noise;
+
+  return nframes;
+}
+
+void state_cb(cubeb_stream *stream, void *user, cubeb_state state)
+{
+  if (stream == NULL)
+    return;
+
+  switch (state) {
+  case CUBEB_STATE_STARTED:
+    printf("stream started\n"); break;
+  case CUBEB_STATE_STOPPED:
+    printf("stream stopped\n"); break;
+  case CUBEB_STATE_DRAINED:
+    printf("stream drained\n"); break;
+  default:
+    printf("unknown stream state %d\n", state);
+  }
+
+  return;
+}
+
+int main(int argc, char *argv[])
+{
+#ifdef CUBEB_GECKO_BUILD
+  ScopedXPCOM xpcom("test_record");
+#endif
+
+  cubeb *ctx;
+  cubeb_stream *stream;
+  cubeb_stream_params params;
+  int r;
+  user_state stream_state = { false };
+
+  r = cubeb_init(&ctx, "Cubeb record example");
+  if (r != CUBEB_OK) {
+    fprintf(stderr, "Error initializing cubeb library\n");
+    return r;
+  }
+
+  /* This test needs an available input device, skip it if this host does not
+   * have one. */
+  if (!has_available_input_device(ctx)) {
+    return 0;
+  }
+
+  params.format = STREAM_FORMAT;
+  params.rate = SAMPLE_FREQUENCY;
+  params.channels = 1;
+
+  r = cubeb_stream_init(ctx, &stream, "Cubeb record (mono)", NULL, &params, NULL, nullptr,
+                        250, data_cb, state_cb, &stream_state);
+  if (r != CUBEB_OK) {
+    fprintf(stderr, "Error initializing cubeb stream\n");
+    return r;
+  }
+
+  cubeb_stream_start(stream);
+  delay(500);
+  cubeb_stream_stop(stream);
+
+  cubeb_stream_destroy(stream);
+  cubeb_destroy(ctx);
+
+  assert(stream_state.seen_noise);
+
+  return CUBEB_OK;
+}
new file mode 100644
--- /dev/null
+++ b/media/libcubeb/tests/test_resampler.cpp
@@ -0,0 +1,555 @@
+/*
+ * Copyright © 2016 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#ifndef CUBEB_GECKO_BUILD
+#include "config.h"
+#endif
+#include "cubeb_resampler_internal.h"
+#include <assert.h>
+#include <stdio.h>
+#include <algorithm>
+#include <iostream>
+
+/* Windows cmath USE_MATH_DEFINE thing... */
+const float PI = 3.14159265359f;
+
+/* Testing all sample rates is very long, so if THOROUGH_TESTING is not defined,
+ * only part of the test suite is ran. */
+#ifdef THOROUGH_TESTING
+/* Some standard sample rates we're testing with. */
+const uint32_t sample_rates[] = {
+    8000,
+   16000,
+   32000,
+   44100,
+   48000,
+   88200,
+   96000,
+  192000
+};
+/* The maximum number of channels we're resampling. */
+const uint32_t max_channels = 2;
+/* The minimum an maximum number of milliseconds we're resampling for. This is
+ * used to simulate the fact that the audio stream is resampled in chunks,
+ * because audio is delivered using callbacks. */
+const uint32_t min_chunks = 10; /* ms */
+const uint32_t max_chunks = 30; /* ms */
+const uint32_t chunk_increment = 1;
+
+#else
+
+const uint32_t sample_rates[] = {
+    8000,
+   44100,
+   48000,
+};
+const uint32_t max_channels = 2;
+const uint32_t min_chunks = 10; /* ms */
+const uint32_t max_chunks = 30; /* ms */
+const uint32_t chunk_increment = 10;
+#endif
+
+#define DUMP_ARRAYS
+#ifdef DUMP_ARRAYS
+/**
+ * Files produced by dump(...) can be converted to .wave files using:
+ *
+ * sox -c <channel_count> -r <rate> -e float -b 32  file.raw file.wav
+ *
+ * for floating-point audio, or:
+ *
+ * sox -c <channel_count> -r <rate> -e unsigned -b 16  file.raw file.wav
+ *
+ * for 16bit integer audio.
+ */
+
+/* Use the correct implementation of fopen, depending on the platform. */
+void fopen_portable(FILE ** f, const char * name, const char * mode)
+{
+#ifdef WIN32
+  fopen_s(f, name, mode);
+#else
+  *f = fopen(name, mode);
+#endif
+}
+
+template<typename T>
+void dump(const char * name, T * frames, size_t count)
+{
+  FILE * file;
+  fopen_portable(&file, name, "wb");
+
+  if (!file) {
+    fprintf(stderr, "error opening %s\n", name);
+    return;
+  }
+
+  if (count != fwrite(frames, sizeof(T), count, file)) {
+    fprintf(stderr, "error writing to %s\n", name);
+    return;
+  }
+  fclose(file);
+}
+#else
+template<typename T>
+void dump(const char * name, T * frames, size_t count)
+{ }
+#endif
+
+// The more the ratio is far from 1, the more we accept a big error.
+float epsilon_tweak_ratio(float ratio)
+{
+  return ratio >= 1 ? ratio : 1 / ratio;
+}
+
+// Epsilon values for comparing resampled data to expected data.
+// The bigger the resampling ratio is, the more lax we are about errors.
+template<typename T>
+T epsilon(float ratio);
+
+template<>
+float epsilon(float ratio) {
+  return 0.08f * epsilon_tweak_ratio(ratio);
+}
+
+template<>
+int16_t epsilon(float ratio) {
+  return static_cast<int16_t>(10 * epsilon_tweak_ratio(ratio));
+}
+
+void test_delay_lines(uint32_t delay_frames, uint32_t channels, uint32_t chunk_ms)
+{
+  const size_t length_s = 2;
+  const size_t rate = 44100;
+  const size_t length_frames = rate * length_s;
+  delay_line<float> delay(delay_frames, channels);
+  auto_array<float> input;
+  auto_array<float> output;
+  uint32_t chunk_length = channels * chunk_ms * rate / 1000;
+  uint32_t output_offset = 0;
+  uint32_t channel = 0;
+
+  /** Generate diracs every 100 frames, and check they are delayed. */
+  input.push_silence(length_frames * channels);
+  for (uint32_t i = 0; i < input.length() - 1; i+=100) {
+    input.data()[i + channel] = 0.5;
+    channel = (channel + 1) % channels;
+  }
+  dump("input.raw", input.data(), input.length());
+  while(input.length()) {
+    uint32_t to_pop = std::min<uint32_t>(input.length(), chunk_length * channels);
+    float * in = delay.input_buffer(to_pop / channels);
+    input.pop(in, to_pop);
+    delay.written(to_pop / channels);
+    output.push_silence(to_pop);
+    delay.output(output.data() + output_offset, to_pop / channels);
+    output_offset += to_pop;
+  }
+
+  // Check the diracs have been shifted by `delay_frames` frames.
+  for (uint32_t i = 0; i < output.length() - delay_frames * channels + 1; i+=100) {
+    assert(output.data()[i + channel + delay_frames * channels] == 0.5);
+    channel = (channel + 1) % channels;
+  }
+
+  dump("output.raw", output.data(), output.length());
+}
+/**
+ * This takes sine waves with a certain `channels` count, `source_rate`, and
+ * resample them, by chunk of `chunk_duration` milliseconds, to `target_rate`.
+ * Then a sample-wise comparison is performed against a sine wave generated at
+ * the correct rate.
+ */
+template<typename T>
+void test_resampler_one_way(uint32_t channels, uint32_t source_rate, uint32_t target_rate, float chunk_duration)
+{
+  size_t chunk_duration_in_source_frames = static_cast<uint32_t>(ceil(chunk_duration * source_rate / 1000.));
+  float resampling_ratio = static_cast<float>(source_rate) / target_rate;
+  cubeb_resampler_speex_one_way<T> resampler(channels, source_rate, target_rate, 3);
+  auto_array<T> source(channels * source_rate * 10);
+  auto_array<T> destination(channels * target_rate * 10);
+  auto_array<T> expected(channels * target_rate * 10);
+  uint32_t phase_index = 0;
+  uint32_t offset = 0;
+  const uint32_t buf_len = 2; /* seconds */
+
+  // generate a sine wave in each channel, at the source sample rate
+  source.push_silence(channels * source_rate * buf_len);
+  while(offset != source.length()) {
+    float  p = phase_index++ / static_cast<float>(source_rate);
+    for (uint32_t j = 0; j < channels; j++) {
+      source.data()[offset++] = 0.5 * sin(440. * 2 * PI * p);
+    }
+  }
+
+  dump("input.raw", source.data(), source.length());
+
+  expected.push_silence(channels * target_rate * buf_len);
+  // generate a sine wave in each channel, at the target sample rate.
+  // Insert silent samples at the beginning to account for the resampler latency.
+  offset = resampler.latency() * channels;
+  for (uint32_t i = 0; i < offset; i++) {
+    expected.data()[i] = 0.0f;
+  }
+  phase_index = 0;
+  while (offset != expected.length()) {
+    float  p = phase_index++ / static_cast<float>(target_rate);
+    for (uint32_t j = 0; j < channels; j++) {
+      expected.data()[offset++] = 0.5 * sin(440. * 2 * PI * p);
+    }
+  }
+
+  dump("expected.raw", expected.data(), expected.length());
+
+  // resample by chunk
+  uint32_t write_offset = 0;
+  destination.push_silence(channels * target_rate * buf_len);
+  while (write_offset < destination.length())
+  {
+    size_t output_frames = static_cast<uint32_t>(floor(chunk_duration_in_source_frames / resampling_ratio));
+    uint32_t input_frames = resampler.input_needed_for_output(output_frames);
+    resampler.input(source.data(), input_frames);
+    source.pop(nullptr, input_frames * channels);
+    resampler.output(destination.data() + write_offset,
+                     std::min(output_frames, (destination.length() - write_offset) / channels));
+    write_offset += output_frames * channels;
+  }
+
+  dump("output.raw", destination.data(), expected.length());
+
+  // compare, taking the latency into account
+  bool fuzzy_equal = true;
+  for (uint32_t i = resampler.latency() + 1; i < expected.length(); i++) {
+    float diff = fabs(expected.data()[i] - destination.data()[i]);
+    if (diff > epsilon<T>(resampling_ratio)) {
+      fprintf(stderr, "divergence at %d: %f %f (delta %f)\n", i, expected.data()[i], destination.data()[i], diff);
+      fuzzy_equal = false;
+    }
+  }
+  assert(fuzzy_equal);
+}
+
+template<typename T>
+cubeb_sample_format cubeb_format();
+
+template<>
+cubeb_sample_format cubeb_format<float>()
+{
+  return CUBEB_SAMPLE_FLOAT32NE;
+}
+
+template<>
+cubeb_sample_format cubeb_format<short>()
+{
+  return CUBEB_SAMPLE_S16NE;
+}
+
+struct osc_state {
+  osc_state()
+    : input_phase_index(0)
+    , output_phase_index(0)
+    , output_offset(0)
+    , input_channels(0)
+    , output_channels(0)
+  {}
+  uint32_t input_phase_index;
+  uint32_t max_output_phase_index;
+  uint32_t output_phase_index;
+  uint32_t output_offset;
+  uint32_t input_channels;
+  uint32_t output_channels;
+  uint32_t output_rate;
+  uint32_t target_rate;
+  auto_array<float> input;
+  auto_array<float> output;
+};
+
+uint32_t fill_with_sine(float * buf, uint32_t rate, uint32_t channels,
+                        uint32_t frames, uint32_t initial_phase)
+{
+  uint32_t offset = 0;
+  for (uint32_t i = 0; i < frames; i++) {
+    float  p = initial_phase++ / static_cast<float>(rate);
+    for (uint32_t j = 0; j < channels; j++) {
+      buf[offset++] = 0.5 * sin(440. * 2 * PI * p);
+    }
+  }
+  return initial_phase;
+}
+
+long data_cb(cubeb_stream * stm, void * user_ptr,
+             const void * input_buffer, void * output_buffer, long frame_count)
+{
+  osc_state * state = reinterpret_cast<osc_state*>(user_ptr);
+  const float * in = reinterpret_cast<const float*>(input_buffer);
+  float * out = reinterpret_cast<float*>(output_buffer);
+
+
+  state->input.push(in, frame_count * state->input_channels);
+
+  /* Check how much output frames we need to write */
+  uint32_t remaining = state->max_output_phase_index - state->output_phase_index;
+  uint32_t to_write = std::min<uint32_t>(remaining, frame_count);
+  state->output_phase_index = fill_with_sine(out,
+                                             state->target_rate,
+                                             state->output_channels,
+                                             to_write,
+                                             state->output_phase_index);
+
+  return to_write;
+}
+
+template<typename T>
+bool array_fuzzy_equal(const auto_array<T>& lhs, const auto_array<T>& rhs, T epsi)
+{
+  uint32_t len = std::min(lhs.length(), rhs.length());
+
+  for (uint32_t i = 0; i < len; i++) {
+    if (fabs(lhs.at(i) - rhs.at(i)) > epsi) {
+      std::cout << "not fuzzy equal at index: " << i
+                << " lhs: " << lhs.at(i) <<  " rhs: " << rhs.at(i)
+                << " delta: " << fabs(lhs.at(i) - rhs.at(i))
+                << " epsilon: "<< epsi << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+
+template<typename T>
+void test_resampler_duplex(uint32_t input_channels, uint32_t output_channels,
+                           uint32_t input_rate, uint32_t output_rate,
+                           uint32_t target_rate, float chunk_duration)
+{
+  cubeb_stream_params input_params;
+  cubeb_stream_params output_params;
+  osc_state state;
+
+  input_params.format = output_params.format = cubeb_format<T>();
+  state.input_channels = input_params.channels = input_channels;
+  state.output_channels = output_params.channels = output_channels;
+  input_params.rate = input_rate;
+  state.output_rate = output_params.rate = output_rate;
+  state.target_rate = target_rate;
+  long got;
+
+  cubeb_resampler * resampler =
+    cubeb_resampler_create((cubeb_stream*)nullptr, &input_params, &output_params, target_rate,
+                           data_cb, (void*)&state, CUBEB_RESAMPLER_QUALITY_VOIP);
+
+  long latency = cubeb_resampler_latency(resampler);
+
+  const uint32_t duration_s = 2;
+  int32_t duration_frames = duration_s * target_rate;
+  uint32_t input_array_frame_count = ceil(chunk_duration * input_rate / 1000) + ceilf(static_cast<float>(input_rate) / target_rate) * 2;
+  uint32_t output_array_frame_count = chunk_duration * output_rate / 1000;
+  auto_array<float> input_buffer(input_channels * input_array_frame_count);
+  auto_array<float> output_buffer(output_channels * output_array_frame_count);
+  auto_array<float> expected_resampled_input(input_channels * duration_frames);
+  auto_array<float> expected_resampled_output(output_channels * output_rate * duration_s);
+
+  state.max_output_phase_index = duration_s * target_rate;
+
+  expected_resampled_input.push_silence(input_channels * duration_frames);
+  expected_resampled_output.push_silence(output_channels * output_rate * duration_s);
+
+  /* expected output is a 440Hz sine wave at 16kHz */
+  fill_with_sine(expected_resampled_input.data() + latency,
+                 target_rate, input_channels, duration_frames - latency, 0);
+  /* expected output is a 440Hz sine wave at 32kHz */
+  fill_with_sine(expected_resampled_output.data() + latency,
+                 output_rate, output_channels, output_rate * duration_s - latency, 0);
+
+
+  while (state.output_phase_index != state.max_output_phase_index) {
+    uint32_t leftover_samples = input_buffer.length() * input_channels;
+    input_buffer.reserve(input_array_frame_count);
+    state.input_phase_index = fill_with_sine(input_buffer.data() + leftover_samples,
+                                             input_rate,
+                                             input_channels,
+                                             input_array_frame_count - leftover_samples,
+                                             state.input_phase_index);
+    long input_consumed = input_array_frame_count;
+    input_buffer.set_length(input_array_frame_count);
+
+    got = cubeb_resampler_fill(resampler,
+                               input_buffer.data(), &input_consumed,
+                               output_buffer.data(), output_array_frame_count);
+
+    /* handle leftover input */
+    if (input_array_frame_count != static_cast<uint32_t>(input_consumed)) {
+      input_buffer.pop(nullptr, input_consumed * input_channels);
+    } else {
+      input_buffer.clear();
+    }
+
+    state.output.push(output_buffer.data(), got * state.output_channels);
+  }
+
+  dump("input_expected.raw", expected_resampled_input.data(), expected_resampled_input.length());
+  dump("output_expected.raw", expected_resampled_output.data(), expected_resampled_output.length());
+  dump("input.raw", state.input.data(), state.input.length());
+  dump("output.raw", state.output.data(), state.output.length());
+
+  assert(array_fuzzy_equal(state.input, expected_resampled_input, epsilon<T>(input_rate/target_rate)));
+  assert(array_fuzzy_equal(state.output, expected_resampled_output, epsilon<T>(output_rate/target_rate)));
+
+  cubeb_resampler_destroy(resampler);
+}
+
+#define array_size(x) (sizeof(x) / sizeof(x[0]))
+
+void test_resamplers_one_way()
+{
+  /* Test one way resamplers */
+  for (uint32_t channels = 1; channels <= max_channels; channels++) {
+    for (uint32_t source_rate = 0; source_rate < array_size(sample_rates); source_rate++) {
+      for (uint32_t dest_rate = 0; dest_rate < array_size(sample_rates); dest_rate++) {
+        for (uint32_t chunk_duration = min_chunks; chunk_duration < max_chunks; chunk_duration+=chunk_increment) {
+          printf("one_way: channels: %d, source_rate: %d, dest_rate: %d, chunk_duration: %d\n",
+                  channels, sample_rates[source_rate], sample_rates[dest_rate], chunk_duration);
+          test_resampler_one_way<float>(channels, sample_rates[source_rate],
+                                        sample_rates[dest_rate], chunk_duration);
+        }
+      }
+    }
+  }
+}
+
+void test_resamplers_duplex()
+{
+  /* Test duplex resamplers */
+  for (uint32_t input_channels = 1; input_channels <= max_channels; input_channels++) {
+    for (uint32_t output_channels = 1; output_channels <= max_channels; output_channels++) {
+      for (uint32_t source_rate_input = 0; source_rate_input < array_size(sample_rates); source_rate_input++) {
+        for (uint32_t source_rate_output = 0; source_rate_output < array_size(sample_rates); source_rate_output++) {
+          for (uint32_t dest_rate = 0; dest_rate < array_size(sample_rates); dest_rate++) {
+            for (uint32_t chunk_duration = min_chunks; chunk_duration < max_chunks; chunk_duration+=chunk_increment) {
+              printf("input channels:%d output_channels:%d input_rate:%d "
+                     "output_rate:%d target_rate:%d chunk_ms:%d\n",
+                     input_channels, output_channels,
+                     sample_rates[source_rate_input],
+                     sample_rates[source_rate_output],
+                     sample_rates[dest_rate],
+                     chunk_duration);
+              test_resampler_duplex<float>(input_channels, output_channels,
+                                           sample_rates[source_rate_input],
+                                           sample_rates[source_rate_output],
+                                           sample_rates[dest_rate],
+                                           chunk_duration);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void test_delay_line()
+{
+  for (uint32_t channel = 1; channel <= 2; channel++) {
+    for (uint32_t delay_frames = 4; delay_frames <= 40; delay_frames+=chunk_increment) {
+      for (uint32_t chunk_size = 10; chunk_size <= 30; chunk_size++) {
+       printf("channel: %d, delay_frames: %d, chunk_size: %d\n",
+              channel, delay_frames, chunk_size);
+        test_delay_lines(delay_frames, channel, chunk_size);
+      }
+    }
+  }
+}
+
+long test_output_only_noop_data_cb(cubeb_stream * stm, void * user_ptr,
+                                   const void * input_buffer,
+                                   void * output_buffer, long frame_count)
+{
+  assert(output_buffer);
+  assert(!input_buffer);
+  return frame_count;
+}
+
+void test_output_only_noop()
+{
+  cubeb_stream_params output_params;
+  int target_rate;
+
+  output_params.rate = 44100;
+  output_params.channels = 1;
+  output_params.format = CUBEB_SAMPLE_FLOAT32NE;
+  target_rate = output_params.rate;
+
+  cubeb_resampler * resampler =
+    cubeb_resampler_create((cubeb_stream*)nullptr, nullptr, &output_params, target_rate,
+                           test_output_only_noop_data_cb, nullptr,
+                           CUBEB_RESAMPLER_QUALITY_VOIP);
+
+  const long out_frames = 128;
+  float out_buffer[out_frames];
+  long got;
+
+  got = cubeb_resampler_fill(resampler, nullptr, nullptr,
+                             out_buffer, out_frames);
+
+  assert(got == out_frames);
+
+  cubeb_resampler_destroy(resampler);
+}
+
+long test_drain_data_cb(cubeb_stream * stm, void * user_ptr,
+                        const void * input_buffer,
+                        void * output_buffer, long frame_count)
+{
+  assert(output_buffer);
+  assert(!input_buffer);
+  return frame_count - 10;
+}
+
+void test_resampler_drain()
+{
+  cubeb_stream_params output_params;
+  int target_rate;
+
+  output_params.rate = 44100;
+  output_params.channels = 1;
+  output_params.format = CUBEB_SAMPLE_FLOAT32NE;
+  target_rate = 48000;
+
+  cubeb_resampler * resampler =
+    cubeb_resampler_create((cubeb_stream*)nullptr, nullptr, &output_params, target_rate,
+                           test_drain_data_cb, nullptr,
+                           CUBEB_RESAMPLER_QUALITY_VOIP);
+
+  const long out_frames = 128;
+  float out_buffer[out_frames];
+  long got;
+
+  do {
+    got = cubeb_resampler_fill(resampler, nullptr, nullptr,
+                               out_buffer, out_frames);
+  } while (got == out_frames);
+
+  /* If the above is not an infinite loop, the drain was a success, just mark
+   * this test as such. */
+  assert(true);
+
+  cubeb_resampler_destroy(resampler);
+}
+
+int main()
+{
+  test_resamplers_one_way();
+  test_delay_line();
+  // This is disabled because the latency estimation in the resampler code is
+  // slightly off so we can generate expected vectors.
+  // test_resamplers_duplex();
+  test_output_only_noop();
+  test_resampler_drain();
+
+  return 0;
+}
new file mode 100644
--- /dev/null
+++ b/media/libcubeb/tests/test_utils.cpp
@@ -0,0 +1,80 @@
+#include <cassert>
+#include "cubeb_utils.h"
+
+int test_auto_array()
+{
+  auto_array<uint32_t> array;
+  auto_array<uint32_t> array2(10);
+  uint32_t a[10];
+
+  assert(array2.length() == 0);
+  assert(array2.capacity() == 10);
+
+
+  for (uint32_t i = 0; i < 10; i++) {
+    a[i] = i;
+  }
+
+  assert(array.capacity() == 0);
+  assert(array.length() == 0);
+
+  array.push(a, 10);
+
+  assert(!array.reserve(9));
+
+  for (uint32_t i = 0; i < 10; i++) {
+    assert(array.data()[i] == i);
+  }
+
+  assert(array.capacity() == 10);
+  assert(array.length() == 10);
+
+  uint32_t b[10];
+
+  array.pop(b, 5);
+
+  assert(array.capacity() == 10);
+  assert(array.length() == 5);
+  for (uint32_t i = 0; i < 5; i++) {
+    assert(b[i] == i);
+    assert(array.data()[i] == 5 + i);
+  }
+  uint32_t* bb = b + 5;
+  array.pop(bb, 5);
+
+  assert(array.capacity() == 10);
+  assert(array.length() == 0);
+  for (uint32_t i = 0; i < 5; i++) {
+    assert(bb[i] == 5 + i);
+  }
+
+  assert(!array.pop(nullptr, 1));
+
+  array.push(a, 10);
+  array.push(a, 10);
+
+  for (uint32_t j = 0; j < 2; j++) {
+    for (uint32_t i = 0; i < 10; i++) {
+      assert(array.data()[10 * j + i] == i);
+    }
+  }
+  assert(array.length() == 20);
+  assert(array.capacity() == 20);
+  array.pop(nullptr, 5);
+
+  for (uint32_t i = 0; i < 5; i++) {
+    assert(array.data()[i] == 5 + i);
+  }
+
+  assert(array.length() == 15);
+  assert(array.capacity() == 20);
+
+  return 0;
+}
+
+
+int main()
+{
+  test_auto_array();
+  return 0;
+}