Bug 1083101 - Add a task scheduler to Moz2D. r=jrmuizel
authorNicolas Silva <nsilva@mozilla.com>
Fri, 04 Sep 2015 14:27:26 +0200
changeset 260840 6ad2ac42107c8764ea6042ca87ea4d5757f7b524
parent 260839 022810f9a65a812bdf49e62e5ba1fe6edb512458
child 260841 9fe51adf6a70eb99f92b0747f7f1485d1059842f
push id64613
push usernsilva@mozilla.com
push dateFri, 04 Sep 2015 12:28:52 +0000
treeherdermozilla-inbound@c8de1f3f0bf3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1083101
milestone43.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1083101 - Add a task scheduler to Moz2D. r=jrmuizel
gfx/2d/TaskScheduler.cpp
gfx/2d/TaskScheduler.h
gfx/2d/TaskScheduler_posix.cpp
gfx/2d/TaskScheduler_posix.h
gfx/2d/TaskScheduler_win32.h
gfx/2d/Types.h
gfx/2d/moz.build
gfx/tests/gtest/TestTaskScheduler.cpp
gfx/tests/gtest/moz.build
new file mode 100644
--- /dev/null
+++ b/gfx/2d/TaskScheduler.cpp
@@ -0,0 +1,234 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "TaskScheduler.h"
+
+namespace mozilla {
+namespace gfx {
+
+TaskScheduler* TaskScheduler::sSingleton = nullptr;
+
+bool TaskScheduler::Init(uint32_t aNumThreads, uint32_t aNumQueues)
+{
+  MOZ_ASSERT(!sSingleton);
+  MOZ_ASSERT(aNumThreads >= aNumQueues);
+
+  sSingleton = new TaskScheduler();
+  sSingleton->mNextQueue = 0;
+
+  for (uint32_t i = 0; i < aNumQueues; ++i) {
+    sSingleton->mDrawingQueues.push_back(new MultiThreadedTaskQueue());
+  }
+
+  for (uint32_t i = 0; i < aNumThreads; ++i) {
+    sSingleton->mWorkerThreads.push_back(new WorkerThread(sSingleton->mDrawingQueues[i%aNumQueues]));
+  }
+  return true;
+}
+
+void TaskScheduler::ShutDown()
+{
+  MOZ_ASSERT(IsEnabled());
+  if (!IsEnabled()) {
+    return;
+  }
+
+  for (auto queue : sSingleton->mDrawingQueues) {
+    queue->ShutDown();
+    delete queue;
+  }
+
+  for (WorkerThread* thread : sSingleton->mWorkerThreads) {
+    // this will block until the thread is joined.
+    delete thread;
+  }
+
+  sSingleton->mWorkerThreads.clear();
+  delete sSingleton;
+  sSingleton = nullptr;
+}
+
+TaskStatus
+TaskScheduler::ProcessTask(Task* aTask)
+{
+  MOZ_ASSERT(aTask);
+  auto status = aTask->Run();
+  if (status == TaskStatus::Error || status == TaskStatus::Complete) {
+    delete aTask;
+  }
+  return status;
+}
+
+void
+TaskScheduler::SubmitTask(Task* aTask)
+{
+  MOZ_ASSERT(aTask);
+  RefPtr<SyncObject> start = aTask->GetStartSync();
+  if (start && start->Register(aTask)) {
+    // The Task buffer starts with a non-signaled sync object, it
+    // is now registered in the list of task buffers waiting on the
+    // sync object, so we should not place it in the queue.
+    return;
+  }
+
+  aTask->GetTaskQueue()->SubmitTask(aTask);
+}
+
+Task::Task(MultiThreadedTaskQueue* aQueue, SyncObject* aStart, SyncObject* aCompletion)
+: mQueue(aQueue)
+, mStartSync(aStart)
+, mCompletionSync(aCompletion)
+{
+  if (mStartSync) {
+    mStartSync->AddSubsequent(this);
+  }
+  if (mCompletionSync) {
+    mCompletionSync->AddPrerequisite(this);
+  }
+}
+
+Task::~Task()
+{
+  if (mCompletionSync) {
+    //printf(" -- Task %p dtor completion %p\n", this, mCompletionSync);
+    mCompletionSync->Signal();
+    mCompletionSync = nullptr;
+  }
+}
+
+TaskStatus
+SetEventTask::Run()
+{
+  mEvent->Set();
+  return TaskStatus::Complete;
+}
+
+SetEventTask::SetEventTask(MultiThreadedTaskQueue* aQueue,
+                           SyncObject* aStart, SyncObject* aCompletion)
+: Task(aQueue, aStart, aCompletion)
+{
+  mEvent = new EventObject();
+}
+
+SetEventTask::~SetEventTask()
+{}
+
+
+SyncObject::SyncObject()
+: mSignals(0)
+, mHasSubmittedSubsequent(false)
+{}
+
+SyncObject::~SyncObject()
+{
+  MOZ_ASSERT(mWaitingTasks.size() == 0);
+}
+
+bool
+SyncObject::Register(Task* aTask)
+{
+  MOZ_ASSERT(aTask);
+
+  mHasSubmittedSubsequent = true;
+
+  int32_t signals = mSignals;
+
+  if (signals > 0) {
+    AddWaitingTask(aTask);
+    // Since Register and Signal can be called concurrently, it can happen that
+    // reading mSignals in Register happens before decrementing mSignals in Signal,
+    // but SubmitWaitingTasks happens before AddWaitingTask. This ordering means
+    // the SyncObject ends up in the signaled state with a task sitting in the
+    // waiting list. To prevent that we check mSignals a second time and submit
+    // again if signals reached zero in the mean time.
+    // We do this instead of holding a mutex around mSignals+mTasks to reduce
+    // lock contention.
+    int32_t signals2 = mSignals;
+    if (signals2 == 0) {
+      SubmitWaitingTasks();
+    }
+    return true;
+  }
+
+  return false;
+}
+
+void
+SyncObject::Signal()
+{
+  // Fetch mHasSubmittedSubsequent *before* mSignals to avoid a race condition
+  // where signals reach zero before we have created all of the prerequisites
+  // which can lead to SubmitTasks being called with subsequents added in the
+  // mean time if the thread is interrupted between the read from mSignals and
+  // the read from mHasSubmittedSubsequents.
+  bool hasSubmittedSubsequent = mHasSubmittedSubsequent;
+  int32_t signals = --mSignals;
+  MOZ_ASSERT(signals >= 0);
+
+  if (hasSubmittedSubsequent && signals == 0) {
+    SubmitWaitingTasks();
+  }
+}
+
+void
+SyncObject::AddWaitingTask(Task* aTask)
+{
+  MutexAutoLock lock(&mMutex);
+  mWaitingTasks.push_back(aTask);
+}
+
+void SyncObject::SubmitWaitingTasks()
+{
+  std::vector<Task*> tasksToSubmit;
+  {
+    // Scheduling the tasks can cause code that modifies <this>'s reference
+    // count to run concurrently, and cause the caller of this function to
+    // be owned by another thread. We need to make sure the reference count
+    // does not reach 0 on another thread before mWaitingTasks.clear(), so
+    // hold a strong ref to prevent that!
+    RefPtr<SyncObject> kungFuDeathGrip(this);
+
+    MutexAutoLock lock(&mMutex);
+    tasksToSubmit = Move(mWaitingTasks);
+    mWaitingTasks.clear();
+  }
+
+  for (Task* task : tasksToSubmit) {
+    task->GetTaskQueue()->SubmitTask(task);
+  }
+}
+
+bool
+SyncObject::IsSignaled()
+{
+  return mSignals == 0;
+}
+
+void
+SyncObject::AddPrerequisite(Task* aTask)
+{
+#ifdef DEBUG
+  mPrerequisites.push_back(aTask);
+#endif
+  // If this assertion blows up it means that a Task that depends on this sync
+  // object has been submitted before we declared all of the prerequisites.
+  // This is racy because if mSignals reaches zero before all prerequisites
+  // have been declared, a subsequent can be scheduled before the completion
+  // of the undeclared prerequisites.
+  MOZ_ASSERT(!mHasSubmittedSubsequent);
+
+  mSignals++;
+}
+
+void
+SyncObject::AddSubsequent(Task* aTask)
+{
+#ifdef DEBUG
+  mSubsequents.push_back(aTask);
+#endif
+}
+
+} //namespace
+} //namespace
new file mode 100644
--- /dev/null
+++ b/gfx/2d/TaskScheduler.h
@@ -0,0 +1,216 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZILLA_GFX_TASKSCHEDULER_H_
+#define MOZILLA_GFX_TASKSCHEDULER_H_
+
+#include "mozilla/RefPtr.h"
+#include "mozilla/gfx/Types.h"
+
+#ifdef WIN32
+#include "mozilla/gfx/TaskScheduler_win32.h"
+#else
+#include "mozilla/gfx/TaskScheduler_posix.h"
+#endif
+
+namespace mozilla {
+namespace gfx {
+
+class MultiThreadedTaskQueue;
+class SyncObject;
+
+class TaskScheduler {
+public:
+  /// Return one of the queues that the drawing worker threads pull from, chosen
+  /// pseudo-randomly.
+  static MultiThreadedTaskQueue* GetDrawingQueue()
+  {
+    return sSingleton->mDrawingQueues[
+      sSingleton->mNextQueue++ % sSingleton->mDrawingQueues.size()
+    ];
+  }
+
+  /// Return one of the queues that the drawing worker threads pull from with a
+  /// hash to choose the queue.
+  ///
+  /// Calling this function several times with the same hash will yield the same queue.
+  static MultiThreadedTaskQueue* GetDrawingQueue(uint32_t aHash)
+  {
+    return sSingleton->mDrawingQueues[
+      aHash % sSingleton->mDrawingQueues.size()
+    ];
+  }
+
+  /// Initialize the task scheduler with aNumThreads worker threads for drawing
+  /// and aNumQueues task queues.
+  ///
+  /// The number of threads must be superior or equal to the number of queues
+  /// (since for now a worker thread only pulls from one queue).
+  static bool Init(uint32_t aNumThreads, uint32_t aNumQueues);
+
+  /// Shut the scheduler down.
+  ///
+  /// This will block until worker threads are joined and deleted.
+  static void ShutDown();
+
+  /// Returns true if there is a successfully initialized TaskScheduler singleton.
+  static bool IsEnabled() { return !!sSingleton; }
+
+  /// Submit a task buffer to its associated queue.
+  ///
+  /// The caller looses ownership of the task buffer.
+  static void SubmitTask(Task* aTasks);
+
+  /// Process commands until the command buffer needs to block on a sync object,
+  /// completes, yields, or encounters an error.
+  ///
+  /// Can be used on any thread. Worker threads basically loop over this, but the
+  /// main thread can also dequeue pending task buffers and process them alongside
+  /// the worker threads if it is about to block until completion anyway.
+  ///
+  /// The caller looses ownership of the task buffer.
+  static TaskStatus ProcessTask(Task* aTasks);
+
+protected:
+  static TaskScheduler* sSingleton;
+
+  // queues of Task that are ready to be processed
+  std::vector<MultiThreadedTaskQueue*> mDrawingQueues;
+  std::vector<WorkerThread*> mWorkerThreads;
+  Atomic<uint32_t> mNextQueue;
+};
+
+/// Tasks are not reference-counted because they don't have shared ownership.
+/// The ownership of tasks can change when they are passed to certain methods
+/// of TaskScheduler and SyncObject. See the docuumentaion of these classes.
+class Task {
+public:
+  Task(MultiThreadedTaskQueue* aQueue, SyncObject* aStart = nullptr, SyncObject* aCompletion = nullptr);
+
+  virtual ~Task();
+
+  virtual TaskStatus Run() = 0;
+
+  /// For use in TaskScheduler::SubmitTask. Don't use it anywhere else.
+  //already_AddRefed<SyncObject> GetAndResetStartSync();
+  SyncObject* GetStartSync() { return mStartSync; }
+
+  MultiThreadedTaskQueue* GetTaskQueue() { return mQueue; }
+
+protected:
+
+  MultiThreadedTaskQueue* mQueue;
+  RefPtr<SyncObject> mStartSync;
+  RefPtr<SyncObject> mCompletionSync;
+};
+
+class EventObject;
+
+/// This task will set an EventObject.
+///
+/// Typically used as the final task, so that the main thread can block on the
+/// corresponfing EventObject until all of the tasks are processed.
+class SetEventTask : public Task
+{
+public:
+  SetEventTask(MultiThreadedTaskQueue* aQueue, SyncObject* aStart = nullptr, SyncObject* aCompletion = nullptr);
+
+  ~SetEventTask();
+
+  TaskStatus Run() override;
+
+  EventObject* GetEvent() { return mEvent; }
+
+protected:
+  RefPtr<EventObject> mEvent;
+};
+
+/// A synchronization object that can be used to express dependencies and ordering between
+/// tasks.
+///
+/// Tasks can register to SyncObjects in order to asynchronously wait for a signal.
+/// In practice, Task objects usually start with a sync object (startSyc) and end
+/// with another one (completionSync).
+/// a Task never gets processed before its startSync is in the signaled state, and
+/// signals its completionSync as soon as it finishes. This is how dependencies
+/// between tasks is expressed.
+class SyncObject final : public external::AtomicRefCounted<SyncObject> {
+public:
+  MOZ_DECLARE_REFCOUNTED_TYPENAME(SyncObject)
+
+  /// Create a synchronization object.
+  SyncObject();
+
+  ~SyncObject();
+
+  /// Attempt to register a task.
+  ///
+  /// If the sync object is already in the signaled state, the buffer is *not*
+  /// registered and the sync object does not take ownership of the task.
+  /// If the object is not yet in the signaled state, it takes ownership of
+  /// the task and places it in a list of pending tasks.
+  /// Pending tasks will not be processed by the worker thread.
+  /// When the SyncObject reaches the signaled state, it places the pending
+  /// tasks back in the available buffer queue, so that they can be
+  /// scheduled again.
+  ///
+  /// Returns true if the SyncOject is not already in the signaled state.
+  /// This means that if this method returns true, the SyncObject has taken
+  /// ownership of the Task.
+  bool Register(Task* aTask);
+
+  /// Signal the SyncObject.
+  ///
+  /// This decrements an internal counter. The sync object reaches the signaled
+  /// state when the counter gets to zero.
+  /// calling Signal on a SyncObject that is already in the signaled state has
+  /// no effect.
+  void Signal();
+
+  /// Returns true if mSignals is equal to zero. In other words, returns true
+  /// if all subsequent tasks have already signaled the sync object.
+  ///
+  /// Note that this means SyncObject are initially in the signaled state, until
+  /// a Task is created with and declares the sync objects as its "completion sync"
+  bool IsSignaled();
+
+private:
+  // Called by Task's constructor
+  void AddSubsequent(Task* aTask);
+  void AddPrerequisite(Task* aTask);
+
+  void AddWaitingTask(Task* aTask);
+
+  void SubmitWaitingTasks();
+
+#ifdef DEBUG
+  // For debugging purposes only.
+  std::vector<Task*> mPrerequisites;
+  std::vector<Task*> mSubsequents;
+#endif
+
+  std::vector<Task*> mWaitingTasks;
+  Mutex mMutex; // for concurrent access to mWaintingTasks
+  Atomic<uint32_t> mSignals;
+  Atomic<bool> mHasSubmittedSubsequent;
+
+  friend class Task;
+  friend class TaskScheduler;
+};
+
+
+/// RAII helper.
+struct MutexAutoLock {
+    MutexAutoLock(Mutex* aMutex) : mMutex(aMutex) { mMutex->Lock(); }
+    ~MutexAutoLock() { mMutex->Unlock(); }
+protected:
+    Mutex* mMutex;
+};
+
+
+} // namespace
+} // namespace
+
+#endif
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/gfx/2d/TaskScheduler_posix.cpp
@@ -0,0 +1,183 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "TaskScheduler.h"
+#include "mozilla/gfx/Logging.h"
+
+using namespace std;
+
+namespace mozilla {
+namespace gfx {
+
+MultiThreadedTaskQueue::MultiThreadedTaskQueue()
+: mThreadsCount(0)
+, mShuttingDown(false)
+{}
+
+MultiThreadedTaskQueue::~MultiThreadedTaskQueue()
+{
+  MOZ_ASSERT(mTasks.empty());
+}
+
+bool
+MultiThreadedTaskQueue::WaitForTask(Task*& aOutTask)
+{
+  return PopTask(aOutTask, BLOCKING);
+}
+
+bool
+MultiThreadedTaskQueue::PopTask(Task*& aOutTasks, AccessType aAccess)
+{
+  for (;;) {
+    MutexAutoLock lock(&mMutex);
+
+    while (aAccess == BLOCKING && !mShuttingDown && mTasks.empty()) {
+      mAvailableCondvar.Wait(&mMutex);
+    }
+
+    if (mShuttingDown) {
+      return false;
+    }
+
+    if (mTasks.empty()) {
+      return false;
+    }
+
+    Task* task = mTasks.front();
+    MOZ_ASSERT(task);
+
+    mTasks.pop_front();
+
+    aOutTasks = task;
+    return true;
+  }
+}
+
+void
+MultiThreadedTaskQueue::SubmitTask(Task* aTasks)
+{
+  MOZ_ASSERT(aTasks);
+  MOZ_ASSERT(aTasks->GetTaskQueue() == this);
+  MutexAutoLock lock(&mMutex);
+  mTasks.push_back(aTasks);
+  mAvailableCondvar.Broadcast();
+}
+
+size_t
+MultiThreadedTaskQueue::NumTasks()
+{
+  MutexAutoLock lock(&mMutex);
+  return mTasks.size();
+}
+
+bool
+MultiThreadedTaskQueue::IsEmpty()
+{
+  MutexAutoLock lock(&mMutex);
+  return mTasks.empty();
+}
+
+void
+MultiThreadedTaskQueue::ShutDown()
+{
+  MutexAutoLock lock(&mMutex);
+  mShuttingDown = true;
+  while (mThreadsCount) {
+    mAvailableCondvar.Broadcast();
+    mShutdownCondvar.Wait(&mMutex);
+  }
+}
+
+void
+MultiThreadedTaskQueue::RegisterThread()
+{
+  mThreadsCount += 1;
+}
+
+void
+MultiThreadedTaskQueue::UnregisterThread()
+{
+  MutexAutoLock lock(&mMutex);
+  mThreadsCount -= 1;
+  if (mThreadsCount == 0) {
+    mShutdownCondvar.Broadcast();
+  }
+}
+
+void* ThreadCallback(void* threadData)
+{
+  WorkerThread* thread = (WorkerThread*)threadData;
+  thread->Run();
+  return nullptr;
+}
+
+WorkerThread::WorkerThread(MultiThreadedTaskQueue* aTaskQueue)
+: mQueue(aTaskQueue)
+{
+  aTaskQueue->RegisterThread();
+  pthread_create(&mThread, nullptr, ThreadCallback, this);
+}
+
+WorkerThread::~WorkerThread()
+{
+  pthread_join(mThread, nullptr);
+}
+
+void
+WorkerThread::Run()
+{
+  for (;;) {
+    Task* commands = nullptr;
+    if (!mQueue->WaitForTask(commands)) {
+      mQueue->UnregisterThread();
+      return;
+    }
+
+    TaskStatus status = TaskScheduler::ProcessTask(commands);
+
+    if (status == TaskStatus::Error) {
+      // Don't try to handle errors for now, but that's open to discussions.
+      // I expect errors to be mostly OOM issues.
+      MOZ_CRASH();
+    }
+  }
+}
+
+EventObject::EventObject()
+: mIsSet(false)
+{}
+
+EventObject::~EventObject()
+{}
+
+bool
+EventObject::Peak()
+{
+  MutexAutoLock lock(&mMutex);
+  return mIsSet;
+}
+
+void
+EventObject::Set()
+{
+  MutexAutoLock lock(&mMutex);
+  if (!mIsSet) {
+    mIsSet = true;
+    mCond.Broadcast();
+  }
+}
+
+void
+EventObject::Wait()
+{
+  MutexAutoLock lock(&mMutex);
+  if (mIsSet) {
+    return;
+  }
+  mCond.Wait(&mMutex);
+}
+
+} // namespce
+} // namespce
new file mode 100644
--- /dev/null
+++ b/gfx/2d/TaskScheduler_posix.h
@@ -0,0 +1,183 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef WIN32
+#ifndef MOZILLA_GFX_TASKSCHEDULER_POSIX_H_
+#define MOZILLA_GFX_TASKSCHEDULER_POSIX_H_
+
+#include <string>
+#include <vector>
+#include <list>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "mozilla/RefPtr.h"
+#include "mozilla/DebugOnly.h"
+
+namespace mozilla {
+namespace gfx {
+
+class Task;
+class PosixCondVar;
+
+class Mutex {
+public:
+  Mutex() {
+    DebugOnly<int> err = pthread_mutex_init(&mMutex, nullptr);
+    MOZ_ASSERT(!err);
+  }
+
+  ~Mutex() {
+    DebugOnly<int> err = pthread_mutex_destroy(&mMutex);
+    MOZ_ASSERT(!err);
+  }
+
+  void Lock() {
+    DebugOnly<int> err = pthread_mutex_lock(&mMutex);
+    MOZ_ASSERT(!err);
+  }
+
+  void Unlock() {
+    DebugOnly<int> err = pthread_mutex_unlock(&mMutex);
+    MOZ_ASSERT(!err);
+  }
+
+protected:
+  pthread_mutex_t mMutex;
+  friend class PosixCondVar;
+};
+
+// posix platforms only!
+class PosixCondVar {
+public:
+  PosixCondVar() {
+    DebugOnly<int> err = pthread_cond_init(&mCond, nullptr);
+    MOZ_ASSERT(!err);
+  }
+
+  ~PosixCondVar() {
+    DebugOnly<int> err = pthread_cond_destroy(&mCond);
+    MOZ_ASSERT(!err);
+  }
+
+  void Wait(Mutex* aMutex) {
+    DebugOnly<int> err = pthread_cond_wait(&mCond, &aMutex->mMutex);
+    MOZ_ASSERT(!err);
+  }
+
+  void Broadcast() {
+    DebugOnly<int> err = pthread_cond_broadcast(&mCond);
+    MOZ_ASSERT(!err);
+  }
+
+protected:
+  pthread_cond_t mCond;
+};
+
+
+/// A simple and naive multithreaded task queue
+///
+/// The public interface of this class must remain identical to its equivalent
+/// in TaskScheduler_win32.h
+class MultiThreadedTaskQueue {
+public:
+  enum AccessType {
+    BLOCKING,
+    NON_BLOCKING
+  };
+
+  // Producer thread
+  MultiThreadedTaskQueue();
+
+  // Producer thread
+  ~MultiThreadedTaskQueue();
+
+  // Worker threads
+  bool WaitForTask(Task*& aOutTask);
+
+  // Any thread
+  bool PopTask(Task*& aOutTask, AccessType aAccess);
+
+  // Any threads
+  void SubmitTask(Task* aTask);
+
+  // Producer thread
+  void ShutDown();
+
+  // Any thread
+  size_t NumTasks();
+
+  // Any thread
+  bool IsEmpty();
+
+  // Producer thread
+  void RegisterThread();
+
+  // Worker threads
+  void UnregisterThread();
+
+protected:
+
+  std::list<Task*> mTasks;
+  Mutex mMutex;
+  PosixCondVar mAvailableCondvar;
+  PosixCondVar mShutdownCondvar;
+  int32_t mThreadsCount;
+  bool mShuttingDown;
+
+  friend class WorkerThread;
+};
+
+/// Worker thread that continuously dequeues Tasks from a MultiThreadedTaskQueue
+/// and process them.
+///
+/// The public interface of this class must remain identical to its equivalent
+/// in TaskScheduler_win32.h
+class WorkerThread {
+public:
+  WorkerThread(MultiThreadedTaskQueue* aTaskQueue);
+
+  ~WorkerThread();
+
+  void Run();
+protected:
+  MultiThreadedTaskQueue* mQueue;
+  pthread_t mThread;
+};
+
+/// An object that a thread can synchronously wait on.
+/// Usually set by a SetEventTask.
+class EventObject : public external::AtomicRefCounted<EventObject>
+{
+public:
+  MOZ_DECLARE_REFCOUNTED_TYPENAME(EventObject)
+
+  EventObject();
+
+  ~EventObject();
+
+  /// Synchronously wait until the event is set.
+  void Wait();
+
+  /// Return true if the event is set, without blocking.
+  bool Peak();
+
+  /// Set the event.
+  void Set();
+
+protected:
+  Mutex mMutex;
+  PosixCondVar mCond;
+  bool mIsSet;
+};
+
+} // namespace
+} // namespace
+
+#include "TaskScheduler.h"
+
+#endif
+#endif
new file mode 100644
--- /dev/null
+++ b/gfx/2d/TaskScheduler_win32.h
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef WIN32
+#ifndef MOZILLA_GFX_TASKSCHEDULER_WIN32_H_
+#define MOZILLA_GFX_TASKSCHEDULER_WIN32_H_
+
+#define NOT_IMPLEMENTED MOZ_CRASH("Not implemented")
+
+#include "mozilla/RefPtr.h"
+
+namespace mozilla {
+namespace gfx {
+
+class WorkerThread;
+class Task;
+
+class Mutex {
+public:
+  Mutex() { NOT_IMPLEMENTED; }
+  ~Mutex() { NOT_IMPLEMENTED; }
+  void Lock() { NOT_IMPLEMENTED; }
+  void Unlock() { NOT_IMPLEMENTED; }
+};
+
+// The public interface of this class must remain identical to its equivalent
+// in TaskScheduler_posix.h
+class MultiThreadedTaskQueue {
+public:
+  enum AccessType {
+    BLOCKING,
+    NON_BLOCKING
+  };
+
+  bool WaitForTask(Task*& aOutCommands) { NOT_IMPLEMENTED; }
+  bool PopTask(Task*& aOutCommands, AccessType aAccess) { NOT_IMPLEMENTED; }
+  void SubmitTask(Task* aCommands) { NOT_IMPLEMENTED; }
+  void ShutDown() { NOT_IMPLEMENTED; }
+  size_t NumTasks() { NOT_IMPLEMENTED;  }
+  bool IsEmpty() { NOT_IMPLEMENTED; }
+  void RegisterThread() { NOT_IMPLEMENTED; }
+  void UnregisterThread() { NOT_IMPLEMENTED; }
+
+  friend class WorkerThread;
+};
+
+
+// The public interface of this class must remain identical to its equivalent
+// in TaskScheduler_posix.h
+class EventObject : public external::AtomicRefCounted<EventObject>
+{
+public:
+  MOZ_DECLARE_REFCOUNTED_TYPENAME(EventObject)
+
+  EventObject() { NOT_IMPLEMENTED; }
+  ~EventObject() { NOT_IMPLEMENTED; }
+  void Wait() { NOT_IMPLEMENTED; }
+  bool Peak() { NOT_IMPLEMENTED; }
+  void Set() { NOT_IMPLEMENTED; }
+};
+
+// The public interface of this class must remain identical to its equivalent
+// in TaskScheduler_posix.h
+class WorkerThread {
+public:
+  WorkerThread(MultiThreadedTaskQueue* aTaskQueue) { NOT_IMPLEMENTED; }
+  void Run();
+};
+
+} // namespace
+} // namespace
+
+#endif
+#endif
--- a/gfx/2d/Types.h
+++ b/gfx/2d/Types.h
@@ -284,16 +284,23 @@ struct GradientStop
   bool operator<(const GradientStop& aOther) const {
     return offset < aOther.offset;
   }
 
   Float offset;
   Color color;
 };
 
+enum class TaskStatus {
+    Complete,
+    Wait,
+    Yield,
+    Error
+};
+
 } // namespace gfx
 } // namespace mozilla
 
 #if defined(XP_WIN) && defined(MOZ_GFX)
 #ifdef GFX2D_INTERNAL
 #define GFX2D_API __declspec(dllexport)
 #else
 #define GFX2D_API __declspec(dllimport)
--- a/gfx/2d/moz.build
+++ b/gfx/2d/moz.build
@@ -34,16 +34,19 @@ EXPORTS.mozilla.gfx += [
     'Point.h',
     'Quaternion.h',
     'Rect.h',
     'Scale.h',
     'ScaleFactor.h',
     'ScaleFactors2D.h',
     'SourceSurfaceCairo.h',
     'StackArray.h',
+    'TaskScheduler.h',
+    'TaskScheduler_posix.h',
+    'TaskScheduler_win32.h',
     'Tools.h',
     'Types.h',
     'UserData.h',
 ]
 
 if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'cocoa':
     EXPORTS.mozilla.gfx += [
         'MacIOSurface.h',
@@ -66,16 +69,21 @@ elif CONFIG['MOZ_WIDGET_TOOLKIT'] == 'wi
         'ScaledFontDWrite.cpp',
         'ScaledFontWin.cpp',
         'SourceSurfaceD2D.cpp',
         'SourceSurfaceD2D1.cpp',
         'SourceSurfaceD2DTarget.cpp',
     ]
     DEFINES['WIN32'] = True
 
+if CONFIG['MOZ_WIDGET_TOOLKIT'] != 'windows':
+    SOURCES += [
+        'TaskScheduler_posix.cpp',
+    ]
+
 if CONFIG['MOZ_ENABLE_SKIA']:
     UNIFIED_SOURCES += [
         'convolver.cpp',
         'DrawTargetSkia.cpp',
         'PathSkia.cpp',
         'SourceSurfaceSkia.cpp',
     ]
     SOURCES += [
@@ -136,16 +144,17 @@ UNIFIED_SOURCES += [
     'PathRecording.cpp',
     'Quaternion.cpp',
     'RecordedEvent.cpp',
     'Scale.cpp',
     'ScaledFontBase.cpp',
     'ScaledFontCairo.cpp',
     'SourceSurfaceCairo.cpp',
     'SourceSurfaceRawData.cpp',
+    'TaskScheduler.cpp',
 ]
 
 SOURCES += [
     'PathHelpers.cpp', # Uses _USE_MATH_DEFINES
 ]
 
 if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'cocoa':
     SOURCES += [
new file mode 100644
--- /dev/null
+++ b/gfx/tests/gtest/TestTaskScheduler.cpp
@@ -0,0 +1,246 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+#ifndef WIN32
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+#include "mozilla/gfx/TaskScheduler.h"
+
+#include <pthread.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <time.h>
+
+namespace test_scheduler {
+
+using namespace mozilla::gfx;
+using namespace mozilla;
+
+// Artificially cause threads to yield randomly in an attempt to make racy
+// things more apparent (if any).
+void MaybeYieldThread()
+{
+  if (rand() % 5 == 0) {
+    sched_yield();
+  }
+}
+
+/// Used by the TestCommand to check that tasks are processed in the right order.
+struct SanityChecker {
+  std::vector<uint64_t> mAdvancements;
+  mozilla::gfx::Mutex mMutex;
+
+  explicit SanityChecker(uint64_t aNumCmdBuffers)
+  {
+    for (uint32_t i = 0; i < aNumCmdBuffers; ++i) {
+      mAdvancements.push_back(0);
+    }
+  }
+
+  virtual void Check(uint64_t aTaskId, uint64_t aCmdId)
+  {
+    MaybeYieldThread();
+    MutexAutoLock lock(&mMutex);
+    ASSERT_EQ(mAdvancements[aTaskId], aCmdId-1);
+    mAdvancements[aTaskId] = aCmdId;
+  }
+};
+
+/// Run checks that are specific to TestSchulerJoin.
+struct JoinTestSanityCheck : public SanityChecker {
+  bool mSpecialTaskHasRun;
+
+  explicit JoinTestSanityCheck(uint64_t aNumCmdBuffers)
+  : SanityChecker(aNumCmdBuffers)
+  , mSpecialTaskHasRun(false)
+  {}
+
+  virtual void Check(uint64_t aTaskId, uint64_t aCmdId) override
+  {
+    // Task 0 is the special task executed when everything is joined after task 1
+    if (aCmdId == 0) {
+      ASSERT_FALSE(mSpecialTaskHasRun);
+      mSpecialTaskHasRun = true;
+      for (auto advancement : mAdvancements) {
+        // Because of the synchronization point (beforeFilter), all
+        // task buffers should have run task 1 when task 0 is run.
+        ASSERT_EQ(advancement, (uint32_t)1);
+      }
+    } else {
+      // This check does not apply to task 0.
+      SanityChecker::Check(aTaskId, aCmdId);
+    }
+
+    if (aCmdId == 2) {
+      ASSERT_TRUE(mSpecialTaskHasRun);
+    }
+  }
+};
+
+class TestTask : public Task
+{
+public:
+  TestTask(uint64_t aCmdId, uint64_t aTaskId, SanityChecker* aChecker,
+           MultiThreadedTaskQueue* aQueue,
+           SyncObject* aStart, SyncObject* aCompletion)
+  : Task(aQueue, aStart, aCompletion)
+  , mCmdId(aCmdId)
+  , mCmdBufferId(aTaskId)
+  , mSanityChecker(aChecker)
+  {}
+
+  TaskStatus Run()
+  {
+    MaybeYieldThread();
+    mSanityChecker->Check(mCmdBufferId, mCmdId);
+    MaybeYieldThread();
+    return TaskStatus::Complete;
+  }
+
+  uint64_t mCmdId;
+  uint64_t mCmdBufferId;
+  SanityChecker* mSanityChecker;
+};
+
+/// This test creates aNumCmdBuffers task buffers with sync objects set up
+/// so that all tasks will join after command 5 before a task buffer runs
+/// a special task (task 0) after which all task buffers fork again.
+/// This simulates the kind of scenario where all tiles must join at
+/// a certain point to execute, say, a filter, and fork again after the filter
+/// has been processed.
+/// The main thread is only blocked when waiting for the completion of the entire
+/// task stream (it doesn't have to wait at the filter's sync points to orchestrate it).
+void TestSchedulerJoin(uint32_t aNumThreads, uint32_t aNumCmdBuffers)
+{
+  JoinTestSanityCheck check(aNumCmdBuffers);
+
+  RefPtr<SyncObject> beforeFilter = new SyncObject();
+  RefPtr<SyncObject> afterFilter = new SyncObject();
+  RefPtr<SyncObject> completion = new SyncObject();
+
+
+  for (uint32_t i = 0; i < aNumCmdBuffers; ++i) {
+    Task* t1 = new TestTask(1, i, &check, TaskScheduler::GetDrawingQueue(),
+                            nullptr, beforeFilter);
+    TaskScheduler::SubmitTask(t1);
+    MaybeYieldThread();
+  }
+
+  // This task buffer is executed when all other tasks have joined after task 1
+  TaskScheduler::SubmitTask(
+    new TestTask(0, 0, &check, TaskScheduler::GetDrawingQueue(), beforeFilter, afterFilter)
+  );
+
+  for (uint32_t i = 0; i < aNumCmdBuffers; ++i) {
+    Task* t2 = new TestTask(2, i, &check, TaskScheduler::GetDrawingQueue(),
+                            afterFilter, completion);
+    TaskScheduler::SubmitTask(t2);
+    MaybeYieldThread();
+  }
+
+  auto evtTask = new SetEventTask(TaskScheduler::GetDrawingQueue(), completion);
+  RefPtr<EventObject> waitForCompletion = evtTask->GetEvent();
+  TaskScheduler::SubmitTask(evtTask);
+
+  MaybeYieldThread();
+
+  waitForCompletion->Wait();
+
+  MaybeYieldThread();
+
+  for (auto advancement : check.mAdvancements) {
+    ASSERT_TRUE(advancement == 2);
+  }
+}
+
+/// This test creates several chains of 10 task, tasks of a given chain are executed
+/// sequentially, and chains are exectuted in parallel.
+/// This simulates the typical scenario where we want to process sequences of drawing
+/// commands for several tiles in parallel.
+void TestSchedulerChain(uint32_t aNumThreads, uint32_t aNumCmdBuffers)
+{
+  SanityChecker check(aNumCmdBuffers);
+
+  RefPtr<SyncObject> completion = new SyncObject();
+
+  uint32_t numTasks = 10;
+
+  for (uint32_t i = 0; i < aNumCmdBuffers; ++i) {
+
+    std::vector<RefPtr<SyncObject>> syncs;
+    std::vector<Task*> tasks;
+    syncs.reserve(numTasks);
+    tasks.reserve(numTasks);
+
+    for (uint32_t t = 0; t < numTasks-1; ++t) {
+      syncs.push_back(new SyncObject());
+      tasks.push_back(new TestTask(t+1, i, &check, TaskScheduler::GetDrawingQueue(),
+                                   t == 0 ? nullptr : syncs[t-1].get(),
+                                   syncs[t]));
+    }
+
+    tasks.push_back(new TestTask(numTasks, i, &check,
+                    TaskScheduler::GetDrawingQueue(),
+                    syncs.back(), completion));
+
+    if (i % 2 == 0) {
+      // submit half of the tasks in order
+      for (Task* task : tasks) {
+        TaskScheduler::SubmitTask(task);
+        MaybeYieldThread();
+      }
+    } else {
+      // ... and submit the other half in reverse order
+      for (int32_t reverse = numTasks-1; reverse >= 0; --reverse) {
+        TaskScheduler::SubmitTask(tasks[reverse]);
+        MaybeYieldThread();
+      }
+    }
+  }
+
+  auto evtTask = new SetEventTask(TaskScheduler::GetDrawingQueue(), completion);
+  RefPtr<EventObject> waitForCompletion = evtTask->GetEvent();
+  TaskScheduler::SubmitTask(evtTask);
+
+  MaybeYieldThread();
+
+  waitForCompletion->Wait();
+
+  for (auto advancement : check.mAdvancements) {
+    ASSERT_TRUE(advancement == numTasks);
+  }
+}
+
+} // namespace test_scheduler
+
+TEST(Moz2D, TaskScheduler_Join) {
+  srand(time(nullptr));
+  for (uint32_t threads = 1; threads < 16; ++threads) {
+    for (uint32_t queues = 1; queues < threads; ++queues) {
+      for (uint32_t buffers = 1; buffers < 100; buffers += 3) {
+        mozilla::gfx::TaskScheduler::Init(threads, queues);
+        test_scheduler::TestSchedulerJoin(threads, buffers);
+        mozilla::gfx::TaskScheduler::ShutDown();
+      }
+    }
+  }
+}
+
+TEST(Moz2D, TaskScheduler_Chain) {
+  srand(time(nullptr));
+  for (uint32_t threads = 1; threads < 16; ++threads) {
+    for (uint32_t queues = 1; queues < threads; ++queues) {
+      for (uint32_t buffers = 1; buffers < 50; buffers += 3) {
+        mozilla::gfx::TaskScheduler::Init(threads, queues);
+        test_scheduler::TestSchedulerChain(threads, buffers);
+        mozilla::gfx::TaskScheduler::ShutDown();
+      }
+    }
+  }
+}
+
+#endif
--- a/gfx/tests/gtest/moz.build
+++ b/gfx/tests/gtest/moz.build
@@ -16,16 +16,17 @@ UNIFIED_SOURCES += [
     'TestGfxPrefs.cpp',
     'TestGfxWidgets.cpp',
     'TestLayers.cpp',
     'TestMoz2D.cpp',
     'TestQcms.cpp',
     'TestRect.cpp',
     'TestRegion.cpp',
     'TestSkipChars.cpp',
+    'TestTaskScheduler.cpp',
     # Hangs on linux in ApplyGdkScreenFontOptions
     #'gfxFontSelectionTest.cpp',
     'TestTextures.cpp',
     # Test works but it doesn't assert anything
     #'gfxTextRunPerfTest.cpp',
     # Bug 1179287 - PGO bustage on Linux
     #'TestTiledLayerBuffer.cpp',
     'TestVsync.cpp',