Bug 1151607 - Step 2: Apply net/ipc namespace separation and chroot to media plugins. r=kang
authorJed Davis <jld@mozilla.com>
Fri, 10 Apr 2015 18:05:19 -0700
changeset 256787 acc410f0b28ca4affaed71fd1bfb0330a3c33072
parent 256786 9a186f904c5d4ed858f28ccb9129f501143991a1
child 256788 a4dd555969f339e5ed66c6f19b1a89e0028998ab
push id1509
push userprogramfox@hotmail.be
push dateMon, 13 Apr 2015 10:33:09 +0000
reviewerskang
bugs1151607
milestone40.0a1
Bug 1151607 - Step 2: Apply net/ipc namespace separation and chroot to media plugins. r=kang This needs more unit tests for the various pieces of what's going on here (LinuxCapabilities, SandboxChroot, UnshareUserNamespace()) but that's nontrivial due to needing a single-threaded process -- and currently they can't be run on Mozilla's CI anyway due to needing user namespaces, and local testing can just try using GMP and manually inspecting the child process. So that will be a followup.
security/sandbox/linux/LinuxCapabilities.cpp
security/sandbox/linux/LinuxCapabilities.h
security/sandbox/linux/Sandbox.cpp
security/sandbox/linux/SandboxChroot.cpp
security/sandbox/linux/SandboxChroot.h
security/sandbox/linux/SandboxUtil.cpp
security/sandbox/linux/SandboxUtil.h
security/sandbox/linux/gtest/moz.build
security/sandbox/linux/moz.build
new file mode 100644
--- /dev/null
+++ b/security/sandbox/linux/LinuxCapabilities.cpp
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LinuxCapabilities.h"
+
+#include <unistd.h>
+#include <sys/syscall.h>
+
+namespace mozilla {
+
+bool
+LinuxCapabilities::GetCurrent() {
+  __user_cap_header_struct header = { _LINUX_CAPABILITY_VERSION_3, 0 };
+  return syscall(__NR_capget, &header, &mBits) == 0
+    && header.version == _LINUX_CAPABILITY_VERSION_3;
+}
+
+bool
+LinuxCapabilities::SetCurrentRaw() const {
+  __user_cap_header_struct header = { _LINUX_CAPABILITY_VERSION_3, 0 };
+  return syscall(__NR_capset, &header, &mBits) == 0
+    && header.version == _LINUX_CAPABILITY_VERSION_3;
+}
+
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/security/sandbox/linux/LinuxCapabilities.h
@@ -0,0 +1,119 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_LinuxCapabilities_h
+#define mozilla_LinuxCapabilities_h
+
+#include <linux/capability.h>
+#include <stdint.h>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/PodOperations.h"
+
+// This class is a relatively simple interface to manipulating the
+// capabilities of a Linux process/thread; see the capabilities(7) man
+// page for background information.
+
+// Unfortunately, Android's kernel headers omit some definitions
+// needed for the low-level capability interface.  They're part of the
+// stable syscall ABI, so it's safe to include them here.
+#ifndef _LINUX_CAPABILITY_VERSION_3
+#define _LINUX_CAPABILITY_VERSION_3  0x20080522
+#define _LINUX_CAPABILITY_U32S_3     2
+#endif
+#ifndef CAP_TO_INDEX
+#define CAP_TO_INDEX(x)     ((x) >> 5)
+#define CAP_TO_MASK(x)      (1 << ((x) & 31))
+#endif
+
+namespace mozilla {
+
+class LinuxCapabilities final
+{
+public:
+  // A class to represent a bit within the capability sets as an lvalue.
+  class BitRef {
+    __u32& mWord;
+    __u32 mMask;
+    friend class LinuxCapabilities;
+    BitRef(__u32& aWord, uint32_t aMask) : mWord(aWord), mMask(aMask) { }
+    BitRef(const BitRef& aBit) : mWord(aBit.mWord), mMask(aBit.mMask) { }
+  public:
+    operator bool() const {
+      return mWord & mMask;
+    }
+    BitRef& operator=(bool aSetTo) {
+      if (aSetTo) {
+        mWord |= mMask;
+      } else {
+        mWord &= mMask;
+      }
+      return *this;
+    }
+  };
+
+  // The default value is the empty set.
+  LinuxCapabilities() { PodArrayZero(mBits); }
+
+  // Get the current thread's capability sets and assign them to this
+  // object.  Returns whether it succeeded and sets errno on failure.
+  // Shouldn't fail unless the kernel is very old.
+  bool GetCurrent();
+
+  // Try to set the current thread's capability sets to those
+  // specified in this object.  Returns whether it succeeded and sets
+  // errno on failure.
+  bool SetCurrentRaw() const;
+
+  // The capability model requires that the permitted set always be a
+  // superset of the effective and inheritable sets.  This method
+  // expands the permitted set as needed and then sets the current
+  // thread's capabilities, as described above.
+  bool SetCurrent() {
+    Normalize();
+    return SetCurrentRaw();
+  }
+
+  void Normalize() {
+    for (size_t i = 0; i < _LINUX_CAPABILITY_U32S_3; ++i) {
+      mBits[i].permitted |= mBits[i].effective | mBits[i].inheritable;
+    }
+  }
+
+  // These three methods expose individual bits in the three
+  // capability sets as objects that can be used as bool lvalues.
+  // The argument is the capability number, as defined in
+  // the <linux/capability.h> header.
+  BitRef Effective(unsigned aCap)
+  {
+    return GenericBitRef(&__user_cap_data_struct::effective, aCap);
+  }
+
+  BitRef Permitted(unsigned aCap)
+  {
+    return GenericBitRef(&__user_cap_data_struct::permitted, aCap);
+  }
+
+  BitRef Inheritable(unsigned aCap)
+  {
+    return GenericBitRef(&__user_cap_data_struct::inheritable, aCap);
+  }
+
+private:
+  __user_cap_data_struct mBits[_LINUX_CAPABILITY_U32S_3];
+
+  BitRef GenericBitRef(__u32 __user_cap_data_struct::* aField, unsigned aCap)
+  {
+    // Please don't pass untrusted data as the capability number.
+    MOZ_ASSERT(CAP_TO_INDEX(aCap) < _LINUX_CAPABILITY_U32S_3);
+    return BitRef(mBits[CAP_TO_INDEX(aCap)].*aField, CAP_TO_MASK(aCap));
+  }
+};
+
+} // namespace mozilla
+
+#endif // mozilla_LinuxCapabilities_h
--- a/security/sandbox/linux/Sandbox.cpp
+++ b/security/sandbox/linux/Sandbox.cpp
@@ -1,15 +1,19 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "Sandbox.h"
+
+#include "LinuxCapabilities.h"
+#include "LinuxSched.h"
+#include "SandboxChroot.h"
 #include "SandboxFilter.h"
 #include "SandboxInternal.h"
 #include "SandboxLogging.h"
 #include "SandboxUtil.h"
 
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -22,21 +26,23 @@
 #include <sys/prctl.h>
 #include <sys/ptrace.h>
 #include <sys/syscall.h>
 #include <sys/time.h>
 #include <unistd.h>
 
 #include "mozilla/Atomics.h"
 #include "mozilla/SandboxInfo.h"
+#include "mozilla/UniquePtr.h"
 #include "mozilla/unused.h"
 #include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
 #if defined(ANDROID)
 #include "sandbox/linux/services/android_ucontext.h"
 #endif
+#include "sandbox/linux/services/linux_syscalls.h"
 
 #ifdef MOZ_ASAN
 // Copy libsanitizer declarations to avoid depending on ASAN headers.
 // See also bug 1081242 comment #4.
 extern "C" {
 namespace __sanitizer {
 // Win64 uses long long, but this is Linux.
 typedef signed long sptr;
@@ -62,16 +68,18 @@ SandboxCrashFunc gSandboxCrashFunc;
 #ifdef MOZ_GMP_SANDBOX
 // For media plugins, we can start the sandbox before we dlopen the
 // module, so we have to pre-open the file and simulate the sandboxed
 // open().
 static int gMediaPluginFileDesc = -1;
 static const char *gMediaPluginFilePath;
 #endif
 
+static UniquePtr<SandboxChroot> gChrootHelper;
+
 /**
  * This is the SIGSYS handler function. It is used to report to the user
  * which system call has been denied by Seccomp.
  * This function also makes the process exit as denying the system call
  * will otherwise generally lead to unexpected behavior from the process,
  * since we don't know if all functions will handle such denials gracefully.
  *
  * @see InstallSyscallReporter() function.
@@ -280,16 +288,22 @@ BroadcastSetThreadSandbox(SandboxType aT
                 "mozilla::Atomic<int> isn't represented by an int");
   pid = getpid();
   myTid = syscall(__NR_gettid);
   taskdp = opendir("/proc/self/task");
   if (taskdp == nullptr) {
     SANDBOX_LOG_ERROR("opendir /proc/self/task: %s\n", strerror(errno));
     MOZ_CRASH();
   }
+
+  if (gChrootHelper) {
+    gChrootHelper->Invoke();
+    gChrootHelper = nullptr;
+  }
+
   signum = FindFreeSignalNumber();
   if (signum == 0) {
     SANDBOX_LOG_ERROR("No available signal numbers!");
     MOZ_CRASH();
   }
   void (*oldHandler)(int);
   oldHandler = signal(signum, SetThreadSandboxHandler);
   if (oldHandler != SIG_DFL) {
@@ -421,16 +435,90 @@ SetCurrentProcessSandbox(SandboxType aTy
 
   BroadcastSetThreadSandbox(aType);
 }
 
 void
 SandboxEarlyInit(GeckoProcessType aType, bool aIsNuwa)
 {
   MOZ_RELEASE_ASSERT(IsSingleThreaded());
+
+  // Which kinds of resource isolation (of those that need to be set
+  // up at this point) can be used by this process?
+  bool canChroot = false;
+  bool canUnshareNet = false;
+  bool canUnshareIPC = false;
+
+  switch (aType) {
+  case GeckoProcessType_Default:
+    MOZ_ASSERT(false, "SandboxEarlyInit in parent process");
+    return;
+#ifdef MOZ_GMP_SANDBOX
+  case GeckoProcessType_GMPlugin:
+    canUnshareNet = true;
+    canUnshareIPC = true;
+    canChroot = true;
+    break;
+#endif
+    // In the future, content processes will be able to use some of
+    // these.
+  default:
+    // Other cases intentionally left blank.
+    break;
+  }
+
+  // If there's nothing to do, then we're done.
+  if (!canChroot && !canUnshareNet && !canUnshareIPC) {
+    return;
+  }
+
+  // If capabilities can't be gained, then nothing can be done.
+  const SandboxInfo info = SandboxInfo::Get();
+  if (!info.Test(SandboxInfo::kHasUserNamespaces)) {
+    return;
+  }
+
+  // The failure cases for the various unshares, and setting up the
+  // chroot helper, don't strictly need to be fatal -- but they also
+  // shouldn't fail on any reasonable system, so let's take the small
+  // risk of breakage over the small risk of quietly providing less
+  // security than we expect.  (Unlike in SandboxInfo, this is in the
+  // child process, so crashing here isn't as severe a response to the
+  // unexpected.)
+  if (!UnshareUserNamespace()) {
+    SANDBOX_LOG_ERROR("unshare(CLONE_NEWUSER): %s", strerror(errno));
+    // If CanCreateUserNamespace (SandboxInfo.cpp) returns true, then
+    // the unshare shouldn't have failed.
+    MOZ_CRASH("unshare(CLONE_NEWUSER)");
+  }
+  // No early returns after this point!  We need to drop the
+  // capabilities that were gained by unsharing the user namesapce.
+
+  if (canUnshareIPC && syscall(__NR_unshare, CLONE_NEWIPC) != 0) {
+    SANDBOX_LOG_ERROR("unshare(CLONE_NEWIPC): %s", strerror(errno));
+    MOZ_CRASH("unshare(CLONE_NEWIPC)");
+  }
+
+  if (canUnshareNet && syscall(__NR_unshare, CLONE_NEWNET) != 0) {
+    SANDBOX_LOG_ERROR("unshare(CLONE_NEWNET): %s", strerror(errno));
+    MOZ_CRASH("unshare(CLONE_NEWNET)");
+  }
+
+  if (canChroot) {
+    gChrootHelper = MakeUnique<SandboxChroot>();
+    if (!gChrootHelper->Prepare()) {
+      SANDBOX_LOG_ERROR("failed to set up chroot helper");
+      MOZ_CRASH("SandboxChroot::Prepare");
+    }
+  }
+
+  if (!LinuxCapabilities().SetCurrent()) {
+    SANDBOX_LOG_ERROR("dropping capabilities: %s", strerror(errno));
+    MOZ_CRASH("can't drop capabilities");
+  }
 }
 
 #ifdef MOZ_CONTENT_SANDBOX
 /**
  * Starts the seccomp sandbox for a content process.  Should be called
  * only once, and before any potentially harmful content is loaded.
  *
  * Will normally make the process exit on failure.
new file mode 100644
--- /dev/null
+++ b/security/sandbox/linux/SandboxChroot.cpp
@@ -0,0 +1,200 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SandboxChroot.h"
+
+#include "SandboxLogging.h"
+#include "LinuxCapabilities.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "base/posix/eintr_wrapper.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/NullPtr.h"
+
+#define MOZ_ALWAYS_ZERO(e) MOZ_ALWAYS_TRUE((e) == 0)
+
+namespace mozilla {
+
+SandboxChroot::SandboxChroot()
+{
+  pthread_mutexattr_t attr;
+  MOZ_ALWAYS_ZERO(pthread_mutexattr_init(&attr));
+  MOZ_ALWAYS_ZERO(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
+  MOZ_ALWAYS_ZERO(pthread_mutex_init(&mMutex, &attr));
+  MOZ_ALWAYS_ZERO(pthread_cond_init(&mWakeup, nullptr));
+  mCommand = NO_THREAD;
+}
+
+SandboxChroot::~SandboxChroot()
+{
+  SendCommand(JUST_EXIT);
+  MOZ_ALWAYS_ZERO(pthread_mutex_destroy(&mMutex));
+  MOZ_ALWAYS_ZERO(pthread_cond_destroy(&mWakeup));
+}
+
+bool
+SandboxChroot::SendCommand(Command aComm)
+{
+  MOZ_ALWAYS_ZERO(pthread_mutex_lock(&mMutex));
+  if (mCommand == NO_THREAD) {
+    MOZ_ALWAYS_ZERO(pthread_mutex_unlock(&mMutex));
+    return false;
+  } else {
+    MOZ_ASSERT(mCommand == NO_COMMAND);
+    mCommand = aComm;
+    MOZ_ALWAYS_ZERO(pthread_mutex_unlock(&mMutex));
+    MOZ_ALWAYS_ZERO(pthread_cond_signal(&mWakeup));
+    void *retval;
+    if (pthread_join(mThread, &retval) != 0 || retval != nullptr) {
+      MOZ_CRASH("Failed to stop privileged chroot thread");
+    }
+    MOZ_ASSERT(mCommand == NO_THREAD);
+  }
+  return true;
+}
+
+static void
+AlwaysClose(int fd)
+{
+  if (IGNORE_EINTR(close(fd)) != 0) {
+    SANDBOX_LOG_ERROR("close: %s", strerror(errno));
+    MOZ_CRASH("failed to close()");
+  }
+}
+
+static int
+OpenDeletedDirectory()
+{
+  // We don't need this directory to persist between invocations of
+  // the program (nor need it to be cleaned up if something goes wrong
+  // here, because mkdtemp will choose a fresh name), so /tmp as
+  // specified by FHS is adequate.
+  char path[] = "/tmp/mozsandbox.XXXXXX";
+  if (!mkdtemp(path)) {
+    SANDBOX_LOG_ERROR("mkdtemp: %s", strerror(errno));
+    return -1;
+  }
+  int fd = HANDLE_EINTR(open(path, O_RDONLY | O_DIRECTORY));
+  if (fd < 0) {
+    SANDBOX_LOG_ERROR("open %s: %s", path, strerror(errno));
+    // Try to clean up.  Shouldn't fail, but livable if it does.
+    DebugOnly<bool> ok = HANDLE_EINTR(rmdir(path)) == 0;
+    MOZ_ASSERT(ok);
+    return -1;
+  }
+  if (HANDLE_EINTR(rmdir(path)) != 0) {
+    SANDBOX_LOG_ERROR("rmdir %s: %s", path, strerror(errno));
+    AlwaysClose(fd);
+    return -1;
+  }
+  return fd;
+}
+
+bool
+SandboxChroot::Prepare()
+{
+  LinuxCapabilities caps;
+  if (!caps.GetCurrent() || !caps.Effective(CAP_SYS_CHROOT)) {
+    SANDBOX_LOG_ERROR("don't have permission to chroot");
+    return false;
+  }
+  mFd = OpenDeletedDirectory();
+  if (mFd < 0) {
+    SANDBOX_LOG_ERROR("failed to create empty directory for chroot");
+    return false;
+  }
+  MOZ_ALWAYS_ZERO(pthread_mutex_lock(&mMutex));
+  MOZ_ASSERT(mCommand == NO_THREAD);
+  if (pthread_create(&mThread, nullptr, StaticThreadMain, this) != 0) {
+    MOZ_ALWAYS_ZERO(pthread_mutex_unlock(&mMutex));
+    SANDBOX_LOG_ERROR("pthread_create: %s", strerror(errno));
+    return false;
+  }
+  while (mCommand != NO_COMMAND) {
+    MOZ_ASSERT(mCommand == NO_THREAD);
+    MOZ_ALWAYS_ZERO(pthread_cond_wait(&mWakeup, &mMutex));
+  }
+  MOZ_ALWAYS_ZERO(pthread_mutex_unlock(&mMutex));
+  return true;
+}
+
+void
+SandboxChroot::Invoke()
+{
+  MOZ_ALWAYS_TRUE(SendCommand(DO_CHROOT));
+}
+
+static bool
+ChrootToFileDesc(int fd)
+{
+  if (fchdir(fd) != 0) {
+    SANDBOX_LOG_ERROR("fchdir: %s", strerror(errno));
+    return false;
+  }
+  if (chroot(".") != 0) {
+    SANDBOX_LOG_ERROR("chroot: %s", strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+/* static */ void*
+SandboxChroot::StaticThreadMain(void* aVoidPtr)
+{
+  static_cast<SandboxChroot*>(aVoidPtr)->ThreadMain();
+  return nullptr;
+}
+
+void
+SandboxChroot::ThreadMain()
+{
+  // First, drop everything that isn't CAP_SYS_CHROOT.  (This code
+  // assumes that this thread already has effective CAP_SYS_CHROOT,
+  // because Prepare() checked for it before creating this thread.)
+  LinuxCapabilities caps;
+  caps.Effective(CAP_SYS_CHROOT) = true;
+  if (!caps.SetCurrent()) {
+    SANDBOX_LOG_ERROR("capset: %s", strerror(errno));
+    MOZ_CRASH("Can't limit chroot thread's capabilities");
+  }
+
+  MOZ_ALWAYS_ZERO(pthread_mutex_lock(&mMutex));
+  MOZ_ASSERT(mCommand == NO_THREAD);
+  mCommand = NO_COMMAND;
+  MOZ_ALWAYS_ZERO(pthread_cond_signal(&mWakeup));
+  while (mCommand == NO_COMMAND) {
+    MOZ_ALWAYS_ZERO(pthread_cond_wait(&mWakeup, &mMutex));
+  }
+  if (mCommand == DO_CHROOT) {
+    MOZ_ASSERT(mFd >= 0);
+    if (!ChrootToFileDesc(mFd)) {
+      MOZ_CRASH("Failed to chroot");
+    }
+  } else {
+    MOZ_ASSERT(mCommand == JUST_EXIT);
+  }
+  if (mFd >= 0) {
+    AlwaysClose(mFd);
+    mFd = -1;
+  }
+  mCommand = NO_THREAD;
+  MOZ_ALWAYS_ZERO(pthread_mutex_unlock(&mMutex));
+  // Drop the remaining capabilities; see note in SandboxChroot.h
+  // about the potential unreliability of pthread_join.
+  if (!LinuxCapabilities().SetCurrent()) {
+    MOZ_CRASH("can't drop capabilities");
+  }
+}
+
+} // namespace mozilla
+
+#undef MOZ_ALWAYS_ZERO
new file mode 100644
--- /dev/null
+++ b/security/sandbox/linux/SandboxChroot.h
@@ -0,0 +1,66 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_SandboxChroot_h
+#define mozilla_SandboxChroot_h
+
+#include <pthread.h>
+
+#include "mozilla/Attributes.h"
+
+// This class uses the chroot(2) system call and Linux namespaces to
+// revoke the process's access to the filesystem.  It requires that
+// the process be able to create user namespaces; this is the
+// kHasUserNamespaces in common/SandboxInfo.h.
+//
+// Usage: call Prepare() from a thread with CAP_SYS_CHROOT in its
+// effective capability set, then later call Invoke() when ready to
+// drop filesystem access.  Prepare() creates a thread to do the
+// chrooting, so the caller can (and should!) drop its own
+// capabilities afterwards.  When Invoke() returns, the thread will
+// have exited.
+//
+// (Exception: on Android/B2G <= KitKat, because of how pthread_join
+// is implemented, the thread may still exist, but it will not have
+// capabilities.  Accordingly, on such systems, be careful about
+// namespaces or other resources the thread might have inherited.)
+//
+// Prepare() can fail (return false); for example, if it doesn't have
+// CAP_SYS_CHROOT or if it can't create a directory to chroot into.
+//
+// The root directory will be empty and deleted, so the process will
+// not be able to create new entries in it regardless of permissions.
+
+namespace mozilla {
+
+class SandboxChroot final {
+public:
+  SandboxChroot();
+  ~SandboxChroot();
+  bool Prepare();
+  void Invoke();
+private:
+  enum Command {
+    NO_THREAD,
+    NO_COMMAND,
+    DO_CHROOT,
+    JUST_EXIT,
+  };
+
+  pthread_t mThread;
+  pthread_mutex_t mMutex;
+  pthread_cond_t mWakeup;
+  Command mCommand;
+  int mFd;
+
+  void ThreadMain();
+  static void* StaticThreadMain(void* aVoidPtr);
+  bool SendCommand(Command aComm);
+};
+
+} // namespace mozilla
+
+#endif // mozilla_SandboxChroot_h
--- a/security/sandbox/linux/SandboxUtil.cpp
+++ b/security/sandbox/linux/SandboxUtil.cpp
@@ -1,22 +1,29 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "SandboxUtil.h"
+
+#include "LinuxCapabilities.h"
+#include "LinuxSched.h"
 #include "SandboxLogging.h"
 
+#include <fcntl.h>
 #include <sys/stat.h>
+#include <sys/syscall.h>
 #include <sys/types.h>
 #include <unistd.h>
 
 #include "mozilla/Assertions.h"
+#include "mozilla/unused.h"
+#include "sandbox/linux/services/linux_syscalls.h"
 
 namespace mozilla {
 
 bool
 IsSingleThreaded()
 {
   // This detects the thread count indirectly.  /proc/<pid>/task has a
   // subdirectory for each thread in <pid>'s thread group, and the
@@ -27,9 +34,69 @@ IsSingleThreaded()
   if (stat("/proc/self/task", &sb) < 0) {
     MOZ_DIAGNOSTIC_ASSERT(false, "Couldn't access /proc/self/task!");
     return false;
   }
   MOZ_DIAGNOSTIC_ASSERT(sb.st_nlink >= 3);
   return sb.st_nlink == 3;
 }
 
+static bool
+WriteStringToFile(const char* aPath, const char* aStr, const size_t aLen)
+{
+  int fd = open(aPath, O_WRONLY);
+  if (fd < 0) {
+    return false;
+  }
+  ssize_t written = write(fd, aStr, aLen);
+  if (close(fd) != 0 || written != ssize_t(aLen)) {
+    return false;
+  }
+  return true;
+}
+
+bool
+UnshareUserNamespace()
+{
+  // The uid and gid need to be retrieved before the unshare; see
+  // below.
+  uid_t uid = getuid();
+  gid_t gid = getgid();
+  char buf[80];
+  size_t len;
+
+  if (syscall(__NR_unshare, CLONE_NEWUSER) != 0) {
+    return false;
+  }
+
+  // As mentioned in the header, this function sets up uid/gid
+  // mappings that preserve the process's previous ids.  Mapping the
+  // uid/gid to something is necessary in order to nest user
+  // namespaces (not used yet, but we'll need this in the future for
+  // pid namespace support), and leaving the ids unchanged is the
+  // least confusing option.
+  //
+  // In recent kernels (3.19, 3.18.2, 3.17.8), for security reasons,
+  // establishing gid mappings will fail unless the process first
+  // revokes its ability to call setgroups() by using a /proc node
+  // added in the same set of patches.
+  //
+  // Note that /proc/self points to the thread group leader, not the
+  // current thread.  However, CLONE_NEWUSER can be unshared only in a
+  // single-threaded process, so those are equivalent if we reach this
+  // point.
+  len = size_t(snprintf(buf, sizeof(buf), "%u %u 1\n", uid, uid));
+  MOZ_ASSERT(len < sizeof(buf));
+  if (!WriteStringToFile("/proc/self/uid_map", buf, len)) {
+    MOZ_CRASH("Failed to write /proc/self/uid_map");
+  }
+
+  unused << WriteStringToFile("/proc/self/setgroups", "deny", 4);
+
+  len = size_t(snprintf(buf, sizeof(buf), "%u %u 1\n", gid, gid));
+  MOZ_ASSERT(len < sizeof(buf));
+  if (!WriteStringToFile("/proc/self/gid_map", buf, len)) {
+    MOZ_CRASH("Failed to write /proc/self/gid_map");
+  }
+  return true;
+}
+
 } // namespace mozilla
--- a/security/sandbox/linux/SandboxUtil.h
+++ b/security/sandbox/linux/SandboxUtil.h
@@ -6,11 +6,16 @@
 
 #ifndef mozilla_SandboxUtil_h
 #define mozilla_SandboxUtil_h
 
 namespace mozilla {
 
 bool IsSingleThreaded();
 
+// Unshare the user namespace, and set up id mappings so that the
+// process's subjective uid and gid are unchanged.  This will always
+// fail if the process is multithreaded.
+bool UnshareUserNamespace();
+
 } // namespace mozilla
 
 #endif // mozilla_SandboxUtil_h
--- a/security/sandbox/linux/gtest/moz.build
+++ b/security/sandbox/linux/gtest/moz.build
@@ -10,11 +10,15 @@ Library('sandboxtest')
 
 SOURCES = [
     '../SandboxUtil.cpp',
     'TestSandboxUtil.cpp',
 ]
 
 LOCAL_INCLUDES += [
     '/security/sandbox/linux',
+    '/security/sandbox/linux/common',
+]
+LOCAL_INCLUDES += [
+    '/security/sandbox/chromium',
 ]
 
 FINAL_LIBRARY = 'xul-gtest'
--- a/security/sandbox/linux/moz.build
+++ b/security/sandbox/linux/moz.build
@@ -49,31 +49,34 @@ SOURCES += [
     '../chromium/sandbox/linux/bpf_dsl/dump_bpf.cc',
     '../chromium/sandbox/linux/bpf_dsl/policy_compiler.cc',
     '../chromium/sandbox/linux/seccomp-bpf/basicblock.cc',
     '../chromium/sandbox/linux/seccomp-bpf/codegen.cc',
     '../chromium/sandbox/linux/seccomp-bpf/die.cc',
     '../chromium/sandbox/linux/seccomp-bpf/errorcode.cc',
     '../chromium/sandbox/linux/seccomp-bpf/syscall.cc',
     '../chromium/sandbox/linux/seccomp-bpf/syscall_iterator.cc',
+    'LinuxCapabilities.cpp',
     'Sandbox.cpp',
     'SandboxAssembler.cpp',
+    'SandboxChroot.cpp',
     'SandboxFilter.cpp',
     'SandboxUtil.cpp',
 ]
 
 # gcc lto likes to put the top level asm in syscall.cc in a different partition
 # from the function using it which breaks the build.  Work around that by
 # forcing there to be only one partition.
 if '-flto' in CONFIG['OS_CXXFLAGS'] and not CONFIG['CLANG_CXX']:
     LDFLAGS += ['--param lto-partitions=1']
 
 DEFINES['NS_NO_XPCOM'] = True
 DISABLE_STL_WRAPPING = True
 
+LOCAL_INCLUDES += ['/security/sandbox/linux/common']
 LOCAL_INCLUDES += ['/security/sandbox/chromium-shim']
 LOCAL_INCLUDES += ['/security/sandbox/chromium']
 LOCAL_INCLUDES += ['/nsprpub']
 
 
 if CONFIG['OS_TARGET'] != 'Android':
     # Needed for clock_gettime with glibc < 2.17:
     OS_LIBS += [