Bug 1277704 - Update jemalloc 4 to version 4.3.1. r=glandium
authorRyan VanderMeulen <ryanvm@gmail.com>
Tue, 12 Jul 2016 10:37:04 -0400
changeset 321731 c6d5c06685a248fd8eaa01d695e4509392db9209
parent 321730 151ddb5b85a1661a59ed528ccab7e6fe2bbad9de
child 321732 95ab9f05b980662a420d6d664c1996c0dfb8e4c8
push id83660
push userryanvm@gmail.com
push dateWed, 09 Nov 2016 02:29:47 +0000
treeherdermozilla-inbound@c6d5c06685a2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersglandium
bugs1277704
milestone52.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1277704 - Update jemalloc 4 to version 4.3.1. r=glandium
memory/jemalloc/moz.build
memory/jemalloc/src/.appveyor.yml
memory/jemalloc/src/.travis.yml
memory/jemalloc/src/COPYING
memory/jemalloc/src/ChangeLog
memory/jemalloc/src/INSTALL
memory/jemalloc/src/Makefile.in
memory/jemalloc/src/README
memory/jemalloc/src/VERSION
memory/jemalloc/src/configure
memory/jemalloc/src/configure.ac
memory/jemalloc/src/doc/html.xsl.in
memory/jemalloc/src/doc/jemalloc.xml.in
memory/jemalloc/src/doc/stylesheet.xsl
memory/jemalloc/src/include/jemalloc/internal/arena.h
memory/jemalloc/src/include/jemalloc/internal/base.h
memory/jemalloc/src/include/jemalloc/internal/bitmap.h
memory/jemalloc/src/include/jemalloc/internal/chunk.h
memory/jemalloc/src/include/jemalloc/internal/chunk_dss.h
memory/jemalloc/src/include/jemalloc/internal/ctl.h
memory/jemalloc/src/include/jemalloc/internal/extent.h
memory/jemalloc/src/include/jemalloc/internal/huge.h
memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in
memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h
memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in
memory/jemalloc/src/include/jemalloc/internal/mb.h
memory/jemalloc/src/include/jemalloc/internal/mutex.h
memory/jemalloc/src/include/jemalloc/internal/nstime.h
memory/jemalloc/src/include/jemalloc/internal/pages.h
memory/jemalloc/src/include/jemalloc/internal/ph.h
memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt
memory/jemalloc/src/include/jemalloc/internal/prng.h
memory/jemalloc/src/include/jemalloc/internal/prof.h
memory/jemalloc/src/include/jemalloc/internal/rtree.h
memory/jemalloc/src/include/jemalloc/internal/size_classes.sh
memory/jemalloc/src/include/jemalloc/internal/spin.h
memory/jemalloc/src/include/jemalloc/internal/stats.h
memory/jemalloc/src/include/jemalloc/internal/tcache.h
memory/jemalloc/src/include/jemalloc/internal/tsd.h
memory/jemalloc/src/include/jemalloc/internal/util.h
memory/jemalloc/src/include/jemalloc/internal/valgrind.h
memory/jemalloc/src/include/jemalloc/internal/witness.h
memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in
memory/jemalloc/src/jemalloc.pc.in
memory/jemalloc/src/msvc/ReadMe.txt
memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
memory/jemalloc/src/src/arena.c
memory/jemalloc/src/src/base.c
memory/jemalloc/src/src/bitmap.c
memory/jemalloc/src/src/chunk.c
memory/jemalloc/src/src/chunk_dss.c
memory/jemalloc/src/src/chunk_mmap.c
memory/jemalloc/src/src/ckh.c
memory/jemalloc/src/src/ctl.c
memory/jemalloc/src/src/huge.c
memory/jemalloc/src/src/jemalloc.c
memory/jemalloc/src/src/mutex.c
memory/jemalloc/src/src/nstime.c
memory/jemalloc/src/src/pages.c
memory/jemalloc/src/src/prof.c
memory/jemalloc/src/src/quarantine.c
memory/jemalloc/src/src/rtree.c
memory/jemalloc/src/src/spin.c
memory/jemalloc/src/src/stats.c
memory/jemalloc/src/src/tcache.c
memory/jemalloc/src/src/tsd.c
memory/jemalloc/src/src/util.c
memory/jemalloc/src/src/witness.c
memory/jemalloc/src/src/zone.c
memory/jemalloc/src/test/include/test/jemalloc_test.h.in
memory/jemalloc/src/test/include/test/mtx.h
memory/jemalloc/src/test/include/test/test.h
memory/jemalloc/src/test/integration/aligned_alloc.c
memory/jemalloc/src/test/integration/mallocx.c
memory/jemalloc/src/test/integration/posix_memalign.c
memory/jemalloc/src/test/integration/xallocx.c
memory/jemalloc/src/test/src/mtx.c
memory/jemalloc/src/test/src/test.c
memory/jemalloc/src/test/src/timer.c
memory/jemalloc/src/test/stress/microbench.c
memory/jemalloc/src/test/unit/a0.c
memory/jemalloc/src/test/unit/arena_reset.c
memory/jemalloc/src/test/unit/bitmap.c
memory/jemalloc/src/test/unit/ckh.c
memory/jemalloc/src/test/unit/decay.c
memory/jemalloc/src/test/unit/fork.c
memory/jemalloc/src/test/unit/junk.c
memory/jemalloc/src/test/unit/junk_alloc.c
memory/jemalloc/src/test/unit/junk_free.c
memory/jemalloc/src/test/unit/math.c
memory/jemalloc/src/test/unit/nstime.c
memory/jemalloc/src/test/unit/ph.c
memory/jemalloc/src/test/unit/prng.c
memory/jemalloc/src/test/unit/prof_reset.c
memory/jemalloc/src/test/unit/run_quantize.c
memory/jemalloc/src/test/unit/size_classes.c
memory/jemalloc/src/test/unit/stats.c
memory/jemalloc/src/test/unit/tsd.c
memory/jemalloc/src/test/unit/util.c
memory/jemalloc/src/test/unit/witness.c
memory/jemalloc/src/test/unit/zero.c
memory/jemalloc/upstream.info
--- a/memory/jemalloc/moz.build
+++ b/memory/jemalloc/moz.build
@@ -20,23 +20,23 @@ UNIFIED_SOURCES += [
     'src/src/mb.c',
     'src/src/mutex.c',
     'src/src/nstime.c',
     'src/src/pages.c',
     'src/src/prng.c',
     'src/src/prof.c',
     'src/src/quarantine.c',
     'src/src/rtree.c',
+    'src/src/spin.c',
     'src/src/stats.c',
     'src/src/tcache.c',
     'src/src/ticker.c',
     'src/src/tsd.c',
     'src/src/util.c',
-    # FIXME do we ever want valgrind.c?
-    # 'src/src/valgrind.c',
+    'src/src/witness.c',
 ]
 
 SOURCES += [
     # This file cannot be built in unified mode because of symbol clash on arena_purge.
     'src/src/ctl.c',
 ]
 
 # Only OSX needs the zone allocation implementation,
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/.appveyor.yml
@@ -0,0 +1,28 @@
+version: '{build}'
+
+environment:
+  matrix:
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    MSVC: amd64
+  - MSYSTEM: MINGW32
+    CPU: i686
+    MSVC: x86
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+  - MSYSTEM: MINGW32
+    CPU: i686
+
+install:
+  - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
+  - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
+  - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+  - pacman --noconfirm -Suy mingw-w64-%CPU%-make
+
+build_script:
+  - bash -c "autoconf"
+  - bash -c "./configure"
+  - mingw32-make -j3
+  - file lib/jemalloc.dll
+  - mingw32-make -j3 tests
+  - mingw32-make -k check
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/.travis.yml
@@ -0,0 +1,29 @@
+language: c
+
+matrix:
+  include:
+    - os: linux
+      compiler: gcc
+    - os: linux
+      compiler: gcc
+      env:
+        - EXTRA_FLAGS=-m32
+      addons:
+        apt:
+          packages:
+          - gcc-multilib
+    - os: osx
+      compiler: clang
+    - os: osx
+      compiler: clang
+      env:
+        - EXTRA_FLAGS=-m32
+
+before_script:
+  - autoconf
+  - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"}
+  - make -j3
+  - make -j3 tests
+
+script:
+  - make check
--- a/memory/jemalloc/src/COPYING
+++ b/memory/jemalloc/src/COPYING
@@ -1,15 +1,15 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2015 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2016 Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2015 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-2016 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice(s),
    this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice(s),
    this list of conditions and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
--- a/memory/jemalloc/src/ChangeLog
+++ b/memory/jemalloc/src/ChangeLog
@@ -1,14 +1,101 @@
 Following are change highlights associated with official releases.  Important
 bug fixes are all mentioned, but some internal enhancements are omitted here for
 brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.3.1 (November 7, 2016)
+
+  Bug fixes:
+  - Fix a severe virtual memory leak.  This regression was first released in
+    4.3.0.  (@interwq, @jasone)
+  - Refactor atomic and prng APIs to restore support for 32-bit platforms that
+    use pre-C11 toolchains, e.g. FreeBSD's mips.  (@jasone)
+
+* 4.3.0 (November 4, 2016)
+
+  This is the first release that passes the test suite for multiple Windows
+  configurations, thanks in large part to @glandium setting up continuous
+  integration via AppVeyor (and Travis CI for Linux and OS X).
+
+  New features:
+  - Add "J" (JSON) support to malloc_stats_print().  (@jasone)
+  - Add Cray compiler support.  (@ronawho)
+
+  Optimizations:
+  - Add/use adaptive spinning for bootstrapping and radix tree node
+    initialization.  (@jasone)
+
+  Bug fixes:
+  - Fix large allocation to search starting in the optimal size class heap,
+    which can substantially reduce virtual memory churn and fragmentation.  This
+    regression was first released in 4.0.0.  (@mjp41, @jasone)
+  - Fix stats.arenas.<i>.nthreads accounting.  (@interwq)
+  - Fix and simplify decay-based purging.  (@jasone)
+  - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
+    deadlocks during thread exit.  (@jasone)
+  - Fix over-sized allocation of radix tree leaf nodes.  (@mjp41, @ogaun,
+    @jasone)
+  - Fix over-sized allocation of arena_t (plus associated stats) data
+    structures.  (@jasone, @interwq)
+  - Fix EXTRA_CFLAGS to not affect configuration.  (@jasone)
+  - Fix a Valgrind integration bug.  (@ronawho)
+  - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)
+  - Fix a file descriptor leak on Linux.  This regression was first released in
+    4.2.0.  (@vsarunas, @jasone)
+  - Fix static linking of jemalloc with glibc.  (@djwatson)
+  - Use syscall(2) rather than {open,read,close}(2) during boot on Linux.  This
+    works around other libraries' system call wrappers performing reentrant
+    allocation.  (@kspinka, @Whissi, @jasone)
+  - Fix OS X default zone replacement to work with OS X 10.12.  (@glandium,
+    @jasone)
+  - Fix cached memory management to avoid needless commit/decommit operations
+    during purging, which resolves permanent virtual memory map fragmentation
+    issues on Windows.  (@mjp41, @jasone)
+  - Fix TSD fetches to avoid (recursive) allocation.  This is relevant to
+    non-TLS and Windows configurations.  (@jasone)
+  - Fix malloc_conf overriding to work on Windows.  (@jasone)
+  - Forcibly disable lazy-lock on Windows (was forcibly *enabled*).  (@jasone)
+
+* 4.2.1 (June 8, 2016)
+
+  Bug fixes:
+  - Fix bootstrapping issues for configurations that require allocation during
+    tsd initialization (e.g. --disable-tls).  (@cferris1000, @jasone)
+  - Fix gettimeofday() version of nstime_update().  (@ronawho)
+  - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper().  (@ronawho)
+  - Fix potential VM map fragmentation regression.  (@jasone)
+  - Fix opt_zero-triggered in-place huge reallocation zeroing.  (@jasone)
+  - Fix heap profiling context leaks in reallocation edge cases.  (@jasone)
+
+* 4.2.0 (May 12, 2016)
+
+  New features:
+  - Add the arena.<i>.reset mallctl, which makes it possible to discard all of
+    an arena's allocations in a single operation.  (@jasone)
+  - Add the stats.retained and stats.arenas.<i>.retained statistics.  (@jasone)
+  - Add the --with-version configure option.  (@jasone)
+  - Support --with-lg-page values larger than actual page size.  (@jasone)
+
+  Optimizations:
+  - Use pairing heaps rather than red-black trees for various hot data
+    structures.  (@djwatson, @jasone)
+  - Streamline fast paths of rtree operations.  (@jasone)
+  - Optimize the fast paths of calloc() and [m,d,sd]allocx().  (@jasone)
+  - Decommit unused virtual memory if the OS does not overcommit.  (@jasone)
+  - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order
+    to avoid unfortunate interactions during fork(2).  (@jasone)
+
+  Bug fixes:
+  - Fix chunk accounting related to triggering gdump profiles.  (@jasone)
+  - Link against librt for clock_gettime(2) if glibc < 2.17.  (@jasone)
+  - Scale leak report summary according to sampling probability.  (@jasone)
+
 * 4.1.1 (May 3, 2016)
 
   This bugfix release resolves a variety of mostly minor issues, though the
   bitmap fix is critical for 64-bit Windows.
 
   Bug fixes:
   - Fix the linear scan version of bitmap_sfu() to shift by the proper amount
     even when sizeof(long) is not the same as sizeof(void *), as on 64-bit
@@ -16,17 +103,17 @@ brevity.  Much more detail can be found 
   - Fix hashing functions to avoid unaligned memory accesses (and resulting
     crashes).  This is relevant at least to some ARM-based platforms.
     (@rkmisra)
   - Fix fork()-related lock rank ordering reversals.  These reversals were
     unlikely to cause deadlocks in practice except when heap profiling was
     enabled and active.  (@jasone)
   - Fix various chunk leaks in OOM code paths.  (@jasone)
   - Fix malloc_stats_print() to print opt.narenas correctly.  (@jasone)
-  - Fix MSVC-specific build/test issues.  (@rustyx, yuslepukhin)
+  - Fix MSVC-specific build/test issues.  (@rustyx, @yuslepukhin)
   - Fix a variety of test failures that were due to test fragility rather than
     core bugs.  (@jasone)
 
 * 4.1.0 (February 28, 2016)
 
   This release is primarily about optimizations, but it also incorporates a lot
   of portability-motivated refactoring and enhancements.  Many people worked on
   this release, to an extent that even with the omission here of minor changes
@@ -75,24 +162,24 @@ brevity.  Much more detail can be found 
     up incremental huge reallocation.  (@jasone)
 
   Incompatible changes:
   - Make opt.narenas unsigned rather than size_t.  (@jasone)
 
   Bug fixes:
   - Fix stats.cactive accounting regression.  (@rustyx, @jasone)
   - Handle unaligned keys in hash().  This caused problems for some ARM systems.
-    (@jasone, Christopher Ferris)
+    (@jasone, @cferris1000)
   - Refactor arenas array.  In addition to fixing a fork-related deadlock, this
     makes arena lookups faster and simpler.  (@jasone)
   - Move retained memory allocation out of the default chunk allocation
     function, to a location that gets executed even if the application installs
     a custom chunk allocation function.  This resolves a virtual memory leak.
     (@buchgr)
-  - Fix a potential tsd cleanup leak.  (Christopher Ferris, @jasone)
+  - Fix a potential tsd cleanup leak.  (@cferris1000, @jasone)
   - Fix run quantization.  In practice this bug had no impact unless
     applications requested memory with alignment exceeding one page.
     (@jasone, @djwatson)
   - Fix LinuxThreads-specific bootstrapping deadlock.  (Cosmin Paraschiv)
   - jeprof:
     + Don't discard curl options if timeout is not defined.  (@djwatson)
     + Detect failed profile fetches.  (@djwatson)
   - Fix stats.arenas.<i>.{dss,lg_dirty_mult,decay_time,pactive,pdirty} for
--- a/memory/jemalloc/src/INSTALL
+++ b/memory/jemalloc/src/INSTALL
@@ -30,16 +30,20 @@ any of the following arguments (not a de
 --prefix=<install-root-dir>
     Set the base directory in which to install.  For example:
 
         ./configure --prefix=/usr/local
 
     will cause files to be installed into /usr/local/include, /usr/local/lib,
     and /usr/local/man.
 
+--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>
+    Use the specified version string rather than trying to generate one (if in
+    a git repository) or use existing the VERSION file (if present).
+
 --with-rpath=<colon-separated-rpath>
     Embed one or more library paths, so that libjemalloc can find the libraries
     it is linked to.  This works only on ELF-based systems.
 
 --with-mangling=<map>
     Mangle public symbols specified in <map> which is a comma-separated list of
     name:mangled pairs.
 
--- a/memory/jemalloc/src/Makefile.in
+++ b/memory/jemalloc/src/Makefile.in
@@ -19,21 +19,21 @@ DATADIR := $(DESTDIR)@DATADIR@
 MANDIR := $(DESTDIR)@MANDIR@
 srcroot := @srcroot@
 objroot := @objroot@
 abs_srcroot := @abs_srcroot@
 abs_objroot := @abs_objroot@
 
 # Build parameters.
 CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
-CFLAGS := @CFLAGS@
+EXTRA_CFLAGS := @EXTRA_CFLAGS@
+CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS)
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
-TESTLIBS := @TESTLIBS@
 RPATH_EXTRA := @RPATH_EXTRA@
 SO := @so@
 IMPORTLIB := @importlib@
 O := @o@
 A := @a@
 EXE := @exe@
 LIBPREFIX := @libprefix@
 REV := @rev@
@@ -48,25 +48,29 @@ cfghdrs_out := @cfghdrs_out@
 cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@)
 cfgoutputs_out := @cfgoutputs_out@
 enable_autogen := @enable_autogen@
 enable_code_coverage := @enable_code_coverage@
 enable_prof := @enable_prof@
 enable_valgrind := @enable_valgrind@
 enable_zone_allocator := @enable_zone_allocator@
 MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
+link_whole_archive := @link_whole_archive@
 DSO_LDFLAGS = @DSO_LDFLAGS@
 SOREV = @SOREV@
 PIC_CFLAGS = @PIC_CFLAGS@
 CTARGET = @CTARGET@
 LDTARGET = @LDTARGET@
+TEST_LD_MODE = @TEST_LD_MODE@
 MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
 CC_MM = @CC_MM@
+LM := @LM@
+INSTALL = @INSTALL@
 
 ifeq (macho, $(ABI))
 TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib"
 else
 ifeq (pecoff, $(ABI))
 TEST_LIBRARY_PATH := PATH="$(PATH):$(objroot)lib"
 else
 TEST_LIBRARY_PATH :=
@@ -95,20 +99,22 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/quarantine.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
+	$(srcroot)src/spin.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
-	$(srcroot)src/util.c
+	$(srcroot)src/util.c \
+	$(srcroot)src/witness.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
 endif
 ifeq ($(enable_zone_allocator), 1)
 C_SRCS += $(srcroot)src/zone.c
 endif
 ifeq ($(IMPORTLIB),$(SO))
 STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A)
@@ -117,42 +123,55 @@ ifdef PIC_CFLAGS
 STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A)
 else
 STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_s.$(A)
 endif
 DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
 ifneq ($(SOREV),$(SO))
 DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
 endif
+ifeq (1, $(link_whole_archive))
+LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive
+else
+LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
+endif
 PC := $(objroot)jemalloc.pc
 MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
 DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html)
 DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
 DOCS := $(DOCS_HTML) $(DOCS_MAN3)
 C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
+ifeq (1, $(link_whole_archive))
+C_UTIL_INTEGRATION_SRCS :=
+else
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
-TESTS_UNIT := $(srcroot)test/unit/atomic.c \
+endif
+TESTS_UNIT := \
+	$(srcroot)test/unit/a0.c \
+	$(srcroot)test/unit/arena_reset.c \
+	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
 	$(srcroot)test/unit/lg_chunk.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
 	$(srcroot)test/unit/prof_active.c \
 	$(srcroot)test/unit/prof_gdump.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_thread_name.c \
 	$(srcroot)test/unit/ql.c \
@@ -164,16 +183,17 @@ TESTS_UNIT := $(srcroot)test/unit/atomic
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/smoothstep.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/util.c \
+	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
 	$(srcroot)test/integration/sdallocx.c \
 	$(srcroot)test/integration/mallocx.c \
 	$(srcroot)test/integration/MALLOCX_ARENA.c \
 	$(srcroot)test/integration/overflow.c \
 	$(srcroot)test/integration/posix_memalign.c \
@@ -285,79 +305,79 @@ endif
 $(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(C_OBJS)
 
 $(STATIC_LIBS):
 	@mkdir -p $(@D)
 	$(AR) $(ARFLAGS)@AROUT@ $+
 
 $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
 build_lib: build_lib_shared build_lib_static
 
 install_bin:
-	install -d $(BINDIR)
+	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	echo "install -m 755 $$b $(BINDIR)"; \
-	install -m 755 $$b $(BINDIR); \
+	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
+	$(INSTALL) -m 755 $$b $(BINDIR); \
 done
 
 install_include:
-	install -d $(INCLUDEDIR)/jemalloc
+	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
-	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
-	install -d $(LIBDIR)
-	install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
+	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
 
 install_lib_static: $(STATIC_LIBS)
-	install -d $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	echo "install -m 755 $$l $(LIBDIR)"; \
-	install -m 755 $$l $(LIBDIR); \
+	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
+	$(INSTALL) -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
-	install -d $(LIBDIR)/pkgconfig
+	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \
-	install -m 644 $$l $(LIBDIR)/pkgconfig; \
+	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
+	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 install_lib: install_lib_shared install_lib_static install_lib_pc
 
 install_doc_html:
-	install -d $(DATADIR)/doc/jemalloc$(install_suffix)
+	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
-	install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man:
-	install -d $(MANDIR)/man3
+	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	echo "install -m 644 $$d $(MANDIR)/man3"; \
-	install -m 644 $$d $(MANDIR)/man3; \
+	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
+	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man
 
 install: install_bin install_include install_lib install_doc
 
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE))
--- a/memory/jemalloc/src/README
+++ b/memory/jemalloc/src/README
@@ -12,9 +12,9 @@ repercussions for real world application
 
 The COPYING file contains copyright and licensing information.
 
 The INSTALL file contains information on how to configure, build, and install
 jemalloc.
 
 The ChangeLog file contains a brief summary of changes for each release.
 
-URL: http://www.canonware.com/jemalloc/
+URL: http://jemalloc.net/
--- a/memory/jemalloc/src/VERSION
+++ b/memory/jemalloc/src/VERSION
@@ -1,1 +1,1 @@
-4.1.1-0-ge02b83cc5e3c4d30f93dba945162e3aa58d962d6
+4.3.1-0-g0110fa8451af905affd77c3bea0d545fee2251b2
--- a/memory/jemalloc/src/configure
+++ b/memory/jemalloc/src/configure
@@ -623,17 +623,16 @@ ac_subst_vars='LTLIBOBJS
 LIBOBJS
 cfgoutputs_out
 cfgoutputs_in
 cfghdrs_out
 cfghdrs_in
 enable_zone_allocator
 enable_tls
 enable_lazy_lock
-TESTLIBS
 jemalloc_version_gid
 jemalloc_version_nrev
 jemalloc_version_bugfix
 jemalloc_version_minor
 jemalloc_version_major
 jemalloc_version
 enable_cache_oblivious
 enable_xmalloc
@@ -653,26 +652,29 @@ enable_code_coverage
 AUTOCONF
 LD
 RANLIB
 INSTALL_DATA
 INSTALL_SCRIPT
 INSTALL_PROGRAM
 enable_autogen
 RPATH_EXTRA
+LM
 CC_MM
 AROUT
 ARFLAGS
 MKLIB
+TEST_LD_MODE
 LDTARGET
 CTARGET
 PIC_CFLAGS
 SOREV
 EXTRA_LDFLAGS
 DSO_LDFLAGS
+link_whole_archive
 libprefix
 exe
 a
 o
 importlib
 so
 LD_PRELOAD_VAR
 RPATH
@@ -684,16 +686,17 @@ host_cpu
 host
 build_os
 build_vendor
 build_cpu
 build
 EGREP
 GREP
 CPP
+EXTRA_CFLAGS
 OBJEXT
 EXEEXT
 ac_ct_CC
 CPPFLAGS
 LDFLAGS
 CFLAGS
 CC
 XSLROOT
@@ -778,16 +781,17 @@ enable_utrace
 enable_valgrind
 enable_xmalloc
 enable_cache_oblivious
 with_lg_tiny_min
 with_lg_quantum
 with_lg_page
 with_lg_page_sizes
 with_lg_size_class_group
+with_version
 enable_lazy_lock
 enable_tls
 enable_zone_allocator
 '
       ac_precious_vars='build_alias
 host_alias
 target_alias
 CC
@@ -1466,16 +1470,18 @@ Optional Packages:
   --with-lg-quantum=<lg-quantum>
                           Base 2 log of minimum allocation alignment
   --with-lg-page=<lg-page>
                           Base 2 log of system page size
   --with-lg-page-sizes=<lg-page-sizes>
                           Base 2 logs of system page sizes to support
   --with-lg-size-class-group=<lg-size-class-group>
                           Base 2 log of size classes per doubling
+  --with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>
+                          Version string
 
 Some influential environment variables:
   CC          C compiler command
   CFLAGS      C compiler flags
   LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
               nonstandard directory <lib dir>
   LIBS        libraries to pass to the linker, e.g. -l<library>
   CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
@@ -3430,16 +3436,17 @@ if test "x$ac_cv_prog_cc_c89" != xno; th
 fi
 
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 if test "x$GCC" != "xyes" ; then
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is MSVC" >&5
 $as_echo_n "checking whether compiler is MSVC... " >&6; }
 if ${je_cv_msvc+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -3463,20 +3470,135 @@ else
   je_cv_msvc=no
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_msvc" >&5
 $as_echo "$je_cv_msvc" >&6; }
 fi
 
+je_cv_cray_prgenv_wrapper=""
+if test "x${PE_ENV}" != "x" ; then
+  case "${CC}" in
+    CC|cc)
+	je_cv_cray_prgenv_wrapper="yes"
+	;;
+    *)
+       ;;
+  esac
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is cray" >&5
+$as_echo_n "checking whether compiler is cray... " >&6; }
+if ${je_cv_cray+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+#ifndef _CRAYC
+  int fail-1;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cray=yes
+else
+  je_cv_cray=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray" >&5
+$as_echo "$je_cv_cray" >&6; }
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether cray compiler version is 8.4" >&5
+$as_echo_n "checking whether cray compiler version is 8.4... " >&6; }
+if ${je_cv_cray_84+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4)
+  int fail-1;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cray_84=yes
+else
+  je_cv_cray_84=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray_84" >&5
+$as_echo "$je_cv_cray_84" >&6; }
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu11" >&5
+$as_echo_n "checking whether compiler supports -std=gnu11... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-std=gnu11"
+else
+  CFLAGS="${CFLAGS} -std=gnu11"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-std=gnu11
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+    if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
+      cat >>confdefs.h <<_ACEOF
+#define JEMALLOC_HAS_RESTRICT 1
+_ACEOF
+
+    else
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu99" >&5
 $as_echo_n "checking whether compiler supports -std=gnu99... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
   CFLAGS="-std=gnu99"
 else
   CFLAGS="${CFLAGS} -std=gnu99"
 fi
@@ -3502,21 +3624,22 @@ else
   je_cv_cflags_appended=
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
-    if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
-      cat >>confdefs.h <<_ACEOF
+      if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+        cat >>confdefs.h <<_ACEOF
 #define JEMALLOC_HAS_RESTRICT 1
 _ACEOF
 
+      fi
     fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wall" >&5
 $as_echo_n "checking whether compiler supports -Wall... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
   CFLAGS="-Wall"
 else
@@ -3617,16 +3740,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wsign-compare" >&5
+$as_echo_n "checking whether compiler supports -Wsign-compare... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-Wsign-compare"
+else
+  CFLAGS="${CFLAGS} -Wsign-compare"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-Wsign-compare
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -pipe" >&5
 $as_echo_n "checking whether compiler supports -pipe... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
   CFLAGS="-pipe"
 else
   CFLAGS="${CFLAGS} -pipe"
 fi
@@ -3836,55 +3995,168 @@ else
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
     CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
   fi
-fi
-if test "x$EXTRA_CFLAGS" != "x" ; then
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports $EXTRA_CFLAGS" >&5
-$as_echo_n "checking whether compiler supports $EXTRA_CFLAGS... " >&6; }
+  if test "x$je_cv_cray" = "xyes" ; then
+        if test "x$je_cv_cray_84" = "xyes" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hipa2" >&5
+$as_echo_n "checking whether compiler supports -hipa2... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
-  CFLAGS="$EXTRA_CFLAGS"
-else
-  CFLAGS="${CFLAGS} $EXTRA_CFLAGS"
+  CFLAGS="-hipa2"
+else
+  CFLAGS="${CFLAGS} -hipa2"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-hipa2
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnognu" >&5
+$as_echo_n "checking whether compiler supports -hnognu... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-hnognu"
+else
+  CFLAGS="${CFLAGS} -hnognu"
 fi
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
 int
 main ()
 {
 
     return 0;
 
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_c_try_compile "$LINENO"; then :
-  je_cv_cflags_appended=$EXTRA_CFLAGS
+  je_cv_cflags_appended=-hnognu
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
   je_cv_cflags_appended=
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
-fi
+    fi
+    if test "x$enable_cc_silence" != "xno" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=128" >&5
+$as_echo_n "checking whether compiler supports -hnomessage=128... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-hnomessage=128"
+else
+  CFLAGS="${CFLAGS} -hnomessage=128"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-hnomessage=128
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=1357" >&5
+$as_echo_n "checking whether compiler supports -hnomessage=1357... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-hnomessage=1357"
+else
+  CFLAGS="${CFLAGS} -hnomessage=1357"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-hnomessage=1357
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+    fi
+  fi
+fi
+
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
 $as_echo_n "checking how to run the C preprocessor... " >&6; }
 # On Suns, sometimes $CPP names a directory.
@@ -4911,27 +5183,37 @@ cat >>confdefs.h <<_ACEOF
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
 importlib="${so}"
 o="$ac_objext"
 a="a"
 exe="$ac_exeext"
 libprefix="lib"
+link_whole_archive="0"
 DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
 RPATH='-Wl,-rpath,$(1)'
 SOREV="${so}.${rev}"
 PIC_CFLAGS='-fPIC -DPIC'
 CTARGET='-o $@'
 LDTARGET='-o $@'
+TEST_LD_MODE=
 EXTRA_LDFLAGS=
 ARFLAGS='crus'
 AROUT=' $@'
 CC_MM=1
 
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  TEST_LD_MODE='-dynamic'
+fi
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  CC_MM=
+fi
+
 
 
 
 if test -n "$ac_tool_prefix"; then
   # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
 set dummy ${ac_tool_prefix}ar; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
@@ -5018,65 +5300,64 @@ ac_tool_warned=yes ;;
 esac
     AR=$ac_ct_AR
   fi
 else
   AR="$ac_cv_prog_AR"
 fi
 
 
+CFLAGS="$CFLAGS"
 default_munmap="1"
 maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
-	CFLAGS="$CFLAGS"
 	abi="macho"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	RPATH=""
 	LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
 	so="dylib"
 	importlib="${so}"
 	force_tls="0"
 	DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)'
 	SOREV="${rev}.${so}"
 	sbrk_deprecated="1"
 	;;
   *-*-freebsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
+	$as_echo "#define JEMALLOC_SYSCTL_VM_OVERCOMMIT  " >>confdefs.h
+
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	;;
   *-*-openbsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	force_tls="0"
 	;;
   *-*-bitrig*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	;;
   *-*-linux*)
-	CFLAGS="$CFLAGS"
-	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+		CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	$as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h
 
+	$as_echo "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY  " >>confdefs.h
+
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED  " >>confdefs.h
 
 	$as_echo "#define JEMALLOC_THREADED_INIT  " >>confdefs.h
 
 	$as_echo "#define JEMALLOC_USE_CXX_THROW  " >>confdefs.h
 
 	default_munmap="0"
 	;;
@@ -5095,28 +5376,27 @@ int
 main ()
 {
 
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_c_try_compile "$LINENO"; then :
-  CFLAGS="$CFLAGS"; abi="elf"
+  abi="elf"
 else
   abi="aout"
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $abi" >&5
 $as_echo "$abi" >&6; }
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	;;
   *-*-solaris2*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	RPATH='-Wl,-R,$(1)'
 		CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
 	LIBS="$LIBS -lposix4 -lsocket -lnsl"
 	;;
   *-ibm-aix*)
@@ -5125,33 +5405,33 @@ rm -f core conftest.err conftest.$ac_obj
 	else
 	  	  LD_PRELOAD_VAR="LDR_PRELOAD"
 	fi
 	abi="xcoff"
 	;;
   *-*-mingw* | *-*-cygwin*)
 	abi="pecoff"
 	force_tls="0"
-	force_lazy_lock="1"
 	maps_coalesce="0"
 	RPATH=""
 	so="dll"
 	if test "x$je_cv_msvc" = "xyes" ; then
 	  importlib="lib"
 	  DSO_LDFLAGS="-LD"
 	  EXTRA_LDFLAGS="-link -DEBUG"
 	  CTARGET='-Fo$@'
 	  LDTARGET='-Fe$@'
 	  AR='lib'
 	  ARFLAGS='-nologo -out:'
 	  AROUT='$@'
 	  CC_MM=
         else
 	  importlib="${so}"
 	  DSO_LDFLAGS="-shared"
+	  link_whole_archive="1"
 	fi
 	a="lib"
 	libprefix=""
 	SOREV="${so}"
 	PIC_CFLAGS=""
 	;;
   *)
 	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Unsupported operating system: ${host}" >&5
@@ -5223,16 +5503,83 @@ cat >>confdefs.h <<_ACEOF
 
 
 
 
 
 
 
 
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing log" >&5
+$as_echo_n "checking for library containing log... " >&6; }
+if ${ac_cv_search_log+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char log ();
+int
+main ()
+{
+return log ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' m; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_search_log=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext
+  if ${ac_cv_search_log+:} false; then :
+  break
+fi
+done
+if ${ac_cv_search_log+:} false; then :
+
+else
+  ac_cv_search_log=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_log" >&5
+$as_echo "$ac_cv_search_log" >&6; }
+ac_res=$ac_cv_search_log
+if test "$ac_res" != no; then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+  as_fn_error $? "Missing math functions" "$LINENO" 5
+fi
+
+if test "x$ac_cv_search_log" != "xnone required" ; then
+  LM="$ac_cv_search_log"
+else
+  LM=
+fi
+
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __attribute__ syntax is compilable" >&5
 $as_echo_n "checking whether __attribute__ syntax is compilable... " >&6; }
 if ${je_cv_attribute+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 static __attribute__((unused)) void foo(void){}
@@ -5330,16 +5677,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-herror_on_warning"
+else
+  CFLAGS="${CFLAGS} -herror_on_warning"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-herror_on_warning
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether tls_model attribute is compilable" >&5
 $as_echo_n "checking whether tls_model attribute is compilable... " >&6; }
 if ${je_cv_tls_model+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
@@ -5405,16 +5788,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-herror_on_warning"
+else
+  CFLAGS="${CFLAGS} -herror_on_warning"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-herror_on_warning
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether alloc_size attribute is compilable" >&5
 $as_echo_n "checking whether alloc_size attribute is compilable... " >&6; }
 if ${je_cv_alloc_size+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdlib.h>
@@ -5475,16 +5894,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-herror_on_warning"
+else
+  CFLAGS="${CFLAGS} -herror_on_warning"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-herror_on_warning
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(gnu_printf, ...) attribute is compilable" >&5
 $as_echo_n "checking whether format(gnu_printf, ...) attribute is compilable... " >&6; }
 if ${je_cv_format_gnu_printf+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdlib.h>
@@ -5545,16 +6000,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-herror_on_warning"
+else
+  CFLAGS="${CFLAGS} -herror_on_warning"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-herror_on_warning
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(printf, ...) attribute is compilable" >&5
 $as_echo_n "checking whether format(printf, ...) attribute is compilable... " >&6; }
 if ${je_cv_format_printf+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdlib.h>
@@ -6647,18 +7138,18 @@ if test "x$backtrace_method" = "x" ; the
   backtrace_method="none (disabling profiling)"
   enable_prof="0"
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking configured backtracing method" >&5
 $as_echo_n "checking configured backtracing method... " >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $backtrace_method" >&5
 $as_echo "$backtrace_method" >&6; }
 if test "x$enable_prof" = "x1" ; then
-  if test "x$abi" != "xpecoff"; then
-        LIBS="$LIBS -lm"
+    if test "x$LM" != "x" ; then
+    LIBS="$LIBS $LM"
   fi
 
   $as_echo "#define JEMALLOC_PROF  " >>confdefs.h
 
 fi
 
 
 # Check whether --enable-tcache was given.
@@ -6897,16 +7388,62 @@ fi
 
 if test "x$enable_cache_oblivious" = "x1" ; then
   $as_echo "#define JEMALLOC_CACHE_OBLIVIOUS  " >>confdefs.h
 
 fi
 
 
 
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_unreachable is compilable" >&5
+$as_echo_n "checking whether a program using __builtin_unreachable is compilable... " >&6; }
+if ${je_cv_gcc_builtin_unreachable+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+void foo (void) {
+  __builtin_unreachable();
+}
+
+int
+main ()
+{
+
+	{
+		foo();
+	}
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_gcc_builtin_unreachable=yes
+else
+  je_cv_gcc_builtin_unreachable=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_builtin_unreachable" >&5
+$as_echo "$je_cv_gcc_builtin_unreachable" >&6; }
+
+if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable" >>confdefs.h
+
+else
+  $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE abort" >>confdefs.h
+
+fi
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_ffsl is compilable" >&5
 $as_echo_n "checking whether a program using __builtin_ffsl is compilable... " >&6; }
 if ${je_cv_gcc_builtin_ffsl+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
@@ -7122,33 +7659,46 @@ fi
 if test "${with_lg_size_class_group+set}" = set; then :
   withval=$with_lg_size_class_group; LG_SIZE_CLASS_GROUP="$with_lg_size_class_group"
 else
   LG_SIZE_CLASS_GROUP="2"
 fi
 
 
 
-if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
-        rm -f "${objroot}VERSION"
-  for pattern in '[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
-                 '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9][0-9]'; do
-    if test ! -e "${objroot}VERSION" ; then
-      (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
-      if test $? -eq 0 ; then
-        mv "${objroot}VERSION.tmp" "${objroot}VERSION"
-        break
-      fi
+
+# Check whether --with-version was given.
+if test "${with_version+set}" = set; then :
+  withval=$with_version;
+    echo "${with_version}" | grep '^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$' 2>&1 1>/dev/null
+    if test $? -ne 0 ; then
+      as_fn_error $? "${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid>" "$LINENO" 5
     fi
-  done
-fi
-rm -f "${objroot}VERSION.tmp"
+    echo "$with_version" > "${objroot}VERSION"
+
+else
+
+        if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+                        for pattern in '[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+                     '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9][0-9]'; do
+        (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+        if test $? -eq 0 ; then
+          mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+          break
+        fi
+      done
+    fi
+    rm -f "${objroot}VERSION.tmp"
+
+fi
+
+
 if test ! -e "${objroot}VERSION" ; then
   if test ! -e "${srcroot}VERSION" ; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: result: Missing VERSION file, and unable to generate it; creating bogus VERSION" >&5
 $as_echo "Missing VERSION file, and unable to generate it; creating bogus VERSION" >&6; }
     echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
   else
     cp ${srcroot}VERSION ${objroot}VERSION
   fi
@@ -7280,18 +7830,16 @@ else
 fi
 
 fi
 
 fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
-SAVED_LIBS="${LIBS}"
-LIBS=
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5
 $as_echo_n "checking for library containing clock_gettime... " >&6; }
 if ${ac_cv_search_clock_gettime+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_func_search_save_LIBS=$LIBS
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
@@ -7335,21 +7883,316 @@ fi
 rm conftest.$ac_ext
 LIBS=$ac_func_search_save_LIBS
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5
 $as_echo "$ac_cv_search_clock_gettime" >&6; }
 ac_res=$ac_cv_search_clock_gettime
 if test "$ac_res" != no; then :
   test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
-  TESTLIBS="${LIBS}"
-fi
-
-
-LIBS="${SAVED_LIBS}"
+
+fi
+
+
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  if test "$ac_cv_search_clock_gettime" != "-lrt"; then
+    SAVED_CFLAGS="${CFLAGS}"
+
+    unset ac_cv_search_clock_gettime
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -dynamic" >&5
+$as_echo_n "checking whether compiler supports -dynamic... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-dynamic"
+else
+  CFLAGS="${CFLAGS} -dynamic"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-dynamic
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5
+$as_echo_n "checking for library containing clock_gettime... " >&6; }
+if ${ac_cv_search_clock_gettime+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+int
+main ()
+{
+return clock_gettime ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_search_clock_gettime=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext
+  if ${ac_cv_search_clock_gettime+:} false; then :
+  break
+fi
+done
+if ${ac_cv_search_clock_gettime+:} false; then :
+
+else
+  ac_cv_search_clock_gettime=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5
+$as_echo "$ac_cv_search_clock_gettime" >&6; }
+ac_res=$ac_cv_search_clock_gettime
+if test "$ac_res" != no; then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+
+    CFLAGS="${SAVED_CFLAGS}"
+  fi
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable" >&5
+$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable... " >&6; }
+if ${je_cv_clock_monotonic_coarse+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <time.h>
+
+int
+main ()
+{
+
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_clock_monotonic_coarse=yes
+else
+  je_cv_clock_monotonic_coarse=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic_coarse" >&5
+$as_echo "$je_cv_clock_monotonic_coarse" >&6; }
+
+if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable" >&5
+$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable... " >&6; }
+if ${je_cv_clock_monotonic+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <unistd.h>
+#include <time.h>
+
+int
+main ()
+{
+
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0
+#  error _POSIX_MONOTONIC_CLOCK missing/invalid
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_clock_monotonic=yes
+else
+  je_cv_clock_monotonic=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic" >&5
+$as_echo "$je_cv_clock_monotonic" >&6; }
+
+if test "x${je_cv_clock_monotonic}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mach_absolute_time() is compilable" >&5
+$as_echo_n "checking whether mach_absolute_time() is compilable... " >&6; }
+if ${je_cv_mach_absolute_time+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <mach/mach_time.h>
+
+int
+main ()
+{
+
+	mach_absolute_time();
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_mach_absolute_time=yes
+else
+  je_cv_mach_absolute_time=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_mach_absolute_time" >&5
+$as_echo "$je_cv_mach_absolute_time" >&6; }
+
+if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1" >>confdefs.h
+
+fi
+
+SAVED_CFLAGS="${CFLAGS}"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-Werror"
+else
+  CFLAGS="${CFLAGS} -Werror"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-Werror
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether syscall(2) is compilable" >&5
+$as_echo_n "checking whether syscall(2) is compilable... " >&6; }
+if ${je_cv_syscall+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+int
+main ()
+{
+
+	syscall(SYS_write, 2, "hello", 5);
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_syscall=yes
+else
+  je_cv_syscall=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_syscall" >&5
+$as_echo "$je_cv_syscall" >&6; }
+
+CFLAGS="${SAVED_CFLAGS}"
+if test "x$je_cv_syscall" = "xyes" ; then
+  $as_echo "#define JEMALLOC_HAVE_SYSCALL  " >>confdefs.h
+
+fi
 
 ac_fn_c_check_func "$LINENO" "secure_getenv" "ac_cv_func_secure_getenv"
 if test "x$ac_cv_func_secure_getenv" = xyes; then :
   have_secure_getenv="1"
 else
   have_secure_getenv="0"
 
 fi
@@ -7407,20 +8250,29 @@ else
   enable_lazy_lock="1"
 fi
 
 else
   enable_lazy_lock=""
 
 fi
 
-if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5
+if test "x${enable_lazy_lock}" = "x" ; then
+  if test "x${force_lazy_lock}" = "x1" ; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5
 $as_echo "Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&6; }
-  enable_lazy_lock="1"
+    enable_lazy_lock="1"
+  else
+    enable_lazy_lock="0"
+  fi
+fi
+if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no lazy-lock because thread creation monitoring is unimplemented" >&5
+$as_echo "Forcing no lazy-lock because thread creation monitoring is unimplemented" >&6; }
+  enable_lazy_lock="0"
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$abi" != "xpecoff" ; then
     for ac_header in dlfcn.h
 do :
   ac_fn_c_check_header_mongrel "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default"
 if test "x$ac_cv_header_dlfcn_h" = xyes; then :
   cat >>confdefs.h <<_ACEOF
@@ -7480,18 +8332,16 @@ else
 fi
 
 
 fi
 
   fi
   $as_echo "#define JEMALLOC_LAZY_LOCK  " >>confdefs.h
 
-else
-  enable_lazy_lock="0"
 fi
 
 
 # Check whether --enable-tls was given.
 if test "${enable_tls+set}" = set; then :
   enableval=$enable_tls; if test "x$enable_tls" = "xno" ; then
   enable_tls="0"
 else
@@ -7882,16 +8732,56 @@ fi
 
 if test "x${je_cv_builtin_clz}" = "xyes" ; then
   $as_echo "#define JEMALLOC_HAVE_BUILTIN_CLZ  " >>confdefs.h
 
 fi
 
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin os_unfair_lock_*() is compilable" >&5
+$as_echo_n "checking whether Darwin os_unfair_lock_*() is compilable... " >&6; }
+if ${je_cv_os_unfair_lock+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <os/lock.h>
+
+int
+main ()
+{
+
+	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
+	os_unfair_lock_lock(&lock);
+	os_unfair_lock_unlock(&lock);
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_os_unfair_lock=yes
+else
+  je_cv_os_unfair_lock=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_os_unfair_lock" >&5
+$as_echo "$je_cv_os_unfair_lock" >&6; }
+
+if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_OS_UNFAIR_LOCK  " >>confdefs.h
+
+fi
+
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSSpin*() is compilable" >&5
 $as_echo_n "checking whether Darwin OSSpin*() is compilable... " >&6; }
 if ${je_cv_osspin+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
@@ -9784,26 +10674,26 @@ fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
 $as_echo "" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: CONFIG             : ${CONFIG}" >&5
 $as_echo "CONFIG             : ${CONFIG}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: CC                 : ${CC}" >&5
 $as_echo "CC                 : ${CC}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: CFLAGS             : ${CFLAGS}" >&5
 $as_echo "CFLAGS             : ${CFLAGS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_CFLAGS       : ${EXTRA_CFLAGS}" >&5
+$as_echo "EXTRA_CFLAGS       : ${EXTRA_CFLAGS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: CPPFLAGS           : ${CPPFLAGS}" >&5
 $as_echo "CPPFLAGS           : ${CPPFLAGS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: LDFLAGS            : ${LDFLAGS}" >&5
 $as_echo "LDFLAGS            : ${LDFLAGS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}" >&5
 $as_echo "EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBS               : ${LIBS}" >&5
 $as_echo "LIBS               : ${LIBS}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: TESTLIBS           : ${TESTLIBS}" >&5
-$as_echo "TESTLIBS           : ${TESTLIBS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: RPATH_EXTRA        : ${RPATH_EXTRA}" >&5
 $as_echo "RPATH_EXTRA        : ${RPATH_EXTRA}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
 $as_echo "" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: XSLTPROC           : ${XSLTPROC}" >&5
 $as_echo "XSLTPROC           : ${XSLTPROC}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: XSLROOT            : ${XSLROOT}" >&5
 $as_echo "XSLROOT            : ${XSLROOT}" >&6; }
--- a/memory/jemalloc/src/configure.ac
+++ b/memory/jemalloc/src/configure.ac
@@ -113,54 +113,108 @@ fi
   XSLROOT="${DEFAULT_XSLROOT}"
 )
 AC_SUBST([XSLROOT])
 
 dnl If CFLAGS isn't defined, set CFLAGS to something reasonable.  Otherwise,
 dnl just prevent autoconf from molesting CFLAGS.
 CFLAGS=$CFLAGS
 AC_PROG_CC
+
 if test "x$GCC" != "xyes" ; then
   AC_CACHE_CHECK([whether compiler is MSVC],
                  [je_cv_msvc],
                  [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
                                                      [
 #ifndef _MSC_VER
   int fail[-1];
 #endif
 ])],
                                [je_cv_msvc=yes],
                                [je_cv_msvc=no])])
 fi
 
+dnl check if a cray prgenv wrapper compiler is being used
+je_cv_cray_prgenv_wrapper=""
+if test "x${PE_ENV}" != "x" ; then
+  case "${CC}" in
+    CC|cc)
+	je_cv_cray_prgenv_wrapper="yes"
+	;;
+    *)
+       ;;
+  esac
+fi
+
+AC_CACHE_CHECK([whether compiler is cray],
+              [je_cv_cray],
+              [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                  [
+#ifndef _CRAYC
+  int fail[-1];
+#endif
+])],
+                            [je_cv_cray=yes],
+                            [je_cv_cray=no])])
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  AC_CACHE_CHECK([whether cray compiler version is 8.4],
+                [je_cv_cray_84],
+                [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                      [
+#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4)
+  int fail[-1];
+#endif
+])],
+                              [je_cv_cray_84=yes],
+                              [je_cv_cray_84=no])])
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
-    JE_CFLAGS_APPEND([-std=gnu99])
-    if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+dnl    JE_CFLAGS_APPEND([-std=gnu99])
+    JE_CFLAGS_APPEND([-std=gnu11])
+    if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+    else
+      JE_CFLAGS_APPEND([-std=gnu99])
+      if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+        AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+      fi
     fi
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
     JE_CFLAGS_APPEND([-Wshorten-64-to-32])
+    JE_CFLAGS_APPEND([-Wsign-compare])
     JE_CFLAGS_APPEND([-pipe])
     JE_CFLAGS_APPEND([-g3])
   elif test "x$je_cv_msvc" = "xyes" ; then
     CC="$CC -nologo"
     JE_CFLAGS_APPEND([-Zi])
     JE_CFLAGS_APPEND([-MT])
     JE_CFLAGS_APPEND([-W3])
     JE_CFLAGS_APPEND([-FS])
     CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
   fi
+  if test "x$je_cv_cray" = "xyes" ; then
+    dnl cray compiler 8.4 has an inlining bug
+    if test "x$je_cv_cray_84" = "xyes" ; then
+      JE_CFLAGS_APPEND([-hipa2])
+      JE_CFLAGS_APPEND([-hnognu])
+    fi
+    if test "x$enable_cc_silence" != "xno" ; then
+      dnl ignore unreachable code warning
+      JE_CFLAGS_APPEND([-hnomessage=128])
+      dnl ignore redefinition of "malloc", "free", etc warning
+      JE_CFLAGS_APPEND([-hnomessage=1357])
+    fi
+  fi
 fi
-dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
-if test "x$EXTRA_CFLAGS" != "x" ; then
-  JE_CFLAGS_APPEND([$EXTRA_CFLAGS])
-fi
+AC_SUBST([EXTRA_CFLAGS])
 AC_PROG_CPP
 
 AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
 if test "x${ac_cv_big_endian}" = "x1" ; then
   AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ])
 fi
 
 if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
@@ -257,103 +311,110 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
 importlib="${so}"
 o="$ac_objext"
 a="a"
 exe="$ac_exeext"
 libprefix="lib"
+link_whole_archive="0"
 DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
 RPATH='-Wl,-rpath,$(1)'
 SOREV="${so}.${rev}"
 PIC_CFLAGS='-fPIC -DPIC'
 CTARGET='-o $@'
 LDTARGET='-o $@'
+TEST_LD_MODE=
 EXTRA_LDFLAGS=
 ARFLAGS='crus'
 AROUT=' $@'
 CC_MM=1
 
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  TEST_LD_MODE='-dynamic'
+fi
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  CC_MM=
+fi
+
 AN_MAKEVAR([AR], [AC_PROG_AR])
 AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
 AC_PROG_AR
 
 dnl Platform-specific settings.  abi and RPATH can probably be determined
 dnl programmatically, but doing so is error-prone, which makes it generally
 dnl not worth the trouble.
 dnl 
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
+CFLAGS="$CFLAGS"
 default_munmap="1"
 maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
-	CFLAGS="$CFLAGS"
 	abi="macho"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH=""
 	LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
 	so="dylib"
 	importlib="${so}"
 	force_tls="0"
 	DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)'
 	SOREV="${rev}.${so}"
 	sbrk_deprecated="1"
 	;;
   *-*-freebsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-openbsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_tls="0"
 	;;
   *-*-bitrig*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux*)
-	CFLAGS="$CFLAGS"
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
 	default_munmap="0"
 	;;
   *-*-netbsd*)
 	AC_MSG_CHECKING([ABI])
         AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 [[#ifdef __ELF__
 /* ELF */
 #else
 #error aout
 #endif
 ]])],
-                          [CFLAGS="$CFLAGS"; abi="elf"],
+                          [abi="elf"],
                           [abi="aout"])
 	AC_MSG_RESULT([$abi])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-solaris2*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH='-Wl,-R,$(1)'
 	dnl Solaris needs this for sigwait().
 	CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
 	LIBS="$LIBS -lposix4 -lsocket -lnsl"
 	;;
   *-ibm-aix*)
@@ -364,33 +425,33 @@ case "${host}" in
 	  dnl 32bit AIX
 	  LD_PRELOAD_VAR="LDR_PRELOAD"
 	fi
 	abi="xcoff"
 	;;
   *-*-mingw* | *-*-cygwin*)
 	abi="pecoff"
 	force_tls="0"
-	force_lazy_lock="1"
 	maps_coalesce="0"
 	RPATH=""
 	so="dll"
 	if test "x$je_cv_msvc" = "xyes" ; then
 	  importlib="lib"
 	  DSO_LDFLAGS="-LD"
 	  EXTRA_LDFLAGS="-link -DEBUG"
 	  CTARGET='-Fo$@'
 	  LDTARGET='-Fe$@'
 	  AR='lib'
 	  ARFLAGS='-nologo -out:'
 	  AROUT='$@'
 	  CC_MM=
         else
 	  importlib="${so}"
 	  DSO_LDFLAGS="-shared"
+	  link_whole_archive="1"
 	fi
 	a="lib"
 	libprefix=""
 	SOREV="${so}"
 	PIC_CFLAGS=""
 	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])
@@ -418,75 +479,90 @@ AC_SUBST([abi])
 AC_SUBST([RPATH])
 AC_SUBST([LD_PRELOAD_VAR])
 AC_SUBST([so])
 AC_SUBST([importlib])
 AC_SUBST([o])
 AC_SUBST([a])
 AC_SUBST([exe])
 AC_SUBST([libprefix])
+AC_SUBST([link_whole_archive])
 AC_SUBST([DSO_LDFLAGS])
 AC_SUBST([EXTRA_LDFLAGS])
 AC_SUBST([SOREV])
 AC_SUBST([PIC_CFLAGS])
 AC_SUBST([CTARGET])
 AC_SUBST([LDTARGET])
+AC_SUBST([TEST_LD_MODE])
 AC_SUBST([MKLIB])
 AC_SUBST([ARFLAGS])
 AC_SUBST([AROUT])
 AC_SUBST([CC_MM])
 
+dnl Determine whether libm must be linked to use e.g. log(3).
+AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
+if test "x$ac_cv_search_log" != "xnone required" ; then
+  LM="$ac_cv_search_log"
+else
+  LM=
+fi
+AC_SUBST(LM)
+
 JE_COMPILABLE([__attribute__ syntax],
               [static __attribute__((unused)) void foo(void){}],
               [],
               [je_cv_attribute])
 if test "x${je_cv_attribute}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
   if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
     JE_CFLAGS_APPEND([-fvisibility=hidden])
   fi
 fi
 dnl Check for tls_model attribute support (clang 3.0 still lacks support).
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([tls_model attribute], [],
               [static __thread int
                __attribute__((tls_model("initial-exec"), unused)) foo;
                foo = 0;],
               [je_cv_tls_model])
 CFLAGS="${SAVED_CFLAGS}"
 if test "x${je_cv_tls_model}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_TLS_MODEL],
             [__attribute__((tls_model("initial-exec")))])
 else
   AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
 fi
 dnl Check for alloc_size attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
               [void *foo(size_t size) __attribute__((alloc_size(1)));],
               [je_cv_alloc_size])
 CFLAGS="${SAVED_CFLAGS}"
 if test "x${je_cv_alloc_size}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ])
 fi
 dnl Check for format(gnu_printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));],
               [je_cv_format_gnu_printf])
 CFLAGS="${SAVED_CFLAGS}"
 if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ])
 fi
 dnl Check for format(printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));],
               [je_cv_format_printf])
 CFLAGS="${SAVED_CFLAGS}"
 if test "x${je_cv_format_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
 fi
 
@@ -871,19 +947,19 @@ fi
 
 if test "x$backtrace_method" = "x" ; then
   backtrace_method="none (disabling profiling)"
   enable_prof="0"
 fi
 AC_MSG_CHECKING([configured backtracing method])
 AC_MSG_RESULT([$backtrace_method])
 if test "x$enable_prof" = "x1" ; then
-  if test "x$abi" != "xpecoff"; then
-    dnl Heap profiling uses the log(3) function.
-    LIBS="$LIBS -lm"
+  dnl Heap profiling uses the log(3) function.
+  if test "x$LM" != "x" ; then
+    LIBS="$LIBS $LM"
   fi
 
   AC_DEFINE([JEMALLOC_PROF], [ ])
 fi
 AC_SUBST([enable_prof])
 
 dnl Enable thread-specific caching by default.
 AC_ARG_ENABLE([tcache],
@@ -1042,16 +1118,33 @@ fi
 ],
 [enable_cache_oblivious="1"]
 )
 if test "x$enable_cache_oblivious" = "x1" ; then
   AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ])
 fi
 AC_SUBST([enable_cache_oblivious])
 
+
+
+JE_COMPILABLE([a program using __builtin_unreachable], [
+void foo (void) {
+  __builtin_unreachable();
+}
+], [
+	{
+		foo();
+	}
+], [je_cv_gcc_builtin_unreachable])
+if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
+else
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
+fi
+
 dnl ============================================================================
 dnl Check for  __builtin_ffsl(), then ffsl(3), and fail if neither are found.
 dnl One of those two functions should (theoretically) exist on all platforms
 dnl that jemalloc currently has a chance of functioning on without modification.
 dnl We additionally assume ffs[ll]() or __builtin_ffs[ll]() are defined if
 dnl ffsl() or __builtin_ffsl() are defined, respectively.
 JE_COMPILABLE([a program using __builtin_ffsl], [
 #include <stdio.h>
@@ -1167,37 +1260,46 @@ AC_ARG_WITH([lg_size_class_group],
    [Base 2 log of size classes per doubling])],
   [LG_SIZE_CLASS_GROUP="$with_lg_size_class_group"],
   [LG_SIZE_CLASS_GROUP="2"])
 
 dnl ============================================================================
 dnl jemalloc configuration.
 dnl 
 
-dnl Set VERSION if source directory is inside a git repository.
-if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
-  dnl Pattern globs aren't powerful enough to match both single- and
-  dnl double-digit version numbers, so iterate over patterns to support up to
-  dnl version 99.99.99 without any accidental matches.
-  rm -f "${objroot}VERSION"
-  for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
-                 '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
-    if test ! -e "${objroot}VERSION" ; then
-      (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
-      if test $? -eq 0 ; then
-        mv "${objroot}VERSION.tmp" "${objroot}VERSION"
-        break
-      fi
+AC_ARG_WITH([version],
+  [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
+   [Version string])],
+  [
+    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+    if test $? -ne 0 ; then
+      AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid>])
     fi
-  done
-fi
-rm -f "${objroot}VERSION.tmp"
+    echo "$with_version" > "${objroot}VERSION"
+  ], [
+    dnl Set VERSION if source directory is inside a git repository.
+    if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+      dnl Pattern globs aren't powerful enough to match both single- and
+      dnl double-digit version numbers, so iterate over patterns to support up
+      dnl to version 99.99.99 without any accidental matches.
+      for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+                     '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
+        (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+        if test $? -eq 0 ; then
+          mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+          break
+        fi
+      done
+    fi
+    rm -f "${objroot}VERSION.tmp"
+  ])
+
 if test ! -e "${objroot}VERSION" ; then
   if test ! -e "${srcroot}VERSION" ; then
     AC_MSG_RESULT(
       [Missing VERSION file, and unable to generate it; creating bogus VERSION])
     echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
   else
     cp ${srcroot}VERSION ${objroot}VERSION
   fi
@@ -1224,23 +1326,86 @@ if test "x$abi" != "xpecoff" ; then
   dnl first, but try libc too before failing.
   AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
                [AC_SEARCH_LIBS([pthread_create], , ,
                                AC_MSG_ERROR([libpthread is missing]))])
 fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
-dnl Check whether clock_gettime(2) is in libc or librt.  This function is only
-dnl used in test code, so save the result to TESTLIBS to avoid poluting LIBS.
-SAVED_LIBS="${LIBS}"
-LIBS=
-AC_SEARCH_LIBS([clock_gettime], [rt], [TESTLIBS="${LIBS}"])
-AC_SUBST([TESTLIBS])
-LIBS="${SAVED_LIBS}"
+dnl Check whether clock_gettime(2) is in libc or librt.
+AC_SEARCH_LIBS([clock_gettime], [rt])
+
+dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with
+dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  if test "$ac_cv_search_clock_gettime" != "-lrt"; then
+    SAVED_CFLAGS="${CFLAGS}"
+
+    unset ac_cv_search_clock_gettime
+    JE_CFLAGS_APPEND([-dynamic])
+    AC_SEARCH_LIBS([clock_gettime], [rt])
+
+    CFLAGS="${SAVED_CFLAGS}"
+  fi
+fi
+
+dnl check for CLOCK_MONOTONIC_COARSE (Linux-specific).
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+], [je_cv_clock_monotonic_coarse])
+if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE])
+fi
+
+dnl check for CLOCK_MONOTONIC.
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [
+#include <unistd.h>
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0
+#  error _POSIX_MONOTONIC_CLOCK missing/invalid
+#endif
+], [je_cv_clock_monotonic])
+if test "x${je_cv_clock_monotonic}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC])
+fi
+
+dnl Check for mach_absolute_time().
+JE_COMPILABLE([mach_absolute_time()], [
+#include <mach/mach_time.h>
+], [
+	mach_absolute_time();
+], [je_cv_mach_absolute_time])
+if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
+fi
+
+dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS X
+dnl 10.12's deprecation warning prevents use.
+SAVED_CFLAGS="${CFLAGS}"
+JE_CFLAGS_APPEND([-Werror])
+JE_COMPILABLE([syscall(2)], [
+#include <sys/syscall.h>
+#include <unistd.h>
+], [
+	syscall(SYS_write, 2, "hello", 5);
+],
+              [je_cv_syscall])
+CFLAGS="${SAVED_CFLAGS}"
+if test "x$je_cv_syscall" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_SYSCALL], [ ])
+fi
 
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
               [have_secure_getenv="0"]
              )
 if test "x$have_secure_getenv" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ])
@@ -1286,31 +1451,37 @@ AC_ARG_ENABLE([lazy_lock],
 [if test "x$enable_lazy_lock" = "xno" ; then
   enable_lazy_lock="0"
 else
   enable_lazy_lock="1"
 fi
 ],
 [enable_lazy_lock=""]
 )
-if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then
-  AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
-  enable_lazy_lock="1"
+if test "x${enable_lazy_lock}" = "x" ; then
+  if test "x${force_lazy_lock}" = "x1" ; then
+    AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
+    enable_lazy_lock="1"
+  else
+    enable_lazy_lock="0"
+  fi
+fi
+if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
+  AC_MSG_RESULT([Forcing no lazy-lock because thread creation monitoring is unimplemented])
+  enable_lazy_lock="0"
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$abi" != "xpecoff" ; then
     AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
     AC_CHECK_FUNC([dlsym], [],
       [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"],
                     [AC_MSG_ERROR([libdl is missing])])
       ])
   fi
   AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
-else
-  enable_lazy_lock="0"
 fi
 AC_SUBST([enable_lazy_lock])
 
 AC_ARG_ENABLE([tls],
   [AS_HELP_STRING([--disable-tls], [Disable thread-local storage (__thread keyword)])],
 if test "x$enable_tls" = "xno" ; then
   enable_tls="0"
 else
@@ -1489,16 +1660,30 @@ AC_CACHE_CHECK([for __builtin_clz],
                                [je_cv_builtin_clz=yes],
                                [je_cv_builtin_clz=no])])
 
 if test "x${je_cv_builtin_clz}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
 fi
 
 dnl ============================================================================
+dnl Check for os_unfair_lock operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin os_unfair_lock_*()], [
+#include <os/lock.h>
+], [
+	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
+	os_unfair_lock_lock(&lock);
+	os_unfair_lock_unlock(&lock);
+], [je_cv_os_unfair_lock])
+if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
+fi
+
+dnl ============================================================================
 dnl Check for spinlock(3) operations as provided on Darwin.
 
 JE_COMPILABLE([Darwin OSSpin*()], [
 #include <libkern/OSAtomic.h>
 #include <inttypes.h>
 ], [
 	OSSpinLock lock = 0;
 	OSSpinLockLock(&lock);
@@ -1732,21 +1917,21 @@ dnl ====================================
 dnl Print out the results of configuration.
 AC_MSG_RESULT([===============================================================================])
 AC_MSG_RESULT([jemalloc version   : ${jemalloc_version}])
 AC_MSG_RESULT([library revision   : ${rev}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([CONFIG             : ${CONFIG}])
 AC_MSG_RESULT([CC                 : ${CC}])
 AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
+AC_MSG_RESULT([EXTRA_CFLAGS       : ${EXTRA_CFLAGS}])
 AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
-AC_MSG_RESULT([TESTLIBS           : ${TESTLIBS}])
 AC_MSG_RESULT([RPATH_EXTRA        : ${RPATH_EXTRA}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([XSLTPROC           : ${XSLTPROC}])
 AC_MSG_RESULT([XSLROOT            : ${XSLROOT}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([PREFIX             : ${PREFIX}])
 AC_MSG_RESULT([BINDIR             : ${BINDIR}])
 AC_MSG_RESULT([DATADIR            : ${DATADIR}])
--- a/memory/jemalloc/src/doc/html.xsl.in
+++ b/memory/jemalloc/src/doc/html.xsl.in
@@ -1,4 +1,5 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:import href="@XSLROOT@/html/docbook.xsl"/>
   <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+  <xsl:output method="xml" encoding="utf-8"/>
 </xsl:stylesheet>
--- a/memory/jemalloc/src/doc/jemalloc.xml.in
+++ b/memory/jemalloc/src/doc/jemalloc.xml.in
@@ -47,17 +47,17 @@
     <refname>malloc_usable_size</refname>
     -->
     <refpurpose>general purpose memory allocation functions</refpurpose>
   </refnamediv>
   <refsect1 id="library">
     <title>LIBRARY</title>
     <para>This manual describes jemalloc @jemalloc_version@.  More information
     can be found at the <ulink
-    url="http://www.canonware.com/jemalloc/">jemalloc website</ulink>.</para>
+    url="http://jemalloc.net/">jemalloc website</ulink>.</para>
   </refsect1>
   <refsynopsisdiv>
     <title>SYNOPSIS</title>
     <funcsynopsis>
       <funcsynopsisinfo>#include &lt;<filename class="headerfile">jemalloc/jemalloc.h</filename>&gt;</funcsynopsisinfo>
       <refsect2>
         <title>Standard API</title>
         <funcprototype>
@@ -175,73 +175,73 @@
       </refsect2>
     </funcsynopsis>
   </refsynopsisdiv>
   <refsect1 id="description">
     <title>DESCRIPTION</title>
     <refsect2>
       <title>Standard API</title>
 
-      <para>The <function>malloc<parameter/></function> function allocates
+      <para>The <function>malloc()</function> function allocates
       <parameter>size</parameter> bytes of uninitialized memory.  The allocated
       space is suitably aligned (after possible pointer coercion) for storage
       of any type of object.</para>
 
-      <para>The <function>calloc<parameter/></function> function allocates
+      <para>The <function>calloc()</function> function allocates
       space for <parameter>number</parameter> objects, each
       <parameter>size</parameter> bytes in length.  The result is identical to
-      calling <function>malloc<parameter/></function> with an argument of
+      calling <function>malloc()</function> with an argument of
       <parameter>number</parameter> * <parameter>size</parameter>, with the
       exception that the allocated memory is explicitly initialized to zero
       bytes.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>, and returns the allocation in the value
       pointed to by <parameter>ptr</parameter>.  The requested
       <parameter>alignment</parameter> must be a power of 2 at least as large as
       <code language="C">sizeof(<type>void *</type>)</code>.</para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function
+      <para>The <function>aligned_alloc()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>.  The requested
       <parameter>alignment</parameter> must be a power of 2.  Behavior is
       undefined if <parameter>size</parameter> is not an integral multiple of
       <parameter>alignment</parameter>.</para>
 
-      <para>The <function>realloc<parameter/></function> function changes the
+      <para>The <function>realloc()</function> function changes the
       size of the previously allocated memory referenced by
       <parameter>ptr</parameter> to <parameter>size</parameter> bytes.  The
       contents of the memory are unchanged up to the lesser of the new and old
       sizes.  If the new size is larger, the contents of the newly allocated
       portion of the memory are undefined.  Upon success, the memory referenced
       by <parameter>ptr</parameter> is freed and a pointer to the newly
       allocated memory is returned.  Note that
-      <function>realloc<parameter/></function> may move the memory allocation,
+      <function>realloc()</function> may move the memory allocation,
       resulting in a different return value than <parameter>ptr</parameter>.
       If <parameter>ptr</parameter> is <constant>NULL</constant>, the
-      <function>realloc<parameter/></function> function behaves identically to
-      <function>malloc<parameter/></function> for the specified size.</para>
+      <function>realloc()</function> function behaves identically to
+      <function>malloc()</function> for the specified size.</para>
 
-      <para>The <function>free<parameter/></function> function causes the
+      <para>The <function>free()</function> function causes the
       allocated memory referenced by <parameter>ptr</parameter> to be made
       available for future allocations.  If <parameter>ptr</parameter> is
       <constant>NULL</constant>, no action occurs.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function>,
-      <function>rallocx<parameter/></function>,
-      <function>xallocx<parameter/></function>,
-      <function>sallocx<parameter/></function>,
-      <function>dallocx<parameter/></function>,
-      <function>sdallocx<parameter/></function>, and
-      <function>nallocx<parameter/></function> functions all have a
+      <para>The <function>mallocx()</function>,
+      <function>rallocx()</function>,
+      <function>xallocx()</function>,
+      <function>sallocx()</function>,
+      <function>dallocx()</function>,
+      <function>sdallocx()</function>, and
+      <function>nallocx()</function> functions all have a
       <parameter>flags</parameter> argument that can be used to specify
       options.  The functions only check the options that are contextually
       relevant.  Use bitwise or (<code language="C">|</code>) operations to
       specify one or more of the following:
         <variablelist>
           <varlistentry id="MALLOCX_LG_ALIGN">
             <term><constant>MALLOCX_LG_ALIGN(<parameter>la</parameter>)
             </constant></term>
@@ -302,87 +302,87 @@
             <parameter>a</parameter>.  This macro has no effect for regions that
             were allocated via an arena other than the one specified.  This
             macro does not validate that <parameter>a</parameter> specifies an
             arena index in the valid range.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>mallocx<parameter/></function> function allocates at
+      <para>The <function>mallocx()</function> function allocates at
       least <parameter>size</parameter> bytes of memory, and returns a pointer
       to the base address of the allocation.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>rallocx<parameter/></function> function resizes the
+      <para>The <function>rallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> to be at least
       <parameter>size</parameter> bytes, and returns a pointer to the base
       address of the resulting allocation, which may or may not have moved from
       its original location.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>xallocx<parameter/></function> function resizes the
+      <para>The <function>xallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> in place to be at least
       <parameter>size</parameter> bytes, and returns the real size of the
       allocation.  If <parameter>extra</parameter> is non-zero, an attempt is
       made to resize the allocation to be at least <code
       language="C">(<parameter>size</parameter> +
       <parameter>extra</parameter>)</code> bytes, though inability to allocate
       the extra byte(s) will not by itself result in failure to resize.
       Behavior is undefined if <parameter>size</parameter> is
       <constant>0</constant>, or if <code
       language="C">(<parameter>size</parameter> + <parameter>extra</parameter>
       &gt; <constant>SIZE_T_MAX</constant>)</code>.</para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation at <parameter>ptr</parameter>.</para>
 
-      <para>The <function>dallocx<parameter/></function> function causes the
+      <para>The <function>dallocx()</function> function causes the
       memory referenced by <parameter>ptr</parameter> to be made available for
       future allocations.</para>
 
-      <para>The <function>sdallocx<parameter/></function> function is an
-      extension of <function>dallocx<parameter/></function> with a
+      <para>The <function>sdallocx()</function> function is an
+      extension of <function>dallocx()</function> with a
       <parameter>size</parameter> parameter to allow the caller to pass in the
       allocation size as an optimization.  The minimum valid input size is the
       original requested size of the allocation, and the maximum valid input
       size is the corresponding value returned by
-      <function>nallocx<parameter/></function> or
-      <function>sallocx<parameter/></function>.</para>
+      <function>nallocx()</function> or
+      <function>sallocx()</function>.</para>
 
-      <para>The <function>nallocx<parameter/></function> function allocates no
+      <para>The <function>nallocx()</function> function allocates no
       memory, but it performs the same size computation as the
-      <function>mallocx<parameter/></function> function, and returns the real
+      <function>mallocx()</function> function, and returns the real
       size of the allocation that would result from the equivalent
-      <function>mallocx<parameter/></function> function call, or
+      <function>mallocx()</function> function call, or
       <constant>0</constant> if the inputs exceed the maximum supported size
       class and/or alignment.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>mallctl<parameter/></function> function provides a
+      <para>The <function>mallctl()</function> function provides a
       general interface for introspecting the memory allocator, as well as
       setting modifiable parameters and triggering actions.  The
       period-separated <parameter>name</parameter> argument specifies a
       location in a tree-structured namespace; see the <xref
       linkend="mallctl_namespace" xrefstyle="template:%t"/> section for
       documentation on the tree contents.  To read a value, pass a pointer via
       <parameter>oldp</parameter> to adequate space to contain the value, and a
       pointer to its length via <parameter>oldlenp</parameter>; otherwise pass
       <constant>NULL</constant> and <constant>NULL</constant>.  Similarly, to
       write a value, pass a pointer to the value via
       <parameter>newp</parameter>, and its length via
       <parameter>newlen</parameter>; otherwise pass <constant>NULL</constant>
       and <constant>0</constant>.</para>
 
-      <para>The <function>mallctlnametomib<parameter/></function> function
+      <para>The <function>mallctlnametomib()</function> function
       provides a way to avoid repeated name lookups for applications that
       repeatedly query the same portion of the namespace, by translating a name
-      to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
-      repeatedly to <function>mallctlbymib<parameter/></function>.  Upon
-      successful return from <function>mallctlnametomib<parameter/></function>,
+      to a <quote>Management Information Base</quote> (MIB) that can be passed
+      repeatedly to <function>mallctlbymib()</function>.  Upon
+      successful return from <function>mallctlnametomib()</function>,
       <parameter>mibp</parameter> contains an array of
       <parameter>*miblenp</parameter> integers, where
       <parameter>*miblenp</parameter> is the lesser of the number of components
       in <parameter>name</parameter> and the input value of
       <parameter>*miblenp</parameter>.  Thus it is possible to pass a
       <parameter>*miblenp</parameter> that is smaller than the number of
       period-separated name components, which results in a partial MIB that can
       be used as the basis for constructing a complete MIB.  For name
@@ -405,67 +405,68 @@ for (i = 0; i < nbins; i++) {
 	size_t bin_size;
 
 	mib[2] = i;
 	len = sizeof(bin_size);
 	mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
-      <para>The <function>malloc_stats_print<parameter/></function> function
-      writes human-readable summary statistics via the
-      <parameter>write_cb</parameter> callback function pointer and
-      <parameter>cbopaque</parameter> data passed to
-      <parameter>write_cb</parameter>, or
-      <function>malloc_message<parameter/></function> if
-      <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
-      function can be called repeatedly.  General information that never
-      changes during execution can be omitted by specifying "g" as a character
+      <para>The <function>malloc_stats_print()</function> function writes
+      summary statistics via the <parameter>write_cb</parameter> callback
+      function pointer and <parameter>cbopaque</parameter> data passed to
+      <parameter>write_cb</parameter>, or <function>malloc_message()</function>
+      if <parameter>write_cb</parameter> is <constant>NULL</constant>.  The
+      statistics are presented in human-readable form unless <quote>J</quote> is
+      specified as a character within the <parameter>opts</parameter> string, in
+      which case the statistics are presented in <ulink
+      url="http://www.json.org/">JSON format</ulink>.  This function can be
+      called repeatedly.  General information that never changes during
+      execution can be omitted by specifying <quote>g</quote> as a character
       within the <parameter>opts</parameter> string.  Note that
-      <function>malloc_message<parameter/></function> uses the
-      <function>mallctl*<parameter/></function> functions internally, so
-      inconsistent statistics can be reported if multiple threads use these
-      functions simultaneously.  If <option>--enable-stats</option> is
-      specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
-      be specified to omit merged arena and per arena statistics, respectively;
-      &ldquo;b&rdquo;, &ldquo;l&rdquo;, and &ldquo;h&rdquo; can be specified to
-      omit per size class statistics for bins, large objects, and huge objects,
-      respectively.  Unrecognized characters are silently ignored.  Note that
-      thread caching may prevent some statistics from being completely up to
-      date, since extra locking would be required to merge counters that track
-      thread cache operations.
-      </para>
+      <function>malloc_message()</function> uses the
+      <function>mallctl*()</function> functions internally, so inconsistent
+      statistics can be reported if multiple threads use these functions
+      simultaneously.  If <option>--enable-stats</option> is specified during
+      configuration, <quote>m</quote> and <quote>a</quote> can be specified to
+      omit merged arena and per arena statistics, respectively;
+      <quote>b</quote>, <quote>l</quote>, and <quote>h</quote> can be specified
+      to omit per size class statistics for bins, large objects, and huge
+      objects, respectively.  Unrecognized characters are silently ignored.
+      Note that thread caching may prevent some statistics from being completely
+      up to date, since extra locking would be required to merge counters that
+      track thread cache operations.</para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  The return value may be larger than the size
       that was requested during allocation.  The
-      <function>malloc_usable_size<parameter/></function> function is not a
-      mechanism for in-place <function>realloc<parameter/></function>; rather
+      <function>malloc_usable_size()</function> function is not a
+      mechanism for in-place <function>realloc()</function>; rather
       it is provided solely as a tool for introspection purposes.  Any
       discrepancy between the requested allocation size and the size reported
-      by <function>malloc_usable_size<parameter/></function> should not be
+      by <function>malloc_usable_size()</function> should not be
       depended on, since such behavior is entirely implementation-dependent.
       </para>
     </refsect2>
   </refsect1>
   <refsect1 id="tuning">
     <title>TUNING</title>
     <para>Once, when the first call is made to one of the memory allocation
     routines, the allocator initializes its internals based in part on various
     options that can be specified at compile- or run-time.</para>
 
     <para>The string specified via <option>--with-malloc-conf</option>, the
     string pointed to by the global variable <varname>malloc_conf</varname>, the
-    &ldquo;name&rdquo; of the file referenced by the symbolic link named
+    <quote>name</quote> of the file referenced by the symbolic link named
     <filename class="symlink">/etc/malloc.conf</filename>, and the value of the
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
     <varname>malloc_conf</varname> may be read before
-    <function>main<parameter/></function> is entered, so the declaration of
+    <function>main()</function> is entered, so the declaration of
     <varname>malloc_conf</varname> should specify an initializer that contains
     the final value to be read by jemalloc.  <option>--with-malloc-conf</option>
     and <varname>malloc_conf</varname> are compile-time mechanisms, whereas
     <filename class="symlink">/etc/malloc.conf</filename> and
     <envar>MALLOC_CONF</envar> can be safely set any time prior to program
     invocation.</para>
 
     <para>An options string is a comma-separated list of option:value pairs.
@@ -535,33 +536,33 @@ for (i = 0; i < nbins; i++) {
     up to the nearest power of two that is at least <code
     language="C">sizeof(<type>double</type>)</code>.  All other object size
     classes are multiples of the quantum, spaced such that there are four size
     classes for each doubling in size, which limits internal fragmentation to
     approximately 20% for all but the smallest size classes.  Small size classes
     are smaller than four times the page size, large size classes are smaller
     than the chunk size (see the <link
     linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), and
-    huge size classes extend from the chunk size up to one size class less than
-    the full address space size.</para>
+    huge size classes extend from the chunk size up to the largest size class
+    that does not exceed <constant>PTRDIFF_MAX</constant>.</para>
 
     <para>Allocations are packed tightly together, which can be an issue for
     multi-threaded applications.  If you need to assure that allocations do not
     suffer from cacheline sharing, round your allocation requests up to the
     nearest multiple of the cacheline size, or specify cacheline alignment when
     allocating.</para>
 
-    <para>The <function>realloc<parameter/></function>,
-    <function>rallocx<parameter/></function>, and
-    <function>xallocx<parameter/></function> functions may resize allocations
+    <para>The <function>realloc()</function>,
+    <function>rallocx()</function>, and
+    <function>xallocx()</function> functions may resize allocations
     without moving them under limited circumstances.  Unlike the
-    <function>*allocx<parameter/></function> API, the standard API does not
+    <function>*allocx()</function> API, the standard API does not
     officially round up the usable size of an allocation to the nearest size
     class, so technically it is necessary to call
-    <function>realloc<parameter/></function> to grow e.g. a 9-byte allocation to
+    <function>realloc()</function> to grow e.g. a 9-byte allocation to
     16 bytes, or shrink a 16-byte allocation to 9 bytes.  Growth and shrinkage
     trivially succeeds in place as long as the pre-size and post-size both round
     up to the same size class.  No other API guarantees are made regarding
     in-place resizing, but the current implementation also tries to resize large
     and huge allocations in place, as long as the pre-size and post-size are
     both large or both huge.  In such cases shrinkage always succeeds for large
     size classes, but for huge size classes the chunk allocator must support
     splitting (see <link
@@ -654,17 +655,17 @@ for (i = 0; i < nbins; i++) {
           <entry>128 KiB</entry>
           <entry>[640 KiB, 768 KiB, 896 KiB, 1 MiB]</entry>
         </row>
         <row>
           <entry>256 KiB</entry>
           <entry>[1280 KiB, 1536 KiB, 1792 KiB]</entry>
         </row>
         <row>
-          <entry morerows="6">Huge</entry>
+          <entry morerows="8">Huge</entry>
           <entry>256 KiB</entry>
           <entry>[2 MiB]</entry>
         </row>
         <row>
           <entry>512 KiB</entry>
           <entry>[2560 KiB, 3 MiB, 3584 KiB, 4 MiB]</entry>
         </row>
         <row>
@@ -682,24 +683,32 @@ for (i = 0; i < nbins; i++) {
         <row>
           <entry>8 MiB</entry>
           <entry>[40 MiB, 48 MiB, 56 MiB, 64 MiB]</entry>
         </row>
         <row>
           <entry>...</entry>
           <entry>...</entry>
         </row>
+        <row>
+          <entry>512 PiB</entry>
+          <entry>[2560 PiB, 3 EiB, 3584 PiB, 4 EiB]</entry>
+        </row>
+        <row>
+          <entry>1 EiB</entry>
+          <entry>[5 EiB, 6 EiB, 7 EiB]</entry>
+        </row>
       </tbody>
       </tgroup>
     </table>
   </refsect1>
   <refsect1 id="mallctl_namespace">
     <title>MALLCTL NAMESPACE</title>
     <para>The following names are defined in the namespace accessible via the
-    <function>mallctl*<parameter/></function> functions.  Value types are
+    <function>mallctl*()</function> functions.  Value types are
     specified in parentheses, their readable/writable statuses are encoded as
     <literal>rw</literal>, <literal>r-</literal>, <literal>-w</literal>, or
     <literal>--</literal>, and required build configuration flags follow, if
     any.  A name element encoded as <literal>&lt;i&gt;</literal> or
     <literal>&lt;j&gt;</literal> indicates an integer component, where the
     integer varies from 0 to some upper value that must be determined via
     introspection.  In the case of <mallctl>stats.arenas.&lt;i&gt;.*</mallctl>,
     <literal>&lt;i&gt;</literal> equal to <link
@@ -720,17 +729,17 @@ for (i = 0; i < nbins; i++) {
 
       <varlistentry id="epoch">
         <term>
           <mallctl>epoch</mallctl>
           (<type>uint64_t</type>)
           <literal>rw</literal>
         </term>
         <listitem><para>If a value is passed in, refresh the data from which
-        the <function>mallctl*<parameter/></function> functions report values,
+        the <function>mallctl*()</function> functions report values,
         and increment the epoch.  Return the current epoch.  This is useful for
         detecting whether another thread caused a refresh.</para></listitem>
       </varlistentry>
 
       <varlistentry id="config.cache_oblivious">
         <term>
           <mallctl>config.cache_oblivious</mallctl>
           (<type>bool</type>)
@@ -904,22 +913,22 @@ for (i = 0; i < nbins; i++) {
         </term>
         <listitem><para>dss (<citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry>) allocation precedence as
         related to <citerefentry><refentrytitle>mmap</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> allocation.  The following
         settings are supported if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system: &ldquo;disabled&rdquo;, &ldquo;primary&rdquo;, and
-        &ldquo;secondary&rdquo;; otherwise only &ldquo;disabled&rdquo; is
-        supported.  The default is &ldquo;secondary&rdquo; if
+        system: <quote>disabled</quote>, <quote>primary</quote>, and
+        <quote>secondary</quote>; otherwise only <quote>disabled</quote> is
+        supported.  The default is <quote>secondary</quote> if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system; &ldquo;disabled&rdquo; otherwise.
+        system; <quote>disabled</quote> otherwise.
         </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.lg_chunk">
         <term>
           <mallctl>opt.lg_chunk</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
@@ -1000,50 +1009,51 @@ for (i = 0; i < nbins; i++) {
 
       <varlistentry id="opt.stats_print">
         <term>
           <mallctl>opt.stats_print</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Enable/disable statistics printing at exit.  If
-        enabled, the <function>malloc_stats_print<parameter/></function>
+        enabled, the <function>malloc_stats_print()</function>
         function is called at program exit via an
         <citerefentry><refentrytitle>atexit</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> function.  If
         <option>--enable-stats</option> is specified during configuration, this
         has the potential to cause deadlock for a multi-threaded process that
         exits while one or more threads are executing in the memory allocation
-        functions.  Furthermore, <function>atexit<parameter/></function> may
+        functions.  Furthermore, <function>atexit()</function> may
         allocate memory during application initialization and then deadlock
         internally when jemalloc in turn calls
-        <function>atexit<parameter/></function>, so this option is not
+        <function>atexit()</function>, so this option is not
         universally usable (though the application can register its own
-        <function>atexit<parameter/></function> function with equivalent
+        <function>atexit()</function> function with equivalent
         functionality).  Therefore, this option should only be used with care;
         it is primarily intended as a performance tuning aid during application
         development.  This option is disabled by default.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.junk">
         <term>
           <mallctl>opt.junk</mallctl>
           (<type>const char *</type>)
           <literal>r-</literal>
           [<option>--enable-fill</option>]
         </term>
-        <listitem><para>Junk filling.  If set to "alloc", each byte of
-        uninitialized allocated memory will be initialized to
-        <literal>0xa5</literal>.  If set to "free", all deallocated memory will
-        be initialized to <literal>0x5a</literal>.  If set to "true", both
-        allocated and deallocated memory will be initialized, and if set to
-        "false", junk filling be disabled entirely.  This is intended for
-        debugging and will impact performance negatively.  This option is
-        "false" by default unless <option>--enable-debug</option> is specified
-        during configuration, in which case it is "true" by default unless
+        <listitem><para>Junk filling.  If set to <quote>alloc</quote>, each byte
+        of uninitialized allocated memory will be initialized to
+        <literal>0xa5</literal>.  If set to <quote>free</quote>, all deallocated
+        memory will be initialized to <literal>0x5a</literal>.  If set to
+        <quote>true</quote>, both allocated and deallocated memory will be
+        initialized, and if set to <quote>false</quote>, junk filling be
+        disabled entirely.  This is intended for debugging and will impact
+        performance negatively.  This option is <quote>false</quote> by default
+        unless <option>--enable-debug</option> is specified during
+        configuration, in which case it is <quote>true</quote> by default unless
         running inside <ulink
         url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.quarantine">
         <term>
           <mallctl>opt.quarantine</mallctl>
           (<type>size_t</type>)
@@ -1088,18 +1098,18 @@ for (i = 0; i < nbins; i++) {
           <mallctl>opt.zero</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
           [<option>--enable-fill</option>]
         </term>
         <listitem><para>Zero filling enabled/disabled.  If enabled, each byte
         of uninitialized allocated memory will be initialized to 0.  Note that
         this initialization only happens once for each byte, so
-        <function>realloc<parameter/></function> and
-        <function>rallocx<parameter/></function> calls do not zero memory that
+        <function>realloc()</function> and
+        <function>rallocx()</function> calls do not zero memory that
         was previously allocated.  This is intended for debugging and will
         impact performance negatively.  This option is disabled by default.
         </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.utrace">
         <term>
           <mallctl>opt.utrace</mallctl>
@@ -1312,21 +1322,21 @@ malloc_conf = "xmalloc:true";]]></progra
         </term>
         <listitem><para>Use an
         <citerefentry><refentrytitle>atexit</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> function to dump final memory
         usage to a file named according to the pattern
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  Note that <function>atexit<parameter/></function> may allocate
+        option.  Note that <function>atexit()</function> may allocate
         memory during application initialization and then deadlock internally
-        when jemalloc in turn calls <function>atexit<parameter/></function>, so
+        when jemalloc in turn calls <function>atexit()</function>, so
         this option is not universally usable (though the application can
-        register its own <function>atexit<parameter/></function> function with
+        register its own <function>atexit()</function> function with
         equivalent functionality).  This option is disabled by
         default.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.prof_leak">
         <term>
           <mallctl>opt.prof_leak</mallctl>
           (<type>bool</type>)
@@ -1375,17 +1385,17 @@ malloc_conf = "xmalloc:true";]]></progra
           (<type>uint64_t *</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Get a pointer to the the value that is returned by the
         <link
         linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.deallocated">
         <term>
           <mallctl>thread.deallocated</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
@@ -1402,17 +1412,17 @@ malloc_conf = "xmalloc:true";]]></progra
           (<type>uint64_t *</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Get a pointer to the the value that is returned by the
         <link
         linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.tcache.enabled">
         <term>
           <mallctl>thread.tcache.enabled</mallctl>
           (<type>bool</type>)
           <literal>rw</literal>
           [<option>--enable-tcache</option>]
@@ -1545,16 +1555,33 @@ malloc_conf = "xmalloc:true";]]></progra
         arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals <link
         linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.
         The proportion of unused dirty pages to be purged depends on the current
         time; see <link
         linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
         details.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.reset">
+        <term>
+          <mallctl>arena.&lt;i&gt;.reset</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Discard all of the arena's extant allocations.  This
+        interface can only be used with arenas created via <link
+        linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link>.  None
+        of the arena's discarded/cached allocations may accessed afterward.  As
+        part of this requirement, all thread caches which were used to
+        allocate/deallocate in conjunction with the arena must be flushed
+        beforehand.  This interface cannot be used if running inside Valgrind,
+        nor if the <link linkend="opt.quarantine">quarantine</link> size is
+        non-zero.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
           (<type>const char *</type>)
           <literal>rw</literal>
         </term>
         <listitem><para>Set the precedence of dss allocation as related to mmap
         allocation for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
@@ -2156,16 +2183,35 @@ typedef struct {
         allocator.  This is a multiple of the chunk size, and is larger than
         <link linkend="stats.active"><mallctl>stats.active</mallctl></link>.
         This does not include inactive chunks, even those that contain unused
         dirty pages, which means that there is no strict ordering between this
         and <link
         linkend="stats.resident"><mallctl>stats.resident</mallctl></link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.retained">
+        <term>
+          <mallctl>stats.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Total number of bytes in virtual memory mappings that
+        were retained rather than being returned to the operating system via
+        e.g. <citerefentry><refentrytitle>munmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry>.  Retained virtual memory is
+        typically untouched, decommitted, or purged, so it has no strongly
+        associated physical memory (see <link
+        linkend="arena.i.chunk_hooks">chunk hooks</link> for details).  Retained
+        memory is excluded from mapped memory statistics, e.g. <link
+        linkend="stats.mapped"><mallctl>stats.mapped</mallctl></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.dss">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.dss</mallctl>
           (<type>const char *</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>dss (<citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry>) allocation precedence as
@@ -2236,16 +2282,28 @@ typedef struct {
           <mallctl>stats.arenas.&lt;i&gt;.mapped</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Number of mapped bytes.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.retained">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of retained bytes.  See <link
+        linkend="stats.retained"><mallctl>stats.retained</mallctl></link> for
+        details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.metadata.mapped">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Number of mapped bytes in arena chunk headers, which
@@ -2673,17 +2731,17 @@ MAPPED_LIBRARIES:
     <title>DEBUGGING MALLOC PROBLEMS</title>
     <para>When debugging, it is a good idea to configure/build jemalloc with
     the <option>--enable-debug</option> and <option>--enable-fill</option>
     options, and recompile the program with suitable options and symbols for
     debugger support.  When so configured, jemalloc incorporates a wide variety
     of run-time assertions that catch application errors such as double-free,
     write-after-free, etc.</para>
 
-    <para>Programs often accidentally depend on &ldquo;uninitialized&rdquo;
+    <para>Programs often accidentally depend on <quote>uninitialized</quote>
     memory actually being filled with zero bytes.  Junk filling
     (see the <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link>
     option) tends to expose such bugs in the form of obviously incorrect
     results and/or coredumps.  Conversely, zero
     filling (see the <link
     linkend="opt.zero"><mallctl>opt.zero</mallctl></link> option) eliminates
     the symptoms of such bugs.  Between these two options, it is usually
     possible to quickly detect, diagnose, and eliminate such bugs.</para>
@@ -2702,39 +2760,39 @@ MAPPED_LIBRARIES:
     dumping core.  If the <link
     linkend="opt.abort"><mallctl>opt.abort</mallctl></link> option is set, most
     warnings are treated as errors.</para>
 
     <para>The <varname>malloc_message</varname> variable allows the programmer
     to override the function which emits the text strings forming the errors
     and warnings if for some reason the <constant>STDERR_FILENO</constant> file
     descriptor is not suitable for this.
-    <function>malloc_message<parameter/></function> takes the
+    <function>malloc_message()</function> takes the
     <parameter>cbopaque</parameter> pointer argument that is
     <constant>NULL</constant> unless overridden by the arguments in a call to
-    <function>malloc_stats_print<parameter/></function>, followed by a string
+    <function>malloc_stats_print()</function>, followed by a string
     pointer.  Please note that doing anything which tries to allocate memory in
     this function is likely to result in a crash or deadlock.</para>
 
     <para>All messages are prefixed by
-    &ldquo;<computeroutput>&lt;jemalloc&gt;: </computeroutput>&rdquo;.</para>
+    <quote><computeroutput>&lt;jemalloc&gt;: </computeroutput></quote>.</para>
   </refsect1>
   <refsect1 id="return_values">
     <title>RETURN VALUES</title>
     <refsect2>
       <title>Standard API</title>
-      <para>The <function>malloc<parameter/></function> and
-      <function>calloc<parameter/></function> functions return a pointer to the
+      <para>The <function>malloc()</function> and
+      <function>calloc()</function> functions return a pointer to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname>.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       returns the value 0 if successful; otherwise it returns an error value.
-      The <function>posix_memalign<parameter/></function> function will fail
+      The <function>posix_memalign()</function> function will fail
       if:
         <variablelist>
           <varlistentry>
             <term><errorname>EINVAL</errorname></term>
 
             <listitem><para>The <parameter>alignment</parameter> parameter is
             not a power of 2 at least as large as
             <code language="C">sizeof(<type>void *</type>)</code>.
@@ -2743,75 +2801,75 @@ MAPPED_LIBRARIES:
           <varlistentry>
             <term><errorname>ENOMEM</errorname></term>
 
             <listitem><para>Memory allocation error.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function returns
+      <para>The <function>aligned_alloc()</function> function returns
       a pointer to the allocated memory if successful; otherwise a
       <constant>NULL</constant> pointer is returned and
       <varname>errno</varname> is set.  The
-      <function>aligned_alloc<parameter/></function> function will fail if:
+      <function>aligned_alloc()</function> function will fail if:
         <variablelist>
           <varlistentry>
             <term><errorname>EINVAL</errorname></term>
 
             <listitem><para>The <parameter>alignment</parameter> parameter is
             not a power of 2.
             </para></listitem>
           </varlistentry>
           <varlistentry>
             <term><errorname>ENOMEM</errorname></term>
 
             <listitem><para>Memory allocation error.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>realloc<parameter/></function> function returns a
+      <para>The <function>realloc()</function> function returns a
       pointer, possibly identical to <parameter>ptr</parameter>, to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned, and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname> if the error was the result of an
-      allocation failure.  The <function>realloc<parameter/></function>
+      allocation failure.  The <function>realloc()</function>
       function always leaves the original buffer intact when an error occurs.
       </para>
 
-      <para>The <function>free<parameter/></function> function returns no
+      <para>The <function>free()</function> function returns no
       value.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function> and
-      <function>rallocx<parameter/></function> functions return a pointer to
+      <para>The <function>mallocx()</function> and
+      <function>rallocx()</function> functions return a pointer to
       the allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned to indicate insufficient contiguous memory was
       available to service the allocation request.  </para>
 
-      <para>The <function>xallocx<parameter/></function> function returns the
+      <para>The <function>xallocx()</function> function returns the
       real size of the resulting resized allocation pointed to by
       <parameter>ptr</parameter>, which is a value less than
       <parameter>size</parameter> if the allocation could not be adequately
       grown in place.  </para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation pointed to by <parameter>ptr</parameter>.
       </para>
 
-      <para>The <function>nallocx<parameter/></function> returns the real size
+      <para>The <function>nallocx()</function> returns the real size
       that would result from a successful equivalent
-      <function>mallocx<parameter/></function> function call, or zero if
+      <function>mallocx()</function> function call, or zero if
       insufficient memory is available to perform the size computation.  </para>
 
-      <para>The <function>mallctl<parameter/></function>,
-      <function>mallctlnametomib<parameter/></function>, and
-      <function>mallctlbymib<parameter/></function> functions return 0 on
+      <para>The <function>mallctl()</function>,
+      <function>mallctlnametomib()</function>, and
+      <function>mallctlbymib()</function> functions return 0 on
       success; otherwise they return an error value.  The functions will fail
       if:
         <variablelist>
           <varlistentry>
             <term><errorname>EINVAL</errorname></term>
 
             <listitem><para><parameter>newp</parameter> is not
             <constant>NULL</constant>, and <parameter>newlen</parameter> is too
@@ -2837,23 +2895,23 @@ MAPPED_LIBRARIES:
 
             <listitem><para>A memory allocation failure
             occurred.</para></listitem>
           </varlistentry>
           <varlistentry>
             <term><errorname>EFAULT</errorname></term>
 
             <listitem><para>An interface with side effects failed in some way
-            not directly related to <function>mallctl*<parameter/></function>
+            not directly related to <function>mallctl*()</function>
             read/write processing.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  </para>
     </refsect2>
   </refsect1>
   <refsect1 id="environment">
     <title>ENVIRONMENT</title>
     <para>The following environment variable affects the execution of the
     allocation functions:
@@ -2891,18 +2949,18 @@ malloc_conf = "lg_chunk:24";]]></program
     <manvolnum>3</manvolnum></citerefentry>,
     <citerefentry><refentrytitle>atexit</refentrytitle>
     <manvolnum>3</manvolnum></citerefentry>,
     <citerefentry><refentrytitle>getpagesize</refentrytitle>
     <manvolnum>3</manvolnum></citerefentry></para>
   </refsect1>
   <refsect1 id="standards">
     <title>STANDARDS</title>
-    <para>The <function>malloc<parameter/></function>,
-    <function>calloc<parameter/></function>,
-    <function>realloc<parameter/></function>, and
-    <function>free<parameter/></function> functions conform to ISO/IEC
-    9899:1990 (&ldquo;ISO C90&rdquo;).</para>
+    <para>The <function>malloc()</function>,
+    <function>calloc()</function>,
+    <function>realloc()</function>, and
+    <function>free()</function> functions conform to ISO/IEC
+    9899:1990 (<quote>ISO C90</quote>).</para>
 
-    <para>The <function>posix_memalign<parameter/></function> function conforms
-    to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
+    <para>The <function>posix_memalign()</function> function conforms
+    to IEEE Std 1003.1-2001 (<quote>POSIX.1</quote>).</para>
   </refsect1>
 </refentry>
--- a/memory/jemalloc/src/doc/stylesheet.xsl
+++ b/memory/jemalloc/src/doc/stylesheet.xsl
@@ -1,7 +1,10 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:param name="funcsynopsis.style">ansi</xsl:param>
-  <xsl:param name="function.parens" select="1"/>
+  <xsl:param name="function.parens" select="0"/>
+  <xsl:template match="function">
+    <xsl:call-template name="inline.monoseq"/>
+  </xsl:template>
   <xsl:template match="mallctl">
-    "<xsl:call-template name="inline.monoseq"/>"
+    <quote><xsl:call-template name="inline.monoseq"/></quote>
   </xsl:template>
 </xsl:stylesheet>
--- a/memory/jemalloc/src/include/jemalloc/internal/arena.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/arena.h
@@ -31,21 +31,23 @@ typedef enum {
 } purge_mode_t;
 #define	PURGE_DEFAULT		purge_mode_ratio
 /* Default decay time in seconds. */
 #define	DECAY_TIME_DEFAULT	10
 /* Number of event ticks between time checks. */
 #define	DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
+typedef struct arena_avail_links_s arena_avail_links_t;
 typedef struct arena_run_s arena_run_t;
 typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
 typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
 typedef struct arena_chunk_s arena_chunk_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
@@ -148,39 +150,39 @@ struct arena_runs_dirty_link_s {
 
 /*
  * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
  * like arena_chunk_map_bits_t.  Two separate arrays are stored within each
  * chunk header in order to improve cache locality.
  */
 struct arena_chunk_map_misc_s {
 	/*
-	 * Linkage for run trees.  There are two disjoint uses:
+	 * Linkage for run heaps.  There are two disjoint uses:
 	 *
-	 * 1) arena_t's runs_avail tree.
+	 * 1) arena_t's runs_avail heaps.
 	 * 2) arena_run_t conceptually uses this linkage for in-use non-full
 	 *    runs, rather than directly embedding linkage.
 	 */
-	rb_node(arena_chunk_map_misc_t)		rb_link;
+	phn(arena_chunk_map_misc_t)		ph_link;
 
 	union {
 		/* Linkage for list of dirty runs. */
 		arena_runs_dirty_link_t		rd;
 
 		/* Profile counters, used for large object runs. */
 		union {
 			void			*prof_tctx_pun;
 			prof_tctx_t		*prof_tctx;
 		};
 
 		/* Small region run metadata. */
 		arena_run_t			run;
 	};
 };
-typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
+typedef ph(arena_chunk_map_misc_t) arena_run_heap_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
 #ifdef JEMALLOC_ARENA_STRUCTS_B
 /* Arena chunk header. */
 struct arena_chunk_s {
 	/*
 	 * A pointer to the arena that owns the chunk is stored within the node.
 	 * This field as a whole is used by chunks_rtree to support both
@@ -251,53 +253,104 @@ struct arena_bin_info_s {
 	 * bin.
 	 */
 	bitmap_info_t		bitmap_info;
 
 	/* Offset of first region in a run for this bin's size class. */
 	uint32_t		reg0_offset;
 };
 
+struct arena_decay_s {
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			time;
+	/* time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		epoch;
+	/* Deadline randomness generator. */
+	uint64_t		jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
+	 */
+	nstime_t		deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between arena->decay.ndirty and
+	 * arena->ndirty to determine how many dirty pages, if any, were
+	 * generated.
+	 */
+	size_t			ndirty;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to epoch.
+	 */
+	size_t			backlog[SMOOTHSTEP_NSTEPS];
+};
+
 struct arena_bin_s {
 	/*
 	 * All operations on runcur, runs, and stats require that lock be
 	 * locked.  Run allocation/deallocation are protected by the arena lock,
 	 * which may be acquired while holding one or more bin locks, but not
 	 * vise versa.
 	 */
 	malloc_mutex_t		lock;
 
 	/*
 	 * Current run being used to service allocations of this bin's size
 	 * class.
 	 */
 	arena_run_t		*runcur;
 
 	/*
-	 * Tree of non-full runs.  This tree is used when looking for an
+	 * Heap of non-full runs.  This heap is used when looking for an
 	 * existing run when runcur is no longer usable.  We choose the
 	 * non-full run that is lowest in memory; this policy tends to keep
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	arena_run_tree_t	runs;
+	arena_run_heap_t	runs;
 
 	/* Bin statistics. */
 	malloc_bin_stats_t	stats;
 };
 
 struct arena_s {
 	/* This arena's index within the arenas array. */
 	unsigned		ind;
 
 	/*
-	 * Number of threads currently assigned to this arena.  This field is
-	 * synchronized via atomic operations.
+	 * Number of threads currently assigned to this arena, synchronized via
+	 * atomic operations.  Each thread has two distinct assignments, one for
+	 * application-serving allocation, and the other for internal metadata
+	 * allocation.  Internal metadata must not be allocated from arenas
+	 * created via the arenas.extend mallctl, because the arena.<i>.reset
+	 * mallctl indiscriminately discards all allocations for the affected
+	 * arena.
+	 *
+	 *   0: Application allocation.
+	 *   1: Internal metadata allocation.
 	 */
-	unsigned		nthreads;
+	unsigned		nthreads[2];
 
 	/*
 	 * There are three classes of arena operations from a locking
 	 * perspective:
 	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
 	 * 2) Bin-related operations are protected by bin locks.
 	 * 3) Chunk- and run-related operations are protected by this mutex.
 	 */
@@ -312,20 +365,24 @@ struct arena_s {
 	ql_head(tcache_t)	tcache_ql;
 
 	uint64_t		prof_accumbytes;
 
 	/*
 	 * PRNG state for cache index randomization of large allocation base
 	 * pointers.
 	 */
-	uint64_t		offset_state;
+	size_t			offset_state;
 
 	dss_prec_t		dss_prec;
 
+
+	/* Extant arena chunks. */
+	ql_head(extent_node_t)	achunks;
+
 	/*
 	 * In order to avoid rapid chunk allocation/deallocation when an arena
 	 * oscillates right on the cusp of needing a new chunk, cache the most
 	 * recently freed chunk.  The spare is left in the arena's chunk trees
 	 * until it is deleted.
 	 *
 	 * There is one spare chunk per arena, rather than one spare total, in
 	 * order to avoid interactions between multiple threads that could make
@@ -376,62 +433,18 @@ struct arena_s {
 	 *        |            |    \-------/    \-------/   |         |
 	 *        |            |                             |         |
 	 *        |            |                             |         |
 	 *        \------------/                             \---------/
 	 */
 	arena_runs_dirty_link_t	runs_dirty;
 	extent_node_t		chunks_cache;
 
-	/*
-	 * Approximate time in seconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	ssize_t			decay_time;
-	/* decay_time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		decay_interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		decay_epoch;
-	/* decay_deadline randomness generator. */
-	uint64_t		decay_jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of decay_interval and
-	 * per epoch jitter which is a uniform random variable in
-	 * [0..decay_interval).  Epochs always advance by precise multiples of
-	 * decay_interval, but we randomize the deadline to reduce the
-	 * likelihood of arenas purging in lockstep.
-	 */
-	nstime_t		decay_deadline;
-	/*
-	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between decay_ndirty and ndirty to
-	 * determine how many dirty pages, if any, were generated, and record
-	 * the result in decay_backlog.
-	 */
-	size_t			decay_ndirty;
-	/*
-	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
-	 * to the current contents of decay_backlog, i.e. the limit on how many
-	 * pages are allowed to exist for the decay epochs.
-	 */
-	size_t			decay_backlog_npages_limit;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to decay_epoch.
-	 */
-	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
+	/* Decay-based purging state. */
+	arena_decay_t		decay;
 
 	/* Extant huge allocations. */
 	ql_head(extent_node_t)	huge;
 	/* Synchronizes all huge allocation/update/deallocation. */
 	malloc_mutex_t		huge_mtx;
 
 	/*
 	 * Trees of chunks that were previously allocated (trees differ only in
@@ -452,20 +465,22 @@ struct arena_s {
 
 	/* User-configurable chunk hook functions. */
 	chunk_hooks_t		chunk_hooks;
 
 	/* bins is used to store trees of free regions. */
 	arena_bin_t		bins[NBINS];
 
 	/*
-	 * Quantized address-ordered trees of this arena's available runs.  The
-	 * trees are used for first-best-fit run allocation.
+	 * Size-segregated address-ordered heaps of this arena's available runs,
+	 * used for first-best-fit run allocation.  Runs are quantized, i.e.
+	 * they reside in the last heap which corresponds to a size class less
+	 * than or equal to the run size.
 	 */
-	arena_run_tree_t	runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[NPSIZES];
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
 struct arena_tdata_s {
 	ticker_t		decay_ticker;
 };
 #endif /* JEMALLOC_ARENA_STRUCTS_B */
 
@@ -487,142 +502,155 @@ extern ssize_t		opt_lg_dirty_mult;
 extern ssize_t		opt_decay_time;
 
 extern arena_bin_info_t	arena_bin_info[NBINS];
 
 extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
-extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
 #ifdef JEMALLOC_JET
 typedef size_t (run_quantize_t)(size_t);
 extern run_quantize_t *run_quantize_floor;
 extern run_quantize_t *run_quantize_ceil;
 #endif
 void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
     bool cache);
-extent_node_t	*arena_node_alloc(arena_t *arena);
-void	arena_node_dalloc(arena_t *arena, extent_node_t *node);
-void	*arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
-    bool *zero);
-void	arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize);
-void	arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-void	arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-bool	arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize, bool *zero);
-ssize_t	arena_lg_dirty_mult_get(arena_t *arena);
-bool	arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
-ssize_t	arena_decay_time_get(arena_t *arena);
-bool	arena_decay_time_set(arena_t *arena, ssize_t decay_time);
-void	arena_maybe_purge(arena_t *arena);
-void	arena_purge(arena_t *arena, bool all);
-void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
-    szind_t binind, uint64_t prof_accumbytes);
+extent_node_t	*arena_node_alloc(tsdn_t *tsdn, arena_t *arena);
+void	arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node);
+void	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool *zero);
+void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk,
+    size_t usize);
+void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+bool	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize, bool *zero);
+ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
+    ssize_t lg_dirty_mult);
+ssize_t	arena_decay_time_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
+void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
+void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
+void	arena_reset(tsd_t *tsd, arena_t *arena);
+void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
+    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
 typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t,
     uint8_t);
 extern arena_redzone_corruption_t *arena_redzone_corruption;
 typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *);
 extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 #else
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
-void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache);
-void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
+void	*arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind,
+    bool zero);
+void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
+    szind_t ind, bool zero);
+void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promoted(const void *ptr, size_t size);
-void	arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size);
+void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm);
+void	arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm);
+void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
 extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
 #else
 void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
-void	arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr);
-void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr);
+void	arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
 #ifdef JEMALLOC_JET
 typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
 extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
-bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero);
+bool	arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t size, size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(arena_t *arena);
-bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_basic_stats_merge(arena_t *arena, unsigned *nthreads,
+void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+    unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult,
+    ssize_t *decay_time, size_t *nactive, size_t *ndirty);
+void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
-    size_t *nactive, size_t *ndirty);
-void	arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
-unsigned	arena_nthreads_get(arena_t *arena);
-void	arena_nthreads_inc(arena_t *arena);
-void	arena_nthreads_dec(arena_t *arena);
-arena_t	*arena_new(unsigned ind);
-bool	arena_boot(void);
-void	arena_prefork0(arena_t *arena);
-void	arena_prefork1(arena_t *arena);
-void	arena_prefork2(arena_t *arena);
-void	arena_prefork3(arena_t *arena);
-void	arena_postfork_parent(arena_t *arena);
-void	arena_postfork_child(arena_t *arena);
+    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+    malloc_huge_stats_t *hstats);
+unsigned	arena_nthreads_get(arena_t *arena, bool internal);
+void	arena_nthreads_inc(arena_t *arena, bool internal);
+void	arena_nthreads_dec(arena_t *arena, bool internal);
+arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
+void	arena_boot(void);
+void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-arena_chunk_map_bits_t	*arena_bitselm_get(arena_chunk_t *chunk,
+arena_chunk_map_bits_t	*arena_bitselm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
-arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
+const arena_chunk_map_bits_t	*arena_bitselm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
+arena_chunk_map_misc_t	*arena_miscelm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
+const arena_chunk_map_misc_t	*arena_miscelm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
 size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
-void	*arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
+void	*arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
-size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbitsp_read(size_t *mapbitsp);
-size_t	arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
+size_t	*arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind);
+const size_t	*arena_mapbitsp_get_const(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbitsp_read(const size_t *mapbitsp);
+size_t	arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_size_decode(size_t mapbits);
-size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
+size_t	arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_size_get(const arena_chunk_t *chunk,
     size_t pageind);
-size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-szind_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_small_runind_get(const arena_chunk_t *chunk,
+    size_t pageind);
+szind_t	arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_decommitted_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind);
 void	arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
 size_t	arena_mapbits_size_encode(size_t size);
 void	arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
     size_t size, size_t flags);
 void	arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
     size_t size);
 void	arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind,
     size_t flags);
@@ -632,73 +660,89 @@ void	arena_mapbits_large_binind_set(aren
     szind_t binind);
 void	arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
     size_t runind, szind_t binind, size_t flags);
 void	arena_metadata_allocated_add(arena_t *arena, size_t size);
 void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
-prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
-void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(const void *ptr, size_t usize,
+prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
-void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
-void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
-void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
+void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
+void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
-size_t	arena_salloc(const void *ptr, bool demote);
-void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+size_t	arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
+void	arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
-arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
+arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
 	assert(pageind < chunk_npages);
 
 	return (&chunk->map_bits[pageind-map_bias]);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t *
+arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
+arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
 	assert(pageind < chunk_npages);
 
 	return ((arena_chunk_map_misc_t *)((uintptr_t)chunk +
 	    (uintptr_t)map_misc_offset) + pageind-map_bias);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t *
+arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk +
 	    map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias;
 
 	assert(pageind >= map_bias);
 	assert(pageind < chunk_npages);
 
 	return (pageind);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm)
+arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
 
 	return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE)));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
@@ -721,34 +765,41 @@ arena_run_to_miscelm(arena_run_t *run)
 
 	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
 	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
 
 	return (miscelm);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t *
-arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (&arena_bitselm_get(chunk, pageind)->bits);
+	return (&arena_bitselm_get_mutable(chunk, pageind)->bits);
+}
+
+JEMALLOC_ALWAYS_INLINE const size_t *
+arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbitsp_read(size_t *mapbitsp)
+arena_mapbitsp_read(const size_t *mapbitsp)
 {
 
 	return (*mapbitsp);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind)));
+	return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind)));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 arena_mapbits_size_decode(size_t mapbits)
 {
 	size_t size;
 
 #if CHUNK_MAP_SIZE_SHIFT > 0
@@ -758,103 +809,103 @@ arena_mapbits_size_decode(size_t mapbits
 #else
 	size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT;
 #endif
 
 	return (size);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
 	return (arena_mapbits_size_decode(mapbits));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
 	    (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
 	return (arena_mapbits_size_decode(mapbits));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
 	    CHUNK_MAP_ALLOCATED);
 	return (mapbits >> CHUNK_MAP_RUNIND_SHIFT);
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 	szind_t binind;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
 	assert(binind < NBINS || binind == BININD_INVALID);
 	return (binind);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	return (mapbits & CHUNK_MAP_DIRTY);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	return (mapbits & CHUNK_MAP_UNZEROED);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	return (mapbits & CHUNK_MAP_DECOMMITTED);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	return (mapbits & CHUNK_MAP_LARGE);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	return (mapbits & CHUNK_MAP_ALLOCATED);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -880,82 +931,82 @@ arena_mapbits_size_encode(size_t size)
 	assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0);
 	return (mapbits);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
 	assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
 	    CHUNK_MAP_BININD_INVALID | flags);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
     size_t size)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
 	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
 	    (mapbits & ~CHUNK_MAP_SIZE_MASK));
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((flags & CHUNK_MAP_UNZEROED) == flags);
 	arena_mapbitsp_write(mapbitsp, flags);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
 	assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
 	    CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE |
 	    CHUNK_MAP_ALLOCATED);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
     szind_t binind)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert(binind <= BININD_INVALID);
 	assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS +
 	    large_pad);
 	arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) |
 	    (binind << CHUNK_MAP_BININD_SHIFT));
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
     szind_t binind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert(binind < BININD_INVALID);
 	assert(pageind - runind >= map_bias);
 	assert((flags & CHUNK_MAP_UNZEROED) == flags);
 	arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) |
 	    (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED);
 }
 
@@ -1002,30 +1053,30 @@ arena_prof_accum_locked(arena_t *arena, 
 	cassert(config_prof);
 
 	if (likely(prof_interval == 0))
 		return (false);
 	return (arena_prof_accum_impl(arena, accumbytes));
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum(arena_t *arena, uint64_t accumbytes)
+arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
 {
 
 	cassert(config_prof);
 
 	if (likely(prof_interval == 0))
 		return (false);
 
 	{
 		bool ret;
 
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		ret = arena_prof_accum_impl(arena, accumbytes);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (ret);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
 arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 {
 	szind_t binind;
@@ -1033,35 +1084,35 @@ arena_ptr_small_binind_get(const void *p
 	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
 
 	if (config_debug) {
 		arena_chunk_t *chunk;
 		arena_t *arena;
 		size_t pageind;
 		size_t actual_mapbits;
 		size_t rpages_ind;
-		arena_run_t *run;
+		const arena_run_t *run;
 		arena_bin_t *bin;
 		szind_t run_binind, actual_binind;
 		arena_bin_info_t *bin_info;
-		arena_chunk_map_misc_t *miscelm;
-		void *rpages;
+		const arena_chunk_map_misc_t *miscelm;
+		const void *rpages;
 
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 		arena = extent_node_arena_get(&chunk->node);
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		actual_mapbits = arena_mapbits_get(chunk, pageind);
 		assert(mapbits == actual_mapbits);
 		assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		rpages_ind = pageind - arena_mapbits_small_runind_get(chunk,
 		    pageind);
-		miscelm = arena_miscelm_get(chunk, rpages_ind);
+		miscelm = arena_miscelm_get_const(chunk, rpages_ind);
 		run = &miscelm->run;
 		run_binind = run->binind;
 		bin = &arena->bins[run_binind];
 		actual_binind = (szind_t)(bin - arena->bins);
 		assert(run_binind == actual_binind);
 		bin_info = &arena_bin_info[actual_binind];
 		rpages = arena_miscelm_to_rpages(miscelm);
 		assert(((uintptr_t)ptr - ((uintptr_t)rpages +
@@ -1151,44 +1202,45 @@ arena_run_regind(arena_run_t *run, arena
 	}
 	assert(diff == regind * interval);
 	assert(regind < bin_info->nregs);
 
 	return (regind);
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(const void *ptr)
+arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *ret;
 	arena_chunk_t *chunk;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		size_t mapbits = arena_mapbits_get(chunk, pageind);
 		assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0))
 			ret = (prof_tctx_t *)(uintptr_t)1U;
 		else {
-			arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk,
-			    pageind);
+			arena_chunk_map_misc_t *elm =
+			    arena_miscelm_get_mutable(chunk, pageind);
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
-		ret = huge_prof_tctx_get(ptr);
+		ret = huge_prof_tctx_get(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
@@ -1197,34 +1249,34 @@ arena_prof_tctx_set(const void *ptr, siz
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 
 		if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx >
 		    (uintptr_t)1U)) {
 			arena_chunk_map_misc_t *elm;
 
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun, tctx);
 		} else {
 			/*
 			 * tctx must always be initialized for large runs.
 			 * Assert that the surrounding conditional logic is
 			 * equivalent to checking whether ptr refers to a large
 			 * run.
 			 */
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
-		huge_prof_tctx_set(ptr, tctx);
+		huge_prof_tctx_set(tsdn, ptr, tctx);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
-    prof_tctx_t *old_tctx)
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr &&
 	    (uintptr_t)old_tctx > (uintptr_t)1U))) {
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -1233,83 +1285,86 @@ arena_prof_tctx_reset(const void *ptr, s
 			arena_chunk_map_misc_t *elm;
 
 			pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
 			    LG_PAGE;
 			assert(arena_mapbits_allocated_get(chunk, pageind) !=
 			    0);
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
-			huge_prof_tctx_reset(ptr);
+			huge_prof_tctx_reset(tsdn, ptr);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks)
+arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
 {
+	tsd_t *tsd;
 	ticker_t *decay_ticker;
 
-	if (unlikely(tsd == NULL))
+	if (unlikely(tsdn_null(tsdn)))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	decay_ticker = decay_ticker_get(tsd, arena->ind);
 	if (unlikely(decay_ticker == NULL))
 		return;
 	if (unlikely(ticker_ticks(decay_ticker, nticks)))
-		arena_purge(arena, false);
+		arena_purge(tsdn, arena, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_tick(tsd_t *tsd, arena_t *arena)
+arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
 {
 
-	arena_decay_ticks(tsd, arena, 1);
+	arena_decay_ticks(tsdn, arena, 1);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
+arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path)
 {
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(size != 0);
 
 	if (likely(tcache != NULL)) {
 		if (likely(size <= SMALL_MAXCLASS)) {
-			return (tcache_alloc_small(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_small(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		if (likely(size <= tcache_maxclass)) {
-			return (tcache_alloc_large(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		/* (size > tcache_maxclass) case falls through. */
 		assert(size > tcache_maxclass);
 	}
 
-	return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache));
+	return (arena_malloc_hard(tsdn, arena, size, ind, zero));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(const void *ptr)
 {
 	arena_chunk_t *chunk;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr))
 		return (extent_node_arena_get(&chunk->node));
 	else
 		return (huge_aalloc(ptr));
 }
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(const void *ptr, bool demote)
+arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
 	size_t pageind;
 	szind_t binind;
 
 	assert(ptr != NULL);
 
@@ -1342,112 +1397,120 @@ arena_salloc(const void *ptr, bool demot
 			 * object).
 			 */
 			assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
 			    arena_ptr_small_binind_get(ptr,
 			    arena_mapbits_get(chunk, pageind)) == binind);
 			ret = index2size(binind);
 		}
 	} else
-		ret = huge_salloc(ptr);
+		ret = huge_salloc(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		mapbits = arena_mapbits_get(chunk, pageind);
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = arena_ptr_small_binind_get(ptr,
 				    mapbits);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    slow_path);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			size_t size = arena_mapbits_large_size_get(chunk,
 			    pageind);
 
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
 			if (likely(tcache != NULL) && size - large_pad <=
 			    tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size -
-				    large_pad, slow_path);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size - large_pad, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 	arena_chunk_t *chunk;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		if (config_prof && opt_prof) {
 			size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
 			    LG_PAGE;
 			assert(arena_mapbits_allocated_get(chunk, pageind) !=
 			    0);
 			if (arena_mapbits_large_get(chunk, pageind) != 0) {
 				/*
 				 * Make sure to use promoted size, not request
 				 * size.
 				 */
 				size = arena_mapbits_large_size_get(chunk,
 				    pageind) - large_pad;
 			}
 		}
-		assert(s2u(size) == s2u(arena_salloc(ptr, false)));
+		assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false)));
 
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    true);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
 			if (likely(tcache != NULL) && size <= tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size,
-				    true);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsdn, ptr);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/base.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/base.h
@@ -4,21 +4,22 @@
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*base_alloc(size_t size);
-void	base_stats_get(size_t *allocated, size_t *resident, size_t *mapped);
+void	*base_alloc(tsdn_t *tsdn, size_t size);
+void	base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
+    size_t *mapped);
 bool	base_boot(void);
-void	base_prefork(void);
-void	base_postfork_parent(void);
-void	base_postfork_child(void);
+void	base_prefork(tsdn_t *tsdn);
+void	base_postfork_parent(tsdn_t *tsdn);
+void	base_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/bitmap.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/bitmap.h
@@ -12,18 +12,18 @@ typedef unsigned long bitmap_t;
 
 /* Number of bits per group. */
 #define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
 #define	BITMAP_GROUP_NBITS		(ZU(1) << LG_BITMAP_GROUP_NBITS)
 #define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
 
 /*
  * Do some analysis on how big the bitmap is before we use a tree.  For a brute
- * force linear search, if we would have to call ffsl more than 2^3 times, use a
- * tree instead.
+ * force linear search, if we would have to call ffs_lu() more than 2^3 times,
+ * use a tree instead.
  */
 #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
 #  define USE_TREE
 #endif
 
 /* Number of groups required to store a given number of bits. */
 #define	BITMAP_BITS2GROUPS(nbits)					\
     ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
@@ -218,17 +218,17 @@ bitmap_sfu(bitmap_t *bitmap, const bitma
 	}
 #else
 	i = 0;
 	g = bitmap[0];
 	while ((bit = ffs_lu(g)) == 0) {
 		i++;
 		g = bitmap[i];
 	}
-	bit = (bit - 1) + (i << LG_BITMAP_GROUP_NBITS);
+	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);
 }
 
 JEMALLOC_INLINE void
 bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 {
--- a/memory/jemalloc/src/include/jemalloc/internal/chunk.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/chunk.h
@@ -43,38 +43,39 @@ extern const char	*opt_dss;
 extern rtree_t		chunks_rtree;
 
 extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
 extern size_t		chunk_npages;
 
 extern const chunk_hooks_t	chunk_hooks_default;
 
-chunk_hooks_t	chunk_hooks_get(arena_t *arena);
-chunk_hooks_t	chunk_hooks_set(arena_t *arena,
+chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
+chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(const void *chunk, const extent_node_t *node);
+bool	chunk_register(tsdn_t *tsdn, const void *chunk,
+    const extent_node_t *node);
 void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
-void	*chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool dalloc_node);
-void	*chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
-void	chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed);
-void	chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed);
-bool	chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, size_t offset, size_t length);
+void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit, bool dalloc_node);
+void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit);
+void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
+void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
+    bool committed);
+bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
+    size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(void);
-void	chunk_postfork_parent(void);
-void	chunk_postfork_child(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 extent_node_t	*chunk_lookup(const void *chunk, bool dependent);
 #endif
--- a/memory/jemalloc/src/include/jemalloc/internal/chunk_dss.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/chunk_dss.h
@@ -18,22 +18,20 @@ typedef enum {
 extern const char *dss_prec_names[];
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 dss_prec_t	chunk_dss_prec_get(void);
 bool	chunk_dss_prec_set(dss_prec_t dss_prec);
-void	*chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit);
+void	*chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit);
 bool	chunk_in_dss(void *chunk);
-bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(void);
-void	chunk_dss_postfork_parent(void);
-void	chunk_dss_postfork_child(void);
+bool	chunk_dss_mergeable(void *chunk_a, void *chunk_b);
+void	chunk_dss_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/ctl.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/ctl.h
@@ -16,23 +16,24 @@ struct ctl_node_s {
 };
 
 struct ctl_named_node_s {
 	struct ctl_node_s	node;
 	const char		*name;
 	/* If (nchildren == 0), this is a terminal node. */
 	unsigned		nchildren;
 	const			ctl_node_t *children;
-	int			(*ctl)(const size_t *, size_t, void *, size_t *,
-	    void *, size_t);
+	int			(*ctl)(tsd_t *, const size_t *, size_t, void *,
+	    size_t *, void *, size_t);
 };
 
 struct ctl_indexed_node_s {
 	struct ctl_node_s	node;
-	const ctl_named_node_t	*(*index)(const size_t *, size_t, size_t);
+	const ctl_named_node_t	*(*index)(tsdn_t *, const size_t *, size_t,
+	    size_t);
 };
 
 struct ctl_arena_stats_s {
 	bool			initialized;
 	unsigned		nthreads;
 	const char		*dss;
 	ssize_t			lg_dirty_mult;
 	ssize_t			decay_time;
@@ -55,34 +56,36 @@ struct ctl_arena_stats_s {
 };
 
 struct ctl_stats_s {
 	size_t			allocated;
 	size_t			active;
 	size_t			metadata;
 	size_t			resident;
 	size_t			mapped;
+	size_t			retained;
 	unsigned		narenas;
 	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-int	ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen);
-int	ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
+int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen);
+int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
+    size_t *miblenp);
 
-int	ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen);
+int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
 bool	ctl_boot(void);
-void	ctl_prefork(void);
-void	ctl_postfork_parent(void);
-void	ctl_postfork_child(void);
+void	ctl_prefork(tsdn_t *tsdn);
+void	ctl_postfork_parent(tsdn_t *tsdn);
+void	ctl_postfork_child(tsdn_t *tsdn);
 
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
 	    != 0) {							\
 		malloc_printf(						\
 		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
 		    name);						\
 		abort();						\
--- a/memory/jemalloc/src/include/jemalloc/internal/extent.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/extent.h
@@ -43,17 +43,17 @@ struct extent_node_s {
 	/* Linkage for arena's runs_dirty and chunks_cache rings. */
 	arena_runs_dirty_link_t	rd;
 	qr(extent_node_t)	cc_link;
 
 	union {
 		/* Linkage for the size/address-ordered tree. */
 		rb_node(extent_node_t)	szad_link;
 
-		/* Linkage for arena's huge and node_cache lists. */
+		/* Linkage for arena's achunks, huge, and node_cache lists. */
 		ql_elm(extent_node_t)	ql_link;
 	};
 
 	/* Linkage for the address-ordered tree. */
 	rb_node(extent_node_t)	ad_link;
 };
 typedef rb_tree(extent_node_t) extent_tree_t;
 
--- a/memory/jemalloc/src/include/jemalloc/internal/huge.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/huge.h
@@ -4,33 +4,32 @@
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
-    tcache_t *tcache);
-void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache);
-bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
+void	*huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
+void	*huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero);
+bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
 typedef void (huge_dalloc_junk_t)(void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void	huge_dalloc(tsdn_t *tsdn, void *ptr);
 arena_t	*huge_aalloc(const void *ptr);
-size_t	huge_salloc(const void *ptr);
-prof_tctx_t	*huge_prof_tctx_get(const void *ptr);
-void	huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(const void *ptr);
+size_t	huge_salloc(tsdn_t *tsdn, const void *ptr);
+prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in
@@ -156,17 +156,20 @@ static const bool config_cache_oblivious
 
 #ifdef JEMALLOC_ZONE
 #include <mach/mach_error.h>
 #include <mach/mach_init.h>
 #include <mach/vm_map.h>
 #include <malloc/malloc.h>
 #endif
 
+#include "jemalloc/internal/ph.h"
+#ifndef __PGI
 #define	RB_COMPACT
+#endif
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"
 
 /*
  * jemalloc can conceptually be broken into components (arena, tcache, etc.),
  * but there are circular dependencies that cannot be broken without
  * substantial performance degradation.  In order to reduce the effect on
@@ -179,16 +182,19 @@ static const bool config_cache_oblivious
  *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
  *   JEMALLOC_H_INLINES : Inline functions.
  */
 /******************************************************************************/
 #define	JEMALLOC_H_TYPES
 
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
+/* Page size index type. */
+typedef unsigned pszind_t;
+
 /* Size class index type. */
 typedef unsigned szind_t;
 
 /*
  * Flags bits:
  *
  * a: arena
  * t: tcache
@@ -228,17 +234,17 @@ typedef unsigned szind_t;
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __ia64__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __alpha__
 #    define LG_QUANTUM		4
 #  endif
-#  if (defined(__sparc64__) || defined(__sparcv9))
+#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
 #    define LG_QUANTUM		4
 #  endif
 #  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __arm__
 #    define LG_QUANTUM		3
 #  endif
@@ -252,16 +258,19 @@ typedef unsigned szind_t;
 #    define LG_QUANTUM		3
 #  endif
 #  ifdef __or1k__
 #    define LG_QUANTUM		3
 #  endif
 #  ifdef __powerpc__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __riscv__
+#    define LG_QUANTUM		4
+#  endif
 #  ifdef __s390__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __SH4__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __tile__
 #    define LG_QUANTUM		4
@@ -355,23 +364,25 @@ typedef unsigned szind_t;
 #else
 #  define VARIABLE_ARRAY(type, name, count) type name[(count)]
 #endif
 
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
@@ -386,23 +397,25 @@ typedef unsigned szind_t;
 #undef JEMALLOC_H_TYPES
 /******************************************************************************/
 #define	JEMALLOC_H_STRUCTS
 
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #define	JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/extent.h"
 #define	JEMALLOC_ARENA_STRUCTS_B
@@ -435,66 +448,77 @@ extern bool	opt_xmalloc;
 extern bool	opt_zero;
 extern unsigned	opt_narenas;
 
 extern bool	in_valgrind;
 
 /* Number of CPUs. */
 extern unsigned	ncpus;
 
+/* Number of arenas used for automatic multiplexing of threads and arenas. */
+extern unsigned	narenas_auto;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
  */
 extern arena_t	**arenas;
 
 /*
+ * pind2sz_tab encodes the same information as could be computed by
+ * pind2sz_compute().
+ */
+extern size_t const	pind2sz_tab[NPSIZES];
+/*
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
  */
-extern size_t const	index2size_tab[NSIZES+1];
+extern size_t const	index2size_tab[NSIZES];
 /*
  * size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
  * and all accesses are via size2index().
  */
 extern uint8_t const	size2index_tab[];
 
+arena_t	*a0get(void);
 void	*a0malloc(size_t size);
 void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
-arena_t	*arenas_extend(unsigned ind);
 unsigned	narenas_total_get(void);
-arena_t	*arena_init(unsigned ind);
+arena_t	*arena_init(tsdn_t *tsdn, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
-arena_t	*arena_choose_hard(tsd_t *tsd);
+arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void	thread_allocated_cleanup(tsd_t *tsd);
 void	thread_deallocated_cleanup(tsd_t *tsd);
+void	iarena_cleanup(tsd_t *tsd);
 void	arena_cleanup(tsd_t *tsd);
 void	arenas_tdata_cleanup(tsd_t *tsd);
 void	narenas_tdata_cleanup(tsd_t *tsd);
 void	arenas_tdata_bypass_cleanup(tsd_t *tsd);
 void	jemalloc_prefork(void);
 void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
 
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
@@ -509,67 +533,154 @@ void	jemalloc_postfork_child(void);
 #undef JEMALLOC_H_EXTERNS
 /******************************************************************************/
 #define	JEMALLOC_H_INLINES
 
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
+pszind_t	psz2ind(size_t psz);
+size_t	pind2sz_compute(pszind_t pind);
+size_t	pind2sz_lookup(pszind_t pind);
+size_t	pind2sz(pszind_t pind);
+size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
 szind_t	size2index_lookup(size_t size);
 szind_t	size2index(size_t size);
 size_t	index2size_compute(szind_t index);
 size_t	index2size_lookup(szind_t index);
 size_t	index2size(szind_t index);
 size_t	s2u_compute(size_t size);
 size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
+arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
-arena_t	*arena_get(unsigned ind, bool init_if_missing);
+arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE pszind_t
+psz2ind(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (NPSIZES);
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return (ind);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz_compute(pszind_t pind)
+{
+
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return (sz);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz_lookup(pszind_t pind)
+{
+	size_t ret = (size_t)pind2sz_tab[pind];
+	assert(ret == pind2sz_compute(pind));
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	assert(pind < NPSIZES);
+	return (pind2sz_lookup(pind));
+}
+
+JEMALLOC_INLINE size_t
+psz2u(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (0);
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return (usize);
+	}
+}
+
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (NSIZES);
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
 		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
 	}
 #endif
 	{
-		szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		szind_t x = lg_floor((size<<1)-1);
 		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
 		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
 		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
 
 		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 
 		size_t delta_inverse_mask = ZI(-1) << lg_delta;
@@ -645,28 +756,28 @@ index2size(szind_t index)
 	assert(index < NSIZES);
 	return (index2size_lookup(index));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (0);
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
 		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
 		    (ZU(1) << lg_ceil));
 	}
 #endif
 	{
-		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		size_t x = lg_floor((size<<1)-1);
 		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 		size_t delta = ZU(1) << lg_delta;
 		size_t delta_mask = delta - 1;
 		size_t usize = (size + delta_mask) & ~delta_mask;
 		return (usize);
 	}
 }
@@ -775,29 +886,44 @@ sa2u(size_t size, size_t alignment)
 		/* size_t overflow. */
 		return (0);
 	}
 	return (usize);
 }
 
 /* Choose an arena based on a per-thread value. */
 JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena)
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal)
 {
 	arena_t *ret;
 
 	if (arena != NULL)
 		return (arena);
 
-	if (unlikely((ret = tsd_arena_get(tsd)) == NULL))
-		ret = arena_choose_hard(tsd);
+	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
+	if (unlikely(ret == NULL))
+		ret = arena_choose_hard(tsd, internal);
 
 	return (ret);
 }
 
+JEMALLOC_INLINE arena_t *
+arena_choose(tsd_t *tsd, arena_t *arena)
+{
+
+	return (arena_choose_impl(tsd, arena, false));
+}
+
+JEMALLOC_INLINE arena_t *
+arena_ichoose(tsd_t *tsd, arena_t *arena)
+{
+
+	return (arena_choose_impl(tsd, arena, true));
+}
+
 JEMALLOC_INLINE arena_tdata_t *
 arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 {
 	arena_tdata_t *tdata;
 	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
 
 	if (unlikely(arenas_tdata == NULL)) {
 		/* arenas_tdata hasn't been initialized yet. */
@@ -814,27 +940,27 @@ arena_tdata_get(tsd_t *tsd, unsigned ind
 
 	tdata = &arenas_tdata[ind];
 	if (likely(tdata != NULL) || !refresh_if_missing)
 		return (tdata);
 	return (arena_tdata_get_hard(tsd, ind));
 }
 
 JEMALLOC_INLINE arena_t *
-arena_get(unsigned ind, bool init_if_missing)
+arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing)
 {
 	arena_t *ret;
 
 	assert(ind <= MALLOCX_ARENA_MAX);
 
 	ret = arenas[ind];
 	if (unlikely(ret == NULL)) {
 		ret = atomic_read_p((void *)&arenas[ind]);
 		if (init_if_missing && unlikely(ret == NULL))
-			ret = arena_init(ind);
+			ret = arena_init(tsdn, ind);
 	}
 	return (ret);
 }
 
 JEMALLOC_INLINE ticker_t *
 decay_ticker_get(tsd_t *tsd, unsigned ind)
 {
 	arena_tdata_t *tdata;
@@ -858,171 +984,151 @@ decay_ticker_get(tsd_t *tsd, unsigned in
 #define	JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/quarantine.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(const void *ptr);
-size_t	isalloc(const void *ptr, bool demote);
-void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+size_t	isalloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
-void	*imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path);
-void	*icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*icalloc(tsd_t *tsd, size_t size, szind_t ind);
-void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+    bool slow_path);
+void	*ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena);
-void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t	ivsalloc(const void *ptr, bool demote);
+size_t	ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote);
 size_t	u2rz(size_t usize);
-size_t	p2rz(const void *ptr);
-void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+size_t	p2rz(tsdn_t *tsdn, const void *ptr);
+void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
-void	idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
 void	idalloc(tsd_t *tsd, void *ptr);
 void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
-void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void	isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
+void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 void	*iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
 void	*iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
 void	*iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero);
-bool	ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+bool	ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(const void *ptr)
 {
 
 	assert(ptr != NULL);
 
 	return (arena_aalloc(ptr));
 }
 
 /*
  * Typical usage:
+ *   tsdn_t *tsdn = [...]
  *   void *ptr = [...]
- *   size_t sz = isalloc(ptr, config_prof);
+ *   size_t sz = isalloc(tsdn, ptr, config_prof);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(const void *ptr, bool demote)
+isalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 
 	assert(ptr != NULL);
 	/* Demotion only makes sense if config_prof is true. */
 	assert(config_prof || !demote);
 
-	return (arena_salloc(ptr, demote));
+	return (arena_salloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
     bool is_metadata, arena_t *arena, bool slow_path)
 {
 	void *ret;
 
 	assert(size != 0);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
-	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
+	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+		arena_metadata_allocated_add(iaalloc(ret),
+		    isalloc(tsdn, ret, config_prof));
+	}
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
+{
+
+	return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
+	    false, NULL, slow_path));
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, bool is_metadata, arena_t *arena)
+{
+	void *ret;
+
+	assert(usize != 0);
+	assert(usize == sa2u(usize, alignment));
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
+
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
+	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
+	if (config_stats && is_metadata && likely(ret != NULL)) {
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret,
 		    config_prof));
 	}
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
-{
-
-	return (iallocztm(tsd, size, ind, false, tcache, false, arena, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path)
-{
-
-	return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false,
-	    NULL, slow_path));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache, false, arena, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloc(tsd_t *tsd, size_t size, szind_t ind)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false,
-	    NULL, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_metadata, arena_t *arena)
-{
-	void *ret;
-
-	assert(usize != 0);
-	assert(usize == sa2u(usize, alignment));
-
-	ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
-	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
-	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
-		    config_prof));
-	}
-	return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena));
+	return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, true),
-	    false, NULL));
+	return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
+	    tcache_get(tsd, true), false, NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(const void *ptr, bool demote)
+ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	extent_node_t *node;
 
 	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
 	node = chunk_lookup(ptr, false);
 	if (node == NULL)
 		return (0);
 	/* Only arena chunks should be looked up via interior pointers. */
 	assert(extent_node_addr_get(node) == ptr ||
 	    extent_node_achunk_get(node));
 
-	return (isalloc(ptr, demote));
+	return (isalloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_INLINE size_t
 u2rz(size_t usize)
 {
 	size_t ret;
 
 	if (usize <= SMALL_MAXCLASS) {
@@ -1030,107 +1136,104 @@ u2rz(size_t usize)
 		ret = arena_bin_info[binind].redzone_size;
 	} else
 		ret = 0;
 
 	return (ret);
 }
 
 JEMALLOC_INLINE size_t
-p2rz(const void *ptr)
+p2rz(tsdn_t *tsdn, const void *ptr)
 {
-	size_t usize = isalloc(ptr, false);
+	size_t usize = isalloc(tsdn, ptr, false);
 
 	return (u2rz(usize));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path)
 {
 
 	assert(ptr != NULL);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr,
+		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr,
 		    config_prof));
 	}
 
-	arena_dalloc(tsd, ptr, tcache, slow_path);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
-{
-
-	idalloctm(tsd, ptr, tcache, false, true);
+	arena_dalloc(tsdn, ptr, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr)
 {
 
-	idalloctm(tsd, ptr, tcache_get(tsd, false), false, true);
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 {
 
 	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		idalloctm(tsd, ptr, tcache, false, slow_path);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 
-	arena_sdalloc(tsd, ptr, size, tcache);
+	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
 {
 
-	if (config_fill && unlikely(opt_quarantine))
+	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		isdalloct(tsd, ptr, size, tcache);
+		isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
 {
 	void *p;
 	size_t usize, copysize;
 
 	usize = sa2u(size + extra, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+	p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
 		if (extra == 0)
 			return (NULL);
 		/* Try again, without extra this time. */
 		usize = sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			return (NULL);
-		p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+		p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache,
+		    arena);
 		if (p == NULL)
 			return (NULL);
 	}
 	/*
 	 * Copy at most size bytes (not size+extra), since the caller has no
 	 * expectation that the extra bytes will be reliably preserved.
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache);
+	isqalloc(tsd, ptr, oldsize, tcache, true);
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
     bool zero, tcache_t *tcache, arena_t *arena)
 {
 
@@ -1156,30 +1259,30 @@ iralloc(tsd_t *tsd, void *ptr, size_t ol
     bool zero)
 {
 
 	return (iralloct(tsd, ptr, oldsize, size, alignment, zero,
 	    tcache_get(tsd, true), NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra,
+ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero)
 {
 
 	assert(ptr != NULL);
 	assert(size != 0);
 
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
 		/* Existing object alignment is inadequate. */
 		return (true);
 	}
 
-	return (arena_ralloc_no_move(tsd, ptr, oldsize, size, extra, zero));
+	return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero));
 }
 #endif
 
 #include "jemalloc/internal/prof.h"
 
 #undef JEMALLOC_H_INLINES
 /******************************************************************************/
 #endif /* JEMALLOC_INTERNAL_H */
--- a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -12,18 +12,28 @@
 #  if !defined(__pnacl__) && !defined(__native_client__)
 #    include <sys/syscall.h>
 #    if !defined(SYS_write) && defined(__NR_write)
 #      define SYS_write __NR_write
 #    endif
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef JEMALLOC_OS_UNFAIR_LOCK
+#    include <os/lock.h>
+#  endif
+#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#    include <sched.h>
+#  endif
 #  include <errno.h>
 #  include <sys/time.h>
+#  include <time.h>
+#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#    include <mach/mach_time.h>
+#  endif
 #endif
 #include <sys/types.h>
 
 #include <limits.h>
 #ifndef SIZE_T_MAX
 #  define SIZE_T_MAX	SIZE_MAX
 #endif
 #include <stdarg.h>
--- a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -56,32 +56,55 @@
 #undef JEMALLOC_HAVE_BUILTIN_CLZ
 
 /*
  * Defined if madvise(2) is available.
  */
 #undef JEMALLOC_HAVE_MADVISE
 
 /*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+#undef JEMALLOC_OS_UNFAIR_LOCK
+
+/*
  * Defined if OSSpin*() functions are available, as provided by Darwin, and
  * documented in the spinlock(3) manual page.
  */
 #undef JEMALLOC_OSSPIN
 
+/* Defined if syscall(2) is available. */
+#undef JEMALLOC_HAVE_SYSCALL
+
 /*
  * Defined if secure_getenv(3) is available.
  */
 #undef JEMALLOC_HAVE_SECURE_GETENV
 
 /*
  * Defined if issetugid(2) is available.
  */
 #undef JEMALLOC_HAVE_ISSETUGID
 
 /*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+
+/*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
  * bootstrapping will cause recursion into the pthreads library.  Therefore, if
  * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
  * malloc_tsd.
  */
 #undef JEMALLOC_MALLOC_THREAD_CLEANUP
 
@@ -184,16 +207,22 @@
  * of mmap()/munmap() calls will cause virtual memory map holes.
  */
 #undef JEMALLOC_MUNMAP
 
 /* TLS is used to map arenas and magazine caches to threads. */
 #undef JEMALLOC_TLS
 
 /*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#undef JEMALLOC_INTERNAL_UNREACHABLE
+
+/*
  * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
  * use ffs_*() from util.h.
  */
 #undef JEMALLOC_INTERNAL_FFSLL
 #undef JEMALLOC_INTERNAL_FFSL
 #undef JEMALLOC_INTERNAL_FFS
 
 /*
@@ -210,16 +239,25 @@
 
 /*
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
 #undef JEMALLOC_ZONE
 #undef JEMALLOC_ZONE_VERSION
 
 /*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ *                                         /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+
+/*
  * Methods for purging unused pages differ between operating systems.
  *
  *   madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
  *                                 such that new pages will be demand-zeroed if
  *                                 the address region is later touched.
  *   madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being
  *                             unused, such that they will be discarded rather
  *                             than swapped out.
--- a/memory/jemalloc/src/include/jemalloc/internal/mb.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/mb.h
@@ -37,27 +37,27 @@ mb_write(void)
 	asm volatile ("pusha;"
 	    "xor  %%eax,%%eax;"
 	    "cpuid;"
 	    "popa;"
 	    : /* Outputs. */
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#else
+#  else
 	/*
 	 * This is hopefully enough to keep the compiler from reordering
 	 * instructions around this one.
 	 */
 	asm volatile ("nop;"
 	    : /* Outputs. */
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#endif
+#  endif
 }
 #elif (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE void
 mb_write(void)
 {
 
 	asm volatile ("sfence"
 	    : /* Outputs. */
@@ -99,17 +99,17 @@ mb_write(void)
  * This is much slower than a simple memory barrier, but the semantics of mutex
  * unlock make this work.
  */
 JEMALLOC_INLINE void
 mb_write(void)
 {
 	malloc_mutex_t mtx;
 
-	malloc_mutex_init(&mtx);
-	malloc_mutex_lock(&mtx);
-	malloc_mutex_unlock(&mtx);
+	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
+	malloc_mutex_lock(TSDN_NULL, &mtx);
+	malloc_mutex_unlock(TSDN_NULL, &mtx);
 }
 #endif
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/mutex.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/mutex.h
@@ -1,111 +1,147 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
 typedef struct malloc_mutex_s malloc_mutex_t;
 
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_OSSPIN))
-#  define MALLOC_MUTEX_INITIALIZER {0}
+#  define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
+#  define MALLOC_MUTEX_INITIALIZER					\
+    {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #else
 #  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
        defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP,				\
+        WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  endif
 #endif
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 struct malloc_mutex_s {
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 	SRWLOCK         	lock;
 #  else
 	CRITICAL_SECTION	lock;
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 	pthread_mutex_t		lock;
 	malloc_mutex_t		*postponed_next;
 #else
 	pthread_mutex_t		lock;
 #endif
+	witness_t		witness;
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 #ifdef JEMALLOC_LAZY_LOCK
 extern bool isthreaded;
 #else
 #  undef isthreaded /* Undo private_namespace.h definition. */
 #  define isthreaded true
 #endif
 
-bool	malloc_mutex_init(malloc_mutex_t *mutex);
-void	malloc_mutex_prefork(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_parent(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_child(malloc_mutex_t *mutex);
-bool	mutex_boot(void);
+bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+    witness_rank_t rank);
+void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
+bool	malloc_mutex_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	malloc_mutex_lock(malloc_mutex_t *mutex);
-void	malloc_mutex_unlock(malloc_mutex_t *mutex);
+void	malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 JEMALLOC_INLINE void
-malloc_mutex_lock(malloc_mutex_t *mutex)
+malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
 #  else
 		EnterCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_lock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockLock(&mutex->lock);
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
+		witness_lock(tsdn, &mutex->witness);
 	}
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_unlock(malloc_mutex_t *mutex)
+malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
 #  else
 		LeaveCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_unlock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockUnlock(&mutex->lock);
 #else
 		pthread_mutex_unlock(&mutex->lock);
 #endif
 	}
 }
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded)
+		witness_assert_owner(tsdn, &mutex->witness);
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded)
+		witness_assert_not_owner(tsdn, &mutex->witness);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/nstime.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/nstime.h
@@ -1,18 +1,15 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
-    && _POSIX_MONOTONIC_CLOCK >= 0
-
 typedef struct nstime_s nstime_t;
 
 /* Maximum supported number of seconds (~584 years). */
-#define	NSTIME_SEC_MAX	18446744072
+#define	NSTIME_SEC_MAX	KQU(18446744072)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 struct nstime_s {
 	uint64_t	ns;
 };
@@ -29,19 +26,22 @@ uint64_t	nstime_nsec(const nstime_t *tim
 void	nstime_copy(nstime_t *time, const nstime_t *source);
 int	nstime_compare(const nstime_t *a, const nstime_t *b);
 void	nstime_add(nstime_t *time, const nstime_t *addend);
 void	nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
 void	nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void	nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t	nstime_divide(const nstime_t *time, const nstime_t *divisor);
 #ifdef JEMALLOC_JET
+typedef bool (nstime_monotonic_t)(void);
+extern nstime_monotonic_t *nstime_monotonic;
 typedef bool (nstime_update_t)(nstime_t *);
 extern nstime_update_t *nstime_update;
 #else
+bool	nstime_monotonic(void);
 bool	nstime_update(nstime_t *time);
 #endif
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
--- a/memory/jemalloc/src/include/jemalloc/internal/pages.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/pages.h
@@ -4,23 +4,24 @@
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*pages_map(void *addr, size_t size);
+void	*pages_map(void *addr, size_t size, bool *commit);
 void	pages_unmap(void *addr, size_t size);
 void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
-    size_t size);
+    size_t size, bool *commit);
 bool	pages_commit(void *addr, size_t size);
 bool	pages_decommit(void *addr, size_t size);
 bool	pages_purge(void *addr, size_t size);
+void	pages_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
 
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/ph.h
@@ -0,0 +1,345 @@
+/*
+ * A Pairing Heap implementation.
+ *
+ * "The Pairing Heap: A New Form of Self-Adjusting Heap"
+ * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
+ *
+ * With auxiliary twopass list, described in a follow on paper.
+ *
+ * "Pairing Heaps: Experiments and Analysis"
+ * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
+ *
+ *******************************************************************************
+ */
+
+#ifndef PH_H_
+#define	PH_H_
+
+/* Node structure. */
+#define	phn(a_type)							\
+struct {								\
+	a_type	*phn_prev;						\
+	a_type	*phn_next;						\
+	a_type	*phn_lchild;						\
+}
+
+/* Root structure. */
+#define	ph(a_type)							\
+struct {								\
+	a_type	*ph_root;						\
+}
+
+/* Internal utility macros. */
+#define	phn_lchild_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_lchild)
+#define	phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
+	a_phn->a_field.phn_lchild = a_lchild;				\
+} while (0)
+
+#define	phn_next_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_next)
+#define	phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
+	a_phn->a_field.phn_prev = a_prev;				\
+} while (0)
+
+#define	phn_prev_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_prev)
+#define	phn_next_set(a_type, a_field, a_phn, a_next) do {		\
+	a_phn->a_field.phn_next = a_next;				\
+} while (0)
+
+#define	phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
+	a_type *phn0child;						\
+									\
+	assert(a_phn0 != NULL);						\
+	assert(a_phn1 != NULL);						\
+	assert(a_cmp(a_phn0, a_phn1) <= 0);				\
+									\
+	phn_prev_set(a_type, a_field, a_phn1, a_phn0);			\
+	phn0child = phn_lchild_get(a_type, a_field, a_phn0);		\
+	phn_next_set(a_type, a_field, a_phn1, phn0child);		\
+	if (phn0child != NULL)						\
+		phn_prev_set(a_type, a_field, phn0child, a_phn1);	\
+	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
+} while (0)
+
+#define	phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
+	if (a_phn0 == NULL)						\
+		r_phn = a_phn1;						\
+	else if (a_phn1 == NULL)					\
+		r_phn = a_phn0;						\
+	else if (a_cmp(a_phn0, a_phn1) < 0) {				\
+		phn_merge_ordered(a_type, a_field, a_phn0, a_phn1,	\
+		    a_cmp);						\
+		r_phn = a_phn0;						\
+	} else {							\
+		phn_merge_ordered(a_type, a_field, a_phn1, a_phn0,	\
+		    a_cmp);						\
+		r_phn = a_phn1;						\
+	}								\
+} while (0)
+
+#define	ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *head = NULL;						\
+	a_type *tail = NULL;						\
+	a_type *phn0 = a_phn;						\
+	a_type *phn1 = phn_next_get(a_type, a_field, phn0);		\
+									\
+	/*								\
+	 * Multipass merge, wherein the first two elements of a FIFO	\
+	 * are repeatedly merged, and each result is appended to the	\
+	 * singly linked FIFO, until the FIFO contains only a single	\
+	 * element.  We start with a sibling list but no reference to	\
+	 * its tail, so we do a single pass over the sibling list to	\
+	 * populate the FIFO.						\
+	 */								\
+	if (phn1 != NULL) {						\
+		a_type *phnrest = phn_next_get(a_type, a_field, phn1);	\
+		if (phnrest != NULL)					\
+			phn_prev_set(a_type, a_field, phnrest, NULL);	\
+		phn_prev_set(a_type, a_field, phn0, NULL);		\
+		phn_next_set(a_type, a_field, phn0, NULL);		\
+		phn_prev_set(a_type, a_field, phn1, NULL);		\
+		phn_next_set(a_type, a_field, phn1, NULL);		\
+		phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0);	\
+		head = tail = phn0;					\
+		phn0 = phnrest;						\
+		while (phn0 != NULL) {					\
+			phn1 = phn_next_get(a_type, a_field, phn0);	\
+			if (phn1 != NULL) {				\
+				phnrest = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				if (phnrest != NULL) {			\
+					phn_prev_set(a_type, a_field,	\
+					    phnrest, NULL);		\
+				}					\
+				phn_prev_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_prev_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = phnrest;				\
+			} else {					\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = NULL;				\
+			}						\
+		}							\
+		phn0 = head;						\
+		phn1 = phn_next_get(a_type, a_field, phn0);		\
+		if (phn1 != NULL) {					\
+			while (true) {					\
+				head = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn0) == NULL);			\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn1) == NULL);			\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				if (head == NULL)			\
+					break;				\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = head;				\
+				phn1 = phn_next_get(a_type, a_field,	\
+				    phn0);				\
+			}						\
+		}							\
+	}								\
+	r_phn = phn0;							\
+} while (0)
+
+#define	ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
+	a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root);	\
+	if (phn != NULL) {						\
+		phn_prev_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_next_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_prev_set(a_type, a_field, phn, NULL);		\
+		ph_merge_siblings(a_type, a_field, phn, a_cmp, phn);	\
+		assert(phn_next_get(a_type, a_field, phn) == NULL);	\
+		phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp,	\
+		    a_ph->ph_root);					\
+	}								\
+} while (0)
+
+#define	ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
+	if (lchild == NULL)						\
+		r_phn = NULL;						\
+	else {								\
+		ph_merge_siblings(a_type, a_field, lchild, a_cmp,	\
+		    r_phn);						\
+	}								\
+} while (0)
+
+/*
+ * The ph_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to ph_gen().
+ */
+#define	ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
+a_attr void	a_prefix##new(a_ph_type *ph);				\
+a_attr bool	a_prefix##empty(a_ph_type *ph);				\
+a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
+a_attr void	a_prefix##insert(a_ph_type *ph, a_type *phn);		\
+a_attr a_type	*a_prefix##remove_first(a_ph_type *ph);			\
+a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
+
+/*
+ * The ph_gen() macro generates a type-specific pairing heap implementation,
+ * based on the above cpp macros.
+ */
+#define	ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_ph_type *ph)						\
+{									\
+									\
+	memset(ph, 0, sizeof(ph(a_type)));				\
+}									\
+a_attr bool								\
+a_prefix##empty(a_ph_type *ph)						\
+{									\
+									\
+	return (ph->ph_root == NULL);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_ph_type *ph)						\
+{									\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+	return (ph->ph_root);						\
+}									\
+a_attr void								\
+a_prefix##insert(a_ph_type *ph, a_type *phn)				\
+{									\
+									\
+	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
+									\
+	/*								\
+	 * Treat the root as an aux list during insertion, and lazily	\
+	 * merge during a_prefix##remove_first().  For elements that	\
+	 * are inserted, then removed via a_prefix##remove() before the	\
+	 * aux list is ever processed, this makes insert/remove		\
+	 * constant-time, whereas eager merging would make insert	\
+	 * O(log n).							\
+	 */								\
+	if (ph->ph_root == NULL)					\
+		ph->ph_root = phn;					\
+	else {								\
+		phn_next_set(a_type, a_field, phn, phn_next_get(a_type,	\
+		    a_field, ph->ph_root));				\
+		if (phn_next_get(a_type, a_field, ph->ph_root) !=	\
+		    NULL) {						\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, ph->ph_root),	\
+			    phn);					\
+		}							\
+		phn_prev_set(a_type, a_field, phn, ph->ph_root);	\
+		phn_next_set(a_type, a_field, ph->ph_root, phn);	\
+	}								\
+}									\
+a_attr a_type *								\
+a_prefix##remove_first(a_ph_type *ph)					\
+{									\
+	a_type *ret;							\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+									\
+	ret = ph->ph_root;						\
+									\
+	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
+	    ph->ph_root);						\
+									\
+	return (ret);							\
+}									\
+a_attr void								\
+a_prefix##remove(a_ph_type *ph, a_type *phn)				\
+{									\
+	a_type *replace, *parent;					\
+									\
+	/*								\
+	 * We can delete from aux list without merging it, but we need	\
+	 * to merge if we are dealing with the root node.		\
+	 */								\
+	if (ph->ph_root == phn) {					\
+		ph_merge_aux(a_type, a_field, ph, a_cmp);		\
+		if (ph->ph_root == phn) {				\
+			ph_merge_children(a_type, a_field, ph->ph_root,	\
+			    a_cmp, ph->ph_root);			\
+			return;						\
+		}							\
+	}								\
+									\
+	/* Get parent (if phn is leftmost child) before mutating. */	\
+	if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) {	\
+		if (phn_lchild_get(a_type, a_field, parent) != phn)	\
+			parent = NULL;					\
+	}								\
+	/* Find a possible replacement node, and link to parent. */	\
+	ph_merge_children(a_type, a_field, phn, a_cmp, replace);	\
+	/* Set next/prev for sibling linked list. */			\
+	if (replace != NULL) {						\
+		if (parent != NULL) {					\
+			phn_prev_set(a_type, a_field, replace, parent);	\
+			phn_lchild_set(a_type, a_field, parent,		\
+			    replace);					\
+		} else {						\
+			phn_prev_set(a_type, a_field, replace,		\
+			    phn_prev_get(a_type, a_field, phn));	\
+			if (phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL) {					\
+				phn_next_set(a_type, a_field,		\
+				    phn_prev_get(a_type, a_field, phn),	\
+				    replace);				\
+			}						\
+		}							\
+		phn_next_set(a_type, a_field, replace,			\
+		    phn_next_get(a_type, a_field, phn));		\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    replace);					\
+		}							\
+	} else {							\
+		if (parent != NULL) {					\
+			a_type *next = phn_next_get(a_type, a_field,	\
+			    phn);					\
+			phn_lchild_set(a_type, a_field, parent, next);	\
+			if (next != NULL) {				\
+				phn_prev_set(a_type, a_field, next,	\
+				    parent);				\
+			}						\
+		} else {						\
+			assert(phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL);					\
+			phn_next_set(a_type, a_field,			\
+			    phn_prev_get(a_type, a_field, phn),		\
+			    phn_next_get(a_type, a_field, phn));	\
+		}							\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    phn_prev_get(a_type, a_field, phn));	\
+		}							\
+	}								\
+}
+
+#endif /* PH_H_ */
--- a/memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt
+++ b/memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt
@@ -1,46 +1,48 @@
 a0dalloc
+a0get
 a0malloc
 arena_aalloc
 arena_alloc_junk_small
 arena_basic_stats_merge
 arena_bin_index
 arena_bin_info
-arena_bitselm_get
+arena_bitselm_get_const
+arena_bitselm_get_mutable
 arena_boot
 arena_choose
 arena_choose_hard
+arena_choose_impl
 arena_chunk_alloc_huge
 arena_chunk_cache_maybe_insert
 arena_chunk_cache_maybe_remove
 arena_chunk_dalloc_huge
 arena_chunk_ralloc_huge_expand
 arena_chunk_ralloc_huge_shrink
 arena_chunk_ralloc_huge_similar
 arena_cleanup
 arena_dalloc
 arena_dalloc_bin
 arena_dalloc_bin_junked_locked
 arena_dalloc_junk_large
-arena_dalloc_junk_large_impl
 arena_dalloc_junk_small
-arena_dalloc_junk_small_impl
 arena_dalloc_large
 arena_dalloc_large_junked_locked
 arena_dalloc_small
 arena_decay_tick
 arena_decay_ticks
 arena_decay_time_default_get
 arena_decay_time_default_set
 arena_decay_time_get
 arena_decay_time_set
 arena_dss_prec_get
 arena_dss_prec_set
 arena_get
+arena_ichoose
 arena_init
 arena_lg_dirty_mult_default_get
 arena_lg_dirty_mult_default_set
 arena_lg_dirty_mult_get
 arena_lg_dirty_mult_set
 arena_malloc
 arena_malloc_hard
 arena_malloc_large
@@ -57,26 +59,28 @@ arena_mapbits_large_size_get
 arena_mapbits_size_decode
 arena_mapbits_size_encode
 arena_mapbits_small_runind_get
 arena_mapbits_small_set
 arena_mapbits_unallocated_set
 arena_mapbits_unallocated_size_get
 arena_mapbits_unallocated_size_set
 arena_mapbits_unzeroed_get
-arena_mapbitsp_get
+arena_mapbitsp_get_const
+arena_mapbitsp_get_mutable
 arena_mapbitsp_read
 arena_mapbitsp_write
 arena_maxrun
 arena_maybe_purge
 arena_metadata_allocated_add
 arena_metadata_allocated_get
 arena_metadata_allocated_sub
 arena_migrate
-arena_miscelm_get
+arena_miscelm_get_const
+arena_miscelm_get_mutable
 arena_miscelm_to_pageind
 arena_miscelm_to_rpages
 arena_new
 arena_node_alloc
 arena_node_dalloc
 arena_nthreads_dec
 arena_nthreads_get
 arena_nthreads_inc
@@ -97,16 +101,17 @@ arena_prof_tctx_set
 arena_ptr_small_binind_get
 arena_purge
 arena_quarantine_junk_small
 arena_ralloc
 arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
+arena_reset
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
 arena_sdalloc
 arena_stats_merge
 arena_tcache_fill_small
 arena_tdata_get
 arena_tdata_get_hard
@@ -158,30 +163,25 @@ chunk_alloc_dss
 chunk_alloc_mmap
 chunk_alloc_wrapper
 chunk_boot
 chunk_dalloc_cache
 chunk_dalloc_mmap
 chunk_dalloc_wrapper
 chunk_deregister
 chunk_dss_boot
-chunk_dss_postfork_child
-chunk_dss_postfork_parent
+chunk_dss_mergeable
 chunk_dss_prec_get
 chunk_dss_prec_set
-chunk_dss_prefork
 chunk_hooks_default
 chunk_hooks_get
 chunk_hooks_set
 chunk_in_dss
 chunk_lookup
 chunk_npages
-chunk_postfork_child
-chunk_postfork_parent
-chunk_prefork
 chunk_purge_wrapper
 chunk_register
 chunks_rtree
 chunksize
 chunksize_mask
 ckh_count
 ckh_delete
 ckh_insert
@@ -282,24 +282,21 @@ huge_malloc
 huge_palloc
 huge_prof_tctx_get
 huge_prof_tctx_reset
 huge_prof_tctx_set
 huge_ralloc
 huge_ralloc_no_move
 huge_salloc
 iaalloc
+ialloc
 iallocztm
-icalloc
-icalloct
+iarena_cleanup
 idalloc
-idalloct
 idalloctm
-imalloc
-imalloct
 in_valgrind
 index2size
 index2size_compute
 index2size_lookup
 index2size_tab
 ipalloc
 ipalloct
 ipallocztm
@@ -315,16 +312,19 @@ ivsalloc
 ixalloc
 jemalloc_postfork_child
 jemalloc_postfork_parent
 jemalloc_prefork
 large_maxclass
 lg_floor
 lg_prof_sample
 malloc_cprintf
+malloc_mutex_assert_not_owner
+malloc_mutex_assert_owner
+malloc_mutex_boot
 malloc_mutex_init
 malloc_mutex_lock
 malloc_mutex_postfork_child
 malloc_mutex_postfork_parent
 malloc_mutex_prefork
 malloc_mutex_unlock
 malloc_printf
 malloc_snprintf
@@ -336,37 +336,37 @@ malloc_tsd_dalloc
 malloc_tsd_malloc
 malloc_tsd_no_cleanup
 malloc_vcprintf
 malloc_vsnprintf
 malloc_write
 map_bias
 map_misc_offset
 mb_write
-mutex_boot
+narenas_auto
 narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
 nhclasses
 nlclasses
 nstime_add
 nstime_compare
 nstime_copy
 nstime_divide
 nstime_idivide
 nstime_imultiply
 nstime_init
 nstime_init2
+nstime_monotonic
 nstime_ns
 nstime_nsec
 nstime_sec
 nstime_subtract
 nstime_update
-nstime_update_impl
 opt_abort
 opt_decay_time
 opt_dss
 opt_junk
 opt_junk_alloc
 opt_junk_free
 opt_lg_chunk
 opt_lg_dirty_mult
@@ -386,27 +386,39 @@ opt_purge
 opt_quarantine
 opt_redzone
 opt_stats_print
 opt_tcache
 opt_utrace
 opt_xmalloc
 opt_zero
 p2rz
+pages_boot
 pages_commit
 pages_decommit
 pages_map
 pages_purge
 pages_trim
 pages_unmap
+pind2sz
+pind2sz_compute
+pind2sz_lookup
+pind2sz_tab
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
-prng_lg_range
-prng_range
+prng_lg_range_u32
+prng_lg_range_u64
+prng_lg_range_zu
+prng_range_u32
+prng_range_u64
+prng_range_zu
+prng_state_next_u32
+prng_state_next_u64
+prng_state_next_zu
 prof_active
 prof_active_get
 prof_active_get_unlocked
 prof_active_set
 prof_alloc_prep
 prof_alloc_rollback
 prof_backtrace
 prof_boot0
@@ -445,22 +457,23 @@ prof_tdata_get
 prof_tdata_init
 prof_tdata_reinit
 prof_thread_active_get
 prof_thread_active_init_get
 prof_thread_active_init_set
 prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
+psz2ind
+psz2u
 purge_mode_names
 quarantine
 quarantine_alloc_hook
 quarantine_alloc_hook_work
 quarantine_cleanup
-register_zone
 rtree_child_read
 rtree_child_read_hard
 rtree_child_tryread
 rtree_delete
 rtree_get
 rtree_new
 rtree_node_valid
 rtree_set
@@ -468,37 +481,36 @@ rtree_start_level
 rtree_subkey
 rtree_subtree_read
 rtree_subtree_read_hard
 rtree_subtree_tryread
 rtree_val_read
 rtree_val_write
 run_quantize_ceil
 run_quantize_floor
-run_quantize_max
 s2u
 s2u_compute
 s2u_lookup
 sa2u
 set_errno
 size2index
 size2index_compute
 size2index_lookup
 size2index_tab
+spin_adaptive
+spin_init
 stats_cactive
 stats_cactive_add
 stats_cactive_get
 stats_cactive_sub
 stats_print
 tcache_alloc_easy
 tcache_alloc_large
 tcache_alloc_small
 tcache_alloc_small_hard
-tcache_arena_associate
-tcache_arena_dissociate
 tcache_arena_reassociate
 tcache_bin_flush_large
 tcache_bin_flush_small
 tcache_bin_info
 tcache_boot
 tcache_cleanup
 tcache_create
 tcache_dalloc_large
@@ -534,29 +546,35 @@ tsd_arenas_tdata_bypass_set
 tsd_arenas_tdata_bypassp_get
 tsd_arenas_tdata_get
 tsd_arenas_tdata_set
 tsd_arenas_tdatap_get
 tsd_boot
 tsd_boot0
 tsd_boot1
 tsd_booted
+tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
+tsd_fetch_impl
 tsd_get
+tsd_get_allocates
+tsd_iarena_get
+tsd_iarena_set
+tsd_iarenap_get
+tsd_initialized
+tsd_init_check_recursion
+tsd_init_finish
+tsd_init_head
 tsd_narenas_tdata_get
 tsd_narenas_tdata_set
 tsd_narenas_tdatap_get
 tsd_wrapper_get
 tsd_wrapper_set
-tsd_initialized
-tsd_init_check_recursion
-tsd_init_finish
-tsd_init_head
 tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
 tsd_prof_tdatap_get
 tsd_quarantine_get
 tsd_quarantine_set
 tsd_quarantinep_get
 tsd_set
@@ -569,13 +587,40 @@ tsd_tcachep_get
 tsd_thread_allocated_get
 tsd_thread_allocated_set
 tsd_thread_allocatedp_get
 tsd_thread_deallocated_get
 tsd_thread_deallocated_set
 tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
+tsd_tsdn
+tsd_witness_fork_get
+tsd_witness_fork_set
+tsd_witness_forkp_get
+tsd_witnesses_get
+tsd_witnesses_set
+tsd_witnessesp_get
+tsdn_fetch
+tsdn_null
+tsdn_tsd
 u2rz
 valgrind_freelike_block
 valgrind_make_mem_defined
 valgrind_make_mem_noaccess
 valgrind_make_mem_undefined
+witness_assert_lockless
+witness_assert_not_owner
+witness_assert_owner
+witness_fork_cleanup
+witness_init
+witness_lock
+witness_lock_error
+witness_lockless_error
+witness_not_owner_error
+witness_owner
+witness_owner_error
+witness_postfork_child
+witness_postfork_parent
+witness_prefork
+witness_unlock
+witnesses_cleanup
+zone_register
--- a/memory/jemalloc/src/include/jemalloc/internal/prng.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/prng.h
@@ -14,66 +14,194 @@
  *
  * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
  *
  * This choice of m has the disadvantage that the quality of the bits is
  * proportional to bit position.  For example, the lowest bit has a cycle of 2,
  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
  * bits.
  */
-#define	PRNG_A	UINT64_C(6364136223846793005)
-#define	PRNG_C	UINT64_C(1442695040888963407)
+
+#define	PRNG_A_32	UINT32_C(1103515241)
+#define	PRNG_C_32	UINT32_C(12347)
+
+#define	PRNG_A_64	UINT64_C(6364136223846793005)
+#define	PRNG_C_64	UINT64_C(1442695040888963407)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-uint64_t	prng_lg_range(uint64_t *state, unsigned lg_range);
-uint64_t	prng_range(uint64_t *state, uint64_t range);
+uint32_t	prng_state_next_u32(uint32_t state);
+uint64_t	prng_state_next_u64(uint64_t state);
+size_t	prng_state_next_zu(size_t state);
+
+uint32_t	prng_lg_range_u32(uint32_t *state, unsigned lg_range,
+    bool atomic);
+uint64_t	prng_lg_range_u64(uint64_t *state, unsigned lg_range);
+size_t	prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);
+
+uint32_t	prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
+uint64_t	prng_range_u64(uint64_t *state, uint64_t range);
+size_t	prng_range_zu(size_t *state, size_t range, bool atomic);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_state_next_u32(uint32_t state)
+{
+
+	return ((state * PRNG_A_32) + PRNG_C_32);
+}
+
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_lg_range(uint64_t *state, unsigned lg_range)
+prng_state_next_u64(uint64_t state)
+{
+
+	return ((state * PRNG_A_64) + PRNG_C_64);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_state_next_zu(size_t state)
+{
+
+#if LG_SIZEOF_PTR == 2
+	return ((state * PRNG_A_32) + PRNG_C_32);
+#elif LG_SIZEOF_PTR == 3
+	return ((state * PRNG_A_64) + PRNG_C_64);
+#else
+#error Unsupported pointer size
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
 {
-	uint64_t ret;
+	uint32_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= 32);
+
+	if (atomic) {
+		uint32_t state0;
+
+		do {
+			state0 = atomic_read_uint32(state);
+			state1 = prng_state_next_u32(state0);
+		} while (atomic_cas_uint32(state, state0, state1));
+	} else {
+		state1 = prng_state_next_u32(*state);
+		*state = state1;
+	}
+	ret = state1 >> (32 - lg_range);
+
+	return (ret);
+}
+
+/* 64-bit atomic operations cannot be supported on all relevant platforms. */
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_lg_range_u64(uint64_t *state, unsigned lg_range)
+{
+	uint64_t ret, state1;
 
 	assert(lg_range > 0);
 	assert(lg_range <= 64);
 
-	ret = (*state * PRNG_A) + PRNG_C;
-	*state = ret;
-	ret >>= (64 - lg_range);
+	state1 = prng_state_next_u64(*state);
+	*state = state1;
+	ret = state1 >> (64 - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
+{
+	size_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
+
+	if (atomic) {
+		size_t state0;
+
+		do {
+			state0 = atomic_read_z(state);
+			state1 = prng_state_next_zu(state0);
+		} while (atomic_cas_z(state, state0, state1));
+	} else {
+		state1 = prng_state_next_zu(*state);
+		*state = state1;
+	}
+	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
+{
+	uint32_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_u32(state, lg_range, atomic);
+	} while (ret >= range);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_range(uint64_t *state, uint64_t range)
+prng_range_u64(uint64_t *state, uint64_t range)
 {
 	uint64_t ret;
 	unsigned lg_range;
 
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
 	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
-		ret = prng_lg_range(state, lg_range);
+		ret = prng_lg_range_u64(state, lg_range);
+	} while (ret >= range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_range_zu(size_t *state, size_t range, bool atomic)
+{
+	size_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_zu(state, lg_range, atomic);
 	} while (ret >= range);
 
 	return (ret);
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/prof.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/prof.h
@@ -276,76 +276,76 @@ extern uint64_t	prof_interval;
 
 /*
  * Initialized as opt_lg_prof_sample, and potentially modified during profiling
  * resets.
  */
 extern size_t	lg_prof_sample;
 
 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
+void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
 void	prof_backtrace(prof_bt_t *bt);
 prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 #ifdef JEMALLOC_JET
 size_t	prof_tdata_count(void);
 size_t	prof_bt_count(void);
 const prof_cnt_t *prof_cnt_all(void);
 typedef int (prof_dump_open_t)(bool, const char *);
 extern prof_dump_open_t *prof_dump_open;
-typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *prof_dump_header;
 #endif
-void	prof_idump(void);
-bool	prof_mdump(const char *filename);
-void	prof_gdump(void);
+void	prof_idump(tsdn_t *tsdn);
+bool	prof_mdump(tsd_t *tsd, const char *filename);
+void	prof_gdump(tsdn_t *tsdn);
 prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void	prof_reset(tsd_t *tsd, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
-const char	*prof_thread_name_get(void);
-bool	prof_active_get(void);
-bool	prof_active_set(bool active);
+bool	prof_active_get(tsdn_t *tsdn);
+bool	prof_active_set(tsdn_t *tsdn, bool active);
+const char	*prof_thread_name_get(tsd_t *tsd);
 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool	prof_thread_active_get(void);
-bool	prof_thread_active_set(bool active);
-bool	prof_thread_active_init_get(void);
-bool	prof_thread_active_init_set(bool active_init);
-bool	prof_gdump_get(void);
-bool	prof_gdump_set(bool active);
+bool	prof_thread_active_get(tsd_t *tsd);
+bool	prof_thread_active_set(tsd_t *tsd, bool active);
+bool	prof_thread_active_init_get(tsdn_t *tsdn);
+bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool	prof_gdump_get(tsdn_t *tsdn);
+bool	prof_gdump_set(tsdn_t *tsdn, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(void);
-void	prof_prefork0(void);
-void	prof_prefork1(void);
-void	prof_postfork_parent(void);
-void	prof_postfork_child(void);
+bool	prof_boot2(tsd_t *tsd);
+void	prof_prefork0(tsdn_t *tsdn);
+void	prof_prefork1(tsdn_t *tsdn);
+void	prof_postfork_parent(tsdn_t *tsdn);
+void	prof_postfork_child(tsdn_t *tsdn);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
+prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
-prof_tctx_t	*prof_tctx_get(const void *ptr);
-void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
-void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
     size_t old_usize, prof_tctx_t *old_tctx);
 void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
 JEMALLOC_ALWAYS_INLINE bool
@@ -393,44 +393,44 @@ prof_tdata_get(tsd_t *tsd, bool create)
 		}
 		assert(tdata == NULL || tdata->attached);
 	}
 
 	return (tdata);
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(const void *ptr)
+prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return (arena_prof_tctx_get(ptr));
+	return (arena_prof_tctx_get(tsdn, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(ptr, usize, tctx);
+	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
     prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+	arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
     prof_tdata_t **tdata_out)
 {
 	prof_tdata_t *tdata;
 
@@ -475,72 +475,73 @@ prof_alloc_prep(tsd_t *tsd, size_t usize
 		prof_backtrace(&bt);
 		ret = prof_lookup(tsd, &bt);
 	}
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsdn, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
     bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
     prof_tctx_t *old_tctx)
 {
 	bool sampled, old_sampled;
 
 	cassert(config_prof);
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(ptr, true));
+		assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
 			 * was larger than what actually got allocated, so a
 			 * backtrace was captured for this allocation, even
 			 * though its actual usize was insufficient to cross the
 			 * sample threshold.
 			 */
+			prof_alloc_rollback(tsd, tctx, true);
 			tctx = (prof_tctx_t *)(uintptr_t)1U;
 		}
 	}
 
 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
 
 	if (unlikely(sampled))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
 	else
-		prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+		prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
 
 	if (unlikely(old_sampled))
 		prof_free_sampled_object(tsd, old_usize, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
 {
-	prof_tctx_t *tctx = prof_tctx_get(ptr);
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_free_sampled_object(tsd, usize, tctx);
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/rtree.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/rtree.h
@@ -10,19 +10,20 @@ typedef struct rtree_node_elm_s rtree_no
 typedef struct rtree_level_s rtree_level_t;
 typedef struct rtree_s rtree_t;
 
 /*
  * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
  * machine address width.
  */
 #define	LG_RTREE_BITS_PER_LEVEL	4
-#define	RTREE_BITS_PER_LEVEL	(ZU(1) << LG_RTREE_BITS_PER_LEVEL)
+#define	RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
+/* Maximum rtree height. */
 #define	RTREE_HEIGHT_MAX						\
-    ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
+    ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
 
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_node_elm_t *)0x1)
 
 /*
  * The node allocation callback function's argument is the number of contiguous
  * rtree_node_elm_t structures to allocate, and the resulting memory must be
  * zeroed.
@@ -106,85 +107,91 @@ rtree_node_elm_t	*rtree_child_read_hard(
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 unsigned	rtree_start_level(rtree_t *rtree, uintptr_t key);
 uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
 
 bool	rtree_node_valid(rtree_node_elm_t *node);
-rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm);
+rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm,
+    bool dependent);
 rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
-    unsigned level);
+    unsigned level, bool dependent);
 extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
     bool dependent);
 void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
     const extent_node_t *val);
-rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level);
-rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level);
+rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
+    bool dependent);
+rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
+    bool dependent);
 
 extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
 bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-JEMALLOC_INLINE unsigned
+JEMALLOC_ALWAYS_INLINE unsigned
 rtree_start_level(rtree_t *rtree, uintptr_t key)
 {
 	unsigned start_level;
 
 	if (unlikely(key == 0))
 		return (rtree->height - 1);
 
 	start_level = rtree->start_level[lg_floor(key) >>
 	    LG_RTREE_BITS_PER_LEVEL];
 	assert(start_level < rtree->height);
 	return (start_level);
 }
 
-JEMALLOC_INLINE uintptr_t
+JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 {
 
 	return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    rtree->levels[level].cumbits)) & ((ZU(1) <<
 	    rtree->levels[level].bits) - 1));
 }
 
-JEMALLOC_INLINE bool
+JEMALLOC_ALWAYS_INLINE bool
 rtree_node_valid(rtree_node_elm_t *node)
 {
 
 	return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_tryread(rtree_node_elm_t *elm)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 {
 	rtree_node_elm_t *child;
 
 	/* Double-checked read (first read may be stale. */
 	child = elm->child;
-	if (!rtree_node_valid(child))
+	if (!dependent && !rtree_node_valid(child))
 		child = atomic_read_p(&elm->pun);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
+    bool dependent)
 {
 	rtree_node_elm_t *child;
 
-	child = rtree_child_tryread(elm);
-	if (unlikely(!rtree_node_valid(child)))
+	child = rtree_child_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(child)))
 		child = rtree_child_read_hard(rtree, elm, level);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
 {
 
 	if (dependent) {
 		/*
 		 * Reading a val on behalf of a pointer to a valid allocation is
 		 * guaranteed to be a clean read even without synchronization,
 		 * because the rtree update became visible in memory before the
@@ -203,91 +210,156 @@ rtree_val_read(rtree_t *rtree, rtree_nod
 
 JEMALLOC_INLINE void
 rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
 {
 
 	atomic_write_p(&elm->pun, val);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_tryread(rtree_t *rtree, unsigned level)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
 	/* Double-checked read (first read may be stale. */
 	subtree = rtree->levels[level].subtree;
-	if (!rtree_node_valid(subtree))
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_read(rtree_t *rtree, unsigned level)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
-	subtree = rtree_subtree_tryread(rtree, level);
-	if (unlikely(!rtree_node_valid(subtree)))
+	subtree = rtree_subtree_tryread(rtree, level, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = rtree_subtree_read_hard(rtree, level);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	uintptr_t subkey;
-	unsigned i, start_level;
-	rtree_node_elm_t *node, *child;
+	unsigned start_level;
+	rtree_node_elm_t *node;
 
 	start_level = rtree_start_level(rtree, key);
 
-	for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
-	    /**/; i++, node = child) {
-		if (!dependent && unlikely(!rtree_node_valid(node)))
-			return (NULL);
-		subkey = rtree_subkey(rtree, key, i);
-		if (i == rtree->height - 1) {
-			/*
-			 * node is a leaf, so it contains values rather than
-			 * child pointers.
-			 */
-			return (rtree_val_read(rtree, &node[subkey],
-			    dependent));
-		}
-		assert(i < rtree->height - 1);
-		child = rtree_child_tryread(&node[subkey]);
+	node = rtree_subtree_tryread(rtree, start_level, dependent);
+#define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
+	switch (start_level + RTREE_GET_BIAS) {
+#define	RTREE_GET_SUBTREE(level)					\
+	case level:							\
+		assert(level < (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		node = rtree_child_tryread(&node[subkey], dependent);	\
+		/* Fall through. */
+#define	RTREE_GET_LEAF(level)						\
+	case level:							\
+		assert(level == (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		/*							\
+		 * node is a leaf, so it contains values rather than	\
+		 * child pointers.					\
+		 */							\
+		return (rtree_val_read(rtree, &node[subkey],		\
+		    dependent));
+#if RTREE_HEIGHT_MAX > 1
+	RTREE_GET_SUBTREE(0)
+#endif
+#if RTREE_HEIGHT_MAX > 2
+	RTREE_GET_SUBTREE(1)
+#endif
+#if RTREE_HEIGHT_MAX > 3
+	RTREE_GET_SUBTREE(2)
+#endif
+#if RTREE_HEIGHT_MAX > 4
+	RTREE_GET_SUBTREE(3)
+#endif
+#if RTREE_HEIGHT_MAX > 5
+	RTREE_GET_SUBTREE(4)
+#endif
+#if RTREE_HEIGHT_MAX > 6
+	RTREE_GET_SUBTREE(5)
+#endif
+#if RTREE_HEIGHT_MAX > 7
+	RTREE_GET_SUBTREE(6)
+#endif
+#if RTREE_HEIGHT_MAX > 8
+	RTREE_GET_SUBTREE(7)
+#endif
+#if RTREE_HEIGHT_MAX > 9
+	RTREE_GET_SUBTREE(8)
+#endif
+#if RTREE_HEIGHT_MAX > 10
+	RTREE_GET_SUBTREE(9)
+#endif
+#if RTREE_HEIGHT_MAX > 11
+	RTREE_GET_SUBTREE(10)
+#endif
+#if RTREE_HEIGHT_MAX > 12
+	RTREE_GET_SUBTREE(11)
+#endif
+#if RTREE_HEIGHT_MAX > 13
+	RTREE_GET_SUBTREE(12)
+#endif
+#if RTREE_HEIGHT_MAX > 14
+	RTREE_GET_SUBTREE(13)
+#endif
+#if RTREE_HEIGHT_MAX > 15
+	RTREE_GET_SUBTREE(14)
+#endif
+#if RTREE_HEIGHT_MAX > 16
+#  error Unsupported RTREE_HEIGHT_MAX
+#endif
+	RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
+#undef RTREE_GET_SUBTREE
+#undef RTREE_GET_LEAF
+	default: not_reached();
 	}
+#undef RTREE_GET_BIAS
 	not_reached();
 }
 
 JEMALLOC_INLINE bool
 rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
 {
 	uintptr_t subkey;
 	unsigned i, start_level;
 	rtree_node_elm_t *node, *child;
 
 	start_level = rtree_start_level(rtree, key);
 
-	node = rtree_subtree_read(rtree, start_level);
+	node = rtree_subtree_read(rtree, start_level, false);
 	if (node == NULL)
 		return (true);
 	for (i = start_level; /**/; i++, node = child) {
 		subkey = rtree_subkey(rtree, key, i);
 		if (i == rtree->height - 1) {
 			/*
 			 * node is a leaf, so it contains values rather than
 			 * child pointers.
 			 */
 			rtree_val_write(rtree, &node[subkey], val);
 			return (false);
 		}
 		assert(i + 1 < rtree->height);
-		child = rtree_child_read(rtree, &node[subkey], i);
+		child = rtree_child_read(rtree, &node[subkey], i, false);
 		if (child == NULL)
 			return (true);
 	}
 	not_reached();
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
--- a/memory/jemalloc/src/include/jemalloc/internal/size_classes.sh
+++ b/memory/jemalloc/src/include/jemalloc/internal/size_classes.sh
@@ -43,16 +43,31 @@ lg() {
 size_class() {
   index=$1
   lg_grp=$2
   lg_delta=$3
   ndelta=$4
   lg_p=$5
   lg_kmax=$6
 
+  if [ ${lg_delta} -ge ${lg_p} ] ; then
+    psz="yes"
+  else
+    pow2 ${lg_p}; p=${pow2_result}
+    pow2 ${lg_grp}; grp=${pow2_result}
+    pow2 ${lg_delta}; delta=${pow2_result}
+    sz=$((${grp} + ${delta} * ${ndelta}))
+    npgs=$((${sz} / ${p}))
+    if [ ${sz} -eq $((${npgs} * ${p})) ] ; then
+      psz="yes"
+    else
+      psz="no"
+    fi
+  fi
+
   lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
   if [ ${pow2_result} -lt ${ndelta} ] ; then
     rem="yes"
   else
     rem="no"
   fi
 
   lg_size=${lg_grp}
@@ -69,54 +84,59 @@ size_class() {
     bin="no"
   fi
   if [ ${lg_size} -lt ${lg_kmax} \
       -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then
     lg_delta_lookup=${lg_delta}
   else
     lg_delta_lookup="no"
   fi
-  printf '    SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup}
+  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${lg_delta_lookup}
   # Defined upon return:
+  # - psz ("yes" or "no")
+  # - bin ("yes" or "no")
   # - lg_delta_lookup (${lg_delta} or "no")
-  # - bin ("yes" or "no")
 }
 
 sep_line() {
-  echo "                                               \\"
+  echo "                                                         \\"
 }
 
 size_classes() {
   lg_z=$1
   lg_q=$2
   lg_t=$3
   lg_p=$4
   lg_g=$5
 
   pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result}
   pow2 ${lg_g}; g=${pow2_result}
 
   echo "#define	SIZE_CLASSES \\"
-  echo "  /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\"
+  echo "  /* index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup */ \\"
 
   ntbins=0
   nlbins=0
   lg_tiny_maxclass='"NA"'
   nbins=0
+  npsizes=0
 
   # Tiny size classes.
   ndelta=0
   index=0
   lg_grp=${lg_t}
   lg_delta=${lg_grp}
   while [ ${lg_grp} -lt ${lg_q} ] ; do
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     if [ ${lg_delta_lookup} != "no" ] ; then
       nlbins=$((${index} + 1))
     fi
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${bin} != "no" ] ; then
       nbins=$((${index} + 1))
     fi
     ntbins=$((${ntbins} + 1))
     lg_tiny_maxclass=${lg_grp} # Final written value is correct.
     index=$((${index} + 1))
     lg_delta=${lg_grp}
     lg_grp=$((${lg_grp} + 1))
@@ -128,21 +148,27 @@ size_classes() {
     # The first size class has an unusual encoding, because the size has to be
     # split between grp and delta*ndelta.
     lg_grp=$((${lg_grp} - 1))
     ndelta=1
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     lg_grp=$((${lg_grp} + 1))
     lg_delta=$((${lg_delta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
   fi
   while [ ${ndelta} -lt ${g} ] ; do
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     ndelta=$((${ndelta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
   done
 
   # All remaining groups.
   lg_grp=$((${lg_grp} + ${lg_g}))
   while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do
     sep_line
     ndelta=1
     if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then
@@ -152,16 +178,19 @@ size_classes() {
     fi
     while [ ${ndelta} -le ${ndelta_limit} ] ; do
       size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
       if [ ${lg_delta_lookup} != "no" ] ; then
         nlbins=$((${index} + 1))
         # Final written value is correct:
         lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
       fi
+      if [ ${psz} = "yes" ] ; then
+        npsizes=$((${npsizes} + 1))
+      fi
       if [ ${bin} != "no" ] ; then
         nbins=$((${index} + 1))
         # Final written value is correct:
         small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
         if [ ${lg_g} -gt 0 ] ; then
           lg_large_minclass=$((${lg_grp} + 1))
         else
           lg_large_minclass=$((${lg_grp} + 2))
@@ -178,16 +207,17 @@ size_classes() {
   echo
   nsizes=${index}
 
   # Defined upon completion:
   # - ntbins
   # - nlbins
   # - nbins
   # - nsizes
+  # - npsizes
   # - lg_tiny_maxclass
   # - lookup_maxclass
   # - small_maxclass
   # - lg_large_minclass
   # - huge_maxclass
 }
 
 cat <<EOF
@@ -195,30 +225,31 @@ cat <<EOF
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
 /*
  * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to
  * be defined prior to inclusion, and it in turn defines:
  *
  *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
- *   SIZE_CLASSES: Complete table of
- *                 SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)
- *                 tuples.
+ *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
+ *                 bin, lg_delta_lookup) tuples.
  *     index: Size class index.
  *     lg_grp: Lg group base size (no deltas added).
  *     lg_delta: Lg delta to previous size class.
  *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
+ *     psz: 'yes' if a multiple of the page size, 'no' otherwise.
  *     bin: 'yes' if a small bin size class, 'no' otherwise.
  *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
  *                      otherwise.
  *   NTBINS: Number of tiny bins.
  *   NLBINS: Number of bins supported by the lookup table.
  *   NBINS: Number of small size class bins.
  *   NSIZES: Number of size classes.
+ *   NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
  *   LG_LARGE_MINCLASS: Lg of minimum large size class.
  *   HUGE_MAXCLASS: Maximum (huge) size class.
  */
 
 #define	LG_SIZE_CLASS_GROUP	${lg_g}
@@ -233,16 +264,17 @@ for lg_z in ${lg_zarr} ; do
       for lg_p in ${lg_parr} ; do
         echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
         size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g}
         echo "#define	SIZE_CLASSES_DEFINED"
         echo "#define	NTBINS			${ntbins}"
         echo "#define	NLBINS			${nlbins}"
         echo "#define	NBINS			${nbins}"
         echo "#define	NSIZES			${nsizes}"
+        echo "#define	NPSIZES			${npsizes}"
         echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
         echo "#define	LG_LARGE_MINCLASS	${lg_large_minclass}"
         echo "#define	HUGE_MAXCLASS		${huge_maxclass}"
         echo "#endif"
         echo
       done
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/spin.h
@@ -0,0 +1,51 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct spin_s spin_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct spin_s {
+	unsigned iteration;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	spin_init(spin_t *spin);
+void	spin_adaptive(spin_t *spin);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
+JEMALLOC_INLINE void
+spin_init(spin_t *spin)
+{
+
+	spin->iteration = 0;
+}
+
+JEMALLOC_INLINE void
+spin_adaptive(spin_t *spin)
+{
+	volatile uint64_t i;
+
+	for (i = 0; i < (KQU(1) << spin->iteration); i++)
+		CPU_SPINWAIT;
+
+	if (spin->iteration < 63)
+		spin->iteration++;
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
--- a/memory/jemalloc/src/include/jemalloc/internal/stats.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/stats.h
@@ -98,16 +98,24 @@ struct malloc_huge_stats_s {
 	size_t		curhchunks;
 };
 
 struct arena_stats_s {
 	/* Number of bytes currently mapped. */
 	size_t		mapped;
 
 	/*
+	 * Number of bytes currently retained as a side effect of munmap() being
+	 * disabled/bypassed.  Retained bytes are technically mapped (though
+	 * always decommitted or purged), but they are excluded from the mapped
+	 * statistic (above).
+	 */
+	size_t		retained;
+
+	/*
 	 * Total number of purge sweeps, total number of madvise calls made,
 	 * and total pages purged in order to keep dirty unused memory under
 	 * control.
 	 */
 	uint64_t	npurge;
 	uint64_t	nmadvise;
 	uint64_t	purged;
 
--- a/memory/jemalloc/src/include/jemalloc/internal/tcache.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/tcache.h
@@ -125,37 +125,35 @@ extern size_t	tcache_maxclass;
  * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
  * completely disjoint from this data structure.  tcaches starts off as a sparse
  * array, so it has no physical memory footprint until individual pages are
  * touched.  This allows the entire array to be allocated the first time an
  * explicit tcache is created without a disproportionate impact on memory usage.
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(const void *ptr);
+size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
-void	*tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
-void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
-void	tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena,
-    arena_t *newarena);
-void	tcache_arena_dissociate(tcache_t *tcache, arena_t *arena);
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
+    arena_t *oldarena, arena_t *newarena);
 tcache_t *tcache_get_hard(tsd_t *tsd);
-tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
+tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
+void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(void);
+bool	tcache_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 void	tcache_event(tsd_t *tsd, tcache_t *tcache);
 void	tcache_flush(void);
@@ -292,30 +290,30 @@ tcache_alloc_small(tsd_t *tsd, arena_t *
 	ret = tcache_alloc_easy(tbin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
 		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind,
-			&tcache_hard_success);
+		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
+		    tbin, binind, &tcache_hard_success);
 		if (tcache_hard_success == false)
 			return (NULL);
 	}
 
 	assert(ret);
 	/*
 	 * Only compute usize if required.  The checks in the following if
 	 * statement are all static.
 	 */
 	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
 		usize = index2size(binind);
-		assert(tcache_salloc(ret) == usize);
+		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 	}
 
 	if (likely(!zero)) {
 		if (slow_path && config_fill) {
 			if (unlikely(opt_junk_alloc)) {
 				arena_alloc_junk_small(ret,
 				    &arena_bin_info[binind], false);
 			} else if (unlikely(opt_zero))
@@ -353,17 +351,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *
 		/*
 		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
 		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = arena_malloc_large(tsd, arena, binind, zero);
+		ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
 		size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 		/* Only compute usize on demand */
 		if (config_prof || (slow_path && config_fill) ||
 		    unlikely(zero)) {
@@ -376,19 +374,20 @@ tcache_alloc_large(tsd_t *tsd, arena_t *
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
 			size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
 			    LG_PAGE);
 			arena_mapbits_large_binind_set(chunk, pageind,
 			    BININD_INVALID);
 		}
 		if (likely(!zero)) {
 			if (slow_path && config_fill) {
-				if (unlikely(opt_junk_alloc))
-					memset(ret, 0xa5, usize);
-				else if (unlikely(opt_zero))
+				if (unlikely(opt_junk_alloc)) {
+					memset(ret, JEMALLOC_ALLOC_JUNK,
+					    usize);
+				} else if (unlikely(opt_zero))
 					memset(ret, 0, usize);
 			}
 		} else
 			memset(ret, 0, usize);
 
 		if (config_stats)
 			tbin->tstats.nrequests++;
 		if (config_prof)
@@ -401,17 +400,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *
 
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path)
 {
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
-	assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
 		tcache_bin_flush_small(tsd, tcache, tbin, binind,
@@ -428,18 +427,18 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
     bool slow_path)
 {
 	szind_t binind;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
 	assert((size & PAGE_MASK) == 0);
-	assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
-	assert(tcache_salloc(ptr) <= tcache_maxclass);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
 	binind = size2index(size);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_large(ptr, size);
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
@@ -453,16 +452,18 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t
 
 	tcache_event(tsd, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE tcache_t *
 tcaches_get(tsd_t *tsd, unsigned ind)
 {
 	tcaches_t *elm = &tcaches[ind];
-	if (unlikely(elm->tcache == NULL))
-		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
+	if (unlikely(elm->tcache == NULL)) {
+		elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd,
+		    NULL));
+	}
 	return (elm->tcache);
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/tsd.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/tsd.h
@@ -8,16 +8,19 @@ typedef bool (*malloc_tsd_cleanup_t)(voi
 
 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
     !defined(_WIN32))
 typedef struct tsd_init_block_s tsd_init_block_t;
 typedef struct tsd_init_head_s tsd_init_head_t;
 #endif
 
 typedef struct tsd_s tsd_t;
+typedef struct tsdn_s tsdn_t;
+
+#define	TSDN_NULL	((tsdn_t *)0)
 
 typedef enum {
 	tsd_state_uninitialized,
 	tsd_state_nominal,
 	tsd_state_purgatory,
 	tsd_state_reincarnated
 } tsd_state_t;
 
@@ -39,17 +42,18 @@ typedef enum {
  * In example.c:
  *   malloc_tsd_data(, example_, example_t, EX_INITIALIZER)
  *   malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER,
  *       example_tsd_cleanup)
  *
  * The result is a set of generated functions, e.g.:
  *
  *   bool example_tsd_boot(void) {...}
- *   example_t *example_tsd_get() {...}
+ *   bool example_tsd_booted_get(void) {...}
+ *   example_t *example_tsd_get(bool init) {...}
  *   void example_tsd_set(example_t *val) {...}
  *
  * Note that all of the functions deal in terms of (a_type *) rather than
  * (a_type) so that it is possible to support non-pointer types (unlike
  * pthreads TSD).  example_tsd_cleanup() is passed an (a_type *) pointer that is
  * cast to (void *).  This means that the cleanup function needs to cast the
  * function argument to (a_type *), then dereference the resulting pointer to
  * access fields, e.g.
@@ -93,18 +97,20 @@ typedef struct {							\
 /* malloc_tsd_protos(). */
 #define	malloc_tsd_protos(a_attr, a_name, a_type)			\
 a_attr bool								\
 a_name##tsd_boot0(void);						\
 a_attr void								\
 a_name##tsd_boot1(void);						\
 a_attr bool								\
 a_name##tsd_boot(void);							\
+a_attr bool								\
+a_name##tsd_booted_get(void);						\
 a_attr a_type *								\
-a_name##tsd_get(void);							\
+a_name##tsd_get(bool init);						\
 a_attr void								\
 a_name##tsd_set(a_type *val);
 
 /* malloc_tsd_externs(). */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
 #define	malloc_tsd_externs(a_name, a_type)				\
 extern __thread a_type	a_name##tsd_tls;				\
 extern __thread bool	a_name##tsd_initialized;			\
@@ -196,19 +202,31 @@ a_name##tsd_boot1(void)							\
 	/* Do nothing. */						\
 }									\
 a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
 	return (&a_name##tsd_tls);					\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
@@ -241,19 +259,31 @@ a_name##tsd_boot1(void)							\
 	/* Do nothing. */						\
 }									\
 a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
 	return (&a_name##tsd_tls);					\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
@@ -302,24 +332,24 @@ a_name##tsd_wrapper_set(a_name##tsd_wrap
 									\
 	if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) {		\
 		malloc_write("<jemalloc>: Error setting"		\
 		    " TSD for "#a_name"\n");				\
 		abort();						\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	DWORD error = GetLastError();					\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    TlsGetValue(a_name##tsd_tsd);				\
 	SetLastError(error);						\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
 		if (wrapper == NULL) {					\
 			malloc_write("<jemalloc>: Error allocating"	\
 			    " TSD for "#a_name"\n");			\
 			abort();					\
 		} else {						\
 			wrapper->initialized = false;			\
@@ -363,33 +393,47 @@ a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
 									\
 	if (a_name##tsd_boot0())					\
 		return (true);						\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
 }
 #else
 #define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
     a_cleanup)								\
 /* Initialization/cleanup. */						\
@@ -423,22 +467,22 @@ a_name##tsd_wrapper_set(a_name##tsd_wrap
 	if (pthread_setspecific(a_name##tsd_tsd,			\
 	    (void *)wrapper)) {						\
 		malloc_write("<jemalloc>: Error setting"		\
 		    " TSD for "#a_name"\n");				\
 		abort();						\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    pthread_getspecific(a_name##tsd_tsd);			\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		tsd_init_block_t block;					\
 		wrapper = tsd_init_check_recursion(			\
 		    &a_name##tsd_init_head, &block);			\
 		if (wrapper)						\
 		    return (wrapper);					\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
 		block.data = wrapper;					\
@@ -485,33 +529,47 @@ a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
 									\
 	if (a_name##tsd_boot0())					\
 		return (true);						\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
 }
 #endif
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -531,108 +589,146 @@ struct tsd_init_head_s {
 #endif
 
 #define	MALLOC_TSD							\
 /*  O(name,			type) */				\
     O(tcache,			tcache_t *)				\
     O(thread_allocated,		uint64_t)				\
     O(thread_deallocated,	uint64_t)				\
     O(prof_tdata,		prof_tdata_t *)				\
+    O(iarena,			arena_t *)				\
     O(arena,			arena_t *)				\
     O(arenas_tdata,		arena_tdata_t *)			\
     O(narenas_tdata,		unsigned)				\
     O(arenas_tdata_bypass,	bool)					\
     O(tcache_enabled,		tcache_enabled_t)			\
     O(quarantine,		quarantine_t *)				\
+    O(witnesses,		witness_list_t)				\
+    O(witness_fork,		bool)					\
 
 #define	TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
     NULL,								\
     0,									\
     0,									\
     NULL,								\
     NULL,								\
     NULL,								\
+    NULL,								\
     0,									\
     false,								\
     tcache_enabled_default,						\
-    NULL								\
+    NULL,								\
+    ql_head_initializer(witnesses),					\
+    false								\
 }
 
 struct tsd_s {
 	tsd_state_t	state;
 #define	O(n, t)								\
 	t		n;
 MALLOC_TSD
 #undef O
 };
 
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+	tsd_t	tsd;
+};
+
 static const tsd_t tsd_initializer = TSD_INITIALIZER;
 
 malloc_tsd_types(, tsd_t)
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 void	*malloc_tsd_malloc(size_t size);
 void	malloc_tsd_dalloc(void *wrapper);
 void	malloc_tsd_no_cleanup(void *arg);
 void	malloc_tsd_cleanup_register(bool (*f)(void));
-bool	malloc_tsd_boot0(void);
+tsd_t	*malloc_tsd_boot0(void);
 void	malloc_tsd_boot1(void);
 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
     !defined(_WIN32))
 void	*tsd_init_check_recursion(tsd_init_head_t *head,
     tsd_init_block_t *block);
 void	tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
 #endif
 void	tsd_cleanup(void *arg);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 
+tsd_t	*tsd_fetch_impl(bool init);
 tsd_t	*tsd_fetch(void);
+tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
 #define	O(n, t)								\
 t	*tsd_##n##p_get(tsd_t *tsd);					\
 t	tsd_##n##_get(tsd_t *tsd);					\
 void	tsd_##n##_set(tsd_t *tsd, t n);
 MALLOC_TSD
 #undef O
+tsdn_t	*tsdn_fetch(void);
+bool	tsdn_null(const tsdn_t *tsdn);
+tsd_t	*tsdn_tsd(tsdn_t *tsdn);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
 malloc_tsd_externs(, tsd_t)
 malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch(void)
+tsd_fetch_impl(bool init)
 {
-	tsd_t *tsd = tsd_get();
+	tsd_t *tsd = tsd_get(init);
+
+	if (!init && tsd_get_allocates() && tsd == NULL)
+		return (NULL);
+	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
 		if (tsd->state == tsd_state_uninitialized) {
 			tsd->state = tsd_state_nominal;
 			/* Trigger cleanup handler registration. */
 			tsd_set(tsd);
 		} else if (tsd->state == tsd_state_purgatory) {
 			tsd->state = tsd_state_reincarnated;
 			tsd_set(tsd);
 		} else
 			assert(tsd->state == tsd_state_reincarnated);
 	}
 
 	return (tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void)
+{
+
+	return (tsd_fetch_impl(true));
+}
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd)
+{
+
+	return ((tsdn_t *)tsd);
+}
+
 JEMALLOC_INLINE bool
 tsd_nominal(tsd_t *tsd)
 {
 
 	return (tsd->state == tsd_state_nominal);
 }
 
 #define	O(n, t)								\
@@ -654,12 +750,38 @@ JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t n)						\
 {									\
 									\
 	assert(tsd->state == tsd_state_nominal);			\
 	tsd->n = n;							\
 }
 MALLOC_TSD
 #undef O
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsdn_fetch(void)
+{
+
+	if (!tsd_booted_get())
+		return (NULL);
+
+	return (tsd_tsdn(tsd_fetch_impl(false)));
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn)
+{
+
+	return (tsdn == NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn)
+{
+
+	assert(!tsdn_null(tsdn));
+
+	return (&tsdn->tsd);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/util.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/util.h
@@ -35,16 +35,20 @@
 #define	BUFERROR_BUF		64
 
 /*
  * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
  * large enough for all possible uses within jemalloc.
  */
 #define	MALLOC_PRINTF_BUFSIZE	4096
 
+/* Junk fill patterns. */
+#define	JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#define	JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+
 /*
  * Wrap a cpp argument that contains commas such that it isn't broken up into
  * multiple arguments.
  */
 #define	JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
 
 /*
  * Silence compiler warnings due to uninitialized values.  This is used
@@ -52,40 +56,30 @@
  * uninitialized.
  */
 #ifdef JEMALLOC_CC_SILENCE
 #	define JEMALLOC_CC_SILENCE_INIT(v) = v
 #else
 #	define JEMALLOC_CC_SILENCE_INIT(v)
 #endif
 
-#define	JEMALLOC_GNUC_PREREQ(major, minor)				\
-    (!defined(__clang__) &&						\
-    (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
-#ifndef __has_builtin
-#  define __has_builtin(builtin) (0)
-#endif
-#define	JEMALLOC_CLANG_HAS_BUILTIN(builtin)				\
-    (defined(__clang__) && __has_builtin(builtin))
-
 #ifdef __GNUC__
 #	define likely(x)   __builtin_expect(!!(x), 1)
 #	define unlikely(x) __builtin_expect(!!(x), 0)
-#  if JEMALLOC_GNUC_PREREQ(4, 6) ||					\
-      JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
-#	define unreachable() __builtin_unreachable()
-#  else
-#	define unreachable()
-#  endif
 #else
 #	define likely(x)   !!(x)
 #	define unlikely(x) !!(x)
-#	define unreachable()
 #endif
 
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+
 #include "jemalloc/internal/assert.h"
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
 #define	cassert(c) do {							\
 	if (unlikely(!(c)))						\
 		not_reached();						\
 } while (0)
 
@@ -101,19 +95,19 @@ int	buferror(int err, char *buf, size_t 
 uintmax_t	malloc_strtoumax(const char *restrict nptr,
     char **restrict endptr, int base);
 void	malloc_write(const char *s);
 
 /*
  * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
  * point math.
  */
-int	malloc_vsnprintf(char *str, size_t size, const char *format,
+size_t	malloc_vsnprintf(char *str, size_t size, const char *format,
     va_list ap);
-int	malloc_snprintf(char *str, size_t size, const char *format, ...)
+size_t	malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, va_list ap);
 void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
     const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void	malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 #endif /* JEMALLOC_H_EXTERNS */
--- a/memory/jemalloc/src/include/jemalloc/internal/valgrind.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/valgrind.h
@@ -25,25 +25,27 @@
 	if (unlikely(in_valgrind))					\
 		valgrind_make_mem_defined(ptr, usize);			\
 } while (0)
 /*
  * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro
  * calls must be embedded in macros rather than in functions so that when
  * Valgrind reports errors, there are no extra stack frames in the backtraces.
  */
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {		\
-	if (unlikely(in_valgrind && cond))				\
-		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero);	\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {	\
+	if (unlikely(in_valgrind && cond)) {				\
+		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr),	\
+		    zero);						\
+	}								\
 } while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {								\
 	if (unlikely(in_valgrind)) {					\
-		size_t rzsize = p2rz(ptr);				\
+		size_t rzsize = p2rz(tsdn, ptr);			\
 									\
 		if (!maybe_moved || ptr == old_ptr) {			\
 			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
 			    usize, rzsize);				\
 			if (zero && old_usize < usize) {		\
 				valgrind_make_mem_defined(		\
 				    (void *)((uintptr_t)ptr +		\
 				    old_usize), usize - old_usize);	\
@@ -76,18 +78,18 @@
 	if (unlikely(in_valgrind))					\
 		valgrind_freelike_block(ptr, rzsize);			\
 } while (0)
 #else
 #define	RUNNING_ON_VALGRIND	((unsigned)0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0)
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {} while (0)
 #define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
 #endif
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/witness.h
@@ -0,0 +1,266 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct witness_s witness_t;
+typedef unsigned witness_rank_t;
+typedef ql_head(witness_t) witness_list_t;
+typedef int witness_comp_t (const witness_t *, const witness_t *);
+
+/*
+ * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
+ * the witness machinery.
+ */
+#define	WITNESS_RANK_OMIT		0U
+
+#define	WITNESS_RANK_INIT		1U
+#define	WITNESS_RANK_CTL		1U
+#define	WITNESS_RANK_ARENAS		2U
+
+#define	WITNESS_RANK_PROF_DUMP		3U
+#define	WITNESS_RANK_PROF_BT2GCTX	4U
+#define	WITNESS_RANK_PROF_TDATAS	5U
+#define	WITNESS_RANK_PROF_TDATA		6U
+#define	WITNESS_RANK_PROF_GCTX		7U
+
+#define	WITNESS_RANK_ARENA		8U
+#define	WITNESS_RANK_ARENA_CHUNKS	9U
+#define	WITNESS_RANK_ARENA_NODE_CACHE	10
+
+#define	WITNESS_RANK_BASE		11U
+
+#define	WITNESS_RANK_LEAF		0xffffffffU
+#define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_ARENA_HUGE		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_DSS		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+
+#define	WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}}
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct witness_s {
+	/* Name, used for printing lock order reversal messages. */
+	const char		*name;
+
+	/*
+	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
+	 * must be acquired in order of increasing rank.
+	 */
+	witness_rank_t		rank;
+
+	/*
+	 * If two witnesses are of equal rank and they have the samp comp
+	 * function pointer, it is called as a last attempt to differentiate
+	 * between witnesses of equal rank.
+	 */
+	witness_comp_t		*comp;
+
+	/* Linkage for thread's currently owned locks. */
+	ql_elm(witness_t)	link;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp);
+#ifdef JEMALLOC_JET
+typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+extern witness_lock_error_t *witness_lock_error;
+#else
+void	witness_lock_error(const witness_list_t *witnesses,
+    const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_owner_error_t)(const witness_t *);
+extern witness_owner_error_t *witness_owner_error;
+#else
+void	witness_owner_error(const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_not_owner_error_t)(const witness_t *);
+extern witness_not_owner_error_t *witness_not_owner_error;
+#else
+void	witness_not_owner_error(const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_lockless_error_t)(const witness_list_t *);
+extern witness_lockless_error_t *witness_lockless_error;
+#else
+void	witness_lockless_error(const witness_list_t *witnesses);
+#endif
+
+void	witnesses_cleanup(tsd_t *tsd);
+void	witness_fork_cleanup(tsd_t *tsd);
+void	witness_prefork(tsd_t *tsd);
+void	witness_postfork_parent(tsd_t *tsd);
+void	witness_postfork_child(tsd_t *tsd);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool	witness_owner(tsd_t *tsd, const witness_t *witness);
+void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_lockless(tsdn_t *tsdn);
+void	witness_lock(tsdn_t *tsdn, witness_t *witness);
+void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE bool
+witness_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return (true);
+	}
+
+	return (false);
+}
+
+JEMALLOC_INLINE void
+witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	if (witness_owner(tsd, witness))
+		return;
+	witness_owner_error(witness);
+}
+
+JEMALLOC_INLINE void
+witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			witness_not_owner_error(witness);
+	}
+}
+
+JEMALLOC_INLINE void
+witness_assert_lockless(tsdn_t *tsdn)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL)
+		witness_lockless_error(witnesses);
+}
+
+JEMALLOC_INLINE void
+witness_lock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_not_owner(tsdn, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
+		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, witness) > 0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+JEMALLOC_INLINE void
+witness_unlock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	/*
+	 * Check whether owner before removal, rather than relying on
+	 * witness_assert_owner() to abort, so that unit tests can test this
+	 * function's failure mode without causing undefined behavior.
+	 */
+	if (witness_owner(tsd, witness)) {
+		witnesses = tsd_witnessesp_get(tsd);
+		ql_remove(witnesses, witness, link);
+	} else
+		witness_assert_owner(tsdn, witness);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in
+++ b/memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in
@@ -8,33 +8,33 @@
 #define	JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
 #define	JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
 #define	JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
 #define	JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
 #define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
 
 #  define MALLOCX_LG_ALIGN(la)	((int)(la))
 #  if LG_SIZEOF_PTR == 2
-#    define MALLOCX_ALIGN(a)	((int)(ffs(a)-1))
+#    define MALLOCX_ALIGN(a)	((int)(ffs((int)(a))-1))
 #  else
 #    define MALLOCX_ALIGN(a)						\
-       ((int)(((a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :		\
-       ffs((int)((a)>>32))+31))
+       ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :	\
+       ffs((int)(((size_t)(a))>>32))+31))
 #  endif
 #  define MALLOCX_ZERO	((int)0x40)
 /*
  * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1
  * encodes MALLOCX_TCACHE_NONE.
  */
 #  define MALLOCX_TCACHE(tc)	((int)(((tc)+2) << 8))
 #  define MALLOCX_TCACHE_NONE	MALLOCX_TCACHE(-1)
 /*
  * Bias arena index bits so that 0 encodes "use an automatically chosen arena".
  */
-#  define MALLOCX_ARENA(a)	((int)(((a)+1) << 20))
+#  define MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
 #  define JEMALLOC_CXX_THROW throw()
 #else
 #  define JEMALLOC_CXX_THROW
 #endif
 
 #if _MSC_VER
--- a/memory/jemalloc/src/jemalloc.pc.in
+++ b/memory/jemalloc/src/jemalloc.pc.in
@@ -1,12 +1,12 @@
 prefix=@prefix@
 exec_prefix=@exec_prefix@
 libdir=@libdir@
 includedir=@includedir@
 install_suffix=@install_suffix@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
-URL: http://www.canonware.com/jemalloc
+URL: http://jemalloc.net/
 Version: @jemalloc_version@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}
--- a/memory/jemalloc/src/msvc/ReadMe.txt
+++ b/memory/jemalloc/src/msvc/ReadMe.txt
@@ -12,13 +12,13 @@ 1. Install Cygwin with at least the foll
 2. Install Visual Studio 2015 with Visual C++
 
 3. Add Cygwin\bin to the PATH environment variable
 
 4. Open "VS2015 x86 Native Tools Command Prompt"
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
-   sh -c "./autogen.sh CC=cl --enable-lazy-lock=no"
+   sh -c "CC=cl ./autogen.sh"
 
 6. Now the project can be opened and built in Visual Studio:
    msvc\jemalloc_vc2015.sln
 
--- a/memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -51,34 +51,37 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_defs.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\prof.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_unnamespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ql.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\quarantine.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\spin.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_mangle.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos_jet.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_rename.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_typedefs.h" />
@@ -104,21 +107,23 @@
     <ClCompile Include="..\..\..\..\src\mb.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\spin.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>jemalloc</RootNamespace>
     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
@@ -245,85 +250,86 @@
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
-      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
@@ -334,17 +340,17 @@
       <PrecompiledHeader>
       </PrecompiledHeader>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
@@ -354,17 +360,17 @@
       <WarningLevel>Level3</WarningLevel>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
@@ -375,18 +381,18 @@
       <PrecompiledHeader>
       </PrecompiledHeader>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
-      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
   </ItemDefinitionGroup>
--- a/memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -102,16 +102,19 @@
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h">
       <Filter>Header Files\internal</Filter>
@@ -138,32 +141,38 @@
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\spin.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h">
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h">
       <Filter>Header Files\msvc_compat</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\msvc_compat\windows_extra.h">
       <Filter>Header Files\msvc_compat</Filter>
     </ClInclude>
@@ -233,25 +242,31 @@
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\quarantine.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\spin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\tcache.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\util.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
--- a/memory/jemalloc/src/src/arena.c
+++ b/memory/jemalloc/src/src/arena.c
@@ -16,37 +16,33 @@ ssize_t		opt_decay_time = DECAY_TIME_DEF
 static ssize_t	decay_time_default;
 
 arena_bin_info_t	arena_bin_info[NBINS];
 
 size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-size_t		run_quantize_max; /* Max run_quantize_*() input. */
-static size_t	small_maxrun; /* Max run size for small size classes. */
-static bool	*small_run_tab; /* Valid small run page multiples. */
-static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
-static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
-static szind_t	runs_avail_bias; /* Size index for first runs_avail tree. */
-static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
 
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
  * definition.
  */
 
-static void	arena_purge_to_limit(arena_t *arena, size_t ndirty_limit);
-static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
-    bool cleaned, bool decommitted);
-static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run, arena_bin_t *bin);
+static void	arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk);
+static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
+    size_t ndirty_limit);
+static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run,
+    bool dirty, bool cleaned, bool decommitted);
+static void	arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin);
 static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin);
 
 /******************************************************************************/
 
 JEMALLOC_INLINE_C size_t
 arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 {
@@ -67,199 +63,142 @@ arena_run_addr_comp(const arena_chunk_ma
 	uintptr_t b_miscelm = (uintptr_t)b;
 
 	assert(a != NULL);
 	assert(b != NULL);
 
 	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
 }
 
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
-    rb_link, arena_run_addr_comp)
-
+/* Generate pairing heap functions. */
+ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
+    ph_link, arena_run_addr_comp)
+
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define	run_quantize_floor JEMALLOC_N(n_run_quantize_floor)
+#endif
 static size_t
-run_quantize_floor_compute(size_t size)
+run_quantize_floor(size_t size)
 {
-	size_t qsize;
+	size_t ret;
+	pszind_t pind;
+
+	assert(size > 0);
+	assert(size <= HUGE_MAXCLASS);
+	assert((size & PAGE_MASK) == 0);
 
 	assert(size != 0);
 	assert(size == PAGE_CEILING(size));
 
-	/* Don't change sizes that are valid small run sizes. */
-	if (size <= small_maxrun && small_run_tab[size >> LG_PAGE])
+	pind = psz2ind(size - large_pad + 1);
+	if (pind == 0) {
+		/*
+		 * Avoid underflow.  This short-circuit would also do the right
+		 * thing for all sizes in the range for which there are
+		 * PAGE-spaced size classes, but it's simplest to just handle
+		 * the one case that would cause erroneous results.
+		 */
 		return (size);
-
-	/*
-	 * Round down to the nearest run size that can actually be requested
-	 * during normal large allocation.  Add large_pad so that cache index
-	 * randomization can offset the allocation from the page boundary.
-	 */
-	qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
-	if (qsize <= SMALL_MAXCLASS + large_pad)
-		return (run_quantize_floor_compute(size - large_pad));
-	assert(qsize <= size);
-	return (qsize);
+	}
+	ret = pind2sz(pind - 1) + large_pad;
+	assert(ret <= size);
+	return (ret);
 }
-
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
+run_quantize_t *run_quantize_floor = JEMALLOC_N(n_run_quantize_floor);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef run_quantize_ceil
+#define	run_quantize_ceil JEMALLOC_N(n_run_quantize_ceil)
+#endif
 static size_t
-run_quantize_ceil_compute_hard(size_t size)
+run_quantize_ceil(size_t size)
 {
-	size_t large_run_size_next;
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	/*
-	 * Return the next quantized size greater than the input size.
-	 * Quantized sizes comprise the union of run sizes that back small
-	 * region runs, and run sizes that back large regions with no explicit
-	 * alignment constraints.
-	 */
-
-	if (size > SMALL_MAXCLASS) {
-		large_run_size_next = PAGE_CEILING(index2size(size2index(size -
-		    large_pad) + 1) + large_pad);
-	} else
-		large_run_size_next = SIZE_T_MAX;
-	if (size >= small_maxrun)
-		return (large_run_size_next);
-
-	while (true) {
-		size += PAGE;
-		assert(size <= small_maxrun);
-		if (small_run_tab[size >> LG_PAGE]) {
-			if (large_run_size_next < size)
-				return (large_run_size_next);
-			return (size);
-		}
-	}
-}
-
-static size_t
-run_quantize_ceil_compute(size_t size)
-{
-	size_t qsize = run_quantize_floor_compute(size);
-
-	if (qsize < size) {
+	size_t ret;
+
+	assert(size > 0);
+	assert(size <= HUGE_MAXCLASS);
+	assert((size & PAGE_MASK) == 0);
+
+	ret = run_quantize_floor(size);
+	if (ret < size) {
 		/*
 		 * Skip a quantization that may have an adequately large run,
 		 * because under-sized runs may be mixed in.  This only happens
 		 * when an unusual size is requested, i.e. for aligned
 		 * allocation, and is just one of several places where linear
 		 * search would potentially find sufficiently aligned available
 		 * memory somewhere lower.
 		 */
-		qsize = run_quantize_ceil_compute_hard(qsize);
+		ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad;
 	}
-	return (qsize);
-}
-
-#ifdef JEMALLOC_JET
-#undef run_quantize_floor
-#define	run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
-#endif
-static size_t
-run_quantize_floor(size_t size)
-{
-	size_t ret;
-
-	assert(size > 0);
-	assert(size <= run_quantize_max);
-	assert((size & PAGE_MASK) == 0);
-
-	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_floor_compute(size));
-	return (ret);
-}
-#ifdef JEMALLOC_JET
-#undef run_quantize_floor
-#define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
-run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
-#endif
-
-#ifdef JEMALLOC_JET
-#undef run_quantize_ceil
-#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
-#endif
-static size_t
-run_quantize_ceil(size_t size)
-{
-	size_t ret;
-
-	assert(size > 0);
-	assert(size <= run_quantize_max);
-	assert((size & PAGE_MASK) == 0);
-
-	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_ceil_compute(size));
 	return (ret);
 }
 #ifdef JEMALLOC_JET
 #undef run_quantize_ceil
 #define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
-run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
+run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif
 
-static arena_run_tree_t *
-arena_runs_avail_get(arena_t *arena, szind_t ind)
-{
-
-	assert(ind >= runs_avail_bias);
-	assert(ind - runs_avail_bias < runs_avail_nclasses);
-
-	return (&arena->runs_avail[ind - runs_avail_bias]);
-}
-
 static void
 arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get(chunk, pageind))));
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
+	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_tree_insert(arena_runs_avail_get(arena, ind),
-	    arena_miscelm_get(chunk, pageind));
+	assert((npages << LG_PAGE) < chunksize);
+	assert(pind2sz(pind) <= chunksize);
+	arena_run_heap_insert(&arena->runs_avail[pind],
+	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
 static void
 arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get(chunk, pageind))));
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
+	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_tree_remove(arena_runs_avail_get(arena, ind),
-	    arena_miscelm_get(chunk, pageind));
+	assert((npages << LG_PAGE) < chunksize);
+	assert(pind2sz(pind) <= chunksize);
+	arena_run_heap_remove(&arena->runs_avail[pind],
+	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
 static void
 arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY);
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
 	qr_new(&miscelm->rd, rd_link);
 	qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link);
 	arena->ndirty += npages;
 }
 
 static void
 arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY);
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
 	qr_remove(&miscelm->rd, rd_link);
@@ -584,220 +523,237 @@ arena_chunk_init_spare(arena_t *arena)
 	    arena_maxrun);
 	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
 	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
 
 	return (chunk);
 }
 
 static bool
-arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero)
+arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    bool zero)
 {
 
 	/*
 	 * The extent node notion of "committed" doesn't directly apply to
 	 * arena chunks.  Arbitrarily mark them as committed.  The commit state
 	 * of runs is tracked individually, and upon chunk deallocation the
 	 * entire chunk is in a consistent commit state.
 	 */
 	extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
 	extent_node_achunk_set(&chunk->node, true);
-	return (chunk_register(chunk, &chunk->node));
+	return (chunk_register(tsdn, chunk, &chunk->node));
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    bool *zero, bool *commit)
+arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
 {
 	arena_chunk_t *chunk;
 
-	malloc_mutex_unlock(&arena->lock);
-
-	chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_hooks, NULL,
-	    chunksize, chunksize, zero, commit);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+
+	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
+	    NULL, chunksize, chunksize, zero, commit);
 	if (chunk != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
-			chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
-			    chunksize, *zero, *commit);
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
+			    (void *)chunk, chunksize, *zero, *commit);
 			chunk = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(arena, chunk, *zero)) {
+	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, *zero)) {
 		if (!*commit) {
 			/* Undo commit of header. */
 			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
 			    LG_PAGE, arena->ind);
 		}
-		chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
+		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
 		    chunksize, *zero, *commit);
 		chunk = NULL;
 	}
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	return (chunk);
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit)
+arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
+    bool *commit)
 {
 	arena_chunk_t *chunk;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk = chunk_alloc_cache(arena, &chunk_hooks, NULL, chunksize,
-	    chunksize, zero, true);
+	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
+	    chunksize, zero, commit, true);
 	if (chunk != NULL) {
-		if (arena_chunk_register(arena, chunk, *zero)) {
-			chunk_dalloc_cache(arena, &chunk_hooks, chunk,
+		if (arena_chunk_register(tsdn, arena, chunk, *zero)) {
+			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
 			    chunksize, true);
 			return (NULL);
 		}
-		*commit = true;
 	}
 	if (chunk == NULL) {
-		chunk = arena_chunk_alloc_internal_hard(arena, &chunk_hooks,
-		    zero, commit);
+		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
+		    &chunk_hooks, zero, commit);
 	}
 
 	if (config_stats && chunk != NULL) {
 		arena->stats.mapped += chunksize;
 		arena->stats.metadata_mapped += (map_bias << LG_PAGE);
 	}
 
 	return (chunk);
 }
 
 static arena_chunk_t *
-arena_chunk_init_hard(arena_t *arena)
+arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 	bool zero, commit;
 	size_t flag_unzeroed, flag_decommitted, i;
 
 	assert(arena->spare == NULL);
 
 	zero = false;
 	commit = false;
-	chunk = arena_chunk_alloc_internal(arena, &zero, &commit);
+	chunk = arena_chunk_alloc_internal(tsdn, arena, &zero, &commit);
 	if (chunk == NULL)
 		return (NULL);
 
 	/*
 	 * Initialize the map to contain one maximal free untouched run.  Mark
-	 * the pages as zeroed if chunk_alloc() returned a zeroed or decommitted
-	 * chunk.
+	 * the pages as zeroed if arena_chunk_alloc_internal() returned a zeroed
+	 * or decommitted chunk.
 	 */
 	flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED;
 	flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED;
 	arena_mapbits_unallocated_set(chunk, map_bias, arena_maxrun,
 	    flag_unzeroed | flag_decommitted);
 	/*
 	 * There is no need to initialize the internal page map entries unless
 	 * the chunk is not zeroed.
 	 */
 	if (!zero) {
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
-		    (void *)arena_bitselm_get(chunk, map_bias+1),
-		    (size_t)((uintptr_t) arena_bitselm_get(chunk,
-		    chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk,
-		    map_bias+1)));
+		    (void *)arena_bitselm_get_const(chunk, map_bias+1),
+		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
+		    chunk_npages-1) -
+		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		for (i = map_bias+1; i < chunk_npages-1; i++)
 			arena_mapbits_internal_set(chunk, i, flag_unzeroed);
 	} else {
 		JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
-		    *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t)
-		    arena_bitselm_get(chunk, chunk_npages-1) -
-		    (uintptr_t)arena_bitselm_get(chunk, map_bias+1)));
+		    *)arena_bitselm_get_const(chunk, map_bias+1),
+		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
+		    chunk_npages-1) -
+		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		if (config_debug) {
 			for (i = map_bias+1; i < chunk_npages-1; i++) {
 				assert(arena_mapbits_unzeroed_get(chunk, i) ==
 				    flag_unzeroed);
 			}
 		}
 	}
 	arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun,
 	    flag_unzeroed);
 
 	return (chunk);
 }
 
 static arena_chunk_t *
-arena_chunk_alloc(arena_t *arena)
+arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 
 	if (arena->spare != NULL)
 		chunk = arena_chunk_init_spare(arena);
 	else {
-		chunk = arena_chunk_init_hard(arena);
+		chunk = arena_chunk_init_hard(tsdn, arena);
 		if (chunk == NULL)
 			return (NULL);
 	}
 
+	ql_elm_new(&chunk->node, ql_link);
+	ql_tail_insert(&arena->achunks, &chunk->node, ql_link);
 	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
 
 	return (chunk);
 }
 
 static void
-arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
+	bool committed;
+	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+
+	chunk_deregister(chunk, &chunk->node);
+
+	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
+	if (!committed) {
+		/*
+		 * Decommit the header.  Mark the chunk as decommitted even if
+		 * header decommit fails, since treating a partially committed
+		 * chunk as committed has a high potential for causing later
+		 * access of decommitted memory.
+		 */
+		chunk_hooks = chunk_hooks_get(tsdn, arena);
+		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
+		    arena->ind);
+	}
+
+	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
+	    committed);
+
+	if (config_stats) {
+		arena->stats.mapped -= chunksize;
+		arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
+	}
+}
+
+static void
+arena_spare_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *spare)
+{
+
+	assert(arena->spare != spare);
+
+	if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
+		arena_run_dirty_remove(arena, spare, map_bias,
+		    chunk_npages-map_bias);
+	}
+
+	arena_chunk_discard(tsdn, arena, spare);
+}
+
+static void
+arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
+{
+	arena_chunk_t *spare;
 
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
 	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
 	assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
 	    arena_maxrun);
 	assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
 	    arena_maxrun);
 	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
 	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
 	assert(arena_mapbits_decommitted_get(chunk, map_bias) ==
 	    arena_mapbits_decommitted_get(chunk, chunk_npages-1));
 
 	/* Remove run from runs_avail, so that the arena does not use it. */
 	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
 
-	if (arena->spare != NULL) {
-		arena_chunk_t *spare = arena->spare;
-		chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-		bool committed;
-
-		arena->spare = chunk;
-		if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
-			arena_run_dirty_remove(arena, spare, map_bias,
-			    chunk_npages-map_bias);
-		}
-
-		chunk_deregister(spare, &spare->node);
-
-		committed = (arena_mapbits_decommitted_get(spare, map_bias) ==
-		    0);
-		if (!committed) {
-			/*
-			 * Decommit the header.  Mark the chunk as decommitted
-			 * even if header decommit fails, since treating a
-			 * partially committed chunk as committed has a high
-			 * potential for causing later access of decommitted
-			 * memory.
-			 */
-			chunk_hooks = chunk_hooks_get(arena);
-			chunk_hooks.decommit(spare, chunksize, 0, map_bias <<
-			    LG_PAGE, arena->ind);
-		}
-
-		chunk_dalloc_cache(arena, &chunk_hooks, (void *)spare,
-		    chunksize, committed);
-
-		if (config_stats) {
-			arena->stats.mapped -= chunksize;
-			arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
-		}
-	} else
-		arena->spare = chunk;
+	ql_remove(&arena->achunks, &chunk->node, ql_link);
+	spare = arena->spare;
+	arena->spare = chunk;
+	if (spare != NULL)
+		arena_spare_discard(tsdn, arena, spare);
 }
 
 static void
 arena_huge_malloc_stats_update(arena_t *arena, size_t usize)
 {
 	szind_t index = size2index(usize) - nlclasses - NBINS;
 
 	cassert(config_stats);
@@ -830,16 +786,27 @@ arena_huge_dalloc_stats_update(arena_t *
 
 	arena->stats.ndalloc_huge++;
 	arena->stats.allocated_huge -= usize;
 	arena->stats.hstats[index].ndalloc++;
 	arena->stats.hstats[index].curhchunks--;
 }
 
 static void
+arena_huge_reset_stats_cancel(arena_t *arena, size_t usize)
+{
+	szind_t index = size2index(usize) - nlclasses - NBINS;
+
+	cassert(config_stats);
+
+	arena->stats.ndalloc_huge++;
+	arena->stats.hstats[index].ndalloc--;
+}
+
+static void
 arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize)
 {
 	szind_t index = size2index(usize) - nlclasses - NBINS;
 
 	cassert(config_stats);
 
 	arena->stats.ndalloc_huge--;
 	arena->stats.allocated_huge += usize;
@@ -860,270 +827,275 @@ arena_huge_ralloc_stats_update_undo(aren
     size_t usize)
 {
 
 	arena_huge_dalloc_stats_update_undo(arena, oldsize);
 	arena_huge_malloc_stats_update_undo(arena, usize);
 }
 
 extent_node_t *
-arena_node_alloc(arena_t *arena)
+arena_node_alloc(tsdn_t *tsdn, arena_t *arena)
 {
 	extent_node_t *node;
 
-	malloc_mutex_lock(&arena->node_cache_mtx);
+	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
 	node = ql_last(&arena->node_cache, ql_link);
 	if (node == NULL) {
-		malloc_mutex_unlock(&arena->node_cache_mtx);
-		return (base_alloc(sizeof(extent_node_t)));
+		malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
+		return (base_alloc(tsdn, sizeof(extent_node_t)));
 	}
 	ql_tail_remove(&arena->node_cache, extent_node_t, ql_link);
-	malloc_mutex_unlock(&arena->node_cache_mtx);
+	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
 	return (node);
 }
 
 void
-arena_node_dalloc(arena_t *arena, extent_node_t *node)
+arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node)
 {
 
-	malloc_mutex_lock(&arena->node_cache_mtx);
+	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->node_cache, node, ql_link);
-	malloc_mutex_unlock(&arena->node_cache_mtx);
+	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
 }
 
 static void *
-arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    size_t usize, size_t alignment, bool *zero, size_t csize)
+arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
+    size_t csize)
 {
 	void *ret;
 	bool commit = true;
 
-	ret = chunk_alloc_wrapper(arena, chunk_hooks, NULL, csize, alignment,
-	    zero, &commit);
+	ret = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize,
+	    alignment, zero, &commit);
 	if (ret == NULL) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
 			arena_huge_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
 		arena_nactive_sub(arena, usize >> LG_PAGE);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 
 	return (ret);
 }
 
 void *
-arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
-    bool *zero)
+arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool *zero)
 {
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
-
-	malloc_mutex_lock(&arena->lock);
+	bool commit = true;
+
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
 		arena_huge_malloc_stats_update(arena, usize);
 		arena->stats.mapped += usize;
 	}
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
-	ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment,
-	    zero, true);
-	malloc_mutex_unlock(&arena->lock);
+	ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize,
+	    alignment, zero, &commit, true);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (ret == NULL) {
-		ret = arena_chunk_alloc_huge_hard(arena, &chunk_hooks, usize,
-		    alignment, zero, csize);
+		ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
+		    usize, alignment, zero, csize);
 	}
 
 	return (ret);
 }
 
 void
-arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize)
+arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize;
 
 	csize = CHUNK_CEILING(usize);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
 		arena_huge_dalloc_stats_update(arena, usize);
 		arena->stats.mapped -= usize;
 	}
 	arena_nactive_sub(arena, usize >> LG_PAGE);
 
-	chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true);
-	malloc_mutex_unlock(&arena->lock);
+	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, csize, true);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize)
+arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize)
 {
 
 	assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
 	assert(oldsize != usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats)
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 	if (oldsize < usize)
 		arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE);
 	else
 		arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize)
+arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize)
 {
 	size_t udiff = oldsize - usize;
 	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		if (cdiff != 0)
 			arena->stats.mapped -= cdiff;
 	}
 	arena_nactive_sub(arena, udiff >> LG_PAGE);
 
 	if (cdiff != 0) {
 		chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 		void *nchunk = (void *)((uintptr_t)chunk +
 		    CHUNK_CEILING(usize));
 
-		chunk_dalloc_cache(arena, &chunk_hooks, nchunk, cdiff, true);
+		chunk_dalloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
+		    true);
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static bool
-arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t oldsize, size_t usize, bool *zero, void *nchunk,
-    size_t udiff, size_t cdiff)
+arena_chunk_ralloc_huge_expand_hard(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize,
+    bool *zero, void *nchunk, size_t udiff, size_t cdiff)
 {
 	bool err;
 	bool commit = true;
 
-	err = (chunk_alloc_wrapper(arena, chunk_hooks, nchunk, cdiff, chunksize,
-	    zero, &commit) == NULL);
+	err = (chunk_alloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
+	    chunksize, zero, &commit) == NULL);
 	if (err) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
 			arena_huge_ralloc_stats_update_undo(arena, oldsize,
 			    usize);
 			arena->stats.mapped -= cdiff;
 		}
 		arena_nactive_sub(arena, udiff >> LG_PAGE);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(arena, chunk_hooks, nchunk, cdiff, *zero,
-		    true);
+		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
+		    *zero, true);
 		err = true;
 	}
 	return (err);
 }
 
 bool
-arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize, bool *zero)
+arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize, bool *zero)
 {
 	bool err;
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
-
-	malloc_mutex_lock(&arena->lock);
+	bool commit = true;
+
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		arena->stats.mapped += cdiff;
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
-	err = (chunk_alloc_cache(arena, &chunk_hooks, nchunk, cdiff, chunksize,
-	    zero, true) == NULL);
-	malloc_mutex_unlock(&arena->lock);
+	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
+	    chunksize, zero, &commit, true) == NULL);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (err) {
-		err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks,
-		    chunk, oldsize, usize, zero, nchunk, udiff,
+		err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena,
+		    &chunk_hooks, chunk, oldsize, usize, zero, nchunk, udiff,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(arena, &chunk_hooks, nchunk, cdiff, *zero,
-		    true);
+		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
+		    *zero, true);
 		err = true;
 	}
 
 	return (err);
 }
 
 /*
  * Do first-best-fit run selection, i.e. select the lowest run that best fits.
  * Run sizes are indexed, so not all candidate runs are necessarily exactly the
  * same size.
  */
 static arena_run_t *
 arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	szind_t ind, i;
-
-	ind = size2index(run_quantize_ceil(size));
-	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
-		arena_chunk_map_misc_t *miscelm = arena_run_tree_first(
-		    arena_runs_avail_get(arena, i));
+	pszind_t pind, i;
+
+	pind = psz2ind(run_quantize_ceil(size));
+
+	for (i = pind; pind2sz(i) <= chunksize; i++) {
+		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
+		    &arena->runs_avail[i]);
 		if (miscelm != NULL)
 			return (&miscelm->run);
 	}
 
 	return (NULL);
 }
 
 static arena_run_t *
 arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 {
-	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
+	arena_run_t *run = arena_run_first_best_fit(arena, size);
 	if (run != NULL) {
 		if (arena_run_split_large(arena, run, size, zero))
 			run = NULL;
 	}
 	return (run);
 }
 
 static arena_run_t *
-arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
+arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
 
 	assert(size <= arena_maxrun);
 	assert(size == PAGE_CEILING(size));
 
 	/* Search the arena's chunks for the lowest best fit. */
 	run = arena_run_alloc_large_helper(arena, size, zero);
 	if (run != NULL)
 		return (run);
 
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(arena);
+	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
-		run = &arena_miscelm_get(chunk, map_bias)->run;
+		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_large(arena, run, size, zero))
 			run = NULL;
 		return (run);
 	}
 
 	/*
 	 * arena_chunk_alloc() failed, but another thread may have made
 	 * sufficient memory available while this one dropped arena->lock in
@@ -1139,36 +1111,36 @@ arena_run_alloc_small_helper(arena_t *ar
 	if (run != NULL) {
 		if (arena_run_split_small(arena, run, size, binind))
 			run = NULL;
 	}
 	return (run);
 }
 
 static arena_run_t *
-arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
+arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
 
 	assert(size <= arena_maxrun);
 	assert(size == PAGE_CEILING(size));
 	assert(binind != BININD_INVALID);
 
 	/* Search the arena's chunks for the lowest best fit. */
 	run = arena_run_alloc_small_helper(arena, size, binind);
 	if (run != NULL)
 		return (run);
 
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(arena);
+	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
-		run = &arena_miscelm_get(chunk, map_bias)->run;
+		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_small(arena, run, size, binind))
 			run = NULL;
 		return (run);
 	}
 
 	/*
 	 * arena_chunk_alloc() failed, but another thread may have made
 	 * sufficient memory available while this one dropped arena->lock in
@@ -1181,70 +1153,70 @@ static bool
 arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult)
 {
 
 	return (lg_dirty_mult >= -1 && lg_dirty_mult < (ssize_t)(sizeof(size_t)
 	    << 3));
 }
 
 ssize_t
-arena_lg_dirty_mult_get(arena_t *arena)
+arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena)
 {
 	ssize_t lg_dirty_mult;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	lg_dirty_mult = arena->lg_dirty_mult;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (lg_dirty_mult);
 }
 
 bool
-arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult)
+arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, ssize_t lg_dirty_mult)
 {
 
 	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
 		return (true);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->lg_dirty_mult = lg_dirty_mult;
-	arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (false);
 }
 
 static void
 arena_decay_deadline_init(arena_t *arena)
 {
 
 	assert(opt_purge == purge_mode_decay);
 
 	/*
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
 	 */
-	nstime_copy(&arena->decay_deadline, &arena->decay_epoch);
-	nstime_add(&arena->decay_deadline, &arena->decay_interval);
-	if (arena->decay_time > 0) {
+	nstime_copy(&arena->decay.deadline, &arena->decay.epoch);
+	nstime_add(&arena->decay.deadline, &arena->decay.interval);
+	if (arena->decay.time > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range(&arena->decay_jitter_state,
-		    nstime_ns(&arena->decay_interval)));
-		nstime_add(&arena->decay_deadline, &jitter);
+		nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
+		    nstime_ns(&arena->decay.interval)));
+		nstime_add(&arena->decay.deadline, &jitter);
 	}
 }
 
 static bool
 arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)
 {
 
 	assert(opt_purge == purge_mode_decay);
 
-	return (nstime_compare(&arena->decay_deadline, time) <= 0);
+	return (nstime_compare(&arena->decay.deadline, time) <= 0);
 }
 
 static size_t
 arena_decay_backlog_npages_limit(const arena_t *arena)
 {
 	static const uint64_t h_steps[] = {
 #define	STEP(step, h, x, y) \
 		h,
@@ -1259,144 +1231,163 @@ arena_decay_backlog_npages_limit(const a
 
 	/*
 	 * For each element of decay_backlog, multiply by the corresponding
 	 * fixed-point smoothstep decay factor.  Sum the products, then divide
 	 * to round down to the nearest whole number of pages.
 	 */
 	sum = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
-		sum += arena->decay_backlog[i] * h_steps[i];
-	npages_limit_backlog = (sum >> SMOOTHSTEP_BFP);
+		sum += arena->decay.backlog[i] * h_steps[i];
+	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
 	return (npages_limit_backlog);
 }
 
 static void
-arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
+arena_decay_backlog_update_last(arena_t *arena)
+{
+	size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ?
+	    arena->ndirty - arena->decay.ndirty : 0;
+	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+}
+
+static void
+arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64)
 {
-	uint64_t nadvance;
+
+	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
+		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		    sizeof(size_t));
+	} else {
+		size_t nadvance_z = (size_t)nadvance_u64;
+
+		assert((uint64_t)nadvance_z == nadvance_u64);
+
+		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
+		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+		if (nadvance_z > 1) {
+			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
+			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
+		}
+	}
+
+	arena_decay_backlog_update_last(arena);
+}
+
+static void
+arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time)
+{
+	uint64_t nadvance_u64;
 	nstime_t delta;
-	size_t ndirty_delta;
 
 	assert(opt_purge == purge_mode_decay);
 	assert(arena_decay_deadline_reached(arena, time));
 
 	nstime_copy(&delta, time);
-	nstime_subtract(&delta, &arena->decay_epoch);
-	nadvance = nstime_divide(&delta, &arena->decay_interval);
-	assert(nadvance > 0);
-
-	/* Add nadvance decay intervals to epoch. */
-	nstime_copy(&delta, &arena->decay_interval);
-	nstime_imultiply(&delta, nadvance);
-	nstime_add(&arena->decay_epoch, &delta);
+	nstime_subtract(&delta, &arena->decay.epoch);
+	nadvance_u64 = nstime_divide(&delta, &arena->decay.interval);
+	assert(nadvance_u64 > 0);
+
+	/* Add nadvance_u64 decay intervals to epoch. */
+	nstime_copy(&delta, &arena->decay.interval);
+	nstime_imultiply(&delta, nadvance_u64);
+	nstime_add(&arena->decay.epoch, &delta);
 
 	/* Set a new deadline. */
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
-	if (nadvance >= SMOOTHSTEP_NSTEPS) {
-		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
-		    sizeof(size_t));
-	} else {
-		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance],
-		    (SMOOTHSTEP_NSTEPS - nadvance) * sizeof(size_t));
-		if (nadvance > 1) {
-			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance], 0, (nadvance-1) * sizeof(size_t));
-		}
-	}
-	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -
-	    arena->decay_ndirty : 0;
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
-	arena->decay_backlog_npages_limit =
-	    arena_decay_backlog_npages_limit(arena);
+	arena_decay_backlog_update(arena, nadvance_u64);
 }
 
-static size_t
-arena_decay_npages_limit(arena_t *arena)
+static void
+arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena)
 {
-	size_t npages_limit;
-
-	assert(opt_purge == purge_mode_decay);
-
-	npages_limit = arena->decay_backlog_npages_limit;
-
-	/* Add in any dirty pages created during the current epoch. */
-	if (arena->ndirty > arena->decay_ndirty)
-		npages_limit += arena->ndirty - arena->decay_ndirty;
-
-	return (npages_limit);
+	size_t ndirty_limit = arena_decay_backlog_npages_limit(arena);
+
+	if (arena->ndirty > ndirty_limit)
+		arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	arena->decay.ndirty = arena->ndirty;
+}
+
+static void
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time)
+{
+
+	arena_decay_epoch_advance_helper(arena, time);
+	arena_decay_epoch_advance_purge(tsdn, arena);
 }
 
 static void
 arena_decay_init(arena_t *arena, ssize_t decay_time)
 {
 
-	arena->decay_time = decay_time;
+	arena->decay.time = decay_time;
 	if (decay_time > 0) {
-		nstime_init2(&arena->decay_interval, decay_time, 0);
-		nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS);
+		nstime_init2(&arena->decay.interval, decay_time, 0);
+		nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	nstime_init(&arena->decay_epoch, 0);
-	nstime_update(&arena->decay_epoch);
-	arena->decay_jitter_state = (uint64_t)(uintptr_t)arena;
+	nstime_init(&arena->decay.epoch, 0);
+	nstime_update(&arena->decay.epoch);
+	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog_npages_limit = 0;
-	memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+	arena->decay.ndirty = arena->ndirty;
+	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
 static bool
 arena_decay_time_valid(ssize_t decay_time)
 {
 
-	return (decay_time >= -1 && decay_time <= NSTIME_SEC_MAX);
+	if (decay_time < -1)
+		return (false);
+	if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX)
+		return (true);
+	return (false);
 }
 
 ssize_t
-arena_decay_time_get(arena_t *arena)
+arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 {
 	ssize_t decay_time;
 
-	malloc_mutex_lock(&arena->lock);
-	decay_time = arena->decay_time;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	decay_time = arena->decay.time;
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (decay_time);
 }
 
 bool
-arena_decay_time_set(arena_t *arena, ssize_t decay_time)
+arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time)
 {
 
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	/*
 	 * Restart decay backlog from scratch, which may cause many dirty pages
 	 * to be immediately purged.  It would conceptually be possible to map
 	 * the old backlog onto the new backlog, but there is no justification
 	 * for such complexity since decay_time changes are intended to be
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_init(arena, decay_time);
-	arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (false);
 }
 
 static void
-arena_maybe_purge_ratio(arena_t *arena)
+arena_maybe_purge_ratio(tsdn_t *tsdn, arena_t *arena)
 {
 
 	assert(opt_purge == purge_mode_ratio);
 
 	/* Don't purge if the option is disabled. */
 	if (arena->lg_dirty_mult < 0)
 		return;
 
@@ -1409,67 +1400,76 @@ arena_maybe_purge_ratio(arena_t *arena)
 		if (threshold < chunk_npages)
 			threshold = chunk_npages;
 		/*
 		 * Don't purge unless the number of purgeable pages exceeds the
 		 * threshold.
 		 */
 		if (arena->ndirty <= threshold)
 			return;
-		arena_purge_to_limit(arena, threshold);
+		arena_purge_to_limit(tsdn, arena, threshold);
 	}
 }
 
 static void
-arena_maybe_purge_decay(arena_t *arena)
+arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 {
 	nstime_t time;
-	size_t ndirty_limit;
 
 	assert(opt_purge == purge_mode_decay);
 
 	/* Purge all or nothing if the option is disabled. */
-	if (arena->decay_time <= 0) {
-		if (arena->decay_time == 0)
-			arena_purge_to_limit(arena, 0);
+	if (arena->decay.time <= 0) {
+		if (arena->decay.time == 0)
+			arena_purge_to_limit(tsdn, arena, 0);
 		return;
 	}
 
-	nstime_copy(&time, &arena->decay_epoch);
-	if (unlikely(nstime_update(&time))) {
-		/* Time went backwards.  Force an epoch advance. */
-		nstime_copy(&time, &arena->decay_deadline);
+	nstime_init(&time, 0);
+	nstime_update(&time);
+	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
+	    &time) > 0)) {
+		/*
+		 * Time went backwards.  Move the epoch back in time and
+		 * generate a new deadline, with the expectation that time
+		 * typically flows forward for long enough periods of time that
+		 * epochs complete.  Unfortunately, this strategy is susceptible
+		 * to clock jitter triggering premature epoch advances, but
+		 * clock jitter estimation and compensation isn't feasible here
+		 * because calls into this code are event-driven.
+		 */
+		nstime_copy(&arena->decay.epoch, &time);
+		arena_decay_deadline_init(arena);
+	} else {
+		/* Verify that time does not go backwards. */
+		assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
 	}
 
-	if (arena_decay_deadline_reached(arena, &time))
-		arena_decay_epoch_advance(arena, &time);
-
-	ndirty_limit = arena_decay_npages_limit(arena);
-
 	/*
-	 * Don't try to purge unless the number of purgeable pages exceeds the
-	 * current limit.
+	 * If the deadline has been reached, advance to the current epoch and
+	 * purge to the new limit if necessary.  Note that dirty pages created
+	 * during the current epoch are not subject to purge until a future
+	 * epoch, so as a result purging only happens during epoch advances.
 	 */
-	if (arena->ndirty <= ndirty_limit)
-		return;
-	arena_purge_to_limit(arena, ndirty_limit);
+	if (arena_decay_deadline_reached(arena, &time))
+		arena_decay_epoch_advance(tsdn, arena, &time);
 }
 
 void
-arena_maybe_purge(arena_t *arena)
+arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
 {
 
 	/* Don't recursively purge. */
 	if (arena->purging)
 		return;
 
 	if (opt_purge == purge_mode_ratio)
-		arena_maybe_purge_ratio(arena);
+		arena_maybe_purge_ratio(tsdn, arena);
 	else
-		arena_maybe_purge_decay(arena);
+		arena_maybe_purge_decay(tsdn, arena);
 }
 
 static size_t
 arena_dirty_count(arena_t *arena)
 {
 	size_t ndirty = 0;
 	arena_runs_dirty_link_t *rdelm;
 	extent_node_t *chunkselm;
@@ -1497,51 +1497,52 @@ arena_dirty_count(arena_t *arena)
 		}
 		ndirty += npages;
 	}
 
 	return (ndirty);
 }
 
 static size_t
-arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
 	arena_runs_dirty_link_t *rdelm, *rdelm_next;
 	extent_node_t *chunkselm;
 	size_t nstashed = 0;
 
 	/* Stash runs/chunks according to ndirty_limit. */
 	for (rdelm = qr_next(&arena->runs_dirty, rd_link),
 	    chunkselm = qr_next(&arena->chunks_cache, cc_link);
 	    rdelm != &arena->runs_dirty; rdelm = rdelm_next) {
 		size_t npages;
 		rdelm_next = qr_next(rdelm, rd_link);
 
 		if (rdelm == &chunkselm->rd) {
 			extent_node_t *chunkselm_next;
-			bool zero;
+			bool zero, commit;
 			UNUSED void *chunk;
 
 			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
 			if (opt_purge == purge_mode_decay && arena->ndirty -
 			    (nstashed + npages) < ndirty_limit)
 				break;
 
 			chunkselm_next = qr_next(chunkselm, cc_link);
 			/*
 			 * Allocate.  chunkselm remains valid due to the
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
-			chunk = chunk_alloc_cache(arena, chunk_hooks,
+			commit = false;
+			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
-			    false);
+			    &commit, false);
 			assert(chunk == extent_node_addr_get(chunkselm));
 			assert(zero == extent_node_zeroed_get(chunkselm));
 			extent_node_dirty_insert(chunkselm, purge_runs_sentinel,
 			    purge_chunks_sentinel);
 			assert(npages == (extent_node_size_get(chunkselm) >>
 			    LG_PAGE));
 			chunkselm = chunkselm_next;
 		} else {
@@ -1563,17 +1564,17 @@ arena_stash_dirty(arena_t *arena, chunk_
 			assert(arena_mapbits_dirty_get(chunk, pageind) ==
 			    arena_mapbits_dirty_get(chunk, pageind+npages-1));
 
 			/*
 			 * If purging the spare chunk's run, make it available
 			 * prior to allocation.
 			 */
 			if (chunk == arena->spare)
-				arena_chunk_alloc(arena);
+				arena_chunk_alloc(tsdn, arena);
 
 			/* Temporarily allocate the free dirty run. */
 			arena_run_split_large(arena, run, run_size, false);
 			/* Stash. */
 			if (false)
 				qr_new(rdelm, rd_link); /* Redundant. */
 			else {
 				assert(qr_next(rdelm, rd_link) == rdelm);
@@ -1587,29 +1588,29 @@ arena_stash_dirty(arena_t *arena, chunk_
 		    ndirty_limit)
 			break;
 	}
 
 	return (nstashed);
 }
 
 static size_t
-arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
 	size_t npurged, nmadvise;
 	arena_runs_dirty_link_t *rdelm;
 	extent_node_t *chunkselm;
 
 	if (config_stats)
 		nmadvise = 0;
 	npurged = 0;
 
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
 	    chunkselm = qr_next(purge_chunks_sentinel, cc_link);
 	    rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) {
 		size_t npages;
 
 		if (rdelm == &chunkselm->rd) {
 			/*
 			 * Don't actually purge the chunk here because 1)
@@ -1638,17 +1639,17 @@ arena_purge_stashed(arena_t *arena, chun
 			assert(!arena_mapbits_decommitted_get(chunk,
 			    pageind+npages-1));
 			decommitted = !chunk_hooks->decommit(chunk, chunksize,
 			    pageind << LG_PAGE, npages << LG_PAGE, arena->ind);
 			if (decommitted) {
 				flag_unzeroed = 0;
 				flags = CHUNK_MAP_DECOMMITTED;
 			} else {
-				flag_unzeroed = chunk_purge_wrapper(arena,
+				flag_unzeroed = chunk_purge_wrapper(tsdn, arena,
 				    chunk_hooks, chunk, chunksize, pageind <<
 				    LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0;
 				flags = flag_unzeroed;
 			}
 			arena_mapbits_large_set(chunk, pageind+npages-1, 0,
 			    flags);
 			arena_mapbits_large_set(chunk, pageind, run_size,
 			    flags);
@@ -1669,28 +1670,28 @@ arena_purge_stashed(arena_t *arena, chun
 				    flag_unzeroed);
 			}
 		}
 
 		npurged += npages;
 		if (config_stats)
 			nmadvise++;
 	}
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	if (config_stats) {
 		arena->stats.nmadvise += nmadvise;
 		arena->stats.purged += npurged;
 	}
 
 	return (npurged);
 }
 
 static void
-arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
 	arena_runs_dirty_link_t *rdelm, *rdelm_next;
 	extent_node_t *chunkselm;
 
 	/* Deallocate chunks/runs. */
 	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
@@ -1700,48 +1701,49 @@ arena_unstash_purged(arena_t *arena, chu
 		if (rdelm == &chunkselm->rd) {
 			extent_node_t *chunkselm_next = qr_next(chunkselm,
 			    cc_link);
 			void *addr = extent_node_addr_get(chunkselm);
 			size_t size = extent_node_size_get(chunkselm);
 			bool zeroed = extent_node_zeroed_get(chunkselm);
 			bool committed = extent_node_committed_get(chunkselm);
 			extent_node_dirty_remove(chunkselm);
-			arena_node_dalloc(arena, chunkselm);
+			arena_node_dalloc(tsdn, arena, chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_wrapper(arena, chunk_hooks, addr, size,
-			    zeroed, committed);
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr,
+			    size, zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(rdelm);
 			size_t pageind = arena_miscelm_to_pageind(miscelm);
 			bool decommitted = (arena_mapbits_decommitted_get(chunk,
 			    pageind) != 0);
 			arena_run_t *run = &miscelm->run;
 			qr_remove(rdelm, rd_link);
-			arena_run_dalloc(arena, run, false, true, decommitted);
+			arena_run_dalloc(tsdn, arena, run, false, true,
+			    decommitted);
 		}
 	}
 }
 
 /*
  * NB: ndirty_limit is interpreted differently depending on opt_purge:
  *   - purge_mode_ratio: Purge as few dirty run/chunks as possible to reach the
  *                       desired state:
  *                       (arena->ndirty <= ndirty_limit)
  *   - purge_mode_decay: Purge as many dirty runs/chunks as possible without
  *                       violating the invariant:
  *                       (arena->ndirty >= ndirty_limit)
  */
 static void
-arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
+arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 {
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	size_t npurge, npurged;
 	arena_runs_dirty_link_t purge_runs_sentinel;
 	extent_node_t purge_chunks_sentinel;
 
 	arena->purging = true;
 
 	/*
 	 * Calls to arena_dirty_count() are disabled even for debug builds
@@ -1752,43 +1754,187 @@ arena_purge_to_limit(arena_t *arena, siz
 		assert(ndirty == arena->ndirty);
 	}
 	assert(opt_purge != purge_mode_ratio || (arena->nactive >>
 	    arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0);
 
 	qr_new(&purge_runs_sentinel, rd_link);
 	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
 
-	npurge = arena_stash_dirty(arena, &chunk_hooks, ndirty_limit,
+	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	if (npurge == 0)
 		goto label_return;
-	npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel,
-	    &purge_chunks_sentinel);
+	npurged = arena_purge_stashed(tsdn, arena, &chunk_hooks,
+	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	assert(npurged == npurge);
-	arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel,
+	arena_unstash_purged(tsdn, arena, &chunk_hooks, &purge_runs_sentinel,
 	    &purge_chunks_sentinel);
 
 	if (config_stats)
 		arena->stats.npurge++;
 
 label_return:
 	arena->purging = false;
 }
 
 void
-arena_purge(arena_t *arena, bool all)
+arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 {
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (all)
-		arena_purge_to_limit(arena, 0);
+		arena_purge_to_limit(tsdn, arena, 0);
 	else
-		arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+		arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+}
+
+static void
+arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+{
+	size_t pageind, npages;
+
+	cassert(config_prof);
+	assert(opt_prof);
+
+	/*
+	 * Iterate over the allocated runs and remove profiled allocations from
+	 * the sample set.
+	 */
+	for (pageind = map_bias; pageind < chunk_npages; pageind += npages) {
+		if (arena_mapbits_allocated_get(chunk, pageind) != 0) {
+			if (arena_mapbits_large_get(chunk, pageind) != 0) {
+				void *ptr = (void *)((uintptr_t)chunk + (pageind
+				    << LG_PAGE));
+				size_t usize = isalloc(tsd_tsdn(tsd), ptr,
+				    config_prof);
+
+				prof_free(tsd, ptr, usize);
+				npages = arena_mapbits_large_size_get(chunk,
+				    pageind) >> LG_PAGE;
+			} else {
+				/* Skip small run. */
+				size_t binind = arena_mapbits_binind_get(chunk,
+				    pageind);
+				arena_bin_info_t *bin_info =
+				    &arena_bin_info[binind];
+				npages = bin_info->run_size >> LG_PAGE;
+			}
+		} else {
+			/* Skip unallocated run. */
+			npages = arena_mapbits_unallocated_size_get(chunk,
+			    pageind) >> LG_PAGE;
+		}
+		assert(pageind + npages <= chunk_npages);
+	}
+}
+
+void
+arena_reset(tsd_t *tsd, arena_t *arena)
+{
+	unsigned i;
+	extent_node_t *node;
+
+	/*
+	 * Locking in this function is unintuitive.  The caller guarantees that
+	 * no concurrent operations are happening in this arena, but there are
+	 * still reasons that some locking is necessary:
+	 *
+	 * - Some of the functions in the transitive closure of calls assume
+	 *   appropriate locks are held, and in some cases these locks are
+	 *   temporarily dropped to avoid lock order reversal or deadlock due to
+	 *   reentry.
+	 * - mallctl("epoch", ...) may concurrently refresh stats.  While
+	 *   strictly speaking this is a "concurrent operation", disallowing
+	 *   stats refreshes would impose an inconvenient burden.
+	 */
+
+	/* Remove large allocations from prof sample set. */
+	if (config_prof && opt_prof) {
+		ql_foreach(node, &arena->achunks, ql_link) {
+			arena_achunk_prof_reset(tsd, arena,
+			    extent_node_addr_get(node));
+		}
+	}
+
+	/* Reset curruns for large size classes. */
+	if (config_stats) {
+		for (i = 0; i < nlclasses; i++)
+			arena->stats.lstats[i].curruns = 0;
+	}
+
+	/* Huge allocations. */
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
+	for (node = ql_last(&arena->huge, ql_link); node != NULL; node =
+	    ql_last(&arena->huge, ql_link)) {
+		void *ptr = extent_node_addr_get(node);
+		size_t usize;
+
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
+		if (config_stats || (config_prof && opt_prof))
+			usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
+		/* Remove huge allocation from prof sample set. */
+		if (config_prof && opt_prof)
+			prof_free(tsd, ptr, usize);
+		huge_dalloc(tsd_tsdn(tsd), ptr);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
+		/* Cancel out unwanted effects on stats. */
+		if (config_stats)
+			arena_huge_reset_stats_cancel(arena, usize);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
+
+	/* Bins. */
+	for (i = 0; i < NBINS; i++) {
+		arena_bin_t *bin = &arena->bins[i];
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		bin->runcur = NULL;
+		arena_run_heap_new(&bin->runs);
+		if (config_stats) {
+			bin->stats.curregs = 0;
+			bin->stats.curruns = 0;
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+	}
+
+	/*
+	 * Re-initialize runs_dirty such that the chunks_cache and runs_dirty
+	 * chains directly correspond.
+	 */
+	qr_new(&arena->runs_dirty, rd_link);
+	for (node = qr_next(&arena->chunks_cache, cc_link);
+	    node != &arena->chunks_cache; node = qr_next(node, cc_link)) {
+		qr_new(&node->rd, rd_link);
+		qr_meld(&arena->runs_dirty, &node->rd, rd_link);
+	}
+
+	/* Arena chunks. */
+	for (node = ql_last(&arena->achunks, ql_link); node != NULL; node =
+	    ql_last(&arena->achunks, ql_link)) {
+		ql_remove(&arena->achunks, node, ql_link);
+		arena_chunk_discard(tsd_tsdn(tsd), arena,
+		    extent_node_addr_get(node));
+	}
+
+	/* Spare. */
+	if (arena->spare != NULL) {
+		arena_chunk_discard(tsd_tsdn(tsd), arena, arena->spare);
+		arena->spare = NULL;
+	}
+
+	assert(!arena->purging);
+	arena->nactive = 0;
+
+	for (i = 0; i < NPSIZES; i++)
+		arena_run_heap_new(&arena->runs_avail[i]);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 }
 
 static void
 arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
     size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty,
     size_t flag_decommitted)
 {
 	size_t size = *p_size;
@@ -1895,18 +2041,18 @@ arena_run_size_get(arena_t *arena, arena
 		arena_bin_info_t *bin_info = &arena_bin_info[run->binind];
 		size = bin_info->run_size;
 	}
 
 	return (size);
 }
 
 static void
-arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
-    bool decommitted)
+arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty,
+    bool cleaned, bool decommitted)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
 	size_t size, run_ind, run_pages, flag_dirty, flag_decommitted;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	miscelm = arena_run_to_miscelm(run);
 	run_ind = arena_miscelm_to_pageind(miscelm);
@@ -1956,33 +2102,33 @@ arena_run_dalloc(arena_t *arena, arena_r
 
 	if (dirty)
 		arena_run_dirty_insert(arena, chunk, run_ind, run_pages);
 
 	/* Deallocate chunk if it is now completely unused. */
 	if (size == arena_maxrun) {
 		assert(run_ind == map_bias);
 		assert(run_pages == (arena_maxrun >> LG_PAGE));
-		arena_chunk_dalloc(arena, chunk);
+		arena_chunk_dalloc(tsdn, arena, chunk);
 	}
 
 	/*
 	 * It is okay to do dirty page processing here even if the chunk was
 	 * deallocated above, since in that case it is the spare.  Waiting
 	 * until after possible chunk deallocation to do dirty processing
 	 * allows for an old spare to be fully deallocated, thus decreasing the
 	 * chances of spuriously crossing the dirty page purging threshold.
 	 */
 	if (dirty)
-		arena_maybe_purge(arena);
+		arena_maybe_purge(tsdn, arena);
 }
 
 static void
-arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    size_t oldsize, size_t newsize)
+arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
 	size_t head_npages = (oldsize - newsize) >> LG_PAGE;
 	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
 	size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind);
 	size_t flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ?
 	    CHUNK_MAP_UNZEROED : 0;
@@ -2007,22 +2153,23 @@ arena_run_trim_head(arena_t *arena, aren
 		    pageind+head_npages+tail_npages-1) == 0);
 		assert(arena_mapbits_dirty_get(chunk,
 		    pageind+head_npages+tail_npages-1) == flag_dirty);
 	}
 	arena_mapbits_large_set(chunk, pageind+head_npages, newsize,
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	arena_run_dalloc(arena, run, false, false, (flag_decommitted != 0));
+	arena_run_dalloc(tsdn, arena, run, false, false, (flag_decommitted !=
+	    0));
 }
 
 static void
-arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    size_t oldsize, size_t newsize, bool dirty)
+arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize, bool dirty)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
 	size_t head_npages = newsize >> LG_PAGE;
 	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
 	size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind);
 	size_t flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ?
 	    CHUNK_MAP_UNZEROED : 0;
@@ -2049,94 +2196,74 @@ arena_run_trim_tail(arena_t *arena, aren
 		    pageind+head_npages+tail_npages-1) == 0);
 		assert(arena_mapbits_dirty_get(chunk,
 		    pageind+head_npages+tail_npages-1) == flag_dirty);
 	}
 	arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize,
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	tail_miscelm = arena_miscelm_get(chunk, pageind + head_npages);
+	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
 	tail_run = &tail_miscelm->run;
-	arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted !=
-	    0));
-}
-
-static arena_run_t *
-arena_bin_runs_first(arena_bin_t *bin)
-{
-	arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs);
-	if (miscelm != NULL)
-		return (&miscelm->run);
-
-	return (NULL);
+	arena_run_dalloc(tsdn, arena, tail_run, dirty, false, (flag_decommitted
+	    != 0));
 }
 
 static void
 arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 
-	assert(arena_run_tree_search(&bin->runs, miscelm) == NULL);
-
-	arena_run_tree_insert(&bin->runs, miscelm);
-}
-
-static void
-arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
-{
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
-
-	assert(arena_run_tree_search(&bin->runs, miscelm) != NULL);
-
-	arena_run_tree_remove(&bin->runs, miscelm);
+	arena_run_heap_insert(&bin->runs, miscelm);
 }
 
 static arena_run_t *
 arena_bin_nonfull_run_tryget(arena_bin_t *bin)
 {
-	arena_run_t *run = arena_bin_runs_first(bin);
-	if (run != NULL) {
-		arena_bin_runs_remove(bin, run);
-		if (config_stats)
-			bin->stats.reruns++;
-	}
-	return (run);
+	arena_chunk_map_misc_t *miscelm;
+
+	miscelm = arena_run_heap_remove_first(&bin->runs);
+	if (miscelm == NULL)
+		return (NULL);
+	if (config_stats)
+		bin->stats.reruns++;
+
+	return (&miscelm->run);
 }
 
 static arena_run_t *
-arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
+arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	arena_run_t *run;
 	szind_t binind;
 	arena_bin_info_t *bin_info;
 
 	/* Look for a usable run. */
 	run = arena_bin_nonfull_run_tryget(bin);
 	if (run != NULL)
 		return (run);
 	/* No existing runs have any space available. */
 
 	binind = arena_bin_index(arena, bin);
 	bin_info = &arena_bin_info[binind];
 
 	/* Allocate a new run. */
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc_small(arena, bin_info->run_size, binind);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	run = arena_run_alloc_small(tsdn, arena, bin_info->run_size, binind);
 	if (run != NULL) {
 		/* Initialize run internals. */
 		run->binind = binind;
 		run->nfree = bin_info->nregs;
 		bitmap_init(run->bitmap, &bin_info->bitmap_info);
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	/********************************/
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if (run != NULL) {
 		if (config_stats) {
 			bin->stats.nruns++;
 			bin->stats.curruns++;
 		}
 		return (run);
 	}
 
@@ -2149,26 +2276,26 @@ arena_bin_nonfull_run_get(arena_t *arena
 	if (run != NULL)
 		return (run);
 
 	return (NULL);
 }
 
 /* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
 static void *
-arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
+arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	szind_t binind;
 	arena_bin_info_t *bin_info;
 	arena_run_t *run;
 
 	binind = arena_bin_index(arena, bin);
 	bin_info = &arena_bin_info[binind];
 	bin->runcur = NULL;
-	run = arena_bin_nonfull_run_get(arena, bin);
+	run = arena_bin_nonfull_run_get(tsdn, arena, bin);
 	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
 		/*
 		 * Another thread updated runcur while this one ran without the
 		 * bin lock in arena_bin_nonfull_run_get().
 		 */
 		void *ret;
 		assert(bin->runcur->nfree > 0);
 		ret = arena_run_reg_alloc(bin->runcur, bin_info);
@@ -2179,55 +2306,56 @@ arena_bin_malloc_hard(arena_t *arena, ar
 			 * arena_run_alloc_small() may have allocated run, or
 			 * it may have pulled run from the bin's run tree.
 			 * Therefore it is unsafe to make any assumptions about
 			 * how run has previously been used, and
 			 * arena_bin_lower_run() must be called, as if a region
 			 * were just deallocated from the run.
 			 */
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-			if (run->nfree == bin_info->nregs)
-				arena_dalloc_bin_run(arena, chunk, run, bin);
-			else
+			if (run->nfree == bin_info->nregs) {
+				arena_dalloc_bin_run(tsdn, arena, chunk, run,
+				    bin);
+			} else
 				arena_bin_lower_run(arena, chunk, run, bin);
 		}
 		return (ret);
 	}
 
 	if (run == NULL)
 		return (NULL);
 
 	bin->runcur = run;
 
 	assert(bin->runcur->nfree > 0);
 
 	return (arena_run_reg_alloc(bin->runcur, bin_info));
 }
 
 void
-arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
+arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes)
 {
 	unsigned i, nfill;
 	arena_bin_t *bin;
 
 	assert(tbin->ncached == 0);
 
-	if (config_prof && arena_prof_accum(arena, prof_accumbytes))
-		prof_idump();
+	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes))
+		prof_idump(tsdn);
 	bin = &arena->bins[binind];
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tbin->lg_fill_div); i < nfill; i++) {
 		arena_run_t *run;
 		void *ptr;
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
 			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 		else
-			ptr = arena_bin_malloc_hard(arena, bin);
+			ptr = arena_bin_malloc_hard(tsdn, arena, bin);
 		if (ptr == NULL) {
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
 			 * element, so the successful allocations (if any) must
 			 * be moved just before tbin->avail before bailing out.
 			 */
 			if (i > 0) {
 				memmove(tbin->avail - i, tbin->avail - nfill,
@@ -2244,111 +2372,112 @@ arena_tcache_fill_small(tsd_t *tsd, aren
 	}
 	if (config_stats) {
 		bin->stats.nmalloc += i;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		bin->stats.curregs += i;
 		bin->stats.nfills++;
 		tbin->tstats.nrequests = 0;
 	}
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	tbin->ncached = i;
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 }
 
 void
 arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
 {
 
+	size_t redzone_size = bin_info->redzone_size;
+
 	if (zero) {
-		size_t redzone_size = bin_info->redzone_size;
-		memset((void *)((uintptr_t)ptr - redzone_size), 0xa5,
-		    redzone_size);
-		memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5,
-		    redzone_size);
+		memset((void *)((uintptr_t)ptr - redzone_size),
+		    JEMALLOC_ALLOC_JUNK, redzone_size);
+		memset((void *)((uintptr_t)ptr + bin_info->reg_size),
+		    JEMALLOC_ALLOC_JUNK, redzone_size);
 	} else {
-		memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5,
-		    bin_info->reg_interval);
+		memset((void *)((uintptr_t)ptr - redzone_size),
+		    JEMALLOC_ALLOC_JUNK, bin_info->reg_interval);
 	}
 }
 
 #ifdef JEMALLOC_JET
 #undef arena_redzone_corruption
-#define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl)
+#define	arena_redzone_corruption JEMALLOC_N(n_arena_redzone_corruption)
 #endif
 static void
 arena_redzone_corruption(void *ptr, size_t usize, bool after,
     size_t offset, uint8_t byte)
 {
 
 	malloc_printf("<jemalloc>: Corrupt redzone %zu byte%s %s %p "
 	    "(size %zu), byte=%#x\n", offset, (offset == 1) ? "" : "s",
 	    after ? "after" : "before", ptr, usize, byte);
 }
 #ifdef JEMALLOC_JET
 #undef arena_redzone_corruption
 #define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption)
 arena_redzone_corruption_t *arena_redzone_corruption =
-    JEMALLOC_N(arena_redzone_corruption_impl);
+    JEMALLOC_N(n_arena_redzone_corruption);
 #endif
 
 static void
 arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
 {
 	bool error = false;
 
 	if (opt_junk_alloc) {
 		size_t size = bin_info->reg_size;
 		size_t redzone_size = bin_info->redzone_size;
 		size_t i;
 
 		for (i = 1; i <= redzone_size; i++) {
 			uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
-			if (*byte != 0xa5) {
+			if (*byte != JEMALLOC_ALLOC_JUNK) {
 				error = true;
 				arena_redzone_corruption(ptr, size, false, i,
 				    *byte);
 				if (reset)
-					*byte = 0xa5;
+					*byte = JEMALLOC_ALLOC_JUNK;
 			}
 		}
 		for (i = 0; i < redzone_size; i++) {
 			uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i);
-			if (*byte != 0xa5) {
+			if (*byte != JEMALLOC_ALLOC_JUNK) {
 				error = true;
 				arena_redzone_corruption(ptr, size, true, i,
 				    *byte);
 				if (reset)
-					*byte = 0xa5;
+					*byte = JEMALLOC_ALLOC_JUNK;
 			}
 		}
 	}
 
 	if (opt_abort && error)
 		abort();
 }
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_small
-#define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl)
+#define	arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small)
 #endif
 void
 arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
 {
 	size_t redzone_size = bin_info->redzone_size;
 
 	arena_redzones_validate(ptr, bin_info, false);
-	memset((void *)((uintptr_t)ptr - redzone_size), 0x5a,
+	memset((void *)((uintptr_t)ptr - redzone_size), JEMALLOC_FREE_JUNK,
 	    bin_info->reg_interval);
 }
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_small
 #define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
 arena_dalloc_junk_small_t *arena_dalloc_junk_small =
-    JEMALLOC_N(arena_dalloc_junk_small_impl);
+    JEMALLOC_N(n_arena_dalloc_junk_small);
 #endif
 
 void
 arena_quarantine_junk_small(void *ptr, size_t usize)
 {
 	szind_t binind;
 	arena_bin_info_t *bin_info;
 	cassert(config_fill);
@@ -2357,46 +2486,46 @@ arena_quarantine_junk_small(void *ptr, s
 	assert(usize <= SMALL_MAXCLASS);
 
 	binind = size2index(usize);
 	bin_info = &arena_bin_info[binind];
 	arena_redzones_validate(ptr, bin_info, true);
 }
 
 static void *
-arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
+arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	arena_bin_t *bin;
 	size_t usize;
 	arena_run_t *run;
 
 	assert(binind < NBINS);
 	bin = &arena->bins[binind];
 	usize = index2size(binind);
 
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
 		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 	else
-		ret = arena_bin_malloc_hard(arena, bin);
+		ret = arena_bin_malloc_hard(tsdn, arena, bin);
 
 	if (ret == NULL) {
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 		return (NULL);
 	}
 
 	if (config_stats) {
 		bin->stats.nmalloc++;
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
-	malloc_mutex_unlock(&bin->lock);
-	if (config_prof && !isthreaded && arena_prof_accum(arena, usize))
-		prof_idump();
+	malloc_mutex_unlock(tsdn, &bin->lock);
+	if (config_prof && !isthreaded && arena_prof_accum(tsdn, arena, usize))
+		prof_idump(tsdn);
 
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc)) {
 				arena_alloc_junk_small(ret,
 				    &arena_bin_info[binind], false);
 			} else if (unlikely(opt_zero))
 				memset(ret, 0, usize);
@@ -2406,48 +2535,49 @@ arena_malloc_small(tsd_t *tsd, arena_t *
 		if (config_fill && unlikely(opt_junk_alloc)) {
 			arena_alloc_junk_small(ret, &arena_bin_info[binind],
 			    true);
 		}
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize);
 		memset(ret, 0, usize);
 	}
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
+arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	size_t usize;
 	uintptr_t random_offset;
 	arena_run_t *run;
 	arena_chunk_map_misc_t *miscelm;
 	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	/* Large allocation. */
 	usize = index2size(binind);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_cache_oblivious) {
 		uint64_t r;
 
 		/*
 		 * Compute a uniformly distributed offset within the first page
 		 * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
 		 * for 4 KiB pages and 64-byte cachelines.
 		 */
-		r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE);
+		r = prng_lg_range_zu(&arena->offset_state, LG_PAGE -
+		    LG_CACHELINE, false);
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
-	run = arena_run_alloc_large(arena, usize + large_pad, zero);
+	run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero);
 	if (run == NULL) {
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	miscelm = arena_run_to_miscelm(run);
 	ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) +
 	    random_offset);
 	if (config_stats) {
 		szind_t index = binind - NBINS;
 
@@ -2455,236 +2585,241 @@ arena_malloc_large(tsd_t *tsd, arena_t *
 		arena->stats.nrequests_large++;
 		arena->stats.allocated_large += usize;
 		arena->stats.lstats[index].nmalloc++;
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
 	if (config_prof)
 		idump = arena_prof_accum_locked(arena, usize);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (config_prof && idump)
-		prof_idump();
+		prof_idump(tsdn);
 
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc))
-				memset(ret, 0xa5, usize);
+				memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 			else if (unlikely(opt_zero))
 				memset(ret, 0, usize);
 		}
 	}
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache)
+arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
+    bool zero)
 {
 
-	arena = arena_choose(tsd, arena);
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
 	if (likely(size <= SMALL_MAXCLASS))
-		return (arena_malloc_small(tsd, arena, ind, zero));
+		return (arena_malloc_small(tsdn, arena, ind, zero));
 	if (likely(size <= large_maxclass))
-		return (arena_malloc_large(tsd, arena, ind, zero));
-	return (huge_malloc(tsd, arena, index2size(ind), zero, tcache));
+		return (arena_malloc_large(tsdn, arena, ind, zero));
+	return (huge_malloc(tsdn, arena, index2size(ind), zero));
 }
 
 /* Only handles large allocations that require more than page alignment. */
 static void *
-arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
 	void *ret;
 	size_t alloc_size, leadsize, trailsize;
 	arena_run_t *run;
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
 	void *rpages;
 
+	assert(!tsdn_null(tsdn) || arena != NULL);
 	assert(usize == PAGE_CEILING(usize));
 
-	arena = arena_choose(tsd, arena);
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
 	alignment = PAGE_CEILING(alignment);
 	alloc_size = usize + large_pad + alignment - PAGE;
 
-	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc_large(arena, alloc_size, false);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	run = arena_run_alloc_large(tsdn, arena, alloc_size, false);
 	if (run == NULL) {
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	miscelm = arena_run_to_miscelm(run);
 	rpages = arena_miscelm_to_rpages(miscelm);
 
 	leadsize = ALIGNMENT_CEILING((uintptr_t)rpages, alignment) -
 	    (uintptr_t)rpages;
 	assert(alloc_size >= leadsize + usize);
 	trailsize = alloc_size - leadsize - usize - large_pad;
 	if (leadsize != 0) {
 		arena_chunk_map_misc_t *head_miscelm = miscelm;
 		arena_run_t *head_run = run;
 
-		miscelm = arena_miscelm_get(chunk,
+		miscelm = arena_miscelm_get_mutable(chunk,
 		    arena_miscelm_to_pageind(head_miscelm) + (leadsize >>
 		    LG_PAGE));
 		run = &miscelm->run;
 
-		arena_run_trim_head(arena, chunk, head_run, alloc_size,
+		arena_run_trim_head(tsdn, arena, chunk, head_run, alloc_size,
 		    alloc_size - leadsize);
 	}
 	if (trailsize != 0) {
-		arena_run_trim_tail(arena, chunk, run, usize + large_pad +
+		arena_run_trim_tail(tsdn, arena, chunk, run, usize + large_pad +
 		    trailsize, usize + large_pad, false);
 	}
 	if (arena_run_init_large(arena, run, usize + large_pad, zero)) {
 		size_t run_ind =
 		    arena_miscelm_to_pageind(arena_run_to_miscelm(run));
 		bool dirty = (arena_mapbits_dirty_get(chunk, run_ind) != 0);
 		bool decommitted = (arena_mapbits_decommitted_get(chunk,
 		    run_ind) != 0);
 
 		assert(decommitted); /* Cause of OOM. */
-		arena_run_dalloc(arena, run, dirty, false, decommitted);
-		malloc_mutex_unlock(&arena->lock);
+		arena_run_dalloc(tsdn, arena, run, dirty, false, decommitted);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	ret = arena_miscelm_to_rpages(miscelm);
 
 	if (config_stats) {
 		szind_t index = size2index(usize) - NBINS;
 
 		arena->stats.nmalloc_large++;
 		arena->stats.nrequests_large++;
 		arena->stats.allocated_large += usize;
 		arena->stats.lstats[index].nmalloc++;
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk_alloc))
-			memset(ret, 0xa5, usize);
+			memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 		else if (unlikely(opt_zero))
 			memset(ret, 0, usize);
 	}
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache)
 {
 	void *ret;
 
 	if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
 	    && (usize & PAGE_MASK) == 0))) {
 		/* Small; alignment doesn't require special run placement. */
-		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 	} else if (usize <= large_maxclass && alignment <= PAGE) {
 		/*
 		 * Large; alignment doesn't require special run placement.
 		 * However, the cached pointer may be at a random offset from
 		 * the base of the run, so do some bit manipulation to retrieve
 		 * the base.
 		 */
-		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 		if (config_cache_oblivious)
 			ret = (void *)((uintptr_t)ret & ~PAGE_MASK);
 	} else {
 		if (likely(usize <= large_maxclass)) {
-			ret = arena_palloc_large(tsd, arena, usize, alignment,
+			ret = arena_palloc_large(tsdn, arena, usize, alignment,
 			    zero);
 		} else if (likely(alignment <= chunksize))
-			ret = huge_malloc(tsd, arena, usize, zero, tcache);
+			ret = huge_malloc(tsdn, arena, usize, zero);
 		else {
-			ret = huge_palloc(tsd, arena, usize, alignment, zero,
-			    tcache);
+			ret = huge_palloc(tsdn, arena, usize, alignment, zero);
 		}
 	}
 	return (ret);
 }
 
 void
-arena_prof_promoted(const void *ptr, size_t size)
+arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size)
 {
 	arena_chunk_t *chunk;
 	size_t pageind;
 	szind_t binind;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-	assert(isalloc(ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(ptr, true) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, true) == LARGE_MINCLASS);
 	assert(size <= SMALL_MAXCLASS);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	binind = size2index(size);
 	assert(binind < NBINS);
 	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
-	assert(isalloc(ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(ptr, true) == size);
+	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, true) == size);
 }
 
 static void
 arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
     arena_bin_t *bin)
 {
 
 	/* Dissociate run from bin. */
 	if (run == bin->runcur)
 		bin->runcur = NULL;
 	else {
 		szind_t binind = arena_bin_index(extent_node_arena_get(
 		    &chunk->node), bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
+		/*
+		 * The following block's conditional is necessary because if the
+		 * run only contains one region, then it never gets inserted
+		 * into the non-full runs tree.
+		 */
 		if (bin_info->nregs != 1) {
-			/*
-			 * This block's conditional is necessary because if the
-			 * run only contains one region, then it never gets
-			 * inserted into the non-full runs tree.
-			 */
-			arena_bin_runs_remove(bin, run);
+			arena_chunk_map_misc_t *miscelm =
+			    arena_run_to_miscelm(run);
+
+			arena_run_heap_remove(&bin->runs, miscelm);
 		}
 	}
 }
 
 static void
-arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    arena_bin_t *bin)
+arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, arena_bin_t *bin)
 {
 
 	assert(run != bin->runcur);
-	assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) ==
-	    NULL);
-
-	malloc_mutex_unlock(&bin->lock);
+
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(&arena->lock);
-	arena_run_dalloc(arena, run, true, false, false);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_run_dalloc(tsdn, arena, run, true, false, false);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	/****************************/
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats)
 		bin->stats.curruns--;
 }
 
 static void
 arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
     arena_bin_t *bin)
 {
@@ -2701,112 +2836,113 @@ arena_bin_lower_run(arena_t *arena, aren
 		bin->runcur = run;
 		if (config_stats)
 			bin->stats.reruns++;
 	} else
 		arena_bin_runs_insert(bin, run);
 }
 
 static void
-arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm, bool junked)
+arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, arena_chunk_map_bits_t *bitselm, bool junked)
 {
 	size_t pageind, rpages_ind;
 	arena_run_t *run;
 	arena_bin_t *bin;
 	arena_bin_info_t *bin_info;
 	szind_t binind;
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get(chunk, rpages_ind)->run;
+	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	binind = run->binind;
 	bin = &arena->bins[binind];
 	bin_info = &arena_bin_info[binind];
 
 	if (!junked && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, bin_info);
 
 	arena_run_reg_dalloc(run, ptr);
 	if (run->nfree == bin_info->nregs) {
 		arena_dissociate_bin_run(chunk, run, bin);
-		arena_dalloc_bin_run(arena, chunk, run, bin);
+		arena_dalloc_bin_run(tsdn, arena, chunk, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
 		arena_bin_lower_run(arena, chunk, run, bin);
 
 	if (config_stats) {
 		bin->stats.ndalloc++;
 		bin->stats.curregs--;
 	}
 }
 
 void
-arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm)
+arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm)
 {
 
-	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, true);
 }
 
 void
-arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm)
 {
 	arena_run_t *run;
 	arena_bin_t *bin;
 	size_t rpages_ind;
 
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get(chunk, rpages_ind)->run;
+	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
-	malloc_mutex_lock(&bin->lock);
-	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false);
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, false);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
 void
-arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind)
+arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind)
 {
 	arena_chunk_map_bits_t *bitselm;
 
 	if (config_debug) {
 		/* arena_ptr_small_binind_get() does extra sanity checking. */
 		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
 		    pageind)) != BININD_INVALID);
 	}
-	bitselm = arena_bitselm_get(chunk, pageind);
-	arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
-	arena_decay_tick(tsd, arena);
+	bitselm = arena_bitselm_get_mutable(chunk, pageind);
+	arena_dalloc_bin(tsdn, arena, chunk, ptr, pageind, bitselm);
+	arena_decay_tick(tsdn, arena);
 }
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_large
-#define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl)
+#define	arena_dalloc_junk_large JEMALLOC_N(n_arena_dalloc_junk_large)
 #endif
 void
 arena_dalloc_junk_large(void *ptr, size_t usize)
 {
 
 	if (config_fill && unlikely(opt_junk_free))
-		memset(ptr, 0x5a, usize);
+		memset(ptr, JEMALLOC_FREE_JUNK, usize);
 }
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_large
 #define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large)
 arena_dalloc_junk_large_t *arena_dalloc_junk_large =
-    JEMALLOC_N(arena_dalloc_junk_large_impl);
+    JEMALLOC_N(n_arena_dalloc_junk_large);
 #endif
 
 static void
-arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, bool junked)
+arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 	arena_run_t *run = &miscelm->run;
 
 	if (config_fill || config_stats) {
 		size_t usize = arena_mapbits_large_size_get(chunk, pageind) -
 		    large_pad;
 
 		if (!junked)
 			arena_dalloc_junk_large(ptr, usize);
@@ -2815,53 +2951,55 @@ arena_dalloc_large_locked_impl(arena_t *
 
 			arena->stats.ndalloc_large++;
 			arena->stats.allocated_large -= usize;
 			arena->stats.lstats[index].ndalloc++;
 			arena->stats.lstats[index].curruns--;
 		}
 	}
 
-	arena_run_dalloc(arena, run, true, false, false);
+	arena_run_dalloc(tsdn, arena, run, true, false, false);
 }
 
 void
-arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr)
+arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr)
 {
 
-	arena_dalloc_large_locked_impl(arena, chunk, ptr, true);
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, true);
 }
 
 void
-arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr)
+arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr)
 {
 
-	malloc_mutex_lock(&arena->lock);
-	arena_dalloc_large_locked_impl(arena, chunk, ptr, false);
-	malloc_mutex_unlock(&arena->lock);
-	arena_decay_tick(tsd, arena);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, false);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+	arena_decay_tick(tsdn, arena);
 }
 
 static void
-arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t oldsize, size_t size)
+arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 	arena_run_t *run = &miscelm->run;
 
 	assert(size < oldsize);
 
 	/*
 	 * Shrink the run, and make trailing pages available for other
 	 * allocations.
 	 */
-	malloc_mutex_lock(&arena->lock);
-	arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size +
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_run_trim_tail(tsdn, arena, chunk, run, oldsize + large_pad, size +
 	    large_pad, true);
 	if (config_stats) {
 		szind_t oldindex = size2index(oldsize) - NBINS;
 		szind_t index = size2index(size) - NBINS;
 
 		arena->stats.ndalloc_large++;
 		arena->stats.allocated_large -= oldsize;
 		arena->stats.lstats[oldindex].ndalloc++;
@@ -2869,32 +3007,32 @@ arena_ralloc_large_shrink(arena_t *arena
 
 		arena->stats.nmalloc_large++;
 		arena->stats.nrequests_large++;
 		arena->stats.allocated_large += size;
 		arena->stats.lstats[index].nmalloc++;
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static bool
-arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
+arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t npages = (oldsize + large_pad) >> LG_PAGE;
 	size_t followsize;
 
 	assert(oldsize == arena_mapbits_large_size_get(chunk, pageind) -
 	    large_pad);
 
 	/* Try to extend the run. */
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk,
 	    pageind+npages) != 0)
 		goto label_fail;
 	followsize = arena_mapbits_unallocated_size_get(chunk, pageind+npages);
 	if (oldsize + followsize >= usize_min) {
 		/*
 		 * The next run is available and sufficiently large.  Split the
 		 * following run, then merge the first part with the existing
@@ -2907,17 +3045,17 @@ arena_ralloc_large_grow(arena_t *arena, 
 		while (oldsize + followsize < usize)
 			usize = index2size(size2index(usize)-1);
 		assert(usize >= usize_min);
 		assert(usize >= oldsize);
 		splitsize = usize - oldsize;
 		if (splitsize == 0)
 			goto label_fail;
 
-		run = &arena_miscelm_get(chunk, pageind+npages)->run;
+		run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run;
 		if (arena_run_split_large(arena, run, splitsize, zero))
 			goto label_fail;
 
 		if (config_cache_oblivious && zero) {
 			/*
 			 * Zero the trailing bytes of the original allocation's
 			 * last page, since they are in an indeterminate state.
 			 * There will always be trailing bytes, because ptr's
@@ -2964,87 +3102,88 @@ arena_ralloc_large_grow(arena_t *arena, 
 
 			arena->stats.nmalloc_large++;
 			arena->stats.nrequests_large++;
 			arena->stats.allocated_large += size;
 			arena->stats.lstats[index].nmalloc++;
 			arena->stats.lstats[index].nrequests++;
 			arena->stats.lstats[index].curruns++;
 		}
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (false);
 	}
 label_fail:
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (true);
 }
 
 #ifdef JEMALLOC_JET
 #undef arena_ralloc_junk_large
-#define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl)
+#define	arena_ralloc_junk_large JEMALLOC_N(n_arena_ralloc_junk_large)
 #endif
 static void
 arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
 {
 
 	if (config_fill && unlikely(opt_junk_free)) {
-		memset((void *)((uintptr_t)ptr + usize), 0x5a,
+		memset((void *)((uintptr_t)ptr + usize), JEMALLOC_FREE_JUNK,
 		    old_usize - usize);
 	}
 }
 #ifdef JEMALLOC_JET
 #undef arena_ralloc_junk_large
 #define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large)
 arena_ralloc_junk_large_t *arena_ralloc_junk_large =
-    JEMALLOC_N(arena_ralloc_junk_large_impl);
+    JEMALLOC_N(n_arena_ralloc_junk_large);
 #endif
 
 /*
  * Try to resize a large allocation, in order to avoid copying.  This will
  * always fail if growing an object, and the following run is already in use.
  */
 static bool
-arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
+arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_t *arena;
 
 	if (oldsize == usize_max) {
 		/* Current size class is compatible and maximal. */
 		return (false);
 	}
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	arena = extent_node_arena_get(&chunk->node);
 
 	if (oldsize < usize_max) {
-		bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize,
-		    usize_min, usize_max, zero);
+		bool ret = arena_ralloc_large_grow(tsdn, arena, chunk, ptr,
+		    oldsize, usize_min, usize_max, zero);
 		if (config_fill && !ret && !zero) {
 			if (unlikely(opt_junk_alloc)) {
-				memset((void *)((uintptr_t)ptr + oldsize), 0xa5,
-				    isalloc(ptr, config_prof) - oldsize);
+				memset((void *)((uintptr_t)ptr + oldsize),
+				    JEMALLOC_ALLOC_JUNK,
+				    isalloc(tsdn, ptr, config_prof) - oldsize);
 			} else if (unlikely(opt_zero)) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
-				    isalloc(ptr, config_prof) - oldsize);
+				    isalloc(tsdn, ptr, config_prof) - oldsize);
 			}
 		}
 		return (ret);
 	}
 
 	assert(oldsize > usize_max);
 	/* Fill before shrinking in order avoid a race. */
 	arena_ralloc_junk_large(ptr, oldsize, usize_max);
-	arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max);
+	arena_ralloc_large_shrink(tsdn, arena, chunk, ptr, oldsize, usize_max);
 	return (false);
 }
 
 bool
-arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero)
 {
 	size_t usize_min, usize_max;
 
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= HUGE_MAXCLASS);
 
 	if (unlikely(size > HUGE_MAXCLASS))
@@ -3064,42 +3203,42 @@ arena_ralloc_no_move(tsd_t *tsd, void *p
 			    oldsize);
 			if ((usize_max > SMALL_MAXCLASS ||
 			    size2index(usize_max) != size2index(oldsize)) &&
 			    (size > oldsize || usize_max < oldsize))
 				return (true);
 		} else {
 			if (usize_max <= SMALL_MAXCLASS)
 				return (true);
-			if (arena_ralloc_large(ptr, oldsize, usize_min,
+			if (arena_ralloc_large(tsdn, ptr, oldsize, usize_min,
 			    usize_max, zero))
 				return (true);
 		}
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena_decay_tick(tsd, extent_node_arena_get(&chunk->node));
+		arena_decay_tick(tsdn, extent_node_arena_get(&chunk->node));
 		return (false);
 	} else {
-		return (huge_ralloc_no_move(tsd, ptr, oldsize, usize_min,
+		return (huge_ralloc_no_move(tsdn, ptr, oldsize, usize_min,
 		    usize_max, zero));
 	}
 }
 
 static void *
-arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache)
 {
 
 	if (alignment == 0)
-		return (arena_malloc(tsd, arena, usize, size2index(usize), zero,
-		    tcache, true));
+		return (arena_malloc(tsdn, arena, usize, size2index(usize),
+		    zero, tcache, true));
 	usize = sa2u(usize, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+	return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 }
 
 void *
 arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t usize;
@@ -3107,65 +3246,66 @@ arena_ralloc(tsd_t *tsd, arena_t *arena,
 	usize = s2u(size);
 	if (unlikely(usize == 0 || size > HUGE_MAXCLASS))
 		return (NULL);
 
 	if (likely(usize <= large_maxclass)) {
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(tsd, ptr, oldsize, usize, 0, zero))
+		if (!arena_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, 0,
+		    zero))
 			return (ptr);
 
 		/*
 		 * size and oldsize are different enough that we need to move
 		 * the object.  In that case, fall back to allocating new space
 		 * and copying.
 		 */
-		ret = arena_ralloc_move_helper(tsd, arena, usize, alignment,
-		    zero, tcache);
+		ret = arena_ralloc_move_helper(tsd_tsdn(tsd), arena, usize,
+		    alignment, zero, tcache);
 		if (ret == NULL)
 			return (NULL);
 
 		/*
 		 * Junk/zero-filling were already done by
 		 * ipalloc()/arena_malloc().
 		 */
 
 		copysize = (usize < oldsize) ? usize : oldsize;
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
 		memcpy(ret, ptr, copysize);
-		isqalloc(tsd, ptr, oldsize, tcache);
+		isqalloc(tsd, ptr, oldsize, tcache, true);
 	} else {
 		ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment,
 		    zero, tcache);
 	}
 	return (ret);
 }
 
 dss_prec_t
-arena_dss_prec_get(arena_t *arena)
+arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena)
 {
 	dss_prec_t ret;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	ret = arena->dss_prec;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (ret);
 }
 
 bool
-arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec)
+arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->dss_prec = dss_prec;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (false);
 }
 
 ssize_t
 arena_lg_dirty_mult_default_get(void)
 {
 
 	return ((ssize_t)atomic_read_z((size_t *)&lg_dirty_mult_default));
@@ -3203,51 +3343,53 @@ arena_decay_time_default_set(ssize_t dec
 }
 
 static void
 arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty)
 {
 
-	*nthreads += arena_nthreads_get(arena);
+	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
-	*decay_time = arena->decay_time;
+	*decay_time = arena->decay.time;
 	*nactive += arena->nactive;
 	*ndirty += arena->ndirty;
 }
 
 void
-arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty)
+arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty)
 {
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats)
+arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+    malloc_huge_stats_t *hstats)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
 
 	astats->mapped += arena->stats.mapped;
+	astats->retained += arena->stats.retained;
 	astats->npurge += arena->stats.npurge;
 	astats->nmadvise += arena->stats.nmadvise;
 	astats->purged += arena->stats.purged;
 	astats->metadata_mapped += arena->stats.metadata_mapped;
 	astats->metadata_allocated += arena_metadata_allocated_get(arena);
 	astats->allocated_large += arena->stats.allocated_large;
 	astats->nmalloc_large += arena->stats.nmalloc_large;
 	astats->ndalloc_large += arena->stats.ndalloc_large;
@@ -3263,95 +3405,91 @@ arena_stats_merge(arena_t *arena, unsign
 		lstats[i].curruns += arena->stats.lstats[i].curruns;
 	}
 
 	for (i = 0; i < nhclasses; i++) {
 		hstats[i].nmalloc += arena->stats.hstats[i].nmalloc;
 		hstats[i].ndalloc += arena->stats.hstats[i].ndalloc;
 		hstats[i].curhchunks += arena->stats.hstats[i].curhchunks;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		bstats[i].nmalloc += bin->stats.nmalloc;
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
 		bstats[i].curregs += bin->stats.curregs;
 		if (config_tcache) {
 			bstats[i].nfills += bin->stats.nfills;
 			bstats[i].nflushes += bin->stats.nflushes;
 		}
 		bstats[i].nruns += bin->stats.nruns;
 		bstats[i].reruns += bin->stats.reruns;
 		bstats[i].curruns += bin->stats.curruns;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 	}
 }
 
 unsigned
-arena_nthreads_get(arena_t *arena)
+arena_nthreads_get(arena_t *arena, bool internal)
 {
 
-	return (atomic_read_u(&arena->nthreads));
-}
-
-void
-arena_nthreads_inc(arena_t *arena)
-{
-
-	atomic_add_u(&arena->nthreads, 1);
+	return (atomic_read_u(&arena->nthreads[internal]));
 }
 
 void
-arena_nthreads_dec(arena_t *arena)
+arena_nthreads_inc(arena_t *arena, bool internal)
 {
 
-	atomic_sub_u(&arena->nthreads, 1);
+	atomic_add_u(&arena->nthreads[internal], 1);
+}
+
+void
+arena_nthreads_dec(arena_t *arena, bool internal)
+{
+
+	atomic_sub_u(&arena->nthreads[internal], 1);
 }
 
 arena_t *
-arena_new(unsigned ind)
+arena_new(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
-	size_t arena_size;
 	unsigned i;
-	arena_bin_t *bin;
-
-	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) *
-	    runs_avail_nclasses);
+
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
-		arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) +
-		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
-		    nhclasses) * sizeof(malloc_huge_stats_t));
+		arena = (arena_t *)base_alloc(tsdn,
+		    CACHELINE_CEILING(sizeof(arena_t)) +
+		    QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)))
+		    + (nhclasses * sizeof(malloc_huge_stats_t)));
 	} else
-		arena = (arena_t *)base_alloc(arena_size);
+		arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
 	if (arena == NULL)
 		return (NULL);
 
 	arena->ind = ind;
-	arena->nthreads = 0;
-	if (malloc_mutex_init(&arena->lock))
+	arena->nthreads[0] = arena->nthreads[1] = 0;
+	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA))
 		return (NULL);
 
 	if (config_stats) {
 		memset(&arena->stats, 0, sizeof(arena_stats_t));
 		arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size));
+		    + CACHELINE_CEILING(sizeof(arena_t)));
 		memset(arena->stats.lstats, 0, nlclasses *
 		    sizeof(malloc_large_stats_t));
 		arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size) +
+		    + CACHELINE_CEILING(sizeof(arena_t)) +
 		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
 		memset(arena->stats.hstats, 0, nhclasses *
 		    sizeof(malloc_huge_stats_t));
 		if (config_tcache)
 			ql_new(&arena->tcache_ql);
 	}
 
 	if (config_prof)
@@ -3361,59 +3499,66 @@ arena_new(unsigned ind)
 		/*
 		 * A nondeterministic seed based on the address of arena reduces
 		 * the likelihood of lockstep non-uniform cache index
 		 * utilization among identical concurrent processes, but at the
 		 * cost of test repeatability.  For debug builds, instead use a
 		 * deterministic seed.
 		 */
 		arena->offset_state = config_debug ? ind :
-		    (uint64_t)(uintptr_t)arena;
+		    (size_t)(uintptr_t)arena;
 	}
 
 	arena->dss_prec = chunk_dss_prec_get();
 
+	ql_new(&arena->achunks);
+
 	arena->spare = NULL;
 
 	arena->lg_dirty_mult = arena_lg_dirty_mult_default_get();
 	arena->purging = false;
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	for(i = 0; i < runs_avail_nclasses; i++)
-		arena_run_tree_new(&arena->runs_avail[i]);
+	for (i = 0; i < NPSIZES; i++)
+		arena_run_heap_new(&arena->runs_avail[i]);
+
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
 	if (opt_purge == purge_mode_decay)
 		arena_decay_init(arena, arena_decay_time_default_get());
 
 	ql_new(&arena->huge);
-	if (malloc_mutex_init(&arena->huge_mtx))
+	if (malloc_mutex_init(&arena->huge_mtx, "arena_huge",
+	    WITNESS_RANK_ARENA_HUGE))
 		return (NULL);
 
 	extent_tree_szad_new(&arena->chunks_szad_cached);
 	extent_tree_ad_new(&arena->chunks_ad_cached);
 	extent_tree_szad_new(&arena->chunks_szad_retained);
 	extent_tree_ad_new(&arena->chunks_ad_retained);
-	if (malloc_mutex_init(&arena->chunks_mtx))
+	if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks",
+	    WITNESS_RANK_ARENA_CHUNKS))
 		return (NULL);
 	ql_new(&arena->node_cache);
-	if (malloc_mutex_init(&arena->node_cache_mtx))
+	if (malloc_mutex_init(&arena->node_cache_mtx, "arena_node_cache",
+	    WITNESS_RANK_ARENA_NODE_CACHE))
 		return (NULL);
 
 	arena->chunk_hooks = chunk_hooks_default;
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
-		bin = &arena->bins[i];
-		if (malloc_mutex_init(&bin->lock))
+		arena_bin_t *bin = &arena->bins[i];
+		if (malloc_mutex_init(&bin->lock, "arena_bin",
+		    WITNESS_RANK_ARENA_BIN))
 			return (NULL);
 		bin->runcur = NULL;
-		arena_run_tree_new(&bin->runs);
+		arena_run_heap_new(&bin->runs);
 		if (config_stats)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}
 
 	return (arena);
 }
 
 /*
@@ -3500,98 +3645,40 @@ bin_info_run_size_calc(arena_bin_info_t 
 	assert(actual_run_size == s2u(actual_run_size));
 
 	/* Copy final settings. */
 	bin_info->run_size = actual_run_size;
 	bin_info->nregs = actual_nregs;
 	bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs *
 	    bin_info->reg_interval) - pad_size + bin_info->redzone_size);
 
-	if (actual_run_size > small_maxrun)
-		small_maxrun = actual_run_size;
-
 	assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
 	    * bin_info->reg_interval) + pad_size == bin_info->run_size);
 }
 
 static void
 bin_info_init(void)
 {
 	arena_bin_info_t *bin_info;
 
 #define	BIN_INFO_INIT_bin_yes(index, size)				\
 	bin_info = &arena_bin_info[index];				\
 	bin_info->reg_size = size;					\
 	bin_info_run_size_calc(bin_info);				\
 	bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 #define	BIN_INFO_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	BIN_INFO_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
 #undef BIN_INFO_INIT_bin_yes
 #undef BIN_INFO_INIT_bin_no
 #undef SC
 }
 
-static bool
-small_run_size_init(void)
-{
-
-	assert(small_maxrun != 0);
-
-	small_run_tab = (bool *)base_alloc(sizeof(bool) * (small_maxrun >>
-	    LG_PAGE));
-	if (small_run_tab == NULL)
-		return (true);
-
-#define	TAB_INIT_bin_yes(index, size) {					\
-		arena_bin_info_t *bin_info = &arena_bin_info[index];	\
-		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
-	}
-#define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
-	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
-	SIZE_CLASSES
-#undef TAB_INIT_bin_yes
-#undef TAB_INIT_bin_no
-#undef SC
-
-	return (false);
-}
-
-static bool
-run_quantize_init(void)
-{
-	unsigned i;
-
-	run_quantize_max = chunksize + large_pad;
-
-	run_quantize_floor_tab = (size_t *)base_alloc(sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_floor_tab == NULL)
-		return (true);
-
-	run_quantize_ceil_tab = (size_t *)base_alloc(sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_ceil_tab == NULL)
-		return (true);
-
-	for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) {
-		size_t run_size = i << LG_PAGE;
-
-		run_quantize_floor_tab[i-1] =
-		    run_quantize_floor_compute(run_size);
-		run_quantize_ceil_tab[i-1] =
-		    run_quantize_ceil_compute(run_size);
-	}
-
-	return (false);
-}
-
-bool
+void
 arena_boot(void)
 {
 	unsigned i;
 
 	arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
 	arena_decay_time_default_set(opt_decay_time);
 
 	/*
@@ -3629,75 +3716,66 @@ arena_boot(void)
 		 */
 		large_maxclass = arena_maxrun;
 	}
 	assert(large_maxclass > 0);
 	nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS);
 	nhclasses = NSIZES - nlclasses - NBINS;
 
 	bin_info_init();
-	if (small_run_size_init())
-		return (true);
-	if (run_quantize_init())
-		return (true);
-
-	runs_avail_bias = size2index(PAGE);
-	runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
-
-	return (false);
+}
+
+void
+arena_prefork0(tsdn_t *tsdn, arena_t *arena)
+{
+
+	malloc_mutex_prefork(tsdn, &arena->lock);
 }
 
 void
-arena_prefork0(arena_t *arena)
+arena_prefork1(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(&arena->lock);
+	malloc_mutex_prefork(tsdn, &arena->chunks_mtx);
 }
 
 void
-arena_prefork1(arena_t *arena)
+arena_prefork2(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(&arena->chunks_mtx);
+	malloc_mutex_prefork(tsdn, &arena->node_cache_mtx);
 }
 
 void
-arena_prefork2(arena_t *arena)
-{
-
-	malloc_mutex_prefork(&arena->node_cache_mtx);
-}
-
-void
-arena_prefork3(arena_t *arena)
+arena_prefork3(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_prefork(&arena->bins[i].lock);
-	malloc_mutex_prefork(&arena->huge_mtx);
+		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
+	malloc_mutex_prefork(tsdn, &arena->huge_mtx);
 }
 
 void
-arena_postfork_parent(arena_t *arena)
+arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_parent(&arena->huge_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_parent(&arena->bins[i].lock);
-	malloc_mutex_postfork_parent(&arena->node_cache_mtx);
-	malloc_mutex_postfork_parent(&arena->chunks_mtx);
-	malloc_mutex_postfork_parent(&arena->lock);
+		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
+	malloc_mutex_postfork_parent(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->lock);
 }
 
 void
-arena_postfork_child(arena_t *arena)
+arena_postfork_child(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_child(&arena->huge_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_child(&arena->bins[i].lock);
-	malloc_mutex_postfork_child(&arena->node_cache_mtx);
-	malloc_mutex_postfork_child(&arena->chunks_mtx);
-	malloc_mutex_postfork_child(&arena->lock);
+		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
+	malloc_mutex_postfork_child(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->lock);
 }
--- a/memory/jemalloc/src/src/base.c
+++ b/memory/jemalloc/src/src/base.c
@@ -8,57 +8,59 @@ static malloc_mutex_t	base_mtx;
 static extent_tree_t	base_avail_szad;
 static extent_node_t	*base_nodes;
 static size_t		base_allocated;
 static size_t		base_resident;
 static size_t		base_mapped;
 
 /******************************************************************************/
 
-/* base_mtx must be held. */
 static extent_node_t *
-base_node_try_alloc(void)
+base_node_try_alloc(tsdn_t *tsdn)
 {
 	extent_node_t *node;
 
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
+
 	if (base_nodes == NULL)
 		return (NULL);
 	node = base_nodes;
 	base_nodes = *(extent_node_t **)node;
 	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	return (node);
 }
 
-/* base_mtx must be held. */
 static void
-base_node_dalloc(extent_node_t *node)
+base_node_dalloc(tsdn_t *tsdn, extent_node_t *node)
 {
 
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
+
 	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	*(extent_node_t **)node = base_nodes;
 	base_nodes = node;
 }
 
-/* base_mtx must be held. */
 static extent_node_t *
-base_chunk_alloc(size_t minsize)
+base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 {
 	extent_node_t *node;
 	size_t csize, nsize;
 	void *addr;
 
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
 	assert(minsize != 0);
-	node = base_node_try_alloc();
+	node = base_node_try_alloc(tsdn);
 	/* Allocate enough space to also carve a node out if necessary. */
 	nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
 	addr = chunk_alloc_base(csize);
 	if (addr == NULL) {
 		if (node != NULL)
-			base_node_dalloc(node);
+			base_node_dalloc(tsdn, node);
 		return (NULL);
 	}
 	base_mapped += csize;
 	if (node == NULL) {
 		node = (extent_node_t *)addr;
 		addr = (void *)((uintptr_t)addr + nsize);
 		csize -= nsize;
 		if (config_stats) {
@@ -71,104 +73,105 @@ base_chunk_alloc(size_t minsize)
 }
 
 /*
  * base_alloc() guarantees demand-zeroed memory, in order to make multi-page
  * sparse data structures such as radix tree nodes efficient with respect to
  * physical memory usage.
  */
 void *
-base_alloc(size_t size)
+base_alloc(tsdn_t *tsdn, size_t size)
 {
 	void *ret;
 	size_t csize, usize;
 	extent_node_t *node;
 	extent_node_t key;
 
 	/*
 	 * Round size up to nearest multiple of the cacheline size, so that
 	 * there is no chance of false cache line sharing.
 	 */
 	csize = CACHELINE_CEILING(size);
 
 	usize = s2u(csize);
 	extent_node_init(&key, NULL, NULL, usize, false, false);
-	malloc_mutex_lock(&base_mtx);
+	malloc_mutex_lock(tsdn, &base_mtx);
 	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
 	if (node != NULL) {
 		/* Use existing space. */
 		extent_tree_szad_remove(&base_avail_szad, node);
 	} else {
 		/* Try to allocate more space. */
-		node = base_chunk_alloc(csize);
+		node = base_chunk_alloc(tsdn, csize);
 	}
 	if (node == NULL) {
 		ret = NULL;
 		goto label_return;
 	}
 
 	ret = extent_node_addr_get(node);
 	if (extent_node_size_get(node) > csize) {
 		extent_node_addr_set(node, (void *)((uintptr_t)ret + csize));
 		extent_node_size_set(node, extent_node_size_get(node) - csize);
 		extent_tree_szad_insert(&base_avail_szad, node);
 	} else
-		base_node_dalloc(node);
+		base_node_dalloc(tsdn, node);
 	if (config_stats) {
 		base_allocated += csize;
 		/*
 		 * Add one PAGE to base_resident for every page boundary that is
 		 * crossed by the new allocation.
 		 */
 		base_resident += PAGE_CEILING((uintptr_t)ret + csize) -
 		    PAGE_CEILING((uintptr_t)ret);
 	}
 	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize);
 label_return:
-	malloc_mutex_unlock(&base_mtx);
+	malloc_mutex_unlock(tsdn, &base_mtx);
 	return (ret);
 }
 
 void
-base_stats_get(size_t *allocated, size_t *resident, size_t *mapped)
+base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
+    size_t *mapped)
 {
 
-	malloc_mutex_lock(&base_mtx);
+	malloc_mutex_lock(tsdn, &base_mtx);
 	assert(base_allocated <= base_resident);
 	assert(base_resident <= base_mapped);
 	*allocated = base_allocated;
 	*resident = base_resident;
 	*mapped = base_mapped;
-	malloc_mutex_unlock(&base_mtx);
+	malloc_mutex_unlock(tsdn, &base_mtx);
 }
 
 bool
 base_boot(void)
 {
 
-	if (malloc_mutex_init(&base_mtx))
+	if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE))
 		return (true);
 	extent_tree_szad_new(&base_avail_szad);
 	base_nodes = NULL;
 
 	return (false);
 }
 
 void
-base_prefork(void)
+base_prefork(tsdn_t *tsdn)
 {
 
-	malloc_mutex_prefork(&base_mtx);
+	malloc_mutex_prefork(tsdn, &base_mtx);
 }
 
 void
-base_postfork_parent(void)
+base_postfork_parent(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_parent(&base_mtx);
+	malloc_mutex_postfork_parent(tsdn, &base_mtx);
 }
 
 void
-base_postfork_child(void)
+base_postfork_child(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_child(&base_mtx);
+	malloc_mutex_postfork_child(tsdn, &base_mtx);
 }
--- a/memory/jemalloc/src/src/bitmap.c
+++ b/memory/jemalloc/src/src/bitmap.c
@@ -69,44 +69,41 @@ bitmap_init(bitmap_t *bitmap, const bitm
 	}
 }
 
 #else /* USE_TREE */
 
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 {
-	size_t i;
 
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
 
-	i = nbits >> LG_BITMAP_GROUP_NBITS;
-	if (nbits % BITMAP_GROUP_NBITS != 0)
-		i++;
-	binfo->ngroups = i;
+	binfo->ngroups = BITMAP_BITS2GROUPS(nbits);
 	binfo->nbits = nbits;
 }
 
 static size_t
 bitmap_info_ngroups(const bitmap_info_t *binfo)
 {
 
 	return (binfo->ngroups);
 }
 
 void
 bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 {
 	size_t extra;
 
 	memset(bitmap, 0xffU, bitmap_size(binfo));
-	extra = (binfo->nbits % (binfo->ngroups * BITMAP_GROUP_NBITS));
+	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+	    & BITMAP_GROUP_NBITS_MASK;
 	if (extra != 0)
-		bitmap[binfo->ngroups - 1] >>= (BITMAP_GROUP_NBITS - extra);
+		bitmap[binfo->ngroups - 1] >>= extra;
 }
 
 #endif /* USE_TREE */
 
 size_t
 bitmap_size(const bitmap_info_t *binfo)
 {
 
--- a/memory/jemalloc/src/src/chunk.c
+++ b/memory/jemalloc/src/src/chunk.c
@@ -44,47 +44,48 @@ const chunk_hooks_t	chunk_hooks_default 
 };
 
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
  * definition.
  */
 
-static void	chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
-    void *chunk, size_t size, bool zeroed, bool committed);
+static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad,
+    extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed,
+    bool committed);
 
 /******************************************************************************/
 
 static chunk_hooks_t
 chunk_hooks_get_locked(arena_t *arena)
 {
 
 	return (arena->chunk_hooks);
 }
 
 chunk_hooks_t
-chunk_hooks_get(arena_t *arena)
+chunk_hooks_get(tsdn_t *tsdn, arena_t *arena)
 {
 	chunk_hooks_t chunk_hooks;
 
-	malloc_mutex_lock(&arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks = chunk_hooks_get_locked(arena);
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	return (chunk_hooks);
 }
 
 chunk_hooks_t
-chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks)
+chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 {
 	chunk_hooks_t old_chunk_hooks;
 
-	malloc_mutex_lock(&arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	old_chunk_hooks = arena->chunk_hooks;
 	/*
 	 * Copy each field atomically so that it is impossible for readers to
 	 * see partially updated pointers.  There are places where readers only
 	 * need one hook function pointer (therefore no need to copy the
 	 * entirety of arena->chunk_hooks), and stale reads do not affect
 	 * correctness, so they perform unlocked reads.
 	 */
@@ -99,52 +100,53 @@ chunk_hooks_set(arena_t *arena, const ch
 	ATOMIC_COPY_HOOK(alloc);
 	ATOMIC_COPY_HOOK(dalloc);
 	ATOMIC_COPY_HOOK(commit);
 	ATOMIC_COPY_HOOK(decommit);
 	ATOMIC_COPY_HOOK(purge);
 	ATOMIC_COPY_HOOK(split);
 	ATOMIC_COPY_HOOK(merge);
 #undef ATOMIC_COPY_HOOK
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	return (old_chunk_hooks);
 }
 
 static void
-chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    bool locked)
+chunk_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, bool locked)
 {
 	static const chunk_hooks_t uninitialized_hooks =
 	    CHUNK_HOOKS_INITIALIZER;
 
 	if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) ==
 	    0) {
 		*chunk_hooks = locked ? chunk_hooks_get_locked(arena) :
-		    chunk_hooks_get(arena);
+		    chunk_hooks_get(tsdn, arena);
 	}
 }
 
 static void
-chunk_hooks_assure_initialized_locked(arena_t *arena,
+chunk_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(arena, chunk_hooks, true);
+	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, true);
 }
 
 static void
-chunk_hooks_assure_initialized(arena_t *arena, chunk_hooks_t *chunk_hooks)
+chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(arena, chunk_hooks, false);
+	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, false);
 }
 
 bool
-chunk_register(const void *chunk, const extent_node_t *node)
+chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == chunk);
 
 	if (rtree_set(&chunks_rtree, (uintptr_t)chunk, node))
 		return (true);
 	if (config_prof && opt_prof) {
 		size_t size = extent_node_size_get(node);
@@ -154,17 +156,17 @@ chunk_register(const void *chunk, const 
 		while (cur > high && atomic_cas_z(&highchunks, high, cur)) {
 			/*
 			 * Don't refresh cur, because it may have decreased
 			 * since this thread lost the highchunks update race.
 			 */
 			high = atomic_read_z(&highchunks);
 		}
 		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump();
+			prof_gdump(tsdn);
 	}
 
 	return (false);
 }
 
 void
 chunk_deregister(const void *chunk, const extent_node_t *node)
 {
@@ -192,17 +194,17 @@ chunk_first_best_fit(arena_t *arena, ext
 
 	assert(size == CHUNK_CEILING(size));
 
 	extent_node_init(&key, arena, NULL, size, false, false);
 	return (extent_tree_szad_nsearch(chunks_szad, &key));
 }
 
 static void *
-chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
     bool dalloc_node)
 {
 	void *ret;
 	extent_node_t *node;
 	size_t alloc_size, leadsize, trailsize;
 	bool zeroed, committed;
@@ -214,30 +216,30 @@ chunk_recycle(arena_t *arena, chunk_hook
 	 * we're operating on a specific chunk.
 	 */
 	assert(dalloc_node || new_addr != NULL);
 
 	alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize));
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
-	malloc_mutex_lock(&arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	if (new_addr != NULL) {
 		extent_node_t key;
 		extent_node_init(&key, arena, new_addr, alloc_size, false,
 		    false);
 		node = extent_tree_ad_search(chunks_ad, &key);
 	} else {
 		node = chunk_first_best_fit(arena, chunks_szad, chunks_ad,
 		    alloc_size);
 	}
 	if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <
 	    size)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
 	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node),
 	    alignment) - (uintptr_t)extent_node_addr_get(node);
 	assert(new_addr == NULL || leadsize == 0);
 	assert(extent_node_size_get(node) >= leadsize + size);
 	trailsize = extent_node_size_get(node) - leadsize - size;
 	ret = (void *)((uintptr_t)extent_node_addr_get(node) + leadsize);
@@ -246,17 +248,17 @@ chunk_recycle(arena_t *arena, chunk_hook
 		*zero = true;
 	committed = extent_node_committed_get(node);
 	if (committed)
 		*commit = true;
 	/* Split the lead. */
 	if (leadsize != 0 &&
 	    chunk_hooks->split(extent_node_addr_get(node),
 	    extent_node_size_get(node), leadsize, size, false, arena->ind)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
 	/* Remove node from the tree. */
 	extent_tree_szad_remove(chunks_szad, node);
 	extent_tree_ad_remove(chunks_ad, node);
 	arena_chunk_cache_maybe_remove(arena, node, cache);
 	if (leadsize != 0) {
 		/* Insert the leading space as a smaller chunk. */
@@ -266,96 +268,98 @@ chunk_recycle(arena_t *arena, chunk_hook
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 		node = NULL;
 	}
 	if (trailsize != 0) {
 		/* Split the trail. */
 		if (chunk_hooks->split(ret, size + trailsize, size,
 		    trailsize, false, arena->ind)) {
 			if (dalloc_node && node != NULL)
-				arena_node_dalloc(arena, node);
-			malloc_mutex_unlock(&arena->chunks_mtx);
-			chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad,
-			    cache, ret, size + trailsize, zeroed, committed);
+				arena_node_dalloc(tsdn, arena, node);
+			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+			chunk_record(tsdn, arena, chunk_hooks, chunks_szad,
+			    chunks_ad, cache, ret, size + trailsize, zeroed,
+			    committed);
 			return (NULL);
 		}
 		/* Insert the trailing space as a smaller chunk. */
 		if (node == NULL) {
-			node = arena_node_alloc(arena);
+			node = arena_node_alloc(tsdn, arena);
 			if (node == NULL) {
-				malloc_mutex_unlock(&arena->chunks_mtx);
-				chunk_record(arena, chunk_hooks, chunks_szad,
-				    chunks_ad, cache, ret, size + trailsize,
-				    zeroed, committed);
+				malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+				chunk_record(tsdn, arena, chunk_hooks,
+				    chunks_szad, chunks_ad, cache, ret, size +
+				    trailsize, zeroed, committed);
 				return (NULL);
 			}
 		}
 		extent_node_init(node, arena, (void *)((uintptr_t)(ret) + size),
 		    trailsize, zeroed, committed);
 		extent_tree_szad_insert(chunks_szad, node);
 		extent_tree_ad_insert(chunks_ad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 		node = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
-		chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, cache,
-		    ret, size, zeroed, committed);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+		chunk_record(tsdn, arena, chunk_hooks, chunks_szad, chunks_ad,
+		    cache, ret, size, zeroed, committed);
 		return (NULL);
 	}
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	assert(dalloc_node || node != NULL);
 	if (dalloc_node && node != NULL)
-		arena_node_dalloc(arena, node);
+		arena_node_dalloc(tsdn, arena, node);
 	if (*zero) {
 		if (!zeroed)
 			memset(ret, 0, size);
 		else if (config_debug) {
 			size_t i;
 			size_t *p = (size_t *)(uintptr_t)ret;
 
-			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
 			for (i = 0; i < size / sizeof(size_t); i++)
 				assert(p[i] == 0);
 		}
+		if (config_valgrind)
+			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
 	}
 	return (ret);
 }
 
 /*
  * If the caller specifies (!*zero), it is still possible to receive zeroed
  * memory, in which case *zero is toggled to true.  arena_chunk_alloc() takes
  * advantage of this to avoid demanding zeroed chunks, but taking advantage of
  * them if they are returned.
  */
 static void *
-chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit, dss_prec_t dss_prec)
+chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec)
 {
 	void *ret;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
-	    NULL)
+	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
 		return (ret);
 	/* mmap. */
 	if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) !=
 	    NULL)
 		return (ret);
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
-	    NULL)
+	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
 		return (ret);
 
 	/* All strategies for allocation failed. */
 	return (NULL);
 }
 
 void *
 chunk_alloc_base(size_t size)
@@ -375,123 +379,148 @@ chunk_alloc_base(size_t size)
 		return (NULL);
 	if (config_valgrind)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 
 	return (ret);
 }
 
 void *
-chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool dalloc_node)
+chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
+    bool dalloc_node)
 {
 	void *ret;
-	bool commit;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	commit = true;
-	ret = chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_cached,
-	    &arena->chunks_ad_cached, true, new_addr, size, alignment, zero,
-	    &commit, dalloc_node);
+	ret = chunk_recycle(tsdn, arena, chunk_hooks,
+	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
+	    new_addr, size, alignment, zero, commit, dalloc_node);
 	if (ret == NULL)
 		return (NULL);
-	assert(commit);
 	if (config_valgrind)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 	return (ret);
 }
 
 static arena_t *
-chunk_arena_get(unsigned arena_ind)
+chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsdn, arena_ind, false);
 	/*
 	 * The arena we're allocating on behalf of must have been initialized
 	 * already.
 	 */
 	assert(arena != NULL);
 	return (arena);
 }
 
 static void *
-chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool *commit, unsigned arena_ind)
+chunk_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
-	arena_t *arena;
 
-	arena = chunk_arena_get(arena_ind);
-	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, commit,
-	    arena->dss_prec);
+	ret = chunk_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
+	    commit, arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 
 	return (ret);
 }
 
 static void *
-chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
+    bool *commit, unsigned arena_ind)
 {
+	tsdn_t *tsdn;
+	arena_t *arena;
+
+	tsdn = tsdn_fetch();
+	arena = chunk_arena_get(tsdn, arena_ind);
+
+	return (chunk_alloc_default_impl(tsdn, arena, new_addr, size, alignment,
+	    zero, commit));
+}
+
+static void *
+chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
+{
+	void *ret;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	return (chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_retained,
-	    &arena->chunks_ad_retained, false, new_addr, size, alignment, zero,
-	    commit, true));
+	ret = chunk_recycle(tsdn, arena, chunk_hooks,
+	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
+	    new_addr, size, alignment, zero, commit, true);
+
+	if (config_stats && ret != NULL)
+		arena->stats.retained -= size;
+
+	return (ret);
 }
 
 void *
-chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
-	ret = chunk_alloc_retained(arena, chunk_hooks, new_addr, size,
+	ret = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
 	    alignment, zero, commit);
 	if (ret == NULL) {
-		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
-		    commit, arena->ind);
+		if (chunk_hooks->alloc == chunk_alloc_default) {
+			/* Call directly to propagate tsdn. */
+			ret = chunk_alloc_default_impl(tsdn, arena, new_addr,
+			    size, alignment, zero, commit);
+		} else {
+			ret = chunk_hooks->alloc(new_addr, size, alignment,
+			    zero, commit, arena->ind);
+		}
+
 		if (ret == NULL)
 			return (NULL);
+
+		if (config_valgrind && chunk_hooks->alloc !=
+		    chunk_alloc_default)
+			JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize);
 	}
 
-	if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default)
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize);
 	return (ret);
 }
 
 static void
-chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
 	bool unzeroed;
 	extent_node_t *node, *prev;
 	extent_node_t key;
 
 	assert(!cache || !zeroed);
 	unzeroed = cache || !zeroed;
 	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
 
-	malloc_mutex_lock(&arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0,
 	    false, false);
 	node = extent_tree_ad_nsearch(chunks_ad, &key);
 	/* Try to coalesce forward. */
 	if (node != NULL && extent_node_addr_get(node) ==
 	    extent_node_addr_get(&key) && extent_node_committed_get(node) ==
 	    committed && !chunk_hooks->merge(chunk, size,
 	    extent_node_addr_get(node), extent_node_size_get(node), false,
@@ -506,27 +535,27 @@ chunk_record(arena_t *arena, chunk_hooks
 		extent_node_addr_set(node, chunk);
 		extent_node_size_set(node, size + extent_node_size_get(node));
 		extent_node_zeroed_set(node, extent_node_zeroed_get(node) &&
 		    !unzeroed);
 		extent_tree_szad_insert(chunks_szad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 	} else {
 		/* Coalescing forward failed, so insert a new node. */
-		node = arena_node_alloc(arena);
+		node = arena_node_alloc(tsdn, arena);
 		if (node == NULL) {
 			/*
 			 * Node allocation failed, which is an exceedingly
 			 * unlikely failure.  Leak chunk after making sure its
 			 * pages have already been purged, so that this is only
 			 * a virtual memory leak.
 			 */
 			if (cache) {
-				chunk_purge_wrapper(arena, chunk_hooks, chunk,
-				    size, 0, size);
+				chunk_purge_wrapper(tsdn, arena, chunk_hooks,
+				    chunk, size, 0, size);
 			}
 			goto label_return;
 		}
 		extent_node_init(node, arena, chunk, size, !unzeroed,
 		    committed);
 		extent_tree_ad_insert(chunks_ad, node);
 		extent_tree_szad_insert(chunks_szad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
@@ -552,71 +581,88 @@ chunk_record(arena_t *arena, chunk_hooks
 		extent_node_addr_set(node, extent_node_addr_get(prev));
 		extent_node_size_set(node, extent_node_size_get(prev) +
 		    extent_node_size_get(node));
 		extent_node_zeroed_set(node, extent_node_zeroed_get(prev) &&
 		    extent_node_zeroed_get(node));
 		extent_tree_szad_insert(chunks_szad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 
-		arena_node_dalloc(arena, prev);
+		arena_node_dalloc(tsdn, arena, prev);
 	}
 
 label_return:
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 }
 
 void
-chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool committed)
+chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, bool committed)
 {
 
 	assert(chunk != NULL);
 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_record(arena, chunk_hooks, &arena->chunks_szad_cached,
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached,
 	    &arena->chunks_ad_cached, true, chunk, size, false, committed);
-	arena_maybe_purge(arena);
+	arena_maybe_purge(tsdn, arena);
+}
+
+static bool
+chunk_dalloc_default_impl(void *chunk, size_t size)
+{
+
+	if (!have_dss || !chunk_in_dss(chunk))
+		return (chunk_dalloc_mmap(chunk, size));
+	return (true);
 }
 
 static bool
 chunk_dalloc_default(void *chunk, size_t size, bool committed,
     unsigned arena_ind)
 {
 
-	if (!have_dss || !chunk_in_dss(chunk))
-		return (chunk_dalloc_mmap(chunk, size));
-	return (true);
+	return (chunk_dalloc_default_impl(chunk, size));
 }
 
 void
-chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool zeroed, bool committed)
+chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, bool zeroed, bool committed)
 {
+	bool err;
 
 	assert(chunk != NULL);
 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	/* Try to deallocate. */
-	if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind))
+	if (chunk_hooks->dalloc == chunk_dalloc_default) {
+		/* Call directly to propagate tsdn. */
+		err = chunk_dalloc_default_impl(chunk, size);
+	} else
+		err = chunk_hooks->dalloc(chunk, size, committed, arena->ind);
+
+	if (!err)
 		return;
 	/* Try to decommit; purge if that fails. */
 	if (committed) {
 		committed = chunk_hooks->decommit(chunk, size, 0, size,
 		    arena->ind);
 	}
 	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
 	    arena->ind);
-	chunk_record(arena, chunk_hooks, &arena->chunks_szad_retained,
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_retained,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
+
+	if (config_stats)
+		arena->stats.retained += size;
 }
 
 static bool
 chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
 {
 
 	return (pages_commit((void *)((uintptr_t)chunk + (uintptr_t)offset),
@@ -643,52 +689,59 @@ chunk_purge_default(void *chunk, size_t 
 	assert(length != 0);
 	assert((length & PAGE_MASK) == 0);
 
 	return (pages_purge((void *)((uintptr_t)chunk + (uintptr_t)offset),
 	    length));
 }
 
 bool
-chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, size_t offset, size_t length)
+chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, size_t offset, size_t length)
 {
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	return (chunk_hooks->purge(chunk, size, offset, length, arena->ind));
 }
 
 static bool
 chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
     bool committed, unsigned arena_ind)
 {
 
 	if (!maps_coalesce)
 		return (true);
 	return (false);
 }
 
 static bool
+chunk_merge_default_impl(void *chunk_a, void *chunk_b)
+{
+
+	if (!maps_coalesce)
+		return (true);
+	if (have_dss && !chunk_dss_mergeable(chunk_a, chunk_b))
+		return (true);
+
+	return (false);
+}
+
+static bool
 chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
     bool committed, unsigned arena_ind)
 {
 
-	if (!maps_coalesce)
-		return (true);
-	if (have_dss && chunk_in_dss(chunk_a) != chunk_in_dss(chunk_b))
-		return (true);
-
-	return (false);
+	return (chunk_merge_default_impl(chunk_a, chunk_b));
 }
 
 static rtree_node_elm_t *
 chunks_rtree_node_alloc(size_t nelms)
 {
 
-	return ((rtree_node_elm_t *)base_alloc(nelms *
+	return ((rtree_node_elm_t *)base_alloc(TSDN_NULL, nelms *
 	    sizeof(rtree_node_elm_t)));
 }
 
 bool
 chunk_boot(void)
 {
 #ifdef _WIN32
 	SYSTEM_INFO info;
@@ -715,37 +768,16 @@ chunk_boot(void)
 #endif
 
 	/* Set variables according to the value of opt_lg_chunk. */
 	chunksize = (ZU(1) << opt_lg_chunk);
 	assert(chunksize >= PAGE);
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> LG_PAGE);
 
-	if (have_dss && chunk_dss_boot())
-		return (true);
+	if (have_dss)
+		chunk_dss_boot();
 	if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    opt_lg_chunk), chunks_rtree_node_alloc, NULL))
 		return (true);
 
 	return (false);
 }
-
-void
-chunk_prefork(void)
-{
-
-	chunk_dss_prefork();
-}
-
-void
-chunk_postfork_parent(void)
-{
-
-	chunk_dss_postfork_parent();
-}
-
-void
-chunk_postfork_child(void)
-{
-
-	chunk_dss_postfork_child();
-}
--- a/memory/jemalloc/src/src/chunk_dss.c
+++ b/memory/jemalloc/src/src/chunk_dss.c
@@ -5,30 +5,29 @@
 
 const char	*dss_prec_names[] = {
 	"disabled",
 	"primary",
 	"secondary",
 	"N/A"
 };
 
-/* Current dss precedence default, used when creating new arenas. */
-static dss_prec_t	dss_prec_default = DSS_PREC_DEFAULT;
-
 /*
- * Protects sbrk() calls.  This avoids malloc races among threads, though it
- * does not protect against races with threads that call sbrk() directly.
+ * Current dss precedence default, used when creating new arenas.  NB: This is
+ * stored as unsigned rather than dss_prec_t because in principle there's no
+ * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
+ * atomic operations to synchronize the setting.
  */
-static malloc_mutex_t	dss_mtx;
+static unsigned		dss_prec_default = (unsigned)DSS_PREC_DEFAULT;
 
 /* Base address of the DSS. */
 static void		*dss_base;
-/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
-static void		*dss_prev;
-/* Current upper limit on DSS addresses. */
+/* Atomic boolean indicating whether the DSS is exhausted. */
+static unsigned		dss_exhausted;
+/* Atomic current upper limit on DSS addresses. */
 static void		*dss_max;
 
 /******************************************************************************/
 
 static void *
 chunk_dss_sbrk(intptr_t increment)
 {
 
@@ -42,71 +41,92 @@ chunk_dss_sbrk(intptr_t increment)
 
 dss_prec_t
 chunk_dss_prec_get(void)
 {
 	dss_prec_t ret;
 
 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(&dss_mtx);
-	ret = dss_prec_default;
-	malloc_mutex_unlock(&dss_mtx);
+	ret = (dss_prec_t)atomic_read_u(&dss_prec_default);
 	return (ret);
 }
 
 bool
 chunk_dss_prec_set(dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(&dss_mtx);
-	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(&dss_mtx);
+	atomic_write_u(&dss_prec_default, (unsigned)dss_prec);
 	return (false);
 }
 
+static void *
+chunk_dss_max_update(void *new_addr)
+{
+	void *max_cur;
+	spin_t spinner;
+
+	/*
+	 * Get the current end of the DSS as max_cur and assure that dss_max is
+	 * up to date.
+	 */
+	spin_init(&spinner);
+	while (true) {
+		void *max_prev = atomic_read_p(&dss_max);
+
+		max_cur = chunk_dss_sbrk(0);
+		if ((uintptr_t)max_prev > (uintptr_t)max_cur) {
+			/*
+			 * Another thread optimistically updated dss_max.  Wait
+			 * for it to finish.
+			 */
+			spin_adaptive(&spinner);
+			continue;
+		}
+		if (!atomic_cas_p(&dss_max, max_prev, max_cur))
+			break;
+	}
+	/* Fixed new_addr can only be supported if it is at the edge of DSS. */
+	if (new_addr != NULL && max_cur != new_addr)
+		return (NULL);
+
+	return (max_cur);
+}
+
 void *
-chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit)
+chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit)
 {
 	cassert(have_dss);
 	assert(size > 0 && (size & chunksize_mask) == 0);
 	assert(alignment > 0 && (alignment & chunksize_mask) == 0);
 
 	/*
 	 * sbrk() uses a signed increment argument, so take care not to
 	 * interpret a huge allocation request as a negative increment.
 	 */
 	if ((intptr_t)size < 0)
 		return (NULL);
 
-	malloc_mutex_lock(&dss_mtx);
-	if (dss_prev != (void *)-1) {
-
+	if (!atomic_read_u(&dss_exhausted)) {
 		/*
 		 * The loop is necessary to recover from races with other
 		 * threads that are using the DSS for something other than
 		 * malloc.
 		 */
-		do {
-			void *ret, *cpad, *dss_next;
+		while (true) {
+			void *ret, *cpad, *max_cur, *dss_next, *dss_prev;
 			size_t gap_size, cpad_size;
 			intptr_t incr;
-			/* Avoid an unnecessary system call. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;
 
-			/* Get the current end of the DSS. */
-			dss_max = chunk_dss_sbrk(0);
-
-			/* Make sure the earlier condition still holds. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;
+			max_cur = chunk_dss_max_update(new_addr);
+			if (max_cur == NULL)
+				goto label_oom;
 
 			/*
 			 * Calculate how much padding is necessary to
 			 * chunk-align the end of the DSS.
 			 */
 			gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) &
 			    chunksize_mask;
 			/*
@@ -115,100 +135,103 @@ chunk_alloc_dss(arena_t *arena, void *ne
 			 * recycled for later use.
 			 */
 			cpad = (void *)((uintptr_t)dss_max + gap_size);
 			ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max,
 			    alignment);
 			cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
-			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
-				/* Wrap-around. */
-				malloc_mutex_unlock(&dss_mtx);
-				return (NULL);
-			}
+			    (uintptr_t)dss_next < (uintptr_t)dss_max)
+				goto label_oom; /* Wrap-around. */
 			incr = gap_size + cpad_size + size;
+
+			/*
+			 * Optimistically update dss_max, and roll back below if
+			 * sbrk() fails.  No other thread will try to extend the
+			 * DSS while dss_max is greater than the current DSS
+			 * max reported by sbrk(0).
+			 */
+			if (atomic_cas_p(&dss_max, max_cur, dss_next))
+				continue;
+
+			/* Try to allocate. */
 			dss_prev = chunk_dss_sbrk(incr);
-			if (dss_prev == dss_max) {
+			if (dss_prev == max_cur) {
 				/* Success. */
-				dss_max = dss_next;
-				malloc_mutex_unlock(&dss_mtx);
 				if (cpad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
-					chunk_dalloc_wrapper(arena,
+					chunk_dalloc_wrapper(tsdn, arena,
 					    &chunk_hooks, cpad, cpad_size,
 					    false, true);
 				}
 				if (*zero) {
 					JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
 					    ret, size);
 					memset(ret, 0, size);
 				}
 				if (!*commit)
 					*commit = pages_decommit(ret, size);
 				return (ret);
 			}
-		} while (dss_prev != (void *)-1);
+
+			/*
+			 * Failure, whether due to OOM or a race with a raw
+			 * sbrk() call from outside the allocator.  Try to roll
+			 * back optimistic dss_max update; if rollback fails,
+			 * it's due to another caller of this function having
+			 * succeeded since this invocation started, in which
+			 * case rollback is not necessary.
+			 */
+			atomic_cas_p(&dss_max, dss_next, max_cur);
+			if (dss_prev == (void *)-1) {
+				/* OOM. */
+				atomic_write_u(&dss_exhausted, (unsigned)true);
+				goto label_oom;
+			}
+		}
 	}
-	malloc_mutex_unlock(&dss_mtx);
+label_oom:
+	return (NULL);
+}
 
-	return (NULL);
+static bool
+chunk_in_dss_helper(void *chunk, void *max)
+{
+
+	return ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk <
+	    (uintptr_t)max);
 }
 
 bool
 chunk_in_dss(void *chunk)
 {
-	bool ret;
 
 	cassert(have_dss);
 
-	malloc_mutex_lock(&dss_mtx);
-	if ((uintptr_t)chunk >= (uintptr_t)dss_base
-	    && (uintptr_t)chunk < (uintptr_t)dss_max)
-		ret = true;
-	else
-		ret = false;
-	malloc_mutex_unlock(&dss_mtx);
-
-	return (ret);
+	return (chunk_in_dss_helper(chunk, atomic_read_p(&dss_max)));
 }
 
 bool
+chunk_dss_mergeable(void *chunk_a, void *chunk_b)
+{
+	void *max;
+
+	cassert(have_dss);
+
+	max = atomic_read_p(&dss_max);
+	return (chunk_in_dss_helper(chunk_a, max) ==
+	    chunk_in_dss_helper(chunk_b, max));
+}
+
+void
 chunk_dss_boot(void)
 {
 
 	cassert(have_dss);
 
-	if (malloc_mutex_init(&dss_mtx))
-		return (true);
 	dss_base = chunk_dss_sbrk(0);
-	dss_prev = dss_base;
+	dss_exhausted = (unsigned)(dss_base == (void *)-1);
 	dss_max = dss_base;
-
-	return (false);
-}
-
-void
-chunk_dss_prefork(void)
-{
-
-	if (have_dss)
-		malloc_mutex_prefork(&dss_mtx);
-}
-
-void
-chunk_dss_postfork_parent(void)
-{
-
-	if (have_dss)
-		malloc_mutex_postfork_parent(&dss_mtx);
-}
-
-void
-chunk_dss_postfork_child(void)
-{
-
-	if (have_dss)
-		malloc_mutex_postfork_child(&dss_mtx);
 }
 
 /******************************************************************************/
--- a/memory/jemalloc/src/src/chunk_mmap.c
+++ b/memory/jemalloc/src/src/chunk_mmap.c
@@ -11,28 +11,26 @@ chunk_alloc_mmap_slow(size_t size, size_
 
 	alloc_size = size + alignment - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
 	do {
 		void *pages;
 		size_t leadsize;
-		pages = pages_map(NULL, alloc_size);
+		pages = pages_map(NULL, alloc_size, commit);
 		if (pages == NULL)
 			return (NULL);
 		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
 		    (uintptr_t)pages;
-		ret = pages_trim(pages, alloc_size, leadsize, size);
+		ret = pages_trim(pages, alloc_size, leadsize, size, commit);
 	} while (ret == NULL);
 
 	assert(ret != NULL);
 	*zero = true;
-	if (!*commit)
-		*commit = pages_decommit(ret, size);
 	return (ret);
 }
 
 void *
 chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit)
 {
 	void *ret;
@@ -49,30 +47,28 @@ chunk_alloc_mmap(void *new_addr, size_t 
 	 * Optimistically try mapping precisely the right amount before falling
 	 * back to the slow method, with the expectation that the optimistic
 	 * approach works most of the time.
 	 */
 
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = pages_map(new_addr, size);
+	ret = pages_map(new_addr, size, commit);
 	if (ret == NULL || ret == new_addr)
 		return (ret);
 	assert(new_addr == NULL);
 	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
 	if (offset != 0) {
 		pages_unmap(ret, size);
 		return (chunk_alloc_mmap_slow(size, alignment, zero, commit));
 	}
 
 	assert(ret != NULL);
 	*zero = true;
-	if (!*commit)
-		*commit = pages_decommit(ret, size);
 	return (ret);
 }
 
 bool
 chunk_dalloc_mmap(void *chunk, size_t size)
 {
 
 	if (config_munmap)
--- a/memory/jemalloc/src/src/ckh.c
+++ b/memory/jemalloc/src/src/ckh.c
@@ -94,17 +94,18 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t
 {
 	ckhc_t *cell;
 	unsigned offset, i;
 
 	/*
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
+	offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+	    LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
 		if (cell->key == NULL) {
 			cell->key = key;
 			cell->data = data;
 			ckh->count++;
 			return (false);
@@ -136,17 +137,17 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_
 		/*
 		 * Choose a random item within the bucket to evict.  This is
 		 * critical to correct function, because without (eventually)
 		 * evicting all items within a bucket during iteration, it
 		 * would be possible to get stuck in an infinite loop if there
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		i = (unsigned)prng_lg_range(&ckh->prng_state,
+		i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
 		    LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
 		/* Swap cell->{key,data} and {key,data} (evict). */
 		tkey = cell->key; tdata = cell->data;
 		cell->key = key; cell->data = data;
 		key = tkey; data = tdata;
@@ -265,35 +266,35 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 		size_t usize;
 
 		lg_curcells++;
 		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
 		}
-		tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
-		    true, NULL);
+		tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE,
+		    true, NULL, true, arena_ichoose(tsd, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
 		}
 		/* Swap in new table. */
 		ttab = ckh->tab;
 		ckh->tab = tab;
 		tab = ttab;
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
+			idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+		idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
 
 	ret = false;
 label_return:
 	return (ret);
 }
@@ -309,41 +310,41 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	 * It is possible (though unlikely, given well behaved hashes) that the
 	 * table rebuild will fail.
 	 */
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
-	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    NULL);
+	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
+	    true, arena_ichoose(tsd, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
 		 * prevent this or future operations from proceeding.
 		 */
 		return;
 	}
 	/* Swap in new table. */
 	ttab = ckh->tab;
 	ckh->tab = tab;
 	tab = ttab;
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
+		idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
 		return;
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
 	ckh->nshrinkfails++;
 #endif
 }
 
 bool
@@ -386,18 +387,18 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t m
 	ckh->hash = hash;
 	ckh->keycomp = keycomp;
 
 	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
 		ret = true;
 		goto label_return;
 	}
-	ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    NULL);
+	ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true,
+	    NULL, true, arena_ichoose(tsd, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
 	}
 
 	ret = false;
 label_return:
 	return (ret);
@@ -416,19 +417,19 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 	    " nrelocs: %"FMTu64"\n", __func__, ckh,
 	    (unsigned long long)ckh->ngrows,
 	    (unsigned long long)ckh->nshrinks,
 	    (unsigned long long)ckh->nshrinkfails,
 	    (unsigned long long)ckh->ninserts,
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	if (config_debug)
-		memset(ckh, 0x5a, sizeof(ckh_t));
+		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
 
 size_t
 ckh_count(ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
 
--- a/memory/jemalloc/src/src/ctl.c
+++ b/memory/jemalloc/src/src/ctl.c
@@ -37,35 +37,35 @@ ctl_indexed_node(const ctl_node_t *node)
 
 	return (!node->named ? (const ctl_indexed_node_t *)node : NULL);
 }
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
 #define	CTL_PROTO(n)							\
-static int	n##_ctl(const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen);
+static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 #define	INDEX_PROTO(n)							\
-static const ctl_named_node_t	*n##_index(const size_t *mib,		\
-    size_t miblen, size_t i);
+static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
+    const size_t *mib, size_t miblen, size_t i);
 
 static bool	ctl_arena_init(ctl_arena_stats_t *astats);
 static void	ctl_arena_clear(ctl_arena_stats_t *astats);
-static void	ctl_arena_stats_amerge(ctl_arena_stats_t *cstats,
+static void	ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats,
     arena_t *arena);
 static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
     ctl_arena_stats_t *astats);
-static void	ctl_arena_refresh(arena_t *arena, unsigned i);
-static bool	ctl_grow(void);
-static void	ctl_refresh(void);
-static bool	ctl_init(void);
-static int	ctl_lookup(const char *name, ctl_node_t const **nodesp,
-    size_t *mibp, size_t *depthp);
+static void	ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i);
+static bool	ctl_grow(tsdn_t *tsdn);
+static void	ctl_refresh(tsdn_t *tsdn);
+static bool	ctl_init(tsdn_t *tsdn);
+static int	ctl_lookup(tsdn_t *tsdn, const char *name,
+    ctl_node_t const **nodesp, size_t *mibp, size_t *depthp);
 
 CTL_PROTO(version)
 CTL_PROTO(epoch)
 CTL_PROTO(thread_tcache_enabled)
 CTL_PROTO(thread_tcache_flush)
 CTL_PROTO(thread_prof_name)
 CTL_PROTO(thread_prof_active)
 CTL_PROTO(thread_arena)
@@ -112,19 +112,20 @@ CTL_PROTO(opt_lg_prof_sample)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
-static void	arena_i_purge(unsigned arena_ind, bool all);
+static void	arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
+CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_lg_dirty_mult)
 CTL_PROTO(arena_i_decay_time)
 CTL_PROTO(arena_i_chunk_hooks)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
 CTL_PROTO(arenas_bin_i_run_size)
@@ -186,28 +187,30 @@ CTL_PROTO(stats_arenas_i_hchunks_j_curhc
 INDEX_PROTO(stats_arenas_i_hchunks_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_dss)
 CTL_PROTO(stats_arenas_i_lg_dirty_mult)
 CTL_PROTO(stats_arenas_i_decay_time)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
 CTL_PROTO(stats_arenas_i_mapped)
+CTL_PROTO(stats_arenas_i_retained)
 CTL_PROTO(stats_arenas_i_npurge)
 CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
 CTL_PROTO(stats_arenas_i_metadata_mapped)
 CTL_PROTO(stats_arenas_i_metadata_allocated)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_cactive)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
 CTL_PROTO(stats_metadata)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
+CTL_PROTO(stats_retained)
 
 /******************************************************************************/
 /* mallctl tree. */
 
 /* Maximum tree depth. */
 #define	CTL_MAX_DEPTH	6
 
 #define	NAME(n)	{true},	n
@@ -294,16 +297,17 @@ static const ctl_named_node_t	tcache_nod
 	{NAME("create"),	CTL(tcache_create)},
 	{NAME("flush"),		CTL(tcache_flush)},
 	{NAME("destroy"),	CTL(tcache_destroy)}
 };
 
 static const ctl_named_node_t arena_i_node[] = {
 	{NAME("purge"),		CTL(arena_i_purge)},
 	{NAME("decay"),		CTL(arena_i_decay)},
+	{NAME("reset"),		CTL(arena_i_reset)},
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(arena_i_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(arena_i_decay_time)},
 	{NAME("chunk_hooks"),	CTL(arena_i_chunk_hooks)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
 	{NAME(""),		CHILD(named, arena_i)}
 };
@@ -451,16 +455,17 @@ static const ctl_indexed_node_t stats_ar
 static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
 	{NAME("dss"),		CTL(stats_arenas_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(stats_arenas_i_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(stats_arenas_i_decay_time)},
 	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
 	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
+	{NAME("retained"),	CTL(stats_arenas_i_retained)},
 	{NAME("npurge"),	CTL(stats_arenas_i_npurge)},
 	{NAME("nmadvise"),	CTL(stats_arenas_i_nmadvise)},
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
 	{NAME("metadata"),	CHILD(named, stats_arenas_i_metadata)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("huge"),		CHILD(named, stats_arenas_i_huge)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
@@ -477,16 +482,17 @@ static const ctl_indexed_node_t stats_ar
 
 static const ctl_named_node_t stats_node[] = {
 	{NAME("cactive"),	CTL(stats_cactive)},
 	{NAME("allocated"),	CTL(stats_allocated)},
 	{NAME("active"),	CTL(stats_active)},
 	{NAME("metadata"),	CTL(stats_metadata)},
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
+	{NAME("retained"),	CTL(stats_retained)},
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
 
 static const ctl_named_node_t	root_node[] = {
 	{NAME("version"),	CTL(version)},
 	{NAME("epoch"),		CTL(epoch)},
 	{NAME("thread"),	CHILD(named, thread)},
 	{NAME("config"),	CHILD(named, config)},
@@ -549,51 +555,52 @@ ctl_arena_clear(ctl_arena_stats_t *astat
 		memset(astats->lstats, 0, nlclasses *
 		    sizeof(malloc_large_stats_t));
 		memset(astats->hstats, 0, nhclasses *
 		    sizeof(malloc_huge_stats_t));
 	}
 }
 
 static void
-ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
+ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
 {
 	unsigned i;
 
 	if (config_stats) {
-		arena_stats_merge(arena, &cstats->nthreads, &cstats->dss,
+		arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss,
 		    &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
 		    cstats->bstats, cstats->lstats, cstats->hstats);
 
 		for (i = 0; i < NBINS; i++) {
 			cstats->allocated_small += cstats->bstats[i].curregs *
 			    index2size(i);
 			cstats->nmalloc_small += cstats->bstats[i].nmalloc;
 			cstats->ndalloc_small += cstats->bstats[i].ndalloc;
 			cstats->nrequests_small += cstats->bstats[i].nrequests;
 		}
 	} else {
-		arena_basic_stats_merge(arena, &cstats->nthreads, &cstats->dss,
-		    &cstats->lg_dirty_mult, &cstats->decay_time,
+		arena_basic_stats_merge(tsdn, arena, &cstats->nthreads,
+		    &cstats->dss, &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty);
 	}
 }
 
 static void
 ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 {
 	unsigned i;
 
 	sstats->nthreads += astats->nthreads;
 	sstats->pactive += astats->pactive;
 	sstats->pdirty += astats->pdirty;
 
 	if (config_stats) {
 		sstats->astats.mapped += astats->astats.mapped;
+		sstats->astats.retained += astats->astats.retained;
 		sstats->astats.npurge += astats->astats.npurge;
 		sstats->astats.nmadvise += astats->astats.nmadvise;
 		sstats->astats.purged += astats->astats.purged;
 
 		sstats->astats.metadata_mapped +=
 		    astats->astats.metadata_mapped;
 		sstats->astats.metadata_allocated +=
 		    astats->astats.metadata_allocated;
@@ -644,34 +651,34 @@ ctl_arena_stats_smerge(ctl_arena_stats_t
 			sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc;
 			sstats->hstats[i].curhchunks +=
 			    astats->hstats[i].curhchunks;
 		}
 	}
 }
 
 static void
-ctl_arena_refresh(arena_t *arena, unsigned i)
+ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i)
 {
 	ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
 	ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
 
 	ctl_arena_clear(astats);
-	ctl_arena_stats_amerge(astats, arena);
+	ctl_arena_stats_amerge(tsdn, astats, arena);
 	/* Merge into sum stats as well. */
 	ctl_arena_stats_smerge(sstats, astats);
 }
 
 static bool
-ctl_grow(void)
+ctl_grow(tsdn_t *tsdn)
 {
 	ctl_arena_stats_t *astats;
 
 	/* Initialize new arena. */
-	if (arena_init(ctl_stats.narenas) == NULL)
+	if (arena_init(tsdn, ctl_stats.narenas) == NULL)
 		return (true);
 
 	/* Allocate extended arena stats. */
 	astats = (ctl_arena_stats_t *)a0malloc((ctl_stats.narenas + 2) *
 	    sizeof(ctl_arena_stats_t));
 	if (astats == NULL)
 		return (true);
 
@@ -696,68 +703,71 @@ ctl_grow(void)
 	a0dalloc(ctl_stats.arenas);
 	ctl_stats.arenas = astats;
 	ctl_stats.narenas++;
 
 	return (false);
 }
 
 static void
-ctl_refresh(void)
+ctl_refresh(tsdn_t *tsdn)
 {
 	unsigned i;
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
 	 * ctl_arena_refresh().
 	 */
 	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
 
 	for (i = 0; i < ctl_stats.narenas; i++)
-		tarenas[i] = arena_get(i, false);
+		tarenas[i] = arena_get(tsdn, i, false);
 
 	for (i = 0; i < ctl_stats.narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
 
 		ctl_stats.arenas[i].initialized = initialized;
 		if (initialized)
-			ctl_arena_refresh(tarenas[i], i);
+			ctl_arena_refresh(tsdn, tarenas[i], i);
 	}
 
 	if (config_stats) {
 		size_t base_allocated, base_resident, base_mapped;
-		base_stats_get(&base_allocated, &base_resident, &base_mapped);
+		base_stats_get(tsdn, &base_allocated, &base_resident,
+		    &base_mapped);
 		ctl_stats.allocated =
 		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_huge;
 		ctl_stats.active =
 		    (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE);
 		ctl_stats.metadata = base_allocated +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped +
 		    ctl_stats.arenas[ctl_stats.narenas].astats
 		    .metadata_allocated;
 		ctl_stats.resident = base_resident +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped +
 		    ((ctl_stats.arenas[ctl_stats.narenas].pactive +
 		    ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE);
 		ctl_stats.mapped = base_mapped +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.mapped;
+		ctl_stats.retained =
+		    ctl_stats.arenas[ctl_stats.narenas].astats.retained;
 	}
 
 	ctl_epoch++;
 }
 
 static bool
-ctl_init(void)
+ctl_init(tsdn_t *tsdn)
 {
 	bool ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
 		/*
 		 * Allocate space for one extra arena stats element, which
 		 * contains summed stats across all arenas.
 		 */
 		ctl_stats.narenas = narenas_total_get();
 		ctl_stats.arenas = (ctl_arena_stats_t *)a0malloc(
 		    (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t));
@@ -789,29 +799,29 @@ ctl_init(void)
 					ret = true;
 					goto label_return;
 				}
 			}
 		}
 		ctl_stats.arenas[ctl_stats.narenas].initialized = true;
 
 		ctl_epoch = 0;
-		ctl_refresh();
+		ctl_refresh(tsdn);
 		ctl_initialized = true;
 	}
 
 	ret = false;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
 
 static int
-ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
-    size_t *depthp)
+ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
+    size_t *mibp, size_t *depthp)
 {
 	int ret;
 	const char *elm, *tdot, *dot;
 	size_t elen, i, j;
 	const ctl_named_node_t *node;
 
 	elm = name;
 	/* Equivalent to strchrnul(). */
@@ -853,17 +863,17 @@ ctl_lookup(const char *name, ctl_node_t 
 			/* Children are indexed. */
 			index = malloc_strtoumax(elm, NULL, 10);
 			if (index == UINTMAX_MAX || index > SIZE_T_MAX) {
 				ret = ENOENT;
 				goto label_return;
 			}
 
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(mibp, *depthp, (size_t)index);
+			node = inode->index(tsdn, mibp, *depthp, (size_t)index);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
 			}
 
 			if (nodesp != NULL)
 				nodesp[i] = (const ctl_node_t *)node;
 			mibp[i] = (size_t)index;
@@ -897,71 +907,71 @@ ctl_lookup(const char *name, ctl_node_t 
 	}
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 int
-ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen)
+ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
 {
 	int ret;
 	size_t depth;
 	ctl_node_t const *nodes[CTL_MAX_DEPTH];
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(name, nodes, mib, &depth);
+	ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth);
 	if (ret != 0)
 		goto label_return;
 
 	node = ctl_named_node(nodes[depth-1]);
 	if (node != NULL && node->ctl)
-		ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen);
+		ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen);
 	else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
 	}
 
 label_return:
 	return(ret);
 }
 
 int
-ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp)
+ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp)
 {
 	int ret;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsdn)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
-	ret = ctl_lookup(name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsdn, name, NULL, mibp, miblenp);
 label_return:
 	return(ret);
 }
 
 int
-ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const ctl_named_node_t *node;
 	size_t i;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
 	/* Iterate down the tree. */
 	node = super_root_node;
 	for (i = 0; i < miblen; i++) {
 		assert(node);
@@ -973,67 +983,67 @@ ctl_bymib(const size_t *mib, size_t mibl
 				goto label_return;
 			}
 			node = ctl_named_children(node, mib[i]);
 		} else {
 			const ctl_indexed_node_t *inode;
 
 			/* Indexed element. */
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(mib, miblen, mib[i]);
+			node = inode->index(tsd_tsdn(tsd), mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
 			}
 		}
 	}
 
 	/* Call the ctl function. */
 	if (node && node->ctl)
-		ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
+		ret = node->ctl(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	else {
 		/* Partial MIB. */
 		ret = ENOENT;
 	}
 
 label_return:
 	return(ret);
 }
 
 bool
 ctl_boot(void)
 {
 
-	if (malloc_mutex_init(&ctl_mtx))
+	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL))
 		return (true);
 
 	ctl_initialized = false;
 
 	return (false);
 }
 
 void
-ctl_prefork(void)
+ctl_prefork(tsdn_t *tsdn)
 {
 
-	malloc_mutex_prefork(&ctl_mtx);
+	malloc_mutex_prefork(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_parent(void)
+ctl_postfork_parent(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_parent(&ctl_mtx);
+	malloc_mutex_postfork_parent(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_child(void)
+ctl_postfork_child(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_child(&ctl_mtx);
+	malloc_mutex_postfork_child(tsdn, &ctl_mtx);
 }
 
 /******************************************************************************/
 /* *_ctl() functions. */
 
 #define	READONLY()	do {						\
 	if (newp != NULL || newlen != 0) {				\
 		ret = EPERM;						\
@@ -1080,85 +1090,85 @@ ctl_postfork_child(void)
 } while (0)
 
 /*
  * There's a lot of code duplication in the following macros due to limitations
  * in how nested cpp macros are expanded.
  */
 #define	CTL_RO_CLGEN(c, l, n, v, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
 	if (l)								\
-		malloc_mutex_lock(&ctl_mtx);				\
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);		\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
 	if (l)								\
-		malloc_mutex_unlock(&ctl_mtx);				\
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);		\
 	return (ret);							\
 }
 
 #define	CTL_RO_CGEN(c, n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
-	malloc_mutex_lock(&ctl_mtx);					\
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(&ctl_mtx);					\
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
 	return (ret);							\
 }
 
 #define	CTL_RO_GEN(n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
-	malloc_mutex_lock(&ctl_mtx);					\
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(&ctl_mtx);					\
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
 	return (ret);							\
 }
 
 /*
  * ctl_mtx is not acquired, under the assumption that no pertinent data will
  * mutate during the call.
  */
 #define	CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
 	READONLY();							\
 	oldval = (v);							\
@@ -1166,56 +1176,54 @@ n##_ctl(const size_t *mib, size_t miblen
 									\
 	ret = 0;							\
 label_return:								\
 	return (ret);							\
 }
 
 #define	CTL_RO_NL_GEN(n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
 	return (ret);							\
 }
 
 #define	CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
-	tsd_t *tsd;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
 	READONLY();							\
-	tsd = tsd_fetch();						\
 	oldval = (m(tsd));						\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
 	return (ret);							\
 }
 
 #define	CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
 	READONLY();							\
 	oldval = n;							\
 	READ(oldval, t);						\
 									\
@@ -1224,31 +1232,31 @@ label_return:								\
 	return (ret);							\
 }
 
 /******************************************************************************/
 
 CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
 
 static int
-epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	UNUSED uint64_t newval;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(newval, uint64_t);
 	if (newp != NULL)
-		ctl_refresh();
+		ctl_refresh(tsd_tsdn(tsd));
 	READ(ctl_epoch, uint64_t);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
 CTL_RO_CONFIG_GEN(config_cache_oblivious, bool)
 CTL_RO_CONFIG_GEN(config_debug, bool)
 CTL_RO_CONFIG_GEN(config_fill, bool)
@@ -1293,77 +1301,75 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_acc
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 
 /******************************************************************************/
 
 static int
-thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
-	tsd = tsd_fetch();
 	oldarena = arena_choose(tsd, NULL);
 	if (oldarena == NULL)
 		return (EAGAIN);
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	newind = oldind = oldarena->ind;
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
 	if (newind != oldind) {
 		arena_t *newarena;
 
 		if (newind >= ctl_stats.narenas) {
 			/* New arena index is out of range. */
 			ret = EFAULT;
 			goto label_return;
 		}
 
 		/* Initialize arena if necessary. */
-		newarena = arena_get(newind, true);
+		newarena = arena_get(tsd_tsdn(tsd), newind, true);
 		if (newarena == NULL) {
 			ret = EAGAIN;
 			goto label_return;
 		}
 		/* Set new arena/tcache associations. */
 		arena_migrate(tsd, oldind, newind);
 		if (config_tcache) {
 			tcache_t *tcache = tsd_tcache_get(tsd);
 			if (tcache != NULL) {
-				tcache_arena_reassociate(tcache, oldarena,
-				    newarena);
+				tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+				    oldarena, newarena);
 			}
 		}
 	}
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 CTL_TSD_RO_NL_CGEN(config_stats, thread_allocated, tsd_thread_allocated_get,
     uint64_t)
 CTL_TSD_RO_NL_CGEN(config_stats, thread_allocatedp, tsd_thread_allocatedp_get,
     uint64_t *)
 CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocated, tsd_thread_deallocated_get,
     uint64_t)
 CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp,
     tsd_thread_deallocatedp_get, uint64_t *)
 
 static int
-thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
 
 	if (!config_tcache)
 		return (ENOENT);
 
 	oldval = tcache_enabled_get();
@@ -1377,18 +1383,18 @@ thread_tcache_enabled_ctl(const size_t *
 	READ(oldval, bool);
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	if (!config_tcache)
 		return (ENOENT);
 
 	READONLY();
 	WRITEONLY();
@@ -1396,146 +1402,133 @@ thread_tcache_flush_ctl(const size_t *mi
 	tcache_flush();
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp,
+thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	if (!config_prof)
 		return (ENOENT);
 
 	READ_XOR_WRITE();
 
 	if (newp != NULL) {
-		tsd_t *tsd;
-
 		if (newlen != sizeof(const char *)) {
 			ret = EINVAL;
 			goto label_return;
 		}
 
-		tsd = tsd_fetch();
-
 		if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
 		    0)
 			goto label_return;
 	} else {
-		const char *oldname = prof_thread_name_get();
+		const char *oldname = prof_thread_name_get(tsd);
 		READ(oldname, const char *);
 	}
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp,
+thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
 
 	if (!config_prof)
 		return (ENOENT);
 
-	oldval = prof_thread_active_get();
+	oldval = prof_thread_active_get(tsd);
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (prof_thread_active_set(*(bool *)newp)) {
+		if (prof_thread_active_set(tsd, *(bool *)newp)) {
 			ret = EAGAIN;
 			goto label_return;
 		}
 	}
 	READ(oldval, bool);
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 /******************************************************************************/
 
 static int
-tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
 	READ(tcache_ind, unsigned);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static int
-tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
 	if (tcache_ind == UINT_MAX) {
 		ret = EFAULT;
 		goto label_return;
 	}
 	tcaches_flush(tsd, tcache_ind);
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp,
+tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
 	if (tcache_ind == UINT_MAX) {
 		ret = EFAULT;
 		goto label_return;
 	}
 	tcaches_destroy(tsd, tcache_ind);
@@ -1543,97 +1536,131 @@ tcache_destroy_ctl(const size_t *mib, si
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 /******************************************************************************/
 
 static void
-arena_i_purge(unsigned arena_ind, bool all)
+arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 {
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	{
 		unsigned narenas = ctl_stats.narenas;
 
 		if (arena_ind == narenas) {
 			unsigned i;
 			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 			for (i = 0; i < narenas; i++)
-				tarenas[i] = arena_get(i, false);
+				tarenas[i] = arena_get(tsdn, i, false);
 
 			/*
 			 * No further need to hold ctl_mtx, since narenas and
 			 * tarenas contain everything needed below.
 			 */
-			malloc_mutex_unlock(&ctl_mtx);
+			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL)
-					arena_purge(tarenas[i], all);
+					arena_purge(tsdn, tarenas[i], all);
 			}
 		} else {
 			arena_t *tarena;
 
 			assert(arena_ind < narenas);
 
-			tarena = arena_get(arena_ind, false);
+			tarena = arena_get(tsdn, arena_ind, false);
 
 			/* No further need to hold ctl_mtx. */
-			malloc_mutex_unlock(&ctl_mtx);
+			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			if (tarena != NULL)
-				arena_purge(tarena, all);
+				arena_purge(tsdn, tarena, all);
 		}
 	}
 }
 
 static int
-arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge((unsigned)mib[1], true);
+	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], true);
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-arena_i_decay_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge((unsigned)mib[1], false);
+	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], false);
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	unsigned arena_ind;
+	arena_t *arena;
+
+	READONLY();
+	WRITEONLY();
+
+	if ((config_valgrind && unlikely(in_valgrind)) || (config_fill &&
+	    unlikely(opt_quarantine))) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	arena_ind = (unsigned)mib[1];
+	if (config_debug) {
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+		assert(arena_ind < ctl_stats.narenas);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	}
+	assert(arena_ind >= opt_narenas);
+
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+
+	arena_reset(tsd, arena);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
+static int
+arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const char *dss = NULL;
 	unsigned arena_ind = (unsigned)mib[1];
 	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(dss, const char *);
 	if (dss != NULL) {
 		int i;
 		bool match = false;
 
 		for (i = 0; i < dss_prec_limit; i++) {
 			if (strcmp(dss_prec_names[i], dss) == 0) {
 				dss_prec = i;
@@ -1644,210 +1671,213 @@ arena_i_dss_ctl(const size_t *mib, size_
 
 		if (!match) {
 			ret = EINVAL;
 			goto label_return;
 		}
 	}
 
 	if (arena_ind < ctl_stats.narenas) {
-		arena_t *arena = arena_get(arena_ind, false);
+		arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(arena, dss_prec))) {
+		    arena_dss_prec_set(tsd_tsdn(tsd), arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = arena_dss_prec_get(arena);
+		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
 		    chunk_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
 		dss_prec_old = chunk_dss_prec_get();
 	}
 
 	dss = dss_prec_names[dss_prec_old];
 	READ(dss, const char *);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static int
-arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_lg_dirty_mult_get(arena);
+		size_t oldval = arena_lg_dirty_mult_get(tsd_tsdn(tsd), arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
 		if (newlen != sizeof(ssize_t)) {
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_lg_dirty_mult_set(arena, *(ssize_t *)newp)) {
+		if (arena_lg_dirty_mult_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
 	}
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_decay_time_get(arena);
+		size_t oldval = arena_decay_time_get(tsd_tsdn(tsd), arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
 		if (newlen != sizeof(ssize_t)) {
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_decay_time_set(arena, *(ssize_t *)newp)) {
+		if (arena_decay_time_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
 	}
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(arena_ind, false)) != NULL) {
+	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		if (newp != NULL) {
 			chunk_hooks_t old_chunk_hooks, new_chunk_hooks;
 			WRITE(new_chunk_hooks, chunk_hooks_t);
-			old_chunk_hooks = chunk_hooks_set(arena,
+			old_chunk_hooks = chunk_hooks_set(tsd_tsdn(tsd), arena,
 			    &new_chunk_hooks);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		} else {
-			chunk_hooks_t old_chunk_hooks = chunk_hooks_get(arena);
+			chunk_hooks_t old_chunk_hooks =
+			    chunk_hooks_get(tsd_tsdn(tsd), arena);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		}
 	} else {
 		ret = EFAULT;
 		goto label_return;
 	}
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static const ctl_named_node_t *
-arena_i_index(const size_t *mib, size_t miblen, size_t i)
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
-	const ctl_named_node_t * ret;
+	const ctl_named_node_t *ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (i > ctl_stats.narenas) {
 		ret = NULL;
 		goto label_return;
 	}
 
 	ret = super_arena_i_node;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
 static int
-arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (*oldlenp != sizeof(unsigned)) {
 		ret = EINVAL;
 		goto label_return;
 	}
 	narenas = ctl_stats.narenas;
 	READ(narenas, unsigned);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static int
-arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned nread, i;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (*oldlenp != ctl_stats.narenas * sizeof(bool)) {
 		ret = EINVAL;
 		nread = (*oldlenp < ctl_stats.narenas * sizeof(bool))
 		    ? (unsigned)(*oldlenp / sizeof(bool)) : ctl_stats.narenas;
 	} else {
 		ret = 0;
 		nread = ctl_stats.narenas;
 	}
 
 	for (i = 0; i < nread; i++)
 		((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
 
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static int
-arenas_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arenas_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	if (oldp != NULL && oldlenp != NULL) {
 		size_t oldval = arena_lg_dirty_mult_default_get();
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1862,17 +1892,17 @@ arenas_lg_dirty_mult_ctl(const size_t *m
 	}
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-arenas_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	if (oldp != NULL && oldlenp != NULL) {
 		size_t oldval = arena_decay_time_default_get();
 		READ(oldval, ssize_t);
 	}
@@ -1896,187 +1926,185 @@ CTL_RO_NL_GEN(arenas_quantum, QUANTUM, s
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned)
 CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > NBINS)
 		return (NULL);
 	return (super_arenas_bin_i_node);
 }
 
 CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned)
 CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_lrun_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nlclasses)
 		return (NULL);
 	return (super_arenas_lrun_i_node);
 }
 
 CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned)
 CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]),
     size_t)
 static const ctl_named_node_t *
-arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_hchunk_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nhclasses)
 		return (NULL);
 	return (super_arenas_hchunk_i_node);
 }
 
 static int
-arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (ctl_grow()) {
+	if (ctl_grow(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 	narenas = ctl_stats.narenas - 1;
 	READ(narenas, unsigned);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
 static int
-prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp,
+prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	bool oldval;
+
+	if (!config_prof)
+		return (ENOENT);
+
+	if (newp != NULL) {
+		if (newlen != sizeof(bool)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
+		    *(bool *)newp);
+	} else
+		oldval = prof_thread_active_init_get(tsd_tsdn(tsd));
+	READ(oldval, bool);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
+static int
+prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
 
 	if (!config_prof)
 		return (ENOENT);
 
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_thread_active_init_set(*(bool *)newp);
+		oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else
-		oldval = prof_thread_active_init_get();
+		oldval = prof_active_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	const char *filename = NULL;
+
+	if (!config_prof)
+		return (ENOENT);
+
+	WRITEONLY();
+	WRITE(filename, const char *);
+
+	if (prof_mdump(tsd, filename)) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
+static int
+prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
 
 	if (!config_prof)
 		return (ENOENT);
 
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_active_set(*(bool *)newp);
+		oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else
-		oldval = prof_active_get();
+		oldval = prof_gdump_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 static int
-prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
-{
-	int ret;
-	const char *filename = NULL;
-
-	if (!config_prof)
-		return (ENOENT);
-
-	WRITEONLY();
-	WRITE(filename, const char *);
-
-	if (prof_mdump(filename)) {
-		ret = EFAULT;
-		goto label_return;
-	}
-
-	ret = 0;
-label_return:
-	return (ret);
-}
-
-static int
-prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
-{
-	int ret;
-	bool oldval;
-
-	if (!config_prof)
-		return (ENOENT);
-
-	if (newp != NULL) {
-		if (newlen != sizeof(bool)) {
-			ret = EINVAL;
-			goto label_return;
-		}
-		oldval = prof_gdump_set(*(bool *)newp);
-	} else
-		oldval = prof_gdump_get();
-	READ(oldval, bool);
-
-	ret = 0;
-label_return:
-	return (ret);
-}
-
-static int
-prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	size_t lg_sample = lg_prof_sample;
-	tsd_t *tsd;
 
 	if (!config_prof)
 		return (ENOENT);
 
 	WRITEONLY();
 	WRITE(lg_sample, size_t);
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
 
-	tsd = tsd_fetch();
-
 	prof_reset(tsd, lg_sample);
 
 	ret = 0;
 label_return:
 	return (ret);
 }
 
 CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t)
@@ -2085,27 +2113,30 @@ CTL_RO_NL_CGEN(config_prof, lg_prof_samp
 /******************************************************************************/
 
 CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *)
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t)
 CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_retained, ctl_stats.retained, size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
 CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult,
     ssize_t)
 CTL_RO_GEN(stats_arenas_i_decay_time, ctl_stats.arenas[mib[2]].decay_time,
     ssize_t)
 CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
     ctl_stats.arenas[mib[2]].astats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
+    ctl_stats.arenas[mib[2]].astats.retained, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
     ctl_stats.arenas[mib[2]].astats.npurge, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
     ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
     ctl_stats.arenas[mib[2]].astats.purged, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_mapped,
     ctl_stats.arenas[mib[2]].astats.metadata_mapped, size_t)
@@ -2152,17 +2183,18 @@ CTL_RO_CGEN(config_stats && config_tcach
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > NBINS)
 		return (NULL);
 	return (super_stats_arenas_i_bins_j_node);
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nmalloc,
@@ -2170,17 +2202,18 @@ CTL_RO_CGEN(config_stats, stats_arenas_i
 CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_ndalloc,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_lruns_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > nlclasses)
 		return (NULL);
 	return (super_stats_arenas_i_lruns_j_node);
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nmalloc,
@@ -2189,32 +2222,33 @@ CTL_RO_CGEN(config_stats, stats_arenas_i
     ctl_stats.arenas[mib[2]].hstats[mib[4]].ndalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nrequests,
     ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, /* Intentional. */
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks,
     ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_hchunks_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > nhclasses)
 		return (NULL);
 	return (super_stats_arenas_i_hchunks_j_node);
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 	const ctl_named_node_t * ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) {
 		ret = NULL;
 		goto label_return;
 	}
 
 	ret = super_stats_arenas_i_node;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
--- a/memory/jemalloc/src/src/huge.c
+++ b/memory/jemalloc/src/src/huge.c
@@ -10,96 +10,109 @@ huge_node_get(const void *ptr)
 
 	node = chunk_lookup(ptr, true);
 	assert(!extent_node_achunk_get(node));
 
 	return (node);
 }
 
 static bool
-huge_node_set(const void *ptr, extent_node_t *node)
+huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == ptr);
 	assert(!extent_node_achunk_get(node));
-	return (chunk_register(ptr, node));
+	return (chunk_register(tsdn, ptr, node));
+}
+
+static void
+huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+{
+	bool err;
+
+	err = huge_node_set(tsdn, ptr, node);
+	assert(!err);
 }
 
 static void
 huge_node_unset(const void *ptr, const extent_node_t *node)
 {
 
 	chunk_deregister(ptr, node);
 }
 
 void *
-huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
-    tcache_t *tcache)
+huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 {
 
 	assert(usize == s2u(usize));
 
-	return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache));
+	return (huge_palloc(tsdn, arena, usize, chunksize, zero));
 }
 
 void *
-huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache)
+huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+    bool zero)
 {
 	void *ret;
 	size_t ausize;
+	arena_t *iarena;
 	extent_node_t *node;
 	bool is_zeroed;
 
 	/* Allocate one or more contiguous chunks for this request. */
 
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
 		return (NULL);
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
-	node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, tcache, true, arena);
+	iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) : a0get();
+	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
+	    CACHELINE, false, NULL, true, iarena);
 	if (node == NULL)
 		return (NULL);
 
 	/*
 	 * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsd, arena);
-	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena,
-	    usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsd, node, tcache, true, true);
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
+	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
+	    arena, usize, alignment, &is_zeroed)) == NULL) {
+		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
 
 	extent_node_init(node, arena, ret, usize, is_zeroed, true);
 
-	if (huge_node_set(ret, node)) {
-		arena_chunk_dalloc_huge(arena, ret, usize);
-		idalloctm(tsd, node, tcache, true, true);
+	if (huge_node_set(tsdn, ret, node)) {
+		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
+		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
 
 	/* Insert node into huge. */
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed)
 			memset(ret, 0, usize);
 	} else if (config_fill && unlikely(opt_junk_alloc))
-		memset(ret, 0xa5, usize);
+		memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 #ifdef JEMALLOC_JET
 #undef huge_dalloc_junk
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
 static void
@@ -107,28 +120,28 @@ huge_dalloc_junk(void *ptr, size_t usize
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
 		/*
 		 * Only bother junk filling if the chunk isn't about to be
 		 * unmapped.
 		 */
 		if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
-			memset(ptr, 0x5a, usize);
+			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
 #ifdef JEMALLOC_JET
 #undef huge_dalloc_junk
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk)
 huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 #endif
 
 static void
-huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
-    size_t usize_max, bool zero)
+huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t usize, usize_next;
 	extent_node_t *node;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	bool pre_zeroed, post_zeroed;
 
 	/* Increase usize to incorporate extra. */
@@ -142,307 +155,319 @@ huge_ralloc_no_move_similar(void *ptr, s
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	pre_zeroed = extent_node_zeroed_get(node);
 
 	/* Fill if necessary (shrinking). */
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff);
+			memset((void *)((uintptr_t)ptr + usize),
+			    JEMALLOC_FREE_JUNK, sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
-			    ptr, CHUNK_CEILING(oldsize), usize, sdiff);
+			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
+			    &chunk_hooks, ptr, CHUNK_CEILING(oldsize), usize,
+			    sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize);
 
 	/* Fill if necessary (growing). */
 	if (oldsize < usize) {
 		if (zero || (config_fill && unlikely(opt_zero))) {
 			if (!pre_zeroed) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
 				    usize - oldsize);
 			}
 		} else if (config_fill && unlikely(opt_junk_alloc)) {
-			memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
-			    oldsize);
+			memset((void *)((uintptr_t)ptr + oldsize),
+			    JEMALLOC_ALLOC_JUNK, usize - oldsize);
 		}
 	}
 }
 
 static bool
-huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
+huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize)
 {
 	extent_node_t *node;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks;
 	size_t cdiff;
 	bool pre_zeroed, post_zeroed;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	pre_zeroed = extent_node_zeroed_get(node);
-	chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks = chunk_hooks_get(tsdn, arena);
 
 	assert(oldsize > usize);
 
 	/* Split excess chunks. */
 	cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 	if (cdiff != 0 && chunk_hooks.split(ptr, CHUNK_CEILING(oldsize),
 	    CHUNK_CEILING(usize), cdiff, true, arena->ind))
 		return (true);
 
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
 			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
-			    CHUNK_ADDR2BASE((uintptr_t)ptr + usize),
-			    CHUNK_CEILING(oldsize),
+			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
+			    &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr +
+			    usize), CHUNK_CEILING(oldsize),
 			    CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/* Zap the excess chunks. */
-	arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize);
 
 	return (false);
 }
 
 static bool
-huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
+huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize, bool zero) {
 	extent_node_t *node;
 	arena_t *arena;
 	bool is_zeroed_subchunk, is_zeroed_chunk;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/*
-	 * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
-	 * that it is possible to make correct junk/zero fill decisions below.
+	 * Use is_zeroed_chunk to detect whether the trailing memory is zeroed,
+	 * update extent's zeroed field, and zero as necessary.
 	 */
-	is_zeroed_chunk = zero;
-
-	if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize,
+	is_zeroed_chunk = false;
+	if (arena_chunk_ralloc_huge_expand(tsdn, arena, ptr, oldsize, usize,
 	     &is_zeroed_chunk))
 		return (true);
 
-	malloc_mutex_lock(&arena->huge_mtx);
-	/* Update the size of the huge allocation. */
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
+	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	extent_node_zeroed_set(node, extent_node_zeroed_get(node) &&
+	    is_zeroed_chunk);
+	huge_node_reset(tsdn, ptr, node);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
 			memset((void *)((uintptr_t)ptr + oldsize), 0,
 			    CHUNK_CEILING(oldsize) - oldsize);
 		}
 		if (!is_zeroed_chunk) {
 			memset((void *)((uintptr_t)ptr +
 			    CHUNK_CEILING(oldsize)), 0, usize -
 			    CHUNK_CEILING(oldsize));
 		}
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
-		    oldsize);
+		memset((void *)((uintptr_t)ptr + oldsize), JEMALLOC_ALLOC_JUNK,
+		    usize - oldsize);
 	}
 
 	return (false);
 }
 
 bool
-huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
+huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 
 	assert(s2u(oldsize) == oldsize);
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS);
 
 	/* Both allocations must be huge to avoid a move. */
 	if (oldsize < chunksize || usize_max < chunksize)
 		return (true);
 
 	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max,
+		if (!huge_ralloc_no_move_expand(tsdn, ptr, oldsize, usize_max,
 		    zero)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
-		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr,
-		    oldsize, usize_min, zero)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsdn,
+		    ptr, oldsize, usize_min, zero)) {
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 	}
 
 	/*
 	 * Avoid moving the allocation if the existing chunk size accommodates
 	 * the new size.
 	 */
 	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
-		huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max,
-		    zero);
-		arena_decay_tick(tsd, huge_aalloc(ptr));
+		huge_ralloc_no_move_similar(tsdn, ptr, oldsize, usize_min,
+		    usize_max, zero);
+		arena_decay_tick(tsdn, huge_aalloc(ptr));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) {
-		if (!huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+		if (!huge_ralloc_no_move_shrink(tsdn, ptr, oldsize,
+		    usize_max)) {
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 	}
 	return (true);
 }
 
 static void *
-huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
+huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero)
 {
 
 	if (alignment <= chunksize)
-		return (huge_malloc(tsd, arena, usize, zero, tcache));
-	return (huge_palloc(tsd, arena, usize, alignment, zero, tcache));
+		return (huge_malloc(tsdn, arena, usize, zero));
+	return (huge_palloc(tsdn, arena, usize, alignment, zero));
 }
 
 void *
-huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
+huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+    size_t usize, size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t copysize;
 
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= HUGE_MAXCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(tsd, ptr, oldsize, usize, usize, zero))
+	if (!huge_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, usize,
+	    zero))
 		return (ptr);
 
 	/*
 	 * usize and oldsize are different enough that we need to use a
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero,
-	    tcache);
+	ret = huge_ralloc_move_helper(tsd_tsdn(tsd), arena, usize, alignment,
+	    zero);
 	if (ret == NULL)
 		return (NULL);
 
 	copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache);
+	isqalloc(tsd, ptr, oldsize, tcache, true);
 	return (ret);
 }
 
 void
-huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
+huge_dalloc(tsdn_t *tsdn, void *ptr)
 {
 	extent_node_t *node;
 	arena_t *arena;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	huge_node_unset(ptr, node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_remove(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	huge_dalloc_junk(extent_node_addr_get(node),
 	    extent_node_size_get(node));
-	arena_chunk_dalloc_huge(extent_node_arena_get(node),
+	arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
-	idalloctm(tsd, node, tcache, true, true);
+	idalloctm(tsdn, node, NULL, true, true);
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 }
 
 arena_t *
 huge_aalloc(const void *ptr)
 {
 
 	return (extent_node_arena_get(huge_node_get(ptr)));
 }
 
 size_t
-huge_salloc(const void *ptr)
+huge_salloc(tsdn_t *tsdn, const void *ptr)
 {
 	size_t size;
 	extent_node_t *node;
 	arena_t *arena;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	size = extent_node_size_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (size);
 }
 
 prof_tctx_t *
-huge_prof_tctx_get(const void *ptr)
+huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *tctx;
 	extent_node_t *node;
 	arena_t *arena;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	tctx = extent_node_prof_tctx_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (tctx);
 }
 
 void
-huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx)
 {
 	extent_node_t *node;
 	arena_t *arena;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_node_prof_tctx_set(node, tctx);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 }
 
 void
-huge_prof_tctx_reset(const void *ptr)
+huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr)
 {
 
-	huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+	huge_prof_tctx_set(tsdn, ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
--- a/memory/jemalloc/src/src/jemalloc.c
+++ b/memory/jemalloc/src/src/jemalloc.c
@@ -1,16 +1,20 @@
 #define	JEMALLOC_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
 /* Data. */
 
 /* Runtime configuration options. */
-const char	*je_malloc_conf JEMALLOC_ATTR(weak);
+const char	*je_malloc_conf
+#ifndef _WIN32
+    JEMALLOC_ATTR(weak)
+#endif
+    ;
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
 #else
     false
 #endif
     ;
 const char	*opt_junk =
@@ -55,49 +59,60 @@ static malloc_mutex_t	arenas_lock;
  *
  * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
  * arenas.  arenas[narenas_auto..narenas_total) are only used if the application
  * takes some action to create them and allocate from them.
  */
 arena_t			**arenas;
 static unsigned		narenas_total; /* Use narenas_total_*(). */
 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
-static unsigned		narenas_auto; /* Read-only after initialization. */
+unsigned		narenas_auto; /* Read-only after initialization. */
 
 typedef enum {
 	malloc_init_uninitialized	= 3,
 	malloc_init_a0_initialized	= 2,
 	malloc_init_recursible		= 1,
 	malloc_init_initialized		= 0 /* Common case --> jnz. */
 } malloc_init_t;
 static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
 
-/* 0 should be the common case.  Set to true to trigger initialization. */
+/* False should be the common case.  Set to true to trigger initialization. */
 static bool	malloc_slow = true;
 
-/* When malloc_slow != 0, set the corresponding bits for sanity check. */
+/* When malloc_slow is true, set the corresponding bits for sanity check. */
 enum {
 	flag_opt_junk_alloc	= (1U),
 	flag_opt_junk_free	= (1U << 1),
 	flag_opt_quarantine	= (1U << 2),
 	flag_opt_zero		= (1U << 3),
 	flag_opt_utrace		= (1U << 4),
 	flag_in_valgrind	= (1U << 5),
 	flag_opt_xmalloc	= (1U << 6)
 };
 static uint8_t	malloc_slow_flags;
 
-/* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t	index2size_tab[NSIZES+1] = {
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+const size_t	pind2sz_tab[NPSIZES] = {
+#define	PSZ_yes(lg_grp, ndelta, lg_delta)				\
+	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
+#define	PSZ_no(lg_grp, ndelta, lg_delta)
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
+	PSZ_##psz(lg_grp, ndelta, lg_delta)
+	SIZE_CLASSES
+#undef PSZ_yes
+#undef PSZ_no
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t	index2size_tab[NSIZES] = {
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
-	ZU(0)
 };
 
 JEMALLOC_ALIGNED(CACHELINE)
 const uint8_t	size2index_tab[] = {
 #if LG_TINY_MIN == 0
 #warning "Dangerous LG_TINY_MIN"
 #define	S2B_0(i)	i,
 #elif LG_TINY_MIN == 1
@@ -156,17 +171,17 @@ const uint8_t	size2index_tab[] = {
 #endif
 #if LG_TINY_MIN < 10
 #define	S2B_10(i)	S2B_9(i) S2B_9(i)
 #endif
 #if LG_TINY_MIN < 11
 #define	S2B_11(i)	S2B_10(i) S2B_10(i)
 #endif
 #define	S2B_no(i)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	S2B_##lg_delta_lookup(index)
 	SIZE_CLASSES
 #undef S2B_3
 #undef S2B_4
 #undef S2B_5
 #undef S2B_6
 #undef S2B_7
 #undef S2B_8
@@ -207,17 +222,17 @@ static void WINAPI
 	 * e.g. setup chunk hooks, it may end up running before this one,
 	 * and malloc_init_hard will crash trying to lock the uninitialized
 	 * lock. So we force an initialization of the lock in
 	 * malloc_init_hard as well. We don't try to care about atomicity
 	 * of the accessed to the init_lock_initialized boolean, since it
 	 * really only matters early in the process creation, before any
 	 * separate thread normally starts doing anything. */
 	if (!init_lock_initialized)
-		malloc_mutex_init(&init_lock);
+		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT);
 	init_lock_initialized = true;
 }
 
 #ifdef _MSC_VER
 #  pragma section(".CRT$XCU", read)
 JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
 static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
 #endif
@@ -302,36 +317,43 @@ malloc_init(void)
 	if (unlikely(!malloc_initialized()) && malloc_init_hard())
 		return (true);
 	malloc_thread_init();
 
 	return (false);
 }
 
 /*
- * The a0*() functions are used instead of i[mcd]alloc() in situations that
+ * The a0*() functions are used instead of i{d,}alloc() in situations that
  * cannot tolerate TLS variable access.
  */
 
 static void *
 a0ialloc(size_t size, bool zero, bool is_metadata)
 {
 
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
-	return (iallocztm(NULL, size, size2index(size), zero, false,
-	    is_metadata, arena_get(0, false), true));
+	return (iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
+	    is_metadata, arena_get(TSDN_NULL, 0, true), true));
 }
 
 static void
 a0idalloc(void *ptr, bool is_metadata)
 {
 
-	idalloctm(NULL, ptr, false, is_metadata, true);
+	idalloctm(TSDN_NULL, ptr, false, is_metadata, true);
+}
+
+arena_t *
+a0get(void)
+{
+
+	return (a0);
 }
 
 void *
 a0malloc(size_t size)
 {
 
 	return (a0ialloc(size, false, true));
 }
@@ -408,85 +430,93 @@ unsigned
 narenas_total_get(void)
 {
 
 	return (atomic_read_u(&narenas_total));
 }
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(unsigned ind)
+arena_init_locked(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 
 	assert(ind <= narenas_total_get());
 	if (ind > MALLOCX_ARENA_MAX)
 		return (NULL);
 	if (ind == narenas_total_get())
 		narenas_total_inc();
 
 	/*
 	 * Another thread may have already initialized arenas[ind] if it's an
 	 * auto arena.
 	 */
-	arena = arena_get(ind, false);
+	arena = arena_get(tsdn, ind, false);
 	if (arena != NULL) {
 		assert(ind < narenas_auto);
 		return (arena);
 	}
 
 	/* Actually initialize the arena. */
-	arena = arena_new(ind);
+	arena = arena_new(tsdn, ind);
 	arena_set(ind, arena);
 	return (arena);
 }
 
 arena_t *
-arena_init(unsigned ind)
+arena_init(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 
-	malloc_mutex_lock(&arenas_lock);
-	arena = arena_init_locked(ind);
-	malloc_mutex_unlock(&arenas_lock);
+	malloc_mutex_lock(tsdn, &arenas_lock);
+	arena = arena_init_locked(tsdn, ind);
+	malloc_mutex_unlock(tsdn, &arenas_lock);
 	return (arena);
 }
 
 static void
-arena_bind(tsd_t *tsd, unsigned ind)
+arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
-	arena = arena_get(ind, false);
-	arena_nthreads_inc(arena);
-
-	if (tsd_nominal(tsd))
+	if (!tsd_nominal(tsd))
+		return;
+
+	arena = arena_get(tsd_tsdn(tsd), ind, false);
+	arena_nthreads_inc(arena, internal);
+
+	if (internal)
+		tsd_iarena_set(tsd, arena);
+	else
 		tsd_arena_set(tsd, arena);
 }
 
 void
 arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
 {
 	arena_t *oldarena, *newarena;
 
-	oldarena = arena_get(oldind, false);
-	newarena = arena_get(newind, false);
-	arena_nthreads_dec(oldarena);
-	arena_nthreads_inc(newarena);
+	oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
+	newarena = arena_get(tsd_tsdn(tsd), newind, false);
+	arena_nthreads_dec(oldarena, false);
+	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 }
 
 static void
-arena_unbind(tsd_t *tsd, unsigned ind)
+arena_unbind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
-	arena = arena_get(ind, false);
-	arena_nthreads_dec(arena);
-	tsd_arena_set(tsd, NULL);
+	arena = arena_get(tsd_tsdn(tsd), ind, false);
+	arena_nthreads_dec(arena, internal);
+	if (internal)
+		tsd_iarena_set(tsd, NULL);
+	else
+		tsd_arena_set(tsd, NULL);
 }
 
 arena_tdata_t *
 arena_tdata_get_hard(tsd_t *tsd, unsigned ind)
 {
 	arena_tdata_t *tdata, *arenas_tdata_old;
 	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
 	unsigned narenas_tdata_old, i;
@@ -557,72 +587,99 @@ arena_tdata_get_hard(tsd_t *tsd, unsigne
 label_return:
 	if (arenas_tdata_old != NULL)
 		a0dalloc(arenas_tdata_old);
 	return (tdata);
 }
 
 /* Slow path, called only by arena_choose(). */
 arena_t *
-arena_choose_hard(tsd_t *tsd)
+arena_choose_hard(tsd_t *tsd, bool internal)
 {
-	arena_t *ret;
+	arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (narenas_auto > 1) {
-		unsigned i, choose, first_null;
-
-		choose = 0;
+		unsigned i, j, choose[2], first_null;
+
+		/*
+		 * Determine binding for both non-internal and internal
+		 * allocation.
+		 *
+		 *   choose[0]: For application allocation.
+		 *   choose[1]: For internal metadata allocation.
+		 */
+
+		for (j = 0; j < 2; j++)
+			choose[j] = 0;
+
 		first_null = narenas_auto;
-		malloc_mutex_lock(&arenas_lock);
-		assert(arena_get(0, false) != NULL);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock);
+		assert(arena_get(tsd_tsdn(tsd), 0, false) != NULL);
 		for (i = 1; i < narenas_auto; i++) {
-			if (arena_get(i, false) != NULL) {
+			if (arena_get(tsd_tsdn(tsd), i, false) != NULL) {
 				/*
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
-				if (arena_nthreads_get(arena_get(i, false)) <
-				    arena_nthreads_get(arena_get(choose,
-				    false)))
-					choose = i;
+				for (j = 0; j < 2; j++) {
+					if (arena_nthreads_get(arena_get(
+					    tsd_tsdn(tsd), i, false), !!j) <
+					    arena_nthreads_get(arena_get(
+					    tsd_tsdn(tsd), choose[j], false),
+					    !!j))
+						choose[j] = i;
+				}
 			} else if (first_null == narenas_auto) {
 				/*
 				 * Record the index of the first uninitialized
 				 * arena, in case all extant arenas are in use.
 				 *
 				 * NB: It is possible for there to be
 				 * discontinuities in terms of initialized
 				 * versus uninitialized arenas, due to the
 				 * "thread.arena" mallctl.
 				 */
 				first_null = i;
 			}
 		}
 
-		if (arena_nthreads_get(arena_get(choose, false)) == 0
-		    || first_null == narenas_auto) {
-			/*
-			 * Use an unloaded arena, or the least loaded arena if
-			 * all arenas are already initialized.
-			 */
-			ret = arena_get(choose, false);
-		} else {
-			/* Initialize a new arena. */
-			choose = first_null;
-			ret = arena_init_locked(choose);
-			if (ret == NULL) {
-				malloc_mutex_unlock(&arenas_lock);
-				return (NULL);
+		for (j = 0; j < 2; j++) {
+			if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
+			    choose[j], false), !!j) == 0 || first_null ==
+			    narenas_auto) {
+				/*
+				 * Use an unloaded arena, or the least loaded
+				 * arena if all arenas are already initialized.
+				 */
+				if (!!j == internal) {
+					ret = arena_get(tsd_tsdn(tsd),
+					    choose[j], false);
+				}
+			} else {
+				arena_t *arena;
+
+				/* Initialize a new arena. */
+				choose[j] = first_null;
+				arena = arena_init_locked(tsd_tsdn(tsd),
+				    choose[j]);
+				if (arena == NULL) {
+					malloc_mutex_unlock(tsd_tsdn(tsd),
+					    &arenas_lock);
+					return (NULL);
+				}
+				if (!!j == internal)
+					ret = arena;
 			}
+			arena_bind(tsd, choose[j], !!j);
 		}
-		arena_bind(tsd, choose);
-		malloc_mutex_unlock(&arenas_lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock);
 	} else {
-		ret = arena_get(0, false);
-		arena_bind(tsd, 0);
+		ret = arena_get(tsd_tsdn(tsd), 0, false);
+		arena_bind(tsd, 0, false);
+		arena_bind(tsd, 0, true);
 	}
 
 	return (ret);
 }
 
 void
 thread_allocated_cleanup(tsd_t *tsd)
 {
@@ -633,23 +690,33 @@ thread_allocated_cleanup(tsd_t *tsd)
 void
 thread_deallocated_cleanup(tsd_t *tsd)
 {
 
 	/* Do nothing. */
 }
 
 void
+iarena_cleanup(tsd_t *tsd)
+{
+	arena_t *iarena;
+
+	iarena = tsd_iarena_get(tsd);
+	if (iarena != NULL)
+		arena_unbind(tsd, iarena->ind, true);
+}
+
+void
 arena_cleanup(tsd_t *tsd)
 {
 	arena_t *arena;
 
 	arena = tsd_arena_get(tsd);
 	if (arena != NULL)
-		arena_unbind(tsd, arena->ind);
+		arena_unbind(tsd, arena->ind, false);
 }
 
 void
 arenas_tdata_cleanup(tsd_t *tsd)
 {
 	arena_tdata_t *arenas_tdata;
 
 	/* Prevent tsd->arenas_tdata from being (re)created. */
@@ -676,41 +743,44 @@ arenas_tdata_bypass_cleanup(tsd_t *tsd)
 	/* Do nothing. */
 }
 
 static void
 stats_print_atexit(void)
 {
 
 	if (config_tcache && config_stats) {
+		tsdn_t *tsdn;
 		unsigned narenas, i;
 
+		tsdn = tsdn_fetch();
+
 		/*
 		 * Merge stats from extant threads.  This is racy, since
 		 * individual threads do not lock when recording tcache stats
 		 * events.  As a consequence, the final stats may be slightly
 		 * out of date by the time they are reported, if other threads
 		 * continue to allocate.
 		 */
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-			arena_t *arena = arena_get(i, false);
+			arena_t *arena = arena_get(tsdn, i, false);
 			if (arena != NULL) {
 				tcache_t *tcache;
 
 				/*
 				 * tcache_stats_merge() locks bins, so if any
 				 * code is introduced that acquires both arena
 				 * and bin locks in the opposite order,
 				 * deadlocks may result.
 				 */
-				malloc_mutex_lock(&arena->lock);
+				malloc_mutex_lock(tsdn, &arena->lock);
 				ql_foreach(tcache, &arena->tcache_ql, link) {
-					tcache_stats_merge(tcache, arena);
+					tcache_stats_merge(tsdn, tcache, arena);
 				}
-				malloc_mutex_unlock(&arena->lock);
+				malloc_mutex_unlock(tsdn, &arena->lock);
 			}
 		}
 	}
 	je_malloc_stats_print(NULL, NULL, NULL);
 }
 
 /*
  * End miscellaneous support functions.
@@ -737,16 +807,30 @@ static unsigned
 malloc_ncpus(void)
 {
 	long result;
 
 #ifdef _WIN32
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
+#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
+	/*
+	 * glibc >= 2.6 has the CPU_COUNT macro.
+	 *
+	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
+	 * *before* setting up the isspace tables.  Therefore we need a
+	 * different method to get the number of CPUs.
+	 */
+	{
+		cpu_set_t set;
+
+		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+		result = CPU_COUNT(&set);
+	}
 #else
 	result = sysconf(_SC_NPROCESSORS_ONLN);
 #endif
 	return ((result == -1) ? 1 : (unsigned)result);
 }
 
 static bool
 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
@@ -1096,31 +1180,53 @@ malloc_conf_init(void)
 			CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult",
 			    -1, (sizeof(size_t) << 3) - 1)
 			CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1,
 			    NSTIME_SEC_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true)
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
 					if (CONF_MATCH_VALUE("true")) {
-						opt_junk = "true";
-						opt_junk_alloc = opt_junk_free =
-						    true;
+						if (config_valgrind &&
+						    unlikely(in_valgrind)) {
+							malloc_conf_error(
+							"Deallocation-time "
+							"junk filling cannot "
+							"be enabled while "
+							"running inside "
+							"Valgrind", k, klen, v,
+							vlen);
+						} else {
+							opt_junk = "true";
+							opt_junk_alloc = true;
+							opt_junk_free = true;
+						}
 					} else if (CONF_MATCH_VALUE("false")) {
 						opt_junk = "false";
 						opt_junk_alloc = opt_junk_free =
 						    false;
 					} else if (CONF_MATCH_VALUE("alloc")) {
 						opt_junk = "alloc";
 						opt_junk_alloc = true;
 						opt_junk_free = false;
 					} else if (CONF_MATCH_VALUE("free")) {
-						opt_junk = "free";
-						opt_junk_alloc = false;
-						opt_junk_free = true;
+						if (config_valgrind &&
+						    unlikely(in_valgrind)) {
+							malloc_conf_error(
+							"Deallocation-time "
+							"junk filling cannot "
+							"be enabled while "
+							"running inside "
+							"Valgrind", k, klen, v,
+							vlen);
+						} else {
+							opt_junk = "free";
+							opt_junk_alloc = false;
+							opt_junk_free = true;
+						}
 					} else {
 						malloc_conf_error(
 						    "Invalid conf value", k,
 						    klen, v, vlen);
 					}
 					continue;
 				}
 				CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine",
@@ -1181,149 +1287,137 @@ malloc_conf_init(void)
 #undef CONF_HANDLE_BOOL
 #undef CONF_HANDLE_SIZE_T
 #undef CONF_HANDLE_SSIZE_T
 #undef CONF_HANDLE_CHAR_P
 		}
 	}
 }
 
-/* init_lock must be held. */
 static bool
 malloc_init_hard_needed(void)
 {
 
 	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
 	    malloc_init_recursible)) {
 		/*
 		 * Another thread initialized the allocator before this one
 		 * acquired init_lock, or this thread is the initializing
 		 * thread, and it is recursively allocating.
 		 */
 		return (false);
 	}
 #ifdef JEMALLOC_THREADED_INIT
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
+		spin_t spinner;
+
 		/* Busy-wait until the initializing thread completes. */
+		spin_init(&spinner);
 		do {
-			malloc_mutex_unlock(&init_lock);
-			CPU_SPINWAIT;
-			malloc_mutex_lock(&init_lock);
+			malloc_mutex_unlock(TSDN_NULL, &init_lock);
+			spin_adaptive(&spinner);
+			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
 	}
 #endif
 	return (true);
 }
 
-/* init_lock must be held. */
 static bool
-malloc_init_hard_a0_locked(void)
+malloc_init_hard_a0_locked()
 {
 
 	malloc_initializer = INITIALIZER;
 
 	if (config_prof)
 		prof_boot0();
 	malloc_conf_init();
 	if (opt_stats_print) {
 		/* Print statistics at exit. */
 		if (atexit(stats_print_atexit) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
 			if (opt_abort)
 				abort();
 		}
 	}
+	pages_boot();
 	if (base_boot())
 		return (true);
 	if (chunk_boot())
 		return (true);
 	if (ctl_boot())
 		return (true);
 	if (config_prof)
 		prof_boot1();
-	if (arena_boot())
+	arena_boot();
+	if (config_tcache && tcache_boot(TSDN_NULL))
 		return (true);
-	if (config_tcache && tcache_boot())
-		return (true);
-	if (malloc_mutex_init(&arenas_lock))
+	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
 		return (true);
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
 	 */
 	narenas_auto = 1;
 	narenas_total_set(narenas_auto);
 	arenas = &a0;
 	memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
 	/*
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(0) == NULL)
+	if (arena_init(TSDN_NULL, 0) == NULL)
 		return (true);
+
 	malloc_init_state = malloc_init_a0_initialized;
+
 	return (false);
 }
 
 static bool
 malloc_init_hard_a0(void)
 {
 	bool ret;
 
-	malloc_mutex_lock(&init_lock);
+	malloc_mutex_lock(TSDN_NULL, &init_lock);
 	ret = malloc_init_hard_a0_locked();
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(TSDN_NULL, &init_lock);
 	return (ret);
 }
 
-/*
- * Initialize data structures which may trigger recursive allocation.
- *
- * init_lock must be held.
- */
+/* Initialize data structures which may trigger recursive allocation. */
 static bool
 malloc_init_hard_recursible(void)
 {
-	bool ret = false;
 
 	malloc_init_state = malloc_init_recursible;
-	malloc_mutex_unlock(&init_lock);
-
-	/* LinuxThreads' pthread_setspecific() allocates. */
-	if (malloc_tsd_boot0()) {
-		ret = true;
-		goto label_return;
-	}
 
 	ncpus = malloc_ncpus();
 
 #if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \
     && !defined(_WIN32) && !defined(__native_client__))
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {
-		ret = true;
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort)
 			abort();
+		return (true);
 	}
 #endif
 
-label_return:
-	malloc_mutex_lock(&init_lock);
-	return (ret);
+	return (false);
 }
 
-/* init_lock must be held. */
 static bool
-malloc_init_hard_finish(void)
+malloc_init_hard_finish(tsdn_t *tsdn)
 {
 
-	if (mutex_boot())
+	if (malloc_mutex_boot())
 		return (true);
 
 	if (opt_narenas == 0) {
 		/*
 		 * For SMP systems, create more than one arena per CPU by
 		 * default.
 		 */
 		if (ncpus > 1)
@@ -1338,181 +1432,202 @@ malloc_init_hard_finish(void)
 	if (narenas_auto > MALLOCX_ARENA_MAX) {
 		narenas_auto = MALLOCX_ARENA_MAX;
 		malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n",
 		    narenas_auto);
 	}
 	narenas_total_set(narenas_auto);
 
 	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(sizeof(arena_t *) *
+	arenas = (arena_t **)base_alloc(tsdn, sizeof(arena_t *) *
 	    (MALLOCX_ARENA_MAX+1));
 	if (arenas == NULL)
 		return (true);
 	/* Copy the pointer to the one arena that was already initialized. */
 	arena_set(0, a0);
 
 	malloc_init_state = malloc_init_initialized;
 	malloc_slow_flag_init();
 
 	return (false);
 }
 
 static bool
 malloc_init_hard(void)
 {
+	tsd_t *tsd;
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
-	malloc_mutex_lock(&init_lock);
+	malloc_mutex_lock(TSDN_NULL, &init_lock);
 	if (!malloc_init_hard_needed()) {
-		malloc_mutex_unlock(&init_lock);
+		malloc_mutex_unlock(TSDN_NULL, &init_lock);
 		return (false);
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
 	    malloc_init_hard_a0_locked()) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
-
-	if (malloc_init_hard_recursible()) {
-		malloc_mutex_unlock(&init_lock);
+		malloc_mutex_unlock(TSDN_NULL, &init_lock);
 		return (true);
 	}
 
-	if (config_prof && prof_boot2()) {
-		malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(TSDN_NULL, &init_lock);
+	/* Recursive allocation relies on functional tsd. */
+	tsd = malloc_tsd_boot0();
+	if (tsd == NULL)
+		return (true);
+	if (malloc_init_hard_recursible())
+		return (true);
+	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
+
+	if (config_prof && prof_boot2(tsd)) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_finish()) {
-		malloc_mutex_unlock(&init_lock);
+	if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
 
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 	malloc_tsd_boot1();
 	return (false);
 }
 
 /*
  * End initialization functions.
  */
 /******************************************************************************/
 /*
  * Begin malloc(3)-compatible functions.
  */
 
 static void *
-imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
+ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
     prof_tctx_t *tctx, bool slow_path)
 {
 	void *p;
 
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
 		szind_t ind_large = size2index(LARGE_MINCLASS);
-		p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path);
+		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
-		p = imalloc(tsd, usize, ind, slow_path);
+		p = ialloc(tsd, usize, ind, zero, slow_path);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
+ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 {
 	void *p;
 	prof_tctx_t *tctx;
 
 	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path);
+		p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path);
 	else
-		p = imalloc(tsd, usize, ind, slow_path);
+		p = ialloc(tsd, usize, ind, zero, slow_path);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
 
 	return (p);
 }
 
+/*
+ * ialloc_body() is inlined so that fast and slow paths are generated separately
+ * with statically known slow_path.
+ *
+ * This function guarantees that *tsdn is non-NULL on success.
+ */
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
+ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
+    bool slow_path)
 {
+	tsd_t *tsd;
 	szind_t ind;
 
-	if (slow_path && unlikely(malloc_init()))
+	if (slow_path && unlikely(malloc_init())) {
+		*tsdn = NULL;
 		return (NULL);
-	*tsd = tsd_fetch();
+	}
+
+	tsd = tsd_fetch();
+	*tsdn = tsd_tsdn(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
+
 	ind = size2index(size);
 	if (unlikely(ind >= NSIZES))
 		return (NULL);
 
 	if (config_stats || (config_prof && opt_prof) || (slow_path &&
 	    config_valgrind && unlikely(in_valgrind))) {
 		*usize = index2size(ind);
 		assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
 	}
 
 	if (config_prof && opt_prof)
-		return (imalloc_prof(*tsd, *usize, ind, slow_path));
-
-	return (imalloc(*tsd, size, ind, slow_path));
+		return (ialloc_prof(tsd, *usize, ind, zero, slow_path));
+
+	return (ialloc(tsd, size, ind, zero, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE_C void
-imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path)
+ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
+    bool update_errno, bool slow_path)
 {
+
+	assert(!tsdn_null(tsdn) || ret == NULL);
+
 	if (unlikely(ret == NULL)) {
 		if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in malloc(): "
-			    "out of memory\n");
+			malloc_printf("<jemalloc>: Error in %s(): out of "
+			    "memory\n", func);
 			abort();
 		}
-		set_errno(ENOMEM);
+		if (update_errno)
+			set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
+		assert(usize == isalloc(tsdn, ret, config_prof));
+		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
 	}
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_malloc(size_t size)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (size == 0)
 		size = 1;
 
 	if (likely(!malloc_slow)) {
-		/*
-		 * imalloc_body() is inlined so that fast and slow paths are
-		 * generated separately with statically known slow_path.
-		 */
-		ret = imalloc_body(size, &tsd, &usize, false);
-		imalloc_post_check(ret, tsd, usize, false);
+		ret = ialloc_body(size, false, &tsdn, &usize, false);
+		ialloc_post_check(ret, tsdn, usize, "malloc", true, false);
 	} else {
-		ret = imalloc_body(size, &tsd, &usize, true);
-		imalloc_post_check(ret, tsd, usize, true);
+		ret = ialloc_body(size, false, &tsdn, &usize, true);
+		ialloc_post_check(ret, tsdn, usize, "malloc", true, true);
 		UTRACE(0, size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
 	}
 
 	return (ret);
 }
 
 static void *
 imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
     prof_tctx_t *tctx)
@@ -1521,17 +1636,17 @@ imemalign_prof_sample(tsd_t *tsd, size_t
 
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
 		assert(sa2u(LARGE_MINCLASS, alignment) == LARGE_MINCLASS);
 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = ipalloc(tsd, usize, alignment, false);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
 imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
@@ -1543,37 +1658,39 @@ imemalign_prof(tsd_t *tsd, size_t alignm
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = imemalign_prof_sample(tsd, alignment, usize, tctx);
 	else
 		p = ipalloc(tsd, usize, alignment, false);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
 
 	return (p);
 }
 
 JEMALLOC_ATTR(nonnull(1))
 static int
 imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 {
 	int ret;
 	tsd_t *tsd;
 	size_t usize;
 	void *result;
 
 	assert(min_alignment != 0);
 
 	if (unlikely(malloc_init())) {
+		tsd = NULL;
 		result = NULL;
 		goto label_oom;
 	}
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (size == 0)
 		size = 1;
 
 	/* Make sure that alignment is a large enough power of 2. */
 	if (unlikely(((alignment - 1) & alignment) != 0
 	    || (alignment < min_alignment))) {
 		if (config_xmalloc && unlikely(opt_xmalloc)) {
 			malloc_write("<jemalloc>: Error allocating "
@@ -1598,196 +1715,129 @@ imemalign(void **memptr, size_t alignmen
 	if (unlikely(result == NULL))
 		goto label_oom;
 	assert(((uintptr_t)result & (alignment - 1)) == ZU(0));
 
 	*memptr = result;
 	ret = 0;
 label_return:
 	if (config_stats && likely(result != NULL)) {
-		assert(usize == isalloc(result, config_prof));
+		assert(usize == isalloc(tsd_tsdn(tsd), result, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
+	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize,
+	    false);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 label_oom:
 	assert(result == NULL);
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write("<jemalloc>: Error allocating aligned memory: "
 		    "out of memory\n");
 		abort();
 	}
 	ret = ENOMEM;
+	witness_assert_lockless(tsd_tsdn(tsd));
 	goto label_return;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
 je_posix_memalign(void **memptr, size_t alignment, size_t size)
 {
-	int ret = imemalign(memptr, alignment, size, sizeof(void *));
-	JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr,
-	    config_prof), false);
+	int ret;
+
+	ret = imemalign(memptr, alignment, size, sizeof(void *));
+
 	return (ret);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
 je_aligned_alloc(size_t alignment, size_t size)
 {
 	void *ret;
 	int err;
 
 	if (unlikely((err = imemalign(&ret, alignment, size, 1)) != 0)) {
 		ret = NULL;
 		set_errno(err);
 	}
-	JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof),
-	    false);
+
 	return (ret);
 }
 
-static void *
-icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx)
-{
-	void *p;
-
-	if (tctx == NULL)
-		return (NULL);
-	if (usize <= SMALL_MAXCLASS) {
-		szind_t ind_large = size2index(LARGE_MINCLASS);
-		p = icalloc(tsd, LARGE_MINCLASS, ind_large);
-		if (p == NULL)
-			return (NULL);
-		arena_prof_promoted(p, usize);
-	} else
-		p = icalloc(tsd, usize, ind);
-
-	return (p);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void *
-icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind)
-{
-	void *p;
-	prof_tctx_t *tctx;
-
-	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = icalloc_prof_sample(tsd, usize, ind, tctx);
-	else
-		p = icalloc(tsd, usize, ind);
-	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
-		return (NULL);
-	}
-	prof_malloc(p, usize, tctx);
-
-	return (p);
-}
-
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
 je_calloc(size_t num, size_t size)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	size_t num_size;
-	szind_t ind;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
-	if (unlikely(malloc_init())) {
-		num_size = 0;
-		ret = NULL;
-		goto label_return;
-	}
-	tsd = tsd_fetch();
-
 	num_size = num * size;
 	if (unlikely(num_size == 0)) {
 		if (num == 0 || size == 0)
 			num_size = 1;
-		else {
-			ret = NULL;
-			goto label_return;
-		}
+		else
+			num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */
 	/*
 	 * Try to avoid division here.  We know that it isn't possible to
 	 * overflow during multiplication if neither operand uses any of the
 	 * most significant half of the bits in a size_t.
 	 */
 	} else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) <<
-	    2))) && (num_size / size != num))) {
-		/* size_t overflow. */
-		ret = NULL;
-		goto label_return;
-	}
-
-	ind = size2index(num_size);
-	if (unlikely(ind >= NSIZES)) {
-		ret = NULL;
-		goto label_return;
-	}
-	if (config_prof && opt_prof) {
-		usize = index2size(ind);
-		ret = icalloc_prof(tsd, usize, ind);
+	    2))) && (num_size / size != num)))
+		num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */
+
+	if (likely(!malloc_slow)) {
+		ret = ialloc_body(num_size, true, &tsdn, &usize, false);
+		ialloc_post_check(ret, tsdn, usize, "calloc", true, false);
 	} else {
-		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = index2size(ind);
-		ret = icalloc(tsd, num_size, ind);
+		ret = ialloc_body(num_size, true, &tsdn, &usize, true);
+		ialloc_post_check(ret, tsdn, usize, "calloc", true, true);
+		UTRACE(0, num_size, ret);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, true);
 	}
 
-label_return:
-	if (unlikely(ret == NULL)) {
-		if (config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in calloc(): out of "
-			    "memory\n");
-			abort();
-		}
-		set_errno(ENOMEM);
-	}
-	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
-	UTRACE(0, num_size, ret);
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true);
 	return (ret);
 }
 
 static void *
 irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
     prof_tctx_t *tctx)
 {
 	void *p;
 
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
 		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
 irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
 {
 	void *p;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
 	else
 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
@@ -1799,134 +1849,154 @@ irealloc_prof(tsd_t *tsd, void *old_ptr,
 }
 
 JEMALLOC_INLINE_C void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 {
 	size_t usize;
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
+	witness_assert_lockless(tsd_tsdn(tsd));
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof) {
-		usize = isalloc(ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		prof_free(tsd, ptr, usize);
 	} else if (config_stats || config_valgrind)
-		usize = isalloc(ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path))
 		iqalloc(tsd, ptr, tcache, false);
 	else {
 		if (config_valgrind && unlikely(in_valgrind))
-			rzsize = p2rz(ptr);
+			rzsize = p2rz(tsd_tsdn(tsd), ptr);
 		iqalloc(tsd, ptr, tcache, true);
 		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 	}
 }
 
 JEMALLOC_INLINE_C void
-isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
+isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
+	witness_assert_lockless(tsd_tsdn(tsd));
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof)
 		prof_free(tsd, ptr, usize);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	if (config_valgrind && unlikely(in_valgrind))
-		rzsize = p2rz(ptr);
-	isqalloc(tsd, ptr, usize, tcache);
+		rzsize = p2rz(tsd_tsdn(tsd), ptr);
+	isqalloc(tsd, ptr, usize, tcache, slow_path);
 	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ALLOC_SIZE(2)
 je_realloc(void *ptr, size_t size)
 {
 	void *ret;
-	tsd_t *tsd JEMALLOC_CC_SILENCE_INIT(NULL);
+	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
+			tsd_t *tsd;
+
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
 			UTRACE(ptr, 0, 0);
 			tsd = tsd_fetch();
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
 			return (NULL);
 		}
 		size = 1;
 	}
 
 	if (likely(ptr != NULL)) {
+		tsd_t *tsd;
+
 		assert(malloc_initialized() || IS_INITIALIZER);
 		malloc_thread_init();
 		tsd = tsd_fetch();
 
-		old_usize = isalloc(ptr, config_prof);
-		if (config_valgrind && unlikely(in_valgrind))
-			old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize);
+		witness_assert_lockless(tsd_tsdn(tsd));
+
+		old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
+		if (config_valgrind && unlikely(in_valgrind)) {
+			old_rzsize = config_prof ? p2rz(tsd_tsdn(tsd), ptr) :
+			    u2rz(old_usize);
+		}
 
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
 			ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ?
 			    NULL : irealloc_prof(tsd, ptr, old_usize, usize);
 		} else {
 			if (config_stats || (config_valgrind &&
 			    unlikely(in_valgrind)))
 				usize = s2u(size);
 			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
 		}
+		tsdn = tsd_tsdn(tsd);
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		if (likely(!malloc_slow))
-			ret = imalloc_body(size, &tsd, &usize, false);
+			ret = ialloc_body(size, false, &tsdn, &usize, false);
 		else
-			ret = imalloc_body(size, &tsd, &usize, true);
+			ret = ialloc_body(size, false, &tsdn, &usize, true);
+		assert(!tsdn_null(tsdn) || ret == NULL);
 	}
 
 	if (unlikely(ret == NULL)) {
 		if (config_xmalloc && unlikely(opt_xmalloc)) {
 			malloc_write("<jemalloc>: Error in realloc(): "
 			    "out of memory\n");
 			abort();
 		}
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
+		tsd_t *tsd;
+
+		assert(usize == isalloc(tsdn, ret, config_prof));
+		tsd = tsdn_tsd(tsdn);
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(true, tsdn, ret, usize, true, ptr, old_usize,
 	    old_rzsize, true, false);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr)
 {
 
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
+		witness_assert_lockless(tsd_tsdn(tsd));
 		if (likely(!malloc_slow))
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
+		witness_assert_lockless(tsd_tsdn(tsd));
 	}
 }
 
 /*
  * End malloc(3)-compatible functions.
  */
 /******************************************************************************/
 /*
@@ -1937,31 +2007,29 @@ je_free(void *ptr)
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc)
 je_memalign(size_t alignment, size_t size)
 {
 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 	if (unlikely(imemalign(&ret, alignment, size, 1) != 0))
 		ret = NULL;
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc)
 je_valloc(size_t size)
 {
 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 	if (unlikely(imemalign(&ret, PAGE, size, 1) != 0))
 		ret = NULL;
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
 
 /*
  * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has
  * #define je_malloc malloc
  */
@@ -1981,28 +2049,51 @@ je_valloc(size_t size)
  */
 JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
 JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
 JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
 # ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
 JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 # endif
+
+#ifdef CPU_COUNT
+/*
+ * To enable static linking with glibc, the libc specific malloc interface must
+ * be implemented also, so none of glibc's malloc.o functions are added to the
+ * link.
+ */
+#define	ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+/* To force macro expansion of je_ prefix before stringification. */
+#define	PREALIAS(je_fn)  ALIAS(je_fn)
+void	*__libc_malloc(size_t size) PREALIAS(je_malloc);
+void	__libc_free(void* ptr) PREALIAS(je_free);
+void	*__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
+void	*__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
+void	*__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
+void	*__libc_valloc(size_t size) PREALIAS(je_valloc);
+int	__posix_memalign(void** r, size_t a, size_t s)
+    PREALIAS(je_posix_memalign);
+#undef PREALIAS
+#undef ALIAS
+
+#endif
+
 #endif
 
 /*
  * End non-standard override functions.
  */
 /******************************************************************************/
 /*
  * Begin non-standard functions.
  */
 
 JEMALLOC_ALWAYS_INLINE_C bool
-imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
+imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
     size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
 {
 
 	if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) {
 		*alignment = 0;
 		*usize = s2u(size);
 	} else {
 		*alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
@@ -2015,192 +2106,191 @@ imallocx_flags_decode_hard(tsd_t *tsd, s
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			*tcache = NULL;
 		else
 			*tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 	} else
 		*tcache = tcache_get(tsd, true);
 	if ((flags & MALLOCX_ARENA_MASK) != 0) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		*arena = arena_get(arena_ind, true);
+		*arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
 		if (unlikely(*arena == NULL))
 			return (true);
 	} else
 		*arena = NULL;
 	return (false);
 }
 
-JEMALLOC_ALWAYS_INLINE_C bool
-imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
-    size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
-{
-
-	if (likely(flags == 0)) {
-		*usize = s2u(size);
-		if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS))
-			return (true);
-		*alignment = 0;
-		*zero = false;
-		*tcache = tcache_get(tsd, true);
-		*arena = NULL;
-		return (false);
-	} else {
-		return (imallocx_flags_decode_hard(tsd, size, flags, usize,
-		    alignment, zero, tcache, arena));
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena)
+imallocx_flags(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	szind_t ind;
 
 	if (unlikely(alignment != 0))
-		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+		return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 	ind = size2index(usize);
 	assert(ind < NSIZES);
-	if (unlikely(zero))
-		return (icalloct(tsd, usize, ind, tcache, arena));
-	return (imalloct(tsd, usize, ind, tcache, arena));
+	return (iallocztm(tsdn, usize, ind, zero, tcache, false, arena,
+	    slow_path));
 }
 
 static void *
-imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena)
+imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	void *p;
 
 	if (usize <= SMALL_MAXCLASS) {
 		assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
 		    sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
-		p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache,
-		    arena);
+		p = imallocx_flags(tsdn, LARGE_MINCLASS, alignment, zero,
+		    tcache, arena, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
-	} else
-		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena);
+		arena_prof_promoted(tsdn, p, usize);
+	} else {
+		p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena,
+		    slow_path);
+	}
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
+imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
 {
 	void *p;
 	size_t alignment;
 	bool zero;
 	tcache_t *tcache;
 	arena_t *arena;
 	prof_tctx_t *tctx;
 
 	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
 	    &zero, &tcache, &arena)))
 		return (NULL);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
-	if (likely((uintptr_t)tctx == (uintptr_t)1U))
-		p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
-	else if ((uintptr_t)tctx > (uintptr_t)1U) {
-		p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache,
-		    arena);
+	if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
+		p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero,
+		    tcache, arena, slow_path);
+	} else if ((uintptr_t)tctx > (uintptr_t)1U) {
+		p = imallocx_prof_sample(tsd_tsdn(tsd), *usize, alignment, zero,
+		    tcache, arena, slow_path);
 	} else
 		p = NULL;
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, *usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, *usize, tctx);
 
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
+imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize,
+    bool slow_path)
 {
 	void *p;
 	size_t alignment;
 	bool zero;
 	tcache_t *tcache;
 	arena_t *arena;
 
+	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
+	    &zero, &tcache, &arena)))
+		return (NULL);
+	p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, tcache,
+	    arena, slow_path);
+	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
+	return (p);
+}
+
+/* This function guarantees that *tsdn is non-NULL on success. */
+JEMALLOC_ALWAYS_INLINE_C void *
+imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
+    bool slow_path)
+{
+	tsd_t *tsd;
+
+	if (slow_path && unlikely(malloc_init())) {
+		*tsdn = NULL;
+		return (NULL);
+	}
+
+	tsd = tsd_fetch();
+	*tsdn = tsd_tsdn(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
+
 	if (likely(flags == 0)) {
 		szind_t ind = size2index(size);
 		if (unlikely(ind >= NSIZES))
 			return (NULL);
-		if (config_stats || (config_valgrind &&
-		    unlikely(in_valgrind))) {
+		if (config_stats || (config_prof && opt_prof) || (slow_path &&
+		    config_valgrind && unlikely(in_valgrind))) {
 			*usize = index2size(ind);
 			assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
 		}
-		return (imalloc(tsd, size, ind, true));
+
+		if (config_prof && opt_prof) {
+			return (ialloc_prof(tsd, *usize, ind, false,
+			    slow_path));
+		}
+
+		return (ialloc(tsd, size, ind, false, slow_path));
 	}
 
-	if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize,
-	    &alignment, &zero, &tcache, &arena)))
-		return (NULL);
-	p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
-	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
-	return (p);
+	if (config_prof && opt_prof)
+		return (imallocx_prof(tsd, size, flags, usize, slow_path));
+
+	return (imallocx_no_prof(tsd, size, flags, usize, slow_path));
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_mallocx(size_t size, int flags)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	void *p;
 	size_t usize;
 
 	assert(size != 0);
 
-	if (unlikely(malloc_init()))
-		goto label_oom;
-	tsd = tsd_fetch();
-
-	if (config_prof && opt_prof)
-		p = imallocx_prof(tsd, size, flags, &usize);
-	else
-		p = imallocx_no_prof(tsd, size, flags, &usize);
-	if (unlikely(p == NULL))
-		goto label_oom;
-
-	if (config_stats) {
-		assert(usize == isalloc(p, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
+	if (likely(!malloc_slow)) {
+		p = imallocx_body(size, flags, &tsdn, &usize, false);
+		ialloc_post_check(p, tsdn, usize, "mallocx", false, false);
+	} else {
+		p = imallocx_body(size, flags, &tsdn, &usize, true);
+		ialloc_post_check(p, tsdn, usize, "mallocx", false, true);
+		UTRACE(0, size, p);
+		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsdn, p, usize,
+		    MALLOCX_ZERO_GET(flags));
 	}
-	UTRACE(0, size, p);
-	JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags));
+
 	return (p);
-label_oom:
-	if (config_xmalloc && unlikely(opt_xmalloc)) {
-		malloc_write("<jemalloc>: Error in mallocx(): out of memory\n");
-		abort();
-	}
-	UTRACE(0, size, 0);
-	return (NULL);
 }
 
 static void *
 irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
     prof_tctx_t *tctx)
 {
 	void *p;
 
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
 		p = iralloct(tsd, old_ptr, old_usize, LARGE_MINCLASS, alignment,
 		    zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else {
 		p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena);
 	}
 
 	return (p);
 }
 
@@ -2209,42 +2299,42 @@ irallocx_prof(tsd_t *tsd, void *old_ptr,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena)
 {
 	void *p;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(old_ptr);
-	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
+	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
 		    alignment, zero, tcache, arena, tctx);
 	} else {
 		p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero,
 		    tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
+		prof_alloc_rollback(tsd, tctx, false);
 		return (NULL);
 	}
 
 	if (p == old_ptr && alignment != 0) {
 		/*
 		 * The allocation did not move, so it is possible that the size
 		 * class is smaller than would guarantee the requested
 		 * alignment, and that the alignment constraint was
 		 * serendipitously satisfied.  Additionally, old_usize may not
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
-		*usize = isalloc(p, config_prof);
+		*usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
-	prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
+	prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
 	    old_usize, old_tctx);
 
 	return (p);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ALLOC_SIZE(2)
@@ -2260,34 +2350,35 @@ je_rallocx(void *ptr, size_t size, int f
 	arena_t *arena;
 	tcache_t *tcache;
 
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		arena = arena_get(arena_ind, true);
+		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
 		if (unlikely(arena == NULL))
 			goto label_oom;
 	} else
 		arena = NULL;
 
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
 		else
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 	} else
 		tcache = tcache_get(tsd, true);
 
-	old_usize = isalloc(ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 	if (config_valgrind && unlikely(in_valgrind))
 		old_rzsize = u2rz(old_usize);
 
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			goto label_oom;
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
@@ -2295,74 +2386,76 @@ je_rallocx(void *ptr, size_t size, int f
 		if (unlikely(p == NULL))
 			goto label_oom;
 	} else {
 		p = iralloct(tsd, ptr, old_usize, size, alignment, zero,
 		     tcache, arena);
 		if (unlikely(p == NULL))
 			goto label_oom;
 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = isalloc(p, config_prof);
+			usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
 	if (config_stats) {
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize,
-	    old_rzsize, false, zero);
+	JEMALLOC_VALGRIND_REALLOC(true, tsd_tsdn(tsd), p, usize, false, ptr,
+	    old_usize, old_rzsize, false, zero);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
 		abort();
 	}
 	UTRACE(ptr, size, 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_helper(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero)
 {
 	size_t usize;
 
-	if (ixalloc(tsd, ptr, old_usize, size, extra, alignment, zero))
+	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero))
 		return (old_usize);
-	usize = isalloc(ptr, config_prof);
+	usize = isalloc(tsdn, ptr, config_prof);
 
 	return (usize);
 }
 
 static size_t
-ixallocx_prof_sample(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx)
 {
 	size_t usize;
 
 	if (tctx == NULL)
 		return (old_usize);
-	usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, alignment,
+	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
 	    zero);
 
 	return (usize);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero)
 {
 	size_t usize_max, usize;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
 	 * prof_alloc_prep() to decide whether to capture a backtrace.
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
 	if (alignment == 0) {
 		usize_max = s2u(size+extra);
@@ -2377,21 +2470,21 @@ ixallocx_prof(tsd_t *tsd, void *ptr, siz
 			 * case allocation succeeds.
 			 */
 			usize_max = HUGE_MAXCLASS;
 		}
 	}
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		usize = ixallocx_prof_sample(tsd, ptr, old_usize, size, extra,
-		    alignment, zero, tctx);
+		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
+		    size, extra, alignment, zero, tctx);
 	} else {
-		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
-		    alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
 		return (usize);
 	}
 	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
 	    old_tctx);
 
@@ -2408,18 +2501,19 @@ je_xallocx(void *ptr, size_t size, size_
 	bool zero = flags & MALLOCX_ZERO;
 
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(SIZE_T_MAX - size >= extra);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
-
-	old_usize = isalloc(ptr, config_prof);
+	witness_assert_lockless(tsd_tsdn(tsd));
+
+	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
 	 * extra) numerical overflow, but we may need to clamp extra to avoid
 	 * exceeding HUGE_MAXCLASS.
 	 *
 	 * Ordinarily, size limit checking is handled deeper down, but here we
 	 * have to check as part of (size + extra) clamping, since we need the
@@ -2434,180 +2528,231 @@ je_xallocx(void *ptr, size_t size, size_
 
 	if (config_valgrind && unlikely(in_valgrind))
 		old_rzsize = u2rz(old_usize);
 
 	if (config_prof && opt_prof) {
 		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
 		    alignment, zero);
 	} else {
-		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
-		    alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (unlikely(usize == old_usize))
 		goto label_not_resized;
 
 	if (config_stats) {
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
-	JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize,
-	    old_rzsize, false, zero);
+	JEMALLOC_VALGRIND_REALLOC(false, tsd_tsdn(tsd), ptr, usize, false, ptr,
+	    old_usize, old_rzsize, false, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (usize);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
 je_sallocx(const void *ptr, int flags)
 {
 	size_t usize;
+	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
+
 	if (config_ivsalloc)
-		usize = ivsalloc(ptr, config_prof);
+		usize = ivsalloc(tsdn, ptr, config_prof);
 	else
-		usize = isalloc(ptr, config_prof);
-
+		usize = isalloc(tsdn, ptr, config_prof);
+
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_dallocx(void *ptr, int flags)
 {
 	tsd_t *tsd;
 	tcache_t *tcache;
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
 		else
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 	} else
 		tcache = tcache_get(tsd, false);
 
 	UTRACE(ptr, 0, 0);
-	ifree(tsd_fetch(), ptr, tcache, true);
+	if (likely(!malloc_slow))
+		ifree(tsd, ptr, tcache, false);
+	else
+		ifree(tsd, ptr, tcache, true);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-inallocx(size_t size, int flags)
+inallocx(tsdn_t *tsdn, size_t size, int flags)
 {
 	size_t usize;
 
+	witness_assert_lockless(tsdn);
+
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
 		usize = s2u(size);
 	else
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_sdallocx(void *ptr, size_t size, int flags)
 {
 	tsd_t *tsd;
 	tcache_t *tcache;
 	size_t usize;
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
-	usize = inallocx(size, flags);
-	assert(usize == isalloc(ptr, config_prof));
-
 	tsd = tsd_fetch();
+	usize = inallocx(tsd_tsdn(tsd), size, flags);
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof));
+
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
 		else
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 	} else
 		tcache = tcache_get(tsd, false);
 
 	UTRACE(ptr, 0, 0);
-	isfree(tsd, ptr, usize, tcache);
+	if (likely(!malloc_slow))
+		isfree(tsd, ptr, usize, tcache, false);
+	else
+		isfree(tsd, ptr, usize, tcache, true);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
 je_nallocx(size_t size, int flags)
 {
 	size_t usize;
+	tsdn_t *tsdn;
 
 	assert(size != 0);
 
 	if (unlikely(malloc_init()))
 		return (0);
 
-	usize = inallocx(size, flags);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
+
+	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > HUGE_MAXCLASS))
 		return (0);
 
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_byname(name, oldp, oldlenp, newp, newlen));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
+	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
+	witness_assert_lockless(tsd_tsdn(tsd));
+	return (ret);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 {
+	int ret;
+	tsdn_t *tsdn;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_nametomib(name, mibp, miblenp));
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
+	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
+	witness_assert_lockless(tsdn);
+	return (ret);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
   void *newp, size_t newlen)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
+	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
+	witness_assert_lockless(tsd_tsdn(tsd));
+	return (ret);
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts)
 {
-
+	tsdn_t *tsdn;
+
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 	stats_print(write_cb, cbopaque, opts);
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 {
 	size_t ret;
+	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
+
 	if (config_ivsalloc)
-		ret = ivsalloc(ptr, config_prof);
+		ret = ivsalloc(tsdn, ptr, config_prof);
 	else
-		ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof);
-
+		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof);
+
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
 /*
  * End non-standard functions.
  */
 /******************************************************************************/
 /*
@@ -2623,112 +2768,130 @@ je_malloc_usable_size(JEMALLOC_USABLE_SI
  * partially initialized the allocator.  Ordinarily jemalloc prevents
  * fork/malloc races via the following functions it registers during
  * initialization using pthread_atfork(), but of course that does no good if
  * the allocator isn't fully initialized at fork time.  The following library
  * constructor is a partial solution to this problem.  It may still be possible
  * to trigger the deadlock described above, but doing so would involve forking
  * via a library constructor that runs before jemalloc's runs.
  */
+#ifndef JEMALLOC_JET
 JEMALLOC_ATTR(constructor)
 static void
 jemalloc_constructor(void)
 {
 
 	malloc_init();
 }
+#endif
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
 void
 jemalloc_prefork(void)
 #else
 JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
+	tsd_t *tsd;
 	unsigned i, j, narenas;
 	arena_t *arena;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	if (!malloc_initialized())
 		return;
 #endif
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
 	narenas = narenas_total_get();
 
+	witness_prefork(tsd);
 	/* Acquire all mutexes in a safe order. */
-	ctl_prefork();
-	malloc_mutex_prefork(&arenas_lock);
-	prof_prefork0();
+	ctl_prefork(tsd_tsdn(tsd));
+	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
+	prof_prefork0(tsd_tsdn(tsd));
 	for (i = 0; i < 3; i++) {
 		for (j = 0; j < narenas; j++) {
-			if ((arena = arena_get(j, false)) != NULL) {
+			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
+			    NULL) {
 				switch (i) {
-				case 0: arena_prefork0(arena); break;
-				case 1: arena_prefork1(arena); break;
-				case 2: arena_prefork2(arena); break;
+				case 0:
+					arena_prefork0(tsd_tsdn(tsd), arena);
+					break;
+				case 1:
+					arena_prefork1(tsd_tsdn(tsd), arena);
+					break;
+				case 2:
+					arena_prefork2(tsd_tsdn(tsd), arena);
+					break;
 				default: not_reached();
 				}
 			}
 		}
 	}
-	base_prefork();
-	chunk_prefork();
+	base_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_prefork3(arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_prefork3(tsd_tsdn(tsd), arena);
 	}
-	prof_prefork1();
+	prof_prefork1(tsd_tsdn(tsd));
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
 void
 jemalloc_postfork_parent(void)
 #else
 JEMALLOC_EXPORT void
 _malloc_postfork(void)
 #endif
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	if (!malloc_initialized())
 		return;
 #endif
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
+	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_parent();
-	base_postfork_parent();
+	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_postfork_parent(arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_postfork_parent(tsd_tsdn(tsd), arena);
 	}
-	prof_postfork_parent();
-	malloc_mutex_postfork_parent(&arenas_lock);
-	ctl_postfork_parent();
+	prof_postfork_parent(tsd_tsdn(tsd));
+	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
+	ctl_postfork_parent(tsd_tsdn(tsd));
 }
 
 void
 jemalloc_postfork_child(void)
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
+	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_child();
-	base_postfork_child();
+	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_postfork_child(arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_postfork_child(tsd_tsdn(tsd), arena);
 	}
-	prof_postfork_child();
-	malloc_mutex_postfork_child(&arenas_lock);
-	ctl_postfork_child();
+	prof_postfork_child(tsd_tsdn(tsd));
+	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
+	ctl_postfork_child(tsd_tsdn(tsd));
 }
 
 /******************************************************************************/
--- a/memory/jemalloc/src/src/mutex.c
+++ b/memory/jemalloc/src/src/mutex.c
@@ -64,27 +64,29 @@ pthread_create(pthread_t *__restrict thr
 /******************************************************************************/
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
     void *(calloc_cb)(size_t, size_t));
 #endif
 
 bool
-malloc_mutex_init(malloc_mutex_t *mutex)
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 {
 
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 	InitializeSRWLock(&mutex->lock);
 #  else
 	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
 	    _CRT_SPINCOUNT))
 		return (true);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mutex->lock = 0;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 	if (postpone_init) {
 		mutex->postponed_next = postponed_mutexes;
 		postponed_mutexes = mutex;
 	} else {
 		if (_pthread_mutex_init_calloc_cb(&mutex->lock,
@@ -98,51 +100,54 @@ malloc_mutex_init(malloc_mutex_t *mutex)
 		return (true);
 	pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE);
 	if (pthread_mutex_init(&mutex->lock, &attr) != 0) {
 		pthread_mutexattr_destroy(&attr);
 		return (true);
 	}
 	pthread_mutexattr_destroy(&attr);
 #endif
+	if (config_debug)
+		witness_init(&mutex->witness, name, rank, NULL);
 	return (false);
 }
 
 void
-malloc_mutex_prefork(malloc_mutex_t *mutex)
+malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_lock(mutex);
+	malloc_mutex_lock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_parent(malloc_mutex_t *mutex)
+malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_unlock(mutex);
+	malloc_mutex_unlock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_child(malloc_mutex_t *mutex)
+malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-	malloc_mutex_unlock(mutex);
+	malloc_mutex_unlock(tsdn, mutex);
 #else
-	if (malloc_mutex_init(mutex)) {
+	if (malloc_mutex_init(mutex, mutex->witness.name,
+	    mutex->witness.rank)) {
 		malloc_printf("<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
 		if (opt_abort)
 			abort();
 	}
 #endif
 }
 
 bool
-mutex_boot(void)
+malloc_mutex_boot(void)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	postpone_init = false;
 	while (postponed_mutexes != NULL) {
 		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
 		    bootstrap_calloc) != 0)
 			return (true);
--- a/memory/jemalloc/src/src/nstime.c
+++ b/memory/jemalloc/src/src/nstime.c
@@ -92,57 +92,103 @@ uint64_t
 nstime_divide(const nstime_t *time, const nstime_t *divisor)
 {
 
 	assert(divisor->ns != 0);
 
 	return (time->ns / divisor->ns);
 }
 
+#ifdef _WIN32
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time)
+{
+	FILETIME ft;
+	uint64_t ticks_100ns;
+
+	GetSystemTimeAsFileTime(&ft);
+	ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
+
+	nstime_init(time, ticks_100ns * 100);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time)
+{
+
+	nstime_init(time, mach_absolute_time());
+}
+#else
+#  define NSTIME_MONOTONIC false
+static void
+nstime_get(nstime_t *time)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000);
+}
+#endif
+
+#ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(n_nstime_monotonic)
+#endif
+bool
+nstime_monotonic(void)
+{
+
+	return (NSTIME_MONOTONIC);
+#undef NSTIME_MONOTONIC
+}
+#ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(nstime_monotonic)
+nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic);
+#endif
+
 #ifdef JEMALLOC_JET
 #undef nstime_update
-#define	nstime_update JEMALLOC_N(nstime_update_impl)
+#define	nstime_update JEMALLOC_N(n_nstime_update)
 #endif
 bool
 nstime_update(nstime_t *time)
 {
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
-
-#ifdef _WIN32
-	{
-		FILETIME ft;
-		uint64_t ticks;
-		GetSystemTimeAsFileTime(&ft);
-		ticks = (((uint64_t)ft.dwHighDateTime) << 32) |
-		    ft.dwLowDateTime;
-		time->ns = ticks * 100;
-	}
-#elif JEMALLOC_CLOCK_GETTIME
-	{
-		struct timespec ts;
-
-		if (sysconf(_SC_MONOTONIC_CLOCK) > 0)
-			clock_gettime(CLOCK_MONOTONIC, &ts);
-		else
-			clock_gettime(CLOCK_REALTIME, &ts);
-		time->ns = ts.tv_sec * BILLION + ts.tv_nsec;
-	}
-#else
-	struct timeval tv;
-	gettimeofday(&tv, NULL);
-	time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
-#endif
+	nstime_get(time);
 
 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {
 		nstime_copy(time, &old_time);
 		return (true);
 	}
 
 	return (false);
 }
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(nstime_update)
-nstime_update_t *nstime_update = JEMALLOC_N(nstime_update_impl);
+nstime_update_t *nstime_update = JEMALLOC_N(n_nstime_update);
 #endif
--- a/memory/jemalloc/src/src/pages.c
+++ b/memory/jemalloc/src/src/pages.c
@@ -1,34 +1,54 @@
 #define	JEMALLOC_PAGES_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#include <sys/sysctl.h>
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+#ifndef _WIN32
+#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
+#  define PAGES_PROT_DECOMMIT (PROT_NONE)
+static int	mmap_flags;
+#endif
+static bool	os_overcommits;
+
 /******************************************************************************/
 
 void *
-pages_map(void *addr, size_t size)
+pages_map(void *addr, size_t size, bool *commit)
 {
 	void *ret;
 
 	assert(size != 0);
 
+	if (os_overcommits)
+		*commit = true;
+
 #ifdef _WIN32
 	/*
 	 * If VirtualAlloc can't allocate at the given address when one is
 	 * given, it fails and returns NULL.
 	 */
-	ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
+	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
 	    PAGE_READWRITE);
 #else
 	/*
 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
 	 * of existing mappings, and we only want to create new mappings.
 	 */
-	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
-	    -1, 0);
+	{
+		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+
+		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
+	}
 	assert(ret != NULL);
 
 	if (ret == MAP_FAILED)
 		ret = NULL;
 	else if (addr != NULL && ret != addr) {
 		/*
 		 * We succeeded in mapping memory, but not in the right place.
 		 */
@@ -62,27 +82,28 @@ pages_unmap(void *addr, size_t size)
 #endif
 		              "(): %s\n", buf);
 		if (opt_abort)
 			abort();
 	}
 }
 
 void *
-pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
+pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
+    bool *commit)
 {
 	void *ret = (void *)((uintptr_t)addr + leadsize);
 
 	assert(alloc_size >= leadsize + size);
 #ifdef _WIN32
 	{
 		void *new_addr;
 
 		pages_unmap(addr, alloc_size);
-		new_addr = pages_map(ret, size);
+		new_addr = pages_map(ret, size, commit);
 		if (new_addr == ret)
 			return (ret);
 		if (new_addr)
 			pages_unmap(new_addr, size);
 		return (NULL);
 	}
 #else
 	{
@@ -96,41 +117,40 @@ pages_trim(void *addr, size_t alloc_size
 	}
 #endif
 }
 
 static bool
 pages_commit_impl(void *addr, size_t size, bool commit)
 {
 
-#ifndef _WIN32
-	/*
-	 * The following decommit/commit implementation is functional, but
-	 * always disabled because it doesn't add value beyong improved
-	 * debugging (at the cost of extra system calls) on systems that
-	 * overcommit.
-	 */
-	if (false) {
-		int prot = commit ? (PROT_READ | PROT_WRITE) : PROT_NONE;
-		void *result = mmap(addr, size, prot, MAP_PRIVATE | MAP_ANON |
-		    MAP_FIXED, -1, 0);
+	if (os_overcommits)
+		return (true);
+
+#ifdef _WIN32
+	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
+	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
+#else
+	{
+		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
+		    -1, 0);
 		if (result == MAP_FAILED)
 			return (true);
 		if (result != addr) {
 			/*
 			 * We succeeded in mapping memory, but not in the right
 			 * place.
 			 */
 			pages_unmap(result, size);
 			return (true);
 		}
 		return (false);
 	}
 #endif
-	return (true);
 }
 
 bool
 pages_commit(void *addr, size_t size)
 {
 
 	return (pages_commit_impl(addr, size, true));
 }
@@ -166,8 +186,88 @@ pages_purge(void *addr, size_t size)
 #  undef JEMALLOC_MADV_ZEROS
 #else
 	/* Last resort no-op. */
 	unzeroed = true;
 #endif
 	return (unzeroed);
 }
 
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+static bool
+os_overcommits_sysctl(void)
+{
+	int vm_overcommit;
+	size_t sz;
+
+	sz = sizeof(vm_overcommit);
+	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0)
+		return (false); /* Error. */
+
+	return ((vm_overcommit & 0x3) == 0);
+}
+#endif
+
+#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+/*
+ * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
+ * reentry during bootstrapping if another library has interposed system call
+ * wrappers.
+ */
+static bool
+os_overcommits_proc(void)
+{
+	int fd;
+	char buf[1];
+	ssize_t nread;
+
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_open)
+	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+#else
+	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+#endif
+	if (fd == -1)
+		return (false); /* Error. */
+
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_read)
+	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
+	nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_close)
+	syscall(SYS_close, fd);
+#else
+	close(fd);
+#endif
+
+	if (nread < 1)
+		return (false); /* Error. */
+	/*
+	 * /proc/sys/vm/overcommit_memory meanings:
+	 * 0: Heuristic overcommit.
+	 * 1: Always overcommit.
+	 * 2: Never overcommit.
+	 */
+	return (buf[0] == '0' || buf[0] == '1');
+}
+#endif
+
+void
+pages_boot(void)
+{
+
+#ifndef _WIN32
+	mmap_flags = MAP_PRIVATE | MAP_ANON;
+#endif
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+	os_overcommits = os_overcommits_sysctl();
+#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
+	os_overcommits = os_overcommits_proc();
+#  ifdef MAP_NORESERVE
+	if (os_overcommits)
+		mmap_flags |= MAP_NORESERVE;
+#  endif
+#else
+	os_overcommits = false;
+#endif
+}
--- a/memory/jemalloc/src/src/prof.c
+++ b/memory/jemalloc/src/src/prof.c
@@ -116,23 +116,23 @@ static int		prof_dump_fd;
 static bool		prof_booted = false;
 
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
  * definition.
  */
 
-static bool	prof_tctx_should_destroy(prof_tctx_t *tctx);
+static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-static bool	prof_tdata_should_destroy(prof_tdata_t *tdata,
+static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
 static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
-static char	*prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
+static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
 /******************************************************************************/
 /* Red-black trees. */
 
 JEMALLOC_INLINE_C int
 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b)
 {
 	uint64_t a_thr_uid = a->thr_uid;
@@ -208,56 +208,57 @@ prof_alloc_rollback(tsd_t *tsd, prof_tct
 		 * programs.
 		 */
 		tdata = prof_tdata_get(tsd, true);
 		if (tdata != NULL)
 			prof_sample_threshold_update(tdata);
 	}
 
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
-		malloc_mutex_lock(tctx->tdata->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
-		if (prof_tctx_should_destroy(tctx))
+		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
 			prof_tctx_destroy(tsd, tctx);
 		else
-			malloc_mutex_unlock(tctx->tdata->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 	}
 }
 
 void
-prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 
-	prof_tctx_set(ptr, usize, tctx);
+	prof_tctx_set(tsdn, ptr, usize, tctx);
 
-	malloc_mutex_lock(tctx->tdata->lock);
+	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
 	tctx->cnts.curbytes += usize;
 	if (opt_prof_accum) {
 		tctx->cnts.accumobjs++;
 		tctx->cnts.accumbytes += usize;
 	}
 	tctx->prepared = false;
-	malloc_mutex_unlock(tctx->tdata->lock);
+	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
 }
 
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
 {
 
-	malloc_mutex_lock(tctx->tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
-	if (prof_tctx_should_destroy(tctx))
+	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
 		prof_tctx_destroy(tsd, tctx);
 	else
-		malloc_mutex_unlock(tctx->tdata->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 }
 
 void
 bt_init(prof_bt_t *bt, void **vec)
 {
 
 	cassert(config_prof);
 
@@ -272,42 +273,42 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tda
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
 	if (tdata != NULL) {
 		assert(!tdata->enq);
 		tdata->enq = true;
 	}
 
-	malloc_mutex_lock(&bt2gctx_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 }
 
 JEMALLOC_INLINE_C void
 prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 {
 
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
-	malloc_mutex_unlock(&bt2gctx_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
 
 	if (tdata != NULL) {
 		bool idump, gdump;
 
 		assert(tdata->enq);
 		tdata->enq = false;
 		idump = tdata->enq_idump;
 		tdata->enq_idump = false;
 		gdump = tdata->enq_gdump;
 		tdata->enq_gdump = false;
 
 		if (idump)
-			prof_idump();
+			prof_idump(tsd_tsdn(tsd));
 		if (gdump)
-			prof_gdump();
+			prof_gdump(tsd_tsdn(tsd));
 	}
 }
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
 void
 prof_backtrace(prof_bt_t *bt)
 {
 	int nframes;
@@ -541,24 +542,25 @@ prof_gctx_mutex_choose(void)
 static malloc_mutex_t *
 prof_tdata_mutex_choose(uint64_t thr_uid)
 {
 
 	return (&tdata_locks[thr_uid % PROF_NTDATA_LOCKS]);
 }
 
 static prof_gctx_t *
-prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt)
 {
 	/*
 	 * Create a single allocation that has space for vec of length bt->len.
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
-	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size,
-	    size2index(size), false, tcache_get(tsd, true), true, NULL, true);
+	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
+	    size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
+	    true);
 	if (gctx == NULL)
 		return (NULL);
 	gctx->lock = prof_gctx_mutex_choose();
 	/*
 	 * Set nlimbo to 1, in order to avoid a race condition with
 	 * prof_tctx_destroy()/prof_gctx_try_destroy().
 	 */
 	gctx->nlimbo = 1;
@@ -580,42 +582,43 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_t
 	/*
 	 * Check that gctx is still unused by any thread cache before destroying
 	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
 	 * condition with this function, as does prof_tctx_destroy() in order to
 	 * avoid a race between the main body of prof_tctx_destroy() and entry
 	 * into this function.
 	 */
 	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
 		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
-		malloc_mutex_unlock(gctx->lock);
-		idalloctm(tsd, gctx, tcache_get(tsd, false), true, true);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
 		 * prof_lookup().
 		 */
 		gctx->nlimbo--;
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 		prof_leave(tsd, tdata_self);
 	}
 }
 
-/* tctx->tdata->lock must be held. */
 static bool
-prof_tctx_should_destroy(prof_tctx_t *tctx)
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx)
 {
 
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
 	if (opt_prof_accum)
 		return (false);
 	if (tctx->cnts.curobjs != 0)
 		return (false);
 	if (tctx->prepared)
 		return (false);
 	return (true);
 }
@@ -628,35 +631,36 @@ prof_gctx_should_destroy(prof_gctx_t *gc
 		return (false);
 	if (!tctx_tree_empty(&gctx->tctxs))
 		return (false);
 	if (gctx->nlimbo != 0)
 		return (false);
 	return (true);
 }
 
-/* tctx->tdata->lock is held upon entry, and released before return. */
 static void
 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 {
 	prof_tdata_t *tdata = tctx->tdata;
 	prof_gctx_t *gctx = tctx->gctx;
 	bool destroy_tdata, destroy_tctx, destroy_gctx;
 
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
 	assert(tctx->cnts.curobjs == 0);
 	assert(tctx->cnts.curbytes == 0);
 	assert(!opt_prof_accum);
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
 
 	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tdata, false);
-	malloc_mutex_unlock(tdata->lock);
+	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		tctx_tree_remove(&gctx->tctxs, tctx);
 		destroy_tctx = true;
 		if (prof_gctx_should_destroy(gctx)) {
 			/*
 			 * Increment gctx->nlimbo in order to keep another
 			 * thread from winning the race to destroy gctx while
@@ -686,27 +690,29 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_
 		destroy_tctx = false;
 		destroy_gctx = false;
 		break;
 	default:
 		not_reached();
 		destroy_tctx = false;
 		destroy_gctx = false;
 	}
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	if (destroy_gctx) {
 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
 		    tdata);
 	}
 
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
 	if (destroy_tdata)
 		prof_tdata_destroy(tsd, tdata, false);
 
 	if (destroy_tctx)
-		idalloctm(tsd, tctx, tcache_get(tsd, false), true, true);
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
 }
 
 static bool
 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
     void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx)
 {
 	union {
 		prof_gctx_t	*p;
@@ -716,38 +722,37 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t
 		prof_bt_t	*p;
 		void		*v;
 	} btkey;
 	bool new_gctx;
 
 	prof_enter(tsd, tdata);
 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
 		/* bt has never been seen before.  Insert it. */
-		gctx.p = prof_gctx_create(tsd, bt);
+		gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
 		if (gctx.v == NULL) {
 			prof_leave(tsd, tdata);
 			return (true);
 		}
 		btkey.p = &gctx.p->bt;
 		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
-			idalloctm(tsd, gctx.v, tcache_get(tsd, false), true,
-			    true);
+			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
 			return (true);
 		}
 		new_gctx = true;
 	} else {
 		/*
 		 * Increment nlimbo, in order to avoid a race condition with
 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
 		 */
-		malloc_mutex_lock(gctx.p->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
 		gctx.p->nlimbo++;
-		malloc_mutex_unlock(gctx.p->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
 		new_gctx = false;
 	}
 	prof_leave(tsd, tdata);
 
 	*p_btkey = btkey.v;
 	*p_gctx = gctx.p;
 	*p_new_gctx = new_gctx;
 	return (false);
@@ -764,88 +769,86 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 	bool not_found;
 
 	cassert(config_prof);
 
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return (NULL);
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
 	if (!not_found) /* Note double negative! */
 		ret.p->prepared = true;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (not_found) {
-		tcache_t *tcache;
 		void *btkey;
 		prof_gctx_t *gctx;
 		bool new_gctx, error;
 
 		/*
 		 * This thread's cache lacks bt.  Look for it in the global
 		 * cache.
 		 */
 		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
 		    &new_gctx))
 			return (NULL);
 
 		/* Link a prof_tctx_t into gctx for this thread. */
-		tcache = tcache_get(tsd, true);
-		ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
-		    size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL,
-		    true);
+		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
+		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
+		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 			return (NULL);
 		}
 		ret.p->tdata = tdata;
 		ret.p->thr_uid = tdata->thr_uid;
 		ret.p->thr_discrim = tdata->thr_discrim;
 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 		ret.p->gctx = gctx;
 		ret.p->tctx_uid = tdata->tctx_uid_next++;
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tdata->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tdata->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			idalloctm(tsd, ret.v, tcache, true, true);
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true);
 			return (NULL);
 		}
-		malloc_mutex_lock(gctx->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 		ret.p->state = prof_tctx_state_nominal;
 		tctx_tree_insert(&gctx->tctxs, ret.p);
 		gctx->nlimbo--;
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	}
 
 	return (ret.p);
 }
 
+/*
+ * The bodies of this function and prof_leakcheck() are compiled out unless heap
+ * profiling is enabled, so that it is possible to compile jemalloc with
+ * floating point support completely disabled.  Avoiding floating point code is
+ * important on memory-constrained systems, but it also enables a workaround for
+ * versions of glibc that don't properly save/restore floating point registers
+ * during dynamic lazy symbol loading (which internally calls into whatever
+ * malloc implementation happens to be integrated into the application).  Note
+ * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
+ * memory moves, so jemalloc must be compiled with such optimizations disabled
+ * (e.g.
+ * -mno-sse) in order for the workaround to be complete.
+ */
 void
 prof_sample_threshold_update(prof_tdata_t *tdata)
 {
-	/*
-	 * The body of this function is compiled out unless heap profiling is
-	 * enabled, so that it is possible to compile jemalloc with floating
-	 * point support completely disabled.  Avoiding floating point code is
-	 * important on memory-constrained systems, but it also enables a
-	 * workaround for versions of glibc that don't properly save/restore
-	 * floating point registers during dynamic lazy symbol loading (which
-	 * internally calls into whatever malloc implementation happens to be
-	 * integrated into the application).  Note that some compilers (e.g.
-	 * gcc 4.8) may use floating point registers for fast memory moves, so
-	 * jemalloc must be compiled with such optimizations disabled (e.g.
-	 * -mno-sse) in order for the workaround to be complete.
-	 */
 #ifdef JEMALLOC_PROF
 	uint64_t r;
 	double u;
 
 	if (!config_prof)
 		return;
 
 	if (lg_prof_sample == 0) {
@@ -866,17 +869,17 @@ prof_sample_threshold_update(prof_tdata_
 	 * For more information on the math, see:
 	 *
 	 *   Non-Uniform Random Variate Generation
 	 *   Luc Devroye
 	 *   Springer-Verlag, New York, 1986
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
 	 */
-	r = prng_lg_range(&tdata->prng_state, 53);
+	r = prng_lg_range_u64(&tdata->prng_state, 53);
 	u = (double)r * (1.0/9007199254740992.0L);
 	tdata->bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
 #endif
 }
 
 #ifdef JEMALLOC_JET
@@ -889,21 +892,23 @@ prof_tdata_count_iter(prof_tdata_tree_t 
 
 	return (NULL);
 }
 
 size_t
 prof_tdata_count(void)
 {
 	size_t tdata_count = 0;
+	tsdn_t *tsdn;
 
-	malloc_mutex_lock(&tdatas_mtx);
+	tsdn = tsdn_fetch();
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
 	    (void *)&tdata_count);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return (tdata_count);
 }
 #endif
 
 #ifdef JEMALLOC_JET
 size_t
 prof_bt_count(void)
@@ -912,19 +917,19 @@ prof_bt_count(void)
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return (0);
 
-	malloc_mutex_lock(&bt2gctx_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(&bt2gctx_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
 
 	return (bt_count);
 }
 #endif
 
 #ifdef JEMALLOC_JET
 #undef prof_dump_open
 #define	prof_dump_open JEMALLOC_N(prof_dump_open_impl)
@@ -1027,30 +1032,31 @@ prof_dump_printf(bool propagate_err, con
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
 	va_end(ap);
 	ret = prof_dump_write(propagate_err, buf);
 
 	return (ret);
 }
 
-/* tctx->tdata->lock is held. */
 static void
-prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata)
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
 {
 
-	malloc_mutex_lock(tctx->gctx->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
+	malloc_mutex_lock(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tctx->gctx->lock);
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
 		return;
 	case prof_tctx_state_nominal:
 		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tctx->gctx->lock);
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
 
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
 		if (opt_prof_accum) {
 			tdata->cnt_summed.accumobjs +=
 			    tctx->dump_cnts.accumobjs;
@@ -1059,81 +1065,93 @@ prof_tctx_merge_tdata(prof_tctx_t *tctx,
 		}
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
 		not_reached();
 	}
 }
 
-/* gctx->lock is held. */
 static void
-prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx)
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx)
 {
 
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
 	if (opt_prof_accum) {
 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
 	}
 }
 
-/* tctx->gctx is held. */
 static prof_tctx_t *
 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		/* New since dumping started; ignore. */
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tctx, tctx->gctx);
+		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
 		break;
 	default:
 		not_reached();
 	}
 
 	return (NULL);
 }
 
-/* gctx->lock is held. */
+struct prof_tctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
 static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 {
-	bool propagate_err = *(bool *)arg;
+	struct prof_tctx_dump_iter_arg_s *arg =
+	    (struct prof_tctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
 	case prof_tctx_state_nominal:
 		/* Not captured by this dump. */
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(propagate_err,
+		if (prof_dump_printf(arg->propagate_err,
 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
 		    tctx->dump_cnts.accumbytes))
 			return (tctx);
 		break;
 	default:
 		not_reached();
 	}
 	return (NULL);
 }
 
-/* tctx->gctx is held. */
 static prof_tctx_t *
 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
+	tsdn_t *tsdn = (tsdn_t *)arg;
 	prof_tctx_t *ret;
 
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		/* New since dumping started; ignore. */
 		break;
 	case prof_tctx_state_dumping:
 		tctx->state = prof_tctx_state_nominal;
 		break;
 	case prof_tctx_state_purgatory:
@@ -1144,46 +1162,53 @@ prof_tctx_finish_iter(prof_tctx_tree_t *
 	}
 
 	ret = NULL;
 label_return:
 	return (ret);
 }
 
 static void
-prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 {
 
 	cassert(config_prof);
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsdn, gctx->lock);
 
 	/*
 	 * Increment nlimbo so that gctx won't go away before dump.
 	 * Additionally, link gctx into the dump list so that it is included in
 	 * prof_dump()'s second pass.
 	 */
 	gctx->nlimbo++;
 	gctx_tree_insert(gctxs, gctx);
 
 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
 
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(tsdn, gctx->lock);
 }
 
+struct prof_gctx_merge_iter_arg_s {
+	tsdn_t	*tsdn;
+	size_t	leak_ngctx;
+};
+
 static prof_gctx_t *
-prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 {
-	size_t *leak_ngctx = (size_t *)arg;
+	struct prof_gctx_merge_iter_arg_s *arg =
+	    (struct prof_gctx_merge_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, NULL);
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
+	    (void *)arg->tsdn);
 	if (gctx->cnt_summed.curobjs != 0)
-		(*leak_ngctx)++;
-	malloc_mutex_unlock(gctx->lock);
+		arg->leak_ngctx++;
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 
 	return (NULL);
 }
 
 static void
 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 {
 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
@@ -1192,74 +1217,82 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_t
 	/*
 	 * Standard tree iteration won't work here, because as soon as we
 	 * decrement gctx->nlimbo and unlock gctx, another thread can
 	 * concurrently destroy it, which will corrupt the tree.  Therefore,
 	 * tear down the tree one node at a time during iteration.
 	 */
 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
 		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(gctx->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 		{
 			prof_tctx_t *next;
 
 			next = NULL;
 			do {
 				prof_tctx_t *to_destroy =
 				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter, NULL);
+				    prof_tctx_finish_iter,
+				    (void *)tsd_tsdn(tsd));
 				if (to_destroy != NULL) {
 					next = tctx_tree_next(&gctx->tctxs,
 					    to_destroy);
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
-					idalloctm(tsd, to_destroy,
-					    tcache_get(tsd, false), true, true);
+					idalloctm(tsd_tsdn(tsd), to_destroy,
+					    NULL, true, true);
 				} else
 					next = NULL;
 			} while (next != NULL);
 		}
 		gctx->nlimbo--;
 		if (prof_gctx_should_destroy(gctx)) {
 			gctx->nlimbo++;
-			malloc_mutex_unlock(gctx->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 		} else
-			malloc_mutex_unlock(gctx->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	}
 }
 
+struct prof_tdata_merge_iter_arg_s {
+	tsdn_t		*tsdn;
+	prof_cnt_t	cnt_all;
+};
+
 static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *opaque)
 {
-	prof_cnt_t *cnt_all = (prof_cnt_t *)arg;
+	struct prof_tdata_merge_iter_arg_s *arg =
+	    (struct prof_tdata_merge_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(arg->tsdn, tdata->lock);
 	if (!tdata->expired) {
 		size_t tabind;
 		union {
 			prof_tctx_t	*p;
 			void		*v;
 		} tctx;
 
 		tdata->dumping = true;
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
 		    &tctx.v);)
-			prof_tctx_merge_tdata(tctx.p, tdata);
+			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
 
-		cnt_all->curobjs += tdata->cnt_summed.curobjs;
-		cnt_all->curbytes += tdata->cnt_summed.curbytes;
+		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
 		if (opt_prof_accum) {
-			cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
-			cnt_all->accumbytes += tdata->cnt_summed.accumbytes;
+			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
 		}
 	} else
 		tdata->dumping = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(arg->tsdn, tdata->lock);
 
 	return (NULL);
 }
 
 static prof_tdata_t *
 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 {
 	bool propagate_err = *(bool *)arg;
@@ -1278,48 +1311,49 @@ prof_tdata_dump_iter(prof_tdata_tree_t *
 	return (NULL);
 }
 
 #ifdef JEMALLOC_JET
 #undef prof_dump_header
 #define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
 #endif
 static bool
-prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all)
 {
 	bool ret;
 
 	if (prof_dump_printf(propagate_err,
 	    "heap_v2/%"FMTu64"\n"
 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
 	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
 		return (true);
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
 	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
 #undef prof_dump_header
 #define	prof_dump_header JEMALLOC_N(prof_dump_header)
 prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
 #endif
 
-/* gctx->lock is held. */
 static bool
-prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt,
-    prof_gctx_tree_t *gctxs)
+prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs)
 {
 	bool ret;
 	unsigned i;
+	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
 
 	cassert(config_prof);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
 	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
 		assert(gctx->cnt_summed.curobjs == 0);
 		assert(gctx->cnt_summed.curbytes == 0);
 		assert(gctx->cnt_summed.accumobjs == 0);
 		assert(gctx->cnt_summed.accumbytes == 0);
@@ -1343,18 +1377,20 @@ prof_dump_gctx(bool propagate_err, prof_
 	    "\n"
 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
 	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
 	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
 		ret = true;
 		goto label_return;
 	}
 
+	prof_tctx_dump_iter_arg.tsdn = tsdn;
+	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&propagate_err) != NULL) {
+	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
 		ret = true;
 		goto label_return;
 	}
 
 	ret = false;
 label_return:
 	return (ret);
 }
@@ -1437,129 +1473,165 @@ prof_dump_maps(bool propagate_err)
 
 	ret = false;
 label_return:
 	if (mfd != -1)
 		close(mfd);
 	return (ret);
 }
 
+/*
+ * See prof_sample_threshold_update() comment for why the body of this function
+ * is conditionally compiled.
+ */
 static void
 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
     const char *filename)
 {
 
+#ifdef JEMALLOC_PROF
+	/*
+	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
+	 * differ slightly from what jeprof reports, because here we scale the
+	 * summary values, whereas jeprof scales each context individually and
+	 * reports the sums of the scaled values.
+	 */
 	if (cnt_all->curbytes != 0) {
-		malloc_printf("<jemalloc>: Leak summary: %"FMTu64" byte%s, %"
-		    FMTu64" object%s, %zu context%s\n",
-		    cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
-		    cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
-		    leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
+		double ratio = (((double)cnt_all->curbytes) /
+		    (double)cnt_all->curobjs) / sample_period;
+		double scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
+		    * scale_factor);
+		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
+		    scale_factor);
+
+		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
+		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
+		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
 		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
 		    filename);
 	}
+#endif
 }
 
+struct prof_gctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
 static prof_gctx_t *
-prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 {
 	prof_gctx_t *ret;
-	bool propagate_err = *(bool *)arg;
+	struct prof_gctx_dump_iter_arg_s *arg =
+	    (struct prof_gctx_dump_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
 
-	if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) {
+	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
+	    gctxs)) {
 		ret = gctx;
 		goto label_return;
 	}
 
 	ret = NULL;
 label_return:
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 	return (ret);
 }
 
 static bool
 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 {
 	prof_tdata_t *tdata;
-	prof_cnt_t cnt_all;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	size_t tabind;
 	union {
 		prof_gctx_t	*p;
 		void		*v;
 	} gctx;
-	size_t leak_ngctx;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
 	prof_gctx_tree_t gctxs;
 
 	cassert(config_prof);
 
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return (true);
 
-	malloc_mutex_lock(&prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 	prof_enter(tsd, tdata);
 
 	/*
 	 * Put gctx's in limbo and clear their counters in preparation for
 	 * summing.
 	 */
 	gctx_tree_new(&gctxs);
 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
-		prof_dump_gctx_prep(gctx.p, &gctxs);
+		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, &gctxs);
 
 	/*
 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
 	 * stats and merge them into the associated gctx's.
 	 */
-	memset(&cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(&tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, (void *)&cnt_all);
-	malloc_mutex_unlock(&tdatas_mtx);
+	prof_tdata_merge_iter_arg.tsdn = tsd_tsdn(tsd);
+	memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t));
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
+	    (void *)&prof_tdata_merge_iter_arg);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
-	leak_ngctx = 0;
-	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx);
+	prof_gctx_merge_iter_arg.tsdn = tsd_tsdn(tsd);
+	prof_gctx_merge_iter_arg.leak_ngctx = 0;
+	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter,
+	    (void *)&prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
 
 	/* Create dump file. */
 	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
 		goto label_open_close_error;
 
 	/* Dump profile header. */
-	if (prof_dump_header(propagate_err, &cnt_all))
+	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
+	    &prof_tdata_merge_iter_arg.cnt_all))
 		goto label_write_error;
 
 	/* Dump per gctx profile stats. */
+	prof_gctx_dump_iter_arg.tsdn = tsd_tsdn(tsd);
+	prof_gctx_dump_iter_arg.propagate_err = propagate_err;
 	if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)&propagate_err) != NULL)
+	    (void *)&prof_gctx_dump_iter_arg) != NULL)
 		goto label_write_error;
 
 	/* Dump /proc/<pid>/maps if possible. */
 	if (prof_dump_maps(propagate_err))
 		goto label_write_error;
 
 	if (prof_dump_close(propagate_err))
 		goto label_open_close_error;
 
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 
-	if (leakcheck)
-		prof_leakcheck(&cnt_all, leak_ngctx, filename);
-
+	if (leakcheck) {
+		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
+		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+	}
 	return (false);
 label_write_error:
 	prof_dump_close(propagate_err);
 label_open_close_error:
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 	return (true);
 }
 
 #define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 #define	VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
 static void
 prof_dump_filename(char *filename, char v, uint64_t vseq)
 {
@@ -1589,101 +1661,99 @@ prof_fdump(void)
 	cassert(config_prof);
 	assert(opt_prof_final);
 	assert(opt_prof_prefix[0] != '\0');
 
 	if (!prof_booted)
 		return;
 	tsd = tsd_fetch();
 
-	malloc_mutex_lock(&prof_dump_seq_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(&prof_dump_seq_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
 void
-prof_idump(void)
+prof_idump(tsdn_t *tsdn)
 {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted)
+	if (!prof_booted || tsdn_null(tsdn))
 		return;
-	tsd = tsd_fetch();
+	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
 	if (tdata->enq) {
 		tdata->enq_idump = true;
 		return;
 	}
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
 
 bool
-prof_mdump(const char *filename)
+prof_mdump(tsd_t *tsd, const char *filename)
 {
-	tsd_t *tsd;
 	char filename_buf[DUMP_FILENAME_BUFSIZE];
 
 	cassert(config_prof);
 
 	if (!opt_prof || !prof_booted)
 		return (true);
-	tsd = tsd_fetch();
 
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
 		if (opt_prof_prefix[0] == '\0')
 			return (true);
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		filename = filename_buf;
 	}
 	return (prof_dump(tsd, true, filename, false));
 }
 
 void
-prof_gdump(void)
+prof_gdump(tsdn_t *tsdn)
 {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted)
+	if (!prof_booted || tsdn_null(tsdn))
 		return;
-	tsd = tsd_fetch();
+	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
 	if (tdata->enq) {
 		tdata->enq_gdump = true;
 		return;
 	}
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
 
 static void
 prof_bt_hash(const void *key, size_t r_hash[2])
 {
 	prof_bt_t *bt = (prof_bt_t *)key;
@@ -1702,274 +1772,280 @@ prof_bt_keycomp(const void *k1, const vo
 	cassert(config_prof);
 
 	if (bt1->len != bt2->len)
 		return (false);
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
 
 JEMALLOC_INLINE_C uint64_t
-prof_thr_uid_alloc(void)
+prof_thr_uid_alloc(tsdn_t *tsdn)
 {
 	uint64_t thr_uid;
 
-	malloc_mutex_lock(&next_thr_uid_mtx);
+	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
 	thr_uid = next_thr_uid;
 	next_thr_uid++;
-	malloc_mutex_unlock(&next_thr_uid_mtx);
+	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
 
 	return (thr_uid);
 }
 
 static prof_tdata_t *
 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
-	tcache_t *tcache;
 
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tcache = tcache_get(tsd, true);
-	tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t),
-	    size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true);
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
+	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL)
 		return (NULL);
 
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
 	tdata->thr_uid = thr_uid;
 	tdata->thr_discrim = thr_discrim;
 	tdata->thread_name = thread_name;
 	tdata->attached = true;
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
 
-	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
-	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsd, tdata, tcache, true, true);
+	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+	    prof_bt_keycomp)) {
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 		return (NULL);
 	}
 
 	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
 	prof_sample_threshold_update(tdata);
 
 	tdata->enq = false;
 	tdata->enq_idump = false;
 	tdata->enq_gdump = false;
 
 	tdata->dumping = false;
 	tdata->active = active;
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	return (tdata);
 }
 
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd)
 {
 
-	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0, NULL,
-	    prof_thread_active_init_get()));
+	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd))));
 }
 
-/* tdata->lock must be held. */
 static bool
-prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached)
 {
 
 	if (tdata->attached && !even_if_attached)
 		return (false);
 	if (ckh_count(&tdata->bt2tctx) != 0)
 		return (false);
 	return (true);
 }
 
-/* tdatas_mtx must be held. */
+static bool
+prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+    bool even_if_attached)
+{
+
+	malloc_mutex_assert_owner(tsdn, tdata->lock);
+
+	return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
+}
+
 static void
 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached)
 {
-	tcache_t *tcache;
 
-	assert(prof_tdata_should_destroy(tdata, even_if_attached));
-	assert(tsd_prof_tdata_get(tsd) != tdata);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
 
 	tdata_tree_remove(&tdatas, tdata);
 
-	tcache = tcache_get(tsd, false);
+	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
+
 	if (tdata->thread_name != NULL)
-		idalloctm(tsd, tdata->thread_name, tcache, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
 	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd, tdata, tcache, true, true);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 }
 
 static void
 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
 {
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 }
 
 static void
 prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tdata, true);
+		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
+		    true);
 		/*
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
 		 */
 		if (!destroy_tdata)
 			tdata->attached = false;
 		tsd_prof_tdata_set(tsd, NULL);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (destroy_tdata)
 		prof_tdata_destroy(tsd, tdata, true);
 }
 
 prof_tdata_t *
 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 {
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
 	char *thread_name = (tdata->thread_name != NULL) ?
-	    prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
+	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
 	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
 	    active));
 }
 
 static bool
-prof_tdata_expire(prof_tdata_t *tdata)
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsdn, tdata->lock);
 	if (!tdata->expired) {
 		tdata->expired = true;
 		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tdata, false);
+		    prof_tdata_should_destroy(tsdn, tdata, false);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsdn, tdata->lock);
 
 	return (destroy_tdata);
 }
 
 static prof_tdata_t *
 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 {
+	tsdn_t *tsdn = (tsdn_t *)arg;
 
-	return (prof_tdata_expire(tdata) ? tdata : NULL);
+	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
 }
 
 void
 prof_reset(tsd_t *tsd, size_t lg_sample)
 {
 	prof_tdata_t *next;
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
 
-	malloc_mutex_lock(&prof_dump_mtx);
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
 
 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, NULL);
+		    prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
 			prof_tdata_destroy_locked(tsd, to_destroy, false);
 		} else
 			next = NULL;
 	} while (next != NULL);
 
-	malloc_mutex_unlock(&tdatas_mtx);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 }
 
 void
 prof_tdata_cleanup(tsd_t *tsd)
 {
 	prof_tdata_t *tdata;
 
 	if (!config_prof)
 		return;
 
 	tdata = tsd_prof_tdata_get(tsd);
 	if (tdata != NULL)
 		prof_tdata_detach(tsd, tdata);
 }
 
 bool
-prof_active_get(void)
+prof_active_get(tsdn_t *tsdn)
 {
 	bool prof_active_current;
 
-	malloc_mutex_lock(&prof_active_mtx);
+	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_current = prof_active;
-	malloc_mutex_unlock(&prof_active_mtx);
+	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return (prof_active_current);
 }
 
 bool
-prof_active_set(bool active)
+prof_active_set(tsdn_t *tsdn, bool active)
 {
 	bool prof_active_old;
 
-	malloc_mutex_lock(&prof_active_mtx);
+	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_old = prof_active;
 	prof_active = active;
-	malloc_mutex_unlock(&prof_active_mtx);
+	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return (prof_active_old);
 }
 
 const char *
-prof_thread_name_get(void)
+prof_thread_name_get(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return ("");
 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
 }
 
 static char *
-prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name)
 {
 	char *ret;
 	size_t size;
 
 	if (thread_name == NULL)
 		return (NULL);
 
 	size = strlen(thread_name) + 1;
 	if (size == 1)
 		return ("");
 
-	ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd,
-	    true), true, NULL, true);
+	ret = iallocztm(tsdn, size, size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL)
 		return (NULL);
 	memcpy(ret, thread_name, size);
 	return (ret);
 }
 
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name)
@@ -1986,100 +2062,95 @@ prof_thread_name_set(tsd_t *tsd, const c
 	if (thread_name == NULL)
 		return (EFAULT);
 	for (i = 0; thread_name[i] != '\0'; i++) {
 		char c = thread_name[i];
 		if (!isgraph(c) && !isblank(c))
 			return (EFAULT);
 	}
 
-	s = prof_thread_name_alloc(tsd, thread_name);
+	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
 	if (s == NULL)
 		return (EAGAIN);
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false),
-		    true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0)
 		tdata->thread_name = s;
 	return (0);
 }
 
 bool
-prof_thread_active_get(void)
+prof_thread_active_get(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return (false);
 	return (tdata->active);
 }
 
 bool
-prof_thread_active_set(bool active)
+prof_thread_active_set(tsd_t *tsd, bool active)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return (true);
 	tdata->active = active;
 	return (false);
 }
 
 bool
-prof_thread_active_init_get(void)
+prof_thread_active_init_get(tsdn_t *tsdn)
 {
 	bool active_init;
 
-	malloc_mutex_lock(&prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
 	active_init = prof_thread_active_init;
-	malloc_mutex_unlock(&prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
 	return (active_init);
 }
 
 bool
-prof_thread_active_init_set(bool active_init)
+prof_thread_active_init_set(tsdn_t *tsdn, bool active_init)
 {
 	bool active_init_old;
 
-	malloc_mutex_lock(&prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
 	active_init_old = prof_thread_active_init;
 	prof_thread_active_init = active_init;
-	malloc_mutex_unlock(&prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
 	return (active_init_old);
 }
 
 bool
-prof_gdump_get(void)
+prof_gdump_get(tsdn_t *tsdn)
 {
 	bool prof_gdump_current;
 
-	malloc_mutex_lock(&prof_gdump_mtx);
+	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
 	prof_gdump_current = prof_gdump_val;
-	malloc_mutex_unlock(&prof_gdump_mtx);
+	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
 	return (prof_gdump_current);
 }
 
 bool
-prof_gdump_set(bool gdump)
+prof_gdump_set(tsdn_t *tsdn, bool gdump)
 {
 	bool prof_gdump_old;
 
-	malloc_mutex_lock(&prof_gdump_mtx);
+	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
 	prof_gdump_old = prof_gdump_val;
 	prof_gdump_val = gdump;
-	malloc_mutex_unlock(&prof_gdump_mtx);
+	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
 	return (prof_gdump_old);
 }
 
 void
 prof_boot0(void)
 {
 
 	cassert(config_prof);
@@ -2110,81 +2181,90 @@ prof_boot1(void)
 		if (opt_lg_prof_interval >= 0) {
 			prof_interval = (((uint64_t)1U) <<
 			    opt_lg_prof_interval);
 		}
 	}
 }
 
 bool
-prof_boot2(void)
+prof_boot2(tsd_t *tsd)
 {
 
 	cassert(config_prof);
 
 	if (opt_prof) {
-		tsd_t *tsd;
 		unsigned i;
 
 		lg_prof_sample = opt_lg_prof_sample;
 
 		prof_active = opt_prof_active;
-		if (malloc_mutex_init(&prof_active_mtx))
+		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
+		    WITNESS_RANK_PROF_ACTIVE))
 			return (true);
 
 		prof_gdump_val = opt_prof_gdump;
-		if (malloc_mutex_init(&prof_gdump_mtx))
+		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
+		    WITNESS_RANK_PROF_GDUMP))
 			return (true);
 
 		prof_thread_active_init = opt_prof_thread_active_init;
-		if (malloc_mutex_init(&prof_thread_active_init_mtx))
+		if (malloc_mutex_init(&prof_thread_active_init_mtx,
+		    "prof_thread_active_init",
+		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);
 
-		tsd = tsd_fetch();
 		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
-		if (malloc_mutex_init(&bt2gctx_mtx))
+		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
+		    WITNESS_RANK_PROF_BT2GCTX))
 			return (true);
 
 		tdata_tree_new(&tdatas);
-		if (malloc_mutex_init(&tdatas_mtx))
+		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
+		    WITNESS_RANK_PROF_TDATAS))
 			return (true);
 
 		next_thr_uid = 0;
-		if (malloc_mutex_init(&next_thr_uid_mtx))
+		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
+		    WITNESS_RANK_PROF_NEXT_THR_UID))
 			return (true);
 
-		if (malloc_mutex_init(&prof_dump_seq_mtx))
+		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
+		    WITNESS_RANK_PROF_DUMP_SEQ))
 			return (true);
-		if (malloc_mutex_init(&prof_dump_mtx))
+		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
+		    WITNESS_RANK_PROF_DUMP))
 			return (true);
 
 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
 		    atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
 			if (opt_abort)
 				abort();
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
-		    sizeof(malloc_mutex_t));
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
+		    PROF_NCTX_LOCKS * sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
-			if (malloc_mutex_init(&gctx_locks[i]))
+			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
+			    WITNESS_RANK_PROF_GCTX))
 				return (true);
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(PROF_NTDATA_LOCKS *
-		    sizeof(malloc_mutex_t));
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
+		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
-			if (malloc_mutex_init(&tdata_locks[i]))
+			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
+			    WITNESS_RANK_PROF_TDATA))
 				return (true);
 		}
 	}
 
 #ifdef JEMALLOC_PROF_LIBGCC
 	/*
 	 * Cause the backtracing machinery to allocate its internal state
 	 * before enabling profiling.
@@ -2193,82 +2273,83 @@ prof_boot2(void)
 #endif
 
 	prof_booted = true;
 
 	return (false);
 }
 
 void
-prof_prefork0(void)
+prof_prefork0(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_prefork(&prof_dump_mtx);
-		malloc_mutex_prefork(&bt2gctx_mtx);
-		malloc_mutex_prefork(&tdatas_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
+		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
+		malloc_mutex_prefork(tsdn, &tdatas_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_prefork(&tdata_locks[i]);
+			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_prefork(&gctx_locks[i]);
+			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
 	}
 }
 
 void
-prof_prefork1(void)
+prof_prefork1(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
-		malloc_mutex_prefork(&prof_active_mtx);
-		malloc_mutex_prefork(&prof_dump_seq_mtx);
-		malloc_mutex_prefork(&prof_gdump_mtx);
-		malloc_mutex_prefork(&next_thr_uid_mtx);
-		malloc_mutex_prefork(&prof_thread_active_init_mtx);
+		malloc_mutex_prefork(tsdn, &prof_active_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
+		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
 	}
 }
 
 void
-prof_postfork_parent(void)
+prof_postfork_parent(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_parent(&prof_thread_active_init_mtx);
-		malloc_mutex_postfork_parent(&next_thr_uid_mtx);
-		malloc_mutex_postfork_parent(&prof_gdump_mtx);
-		malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_parent(&prof_active_mtx);
+		malloc_mutex_postfork_parent(tsdn,
+		    &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_parent(&gctx_locks[i]);
+			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_parent(&tdata_locks[i]);
-		malloc_mutex_postfork_parent(&tdatas_mtx);
-		malloc_mutex_postfork_parent(&bt2gctx_mtx);
-		malloc_mutex_postfork_parent(&prof_dump_mtx);
+			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
+		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
+		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
 	}
 }
 
 void
-prof_postfork_child(void)
+prof_postfork_child(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_child(&prof_thread_active_init_mtx);
-		malloc_mutex_postfork_child(&next_thr_uid_mtx);
-		malloc_mutex_postfork_child(&prof_gdump_mtx);
-		malloc_mutex_postfork_child(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_child(&prof_active_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_child(&gctx_locks[i]);
+			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_child(&tdata_locks[i]);
-		malloc_mutex_postfork_child(&tdatas_mtx);
-		malloc_mutex_postfork_child(&bt2gctx_mtx);
-		malloc_mutex_postfork_child(&prof_dump_mtx);
+			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
+		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
+		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
 	}
 }
 
 /******************************************************************************/
--- a/memory/jemalloc/src/src/quarantine.c
+++ b/memory/jemalloc/src/src/quarantine.c
@@ -8,34 +8,32 @@
 #define	QUARANTINE_STATE_REINCARNATED	((quarantine_t *)(uintptr_t)1)
 #define	QUARANTINE_STATE_PURGATORY	((quarantine_t *)(uintptr_t)2)
 #define	QUARANTINE_STATE_MAX		QUARANTINE_STATE_PURGATORY
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
 static quarantine_t	*quarantine_grow(tsd_t *tsd, quarantine_t *quarantine);
-static void	quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine);
-static void	quarantine_drain(tsd_t *tsd, quarantine_t *quarantine,
+static void	quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine);
+static void	quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine,
     size_t upper_bound);
 
 /******************************************************************************/
 
 static quarantine_t *
-quarantine_init(tsd_t *tsd, size_t lg_maxobjs)
+quarantine_init(tsdn_t *tsdn, size_t lg_maxobjs)
 {
 	quarantine_t *quarantine;
 	size_t size;
 
-	assert(tsd_nominal(tsd));
-
 	size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) *
 	    sizeof(quarantine_obj_t));
-	quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size),
-	    false, tcache_get(tsd, true), true, NULL, true);
+	quarantine = (quarantine_t *)iallocztm(tsdn, size, size2index(size),
+	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
 	if (quarantine == NULL)
 		return (NULL);
 	quarantine->curbytes = 0;
 	quarantine->curobjs = 0;
 	quarantine->first = 0;
 	quarantine->lg_maxobjs = lg_maxobjs;
 
 	return (quarantine);
@@ -44,35 +42,35 @@ quarantine_init(tsd_t *tsd, size_t lg_ma
 void
 quarantine_alloc_hook_work(tsd_t *tsd)
 {
 	quarantine_t *quarantine;
 
 	if (!tsd_nominal(tsd))
 		return;
 
-	quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT);
+	quarantine = quarantine_init(tsd_tsdn(tsd), LG_MAXOBJS_INIT);
 	/*
 	 * Check again whether quarantine has been initialized, because
 	 * quarantine_init() may have triggered recursive initialization.
 	 */
 	if (tsd_quarantine_get(tsd) == NULL)
 		tsd_quarantine_set(tsd, quarantine);
 	else
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 }
 
 static quarantine_t *
 quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
 {
 	quarantine_t *ret;
 
-	ret = quarantine_init(tsd, quarantine->lg_maxobjs + 1);
+	ret = quarantine_init(tsd_tsdn(tsd), quarantine->lg_maxobjs + 1);
 	if (ret == NULL) {
-		quarantine_drain_one(tsd, quarantine);
+		quarantine_drain_one(tsd_tsdn(tsd), quarantine);
 		return (quarantine);
 	}
 
 	ret->curbytes = quarantine->curbytes;
 	ret->curobjs = quarantine->curobjs;
 	if (quarantine->first + quarantine->curobjs <= (ZU(1) <<
 	    quarantine->lg_maxobjs)) {
 		/* objs ring buffer data are contiguous. */
@@ -84,63 +82,63 @@ quarantine_grow(tsd_t *tsd, quarantine_t
 		    quarantine->first;
 		size_t ncopy_b = quarantine->curobjs - ncopy_a;
 
 		memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a
 		    * sizeof(quarantine_obj_t));
 		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
 		    sizeof(quarantine_obj_t));
 	}
-	idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+	idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 
 	tsd_quarantine_set(tsd, ret);
 	return (ret);
 }
 
 static void
-quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
+quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine)
 {
 	quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
-	assert(obj->usize == isalloc(obj->ptr, config_prof));
-	idalloctm(tsd, obj->ptr, NULL, false, true);
+	assert(obj->usize == isalloc(tsdn, obj->ptr, config_prof));
+	idalloctm(tsdn, obj->ptr, NULL, false, true);
 	quarantine->curbytes -= obj->usize;
 	quarantine->curobjs--;
 	quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
 	    quarantine->lg_maxobjs) - 1);
 }
 
 static void
-quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound)
+quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine, size_t upper_bound)
 {
 
 	while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0)
-		quarantine_drain_one(tsd, quarantine);
+		quarantine_drain_one(tsdn, quarantine);
 }
 
 void
 quarantine(tsd_t *tsd, void *ptr)
 {
 	quarantine_t *quarantine;
-	size_t usize = isalloc(ptr, config_prof);
+	size_t usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
 	cassert(config_fill);
 	assert(opt_quarantine);
 
 	if ((quarantine = tsd_quarantine_get(tsd)) == NULL) {
-		idalloctm(tsd, ptr, NULL, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
 		return;
 	}
 	/*
 	 * Drain one or more objects if the quarantine size limit would be
 	 * exceeded by appending ptr.
 	 */
 	if (quarantine->curbytes + usize > opt_quarantine) {
 		size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine
 		    - usize : 0;
-		quarantine_drain(tsd, quarantine, upper_bound);
+		quarantine_drain(tsd_tsdn(tsd), quarantine, upper_bound);
 	}
 	/* Grow the quarantine ring buffer if it's full. */
 	if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs))
 		quarantine = quarantine_grow(tsd, quarantine);
 	/* quarantine_grow() must free a slot if it fails to grow. */
 	assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs));
 	/* Append ptr if its size doesn't exceed the quarantine size. */
 	if (quarantine->curbytes + usize <= opt_quarantine) {
@@ -155,31 +153,31 @@ quarantine(tsd_t *tsd, void *ptr)
 			/*
 			 * Only do redzone validation if Valgrind isn't in
 			 * operation.
 			 */
 			if ((!config_valgrind || likely(!in_valgrind))
 			    && usize <= SMALL_MAXCLASS)
 				arena_quarantine_junk_small(ptr, usize);
 			else
-				memset(ptr, 0x5a, usize);
+				memset(ptr, JEMALLOC_FREE_JUNK, usize);
 		}
 	} else {
 		assert(quarantine->curbytes == 0);
-		idalloctm(tsd, ptr, NULL, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
 	}
 }
 
 void
 quarantine_cleanup(tsd_t *tsd)
 {
 	quarantine_t *quarantine;
 
 	if (!config_fill)
 		return;
 
 	quarantine = tsd_quarantine_get(tsd);
 	if (quarantine != NULL) {
-		quarantine_drain(tsd, quarantine, 0);
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+		quarantine_drain(tsd_tsdn(tsd), quarantine, 0);
+		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 		tsd_quarantine_set(tsd, NULL);
 	}
 }
--- a/memory/jemalloc/src/src/rtree.c
+++ b/memory/jemalloc/src/src/rtree.c
@@ -10,16 +10,18 @@ hmin(unsigned ha, unsigned hb)
 
 /* Only the most significant bits of keys passed to rtree_[gs]et() are used. */
 bool
 rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
     rtree_node_dalloc_t *dalloc)
 {
 	unsigned bits_in_leaf, height, i;
 
+	assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
+	    RTREE_BITS_PER_LEVEL));
 	assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
 
 	bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL
 	    : (bits % RTREE_BITS_PER_LEVEL);
 	if (bits > bits_in_leaf) {
 		height = 1 + (bits - bits_in_leaf) / RTREE_BITS_PER_LEVEL;
 		if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits)
 			height++;
@@ -89,22 +91,25 @@ rtree_delete(rtree_t *rtree)
 }
 
 static rtree_node_elm_t *
 rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp)
 {
 	rtree_node_elm_t *node;
 
 	if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
+		spin_t spinner;
+
 		/*
 		 * Another thread is already in the process of initializing.
 		 * Spin-wait until initialization is complete.
 		 */
+		spin_init(&spinner);
 		do {
-			CPU_SPINWAIT;
+			spin_adaptive(&spinner);
 			node = atomic_read_p((void **)elmp);
 		} while (node == RTREE_NODE_INITIALIZING);
 	} else {
 		node = rtree->alloc(ZU(1) << rtree->levels[level].bits);
 		if (node == NULL)
 			return (NULL);
 		atomic_write_p((void **)elmp, node);
 	}
@@ -118,10 +123,10 @@ rtree_subtree_read_hard(rtree_t *rtree, 
 
 	return (rtree_node_init(rtree, level, &rtree->levels[level].subtree));
 }
 
 rtree_node_elm_t *
 rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
 {
 
-	return (rtree_node_init(rtree, level, &elm->child));
+	return (rtree_node_init(rtree, level+1, &elm->child));
 }
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/src/spin.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_SPIN_C_
+#include "jemalloc/internal/jemalloc_internal.h"
--- a/memory/jemalloc/src/src/stats.c
+++ b/memory/jemalloc/src/src/stats.c
@@ -28,95 +28,116 @@
 /******************************************************************************/
 /* Data. */
 
 bool	opt_stats_print = false;
 
 size_t	stats_cactive = 0;
 
 /******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i);
-static void	stats_arena_lruns_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i);
-static void	stats_arena_hchunks_print(
-    void (*write_cb)(void *, const char *), void *cbopaque, unsigned i);
-static void	stats_arena_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i, bool bins, bool large, bool huge);
-
-/******************************************************************************/
 
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i)
+    bool json, bool large, bool huge, unsigned i)
 {
 	size_t page;
-	bool config_tcache, in_gap;
+	bool config_tcache, in_gap, in_gap_prev;
 	unsigned nbins, j;
 
 	CTL_GET("arenas.page", &page, size_t);
 
-	CTL_GET("config.tcache", &config_tcache, bool);
-	if (config_tcache) {
+	CTL_GET("arenas.nbins", &nbins, unsigned);
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs      curruns regs"
-		    " pgs  util       nfills     nflushes      newruns"
-		    "       reruns\n");
+		    "\t\t\t\t\"bins\": [\n");
 	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs      curruns regs"
-		    " pgs  util      newruns       reruns\n");
+		CTL_GET("config.tcache", &config_tcache, bool);
+		if (config_tcache) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "bins:           size ind    allocated      nmalloc"
+			    "      ndalloc    nrequests      curregs"
+			    "      curruns regs pgs  util       nfills"
+			    "     nflushes      newruns       reruns\n");
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "bins:           size ind    allocated      nmalloc"
+			    "      ndalloc    nrequests      curregs"
+			    "      curruns regs pgs  util      newruns"
+			    "       reruns\n");
+		}
 	}
-	CTL_GET("arenas.nbins", &nbins, unsigned);
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nruns;
+		size_t reg_size, run_size, curregs;
+		size_t curruns;
+		uint32_t nregs;
+		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
+		uint64_t nreruns;
 
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns,
 		    uint64_t);
-		if (nruns == 0)
-			in_gap = true;
-		else {
-			size_t reg_size, run_size, curregs, availregs, milli;
-			size_t curruns;
-			uint32_t nregs;
-			uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
-			uint64_t reruns;
-			char util[6]; /* "x.yyy". */
+		in_gap_prev = in_gap;
+		in_gap = (nruns == 0);
+
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
+		CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
+		CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, size_t);
 
-			if (in_gap) {
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs,
+		    size_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
+		    &nrequests, uint64_t);
+		if (config_tcache) {
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j,
+			    &nfills, uint64_t);
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j,
+			    &nflushes, uint64_t);
+		}
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, &nreruns,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, &curruns,
+		    size_t);
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"curregs\": %zu,\n"
+			    "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n",
+			    nmalloc,
+			    ndalloc,
+			    curregs,
+			    nrequests);
+			if (config_tcache) {
 				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
+				    "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n"
+				    "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n",
+				    nfills,
+				    nflushes);
 			}
-			CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
-			CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
-			CTL_M2_GET("arenas.bin.0.run_size", j, &run_size,
-			    size_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j,
-			    &nmalloc, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j,
-			    &ndalloc, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j,
-			    &curregs, size_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
-			    &nrequests, uint64_t);
-			if (config_tcache) {
-				CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i,
-				    j, &nfills, uint64_t);
-				CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes",
-				    i, j, &nflushes, uint64_t);
-			}
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j,
-			    &reruns, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j,
-			    &curruns, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\t\"nreruns\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"curruns\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    nreruns,
+			    curruns,
+			    (j + 1 < nbins) ? "," : "");
+		} else if (!in_gap) {
+			size_t availregs, milli;
+			char util[6]; /* "x.yyy". */
 
 			availregs = nregs * curruns;
 			milli = (availregs != 0) ? (1000 * curregs) / availregs
 			    : 1000;
 			assert(milli <= 1000);
 			if (milli < 10) {
 				malloc_snprintf(util, sizeof(util),
 				    "0.00%zu", milli);
@@ -133,250 +154,925 @@ stats_arena_bins_print(void (*write_cb)(
 				malloc_cprintf(write_cb, cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
 				    " %12"FMTu64" %12"FMTu64" %12zu"
 				    " %12zu %4u %3zu %-5s %12"FMTu64
 				    " %12"FMTu64" %12"FMTu64" %12"FMTu64"\n",
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curruns, nregs,
 				    run_size / page, util, nfills, nflushes,
-				    nruns, reruns);
+				    nruns, nreruns);
 			} else {
 				malloc_cprintf(write_cb, cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
 				    " %12"FMTu64" %12"FMTu64" %12zu"
 				    " %12zu %4u %3zu %-5s %12"FMTu64
 				    " %12"FMTu64"\n",
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curruns, nregs,
-				    run_size / page, util, nruns, reruns);
+				    run_size / page, util, nruns, nreruns);
 			}
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]%s\n", (large || huge) ? "," : "");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i)
+    bool json, bool huge, unsigned i)
 {
 	unsigned nbins, nlruns, j;
-	bool in_gap;
+	bool in_gap, in_gap_prev;
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:          size ind    allocated      nmalloc      ndalloc"
-	    "    nrequests      curruns\n");
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlruns", &nlruns, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"lruns\": [\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "large:          size ind    allocated      nmalloc"
+		    "      ndalloc    nrequests      curruns\n");
+	}
 	for (j = 0, in_gap = false; j < nlruns; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t run_size, curruns;
 
 		CTL_M2_M4_GET("stats.arenas.0.lruns.0.nmalloc", i, j, &nmalloc,
 		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.lruns.0.ndalloc", i, j, &ndalloc,
 		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j,
 		    &nrequests, uint64_t);
-		if (nrequests == 0)
-			in_gap = true;
-		else {
-			CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t);
-			CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j,
-			    &curruns, size_t);
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
+		in_gap_prev = in_gap;
+		in_gap = (nrequests == 0);
+
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, &curruns,
+		    size_t);
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"curruns\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    curruns,
+			    (j + 1 < nlruns) ? "," : "");
+		} else if (!in_gap) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
 			    " %12"FMTu64" %12zu\n",
 			    run_size, nbins + j, curruns * run_size, nmalloc,
 			    ndalloc, nrequests, curruns);
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]%s\n", huge ? "," : "");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i)
+    void *cbopaque, bool json, unsigned i)
 {
 	unsigned nbins, nlruns, nhchunks, j;
-	bool in_gap;
+	bool in_gap, in_gap_prev;
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "huge:           size ind    allocated      nmalloc      ndalloc"
-	    "    nrequests   curhchunks\n");
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlruns", &nlruns, unsigned);
 	CTL_GET("arenas.nhchunks", &nhchunks, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"hchunks\": [\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "huge:           size ind    allocated      nmalloc"
+		    "      ndalloc    nrequests   curhchunks\n");
+	}
 	for (j = 0, in_gap = false; j < nhchunks; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t hchunk_size, curhchunks;
 
 		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nmalloc", i, j,
 		    &nmalloc, uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.ndalloc", i, j,
 		    &ndalloc, uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j,
 		    &nrequests, uint64_t);
-		if (nrequests == 0)
-			in_gap = true;
-		else {
-			CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size,
-			    size_t);
-			CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i,
-			    j, &curhchunks, size_t);
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
+		in_gap_prev = in_gap;
+		in_gap = (nrequests == 0);
+
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, j,
+		    &curhchunks, size_t);
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"curhchunks\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    curhchunks,
+			    (j + 1 < nhchunks) ? "," : "");
+		} else if (!in_gap) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
 			    " %12"FMTu64" %12zu\n",
 			    hchunk_size, nbins + nlruns + j,
 			    curhchunks * hchunk_size, nmalloc, ndalloc,
 			    nrequests, curhchunks);
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]\n");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i, bool bins, bool large, bool huge)
+    bool json, unsigned i, bool bins, bool large, bool huge)
 {
 	unsigned nthreads;
 	const char *dss;
 	ssize_t lg_dirty_mult, decay_time;
-	size_t page, pactive, pdirty, mapped;
+	size_t page, pactive, pdirty, mapped, retained;
 	size_t metadata_mapped, metadata_allocated;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
 	size_t large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
 	size_t huge_allocated;
 	uint64_t huge_nmalloc, huge_ndalloc, huge_nrequests;
 
 	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
-	malloc_cprintf(write_cb, cbopaque,
-	    "assigned threads: %u\n", nthreads);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"nthreads\": %u,\n", nthreads);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "assigned threads: %u\n", nthreads);
+	}
+
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
-	malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n",
-	    dss);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"dss\": \"%s\",\n", dss);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "dss allocation precedence: %s\n", dss);
+	}
+
 	CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t);
-	if (opt_purge == purge_mode_ratio) {
-		if (lg_dirty_mult >= 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "min active:dirty page ratio: %u:1\n",
-			    (1U << lg_dirty_mult));
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "min active:dirty page ratio: N/A\n");
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"lg_dirty_mult\": %zd,\n", lg_dirty_mult);
+	} else {
+		if (opt_purge == purge_mode_ratio) {
+			if (lg_dirty_mult >= 0) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "min active:dirty page ratio: %u:1\n",
+				    (1U << lg_dirty_mult));
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "min active:dirty page ratio: N/A\n");
+			}
 		}
 	}
+
 	CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t);
-	if (opt_purge == purge_mode_decay) {
-		if (decay_time >= 0) {
-			malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
-			    decay_time);
-		} else
-			malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"decay_time\": %zd,\n", decay_time);
+	} else {
+		if (opt_purge == purge_mode_decay) {
+			if (decay_time >= 0) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "decay time: %zd\n", decay_time);
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "decay time: N/A\n");
+			}
+		}
 	}
+
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
 	CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t);
 	CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64", "
-	    "purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"pactive\": %zu,\n", pactive);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"pdirty\": %zu,\n", pdirty);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"npurge\": %"FMTu64",\n", npurge);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"nmadvise\": %"FMTu64",\n", nmadvise);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"purged\": %"FMTu64",\n", purged);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64
+		    ", purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
+	}
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "                            allocated      nmalloc      ndalloc"
-	    "    nrequests\n");
 	CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "small:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"small\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "                            allocated      nmalloc"
+		    "      ndalloc    nrequests\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    "small:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    small_allocated, small_nmalloc, small_ndalloc,
+		    small_nrequests);
+	}
+
 	CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"large\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "large:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    large_allocated, large_nmalloc, large_ndalloc,
+		    large_nrequests);
+	}
+
 	CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t);
 	CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "huge:                    %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests);
-	malloc_cprintf(write_cb, cbopaque,
-	    "total:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    small_allocated + large_allocated + huge_allocated,
-	    small_nmalloc + large_nmalloc + huge_nmalloc,
-	    small_ndalloc + large_ndalloc + huge_ndalloc,
-	    small_nrequests + large_nrequests + huge_nrequests);
-	malloc_cprintf(write_cb, cbopaque,
-	    "active:                  %12zu\n", pactive * page);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"huge\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", huge_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", huge_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", huge_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", huge_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "huge:                    %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests);
+		malloc_cprintf(write_cb, cbopaque,
+		    "total:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    small_allocated + large_allocated + huge_allocated,
+		    small_nmalloc + large_nmalloc + huge_nmalloc,
+		    small_ndalloc + large_ndalloc + huge_ndalloc,
+		    small_nrequests + large_nrequests + huge_nrequests);
+	}
+	if (!json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "active:                  %12zu\n", pactive * page);
+	}
+
 	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "mapped:                  %12zu\n", mapped);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"mapped\": %zu,\n", mapped);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "mapped:                  %12zu\n", mapped);
+	}
+
+	CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"retained\": %zu,\n", retained);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "retained:                %12zu\n", retained);
+	}
+
 	CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated,
 	    size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "metadata: mapped: %zu, allocated: %zu\n",
-	    metadata_mapped, metadata_allocated);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"metadata\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"mapped\": %zu,\n", metadata_mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "metadata: mapped: %zu, allocated: %zu\n",
+		    metadata_mapped, metadata_allocated);
+	}
+
+	if (bins) {
+		stats_arena_bins_print(write_cb, cbopaque, json, large, huge,
+		    i);
+	}
+	if (large)
+		stats_arena_lruns_print(write_cb, cbopaque, json, huge, i);
+	if (huge)
+		stats_arena_hchunks_print(write_cb, cbopaque, json, i);
+}
+
+static void
+stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    bool json, bool merged, bool unmerged)
+{
+	const char *cpv;
+	bool bv;
+	unsigned uv;
+	uint32_t u32v;
+	uint64_t u64v;
+	ssize_t ssv;
+	size_t sv, bsz, usz, ssz, sssz, cpsz;
+
+	bsz = sizeof(bool);
+	usz = sizeof(unsigned);
+	ssz = sizeof(size_t);
+	sssz = sizeof(ssize_t);
+	cpsz = sizeof(const char *);
+
+	CTL_GET("version", &cpv, const char *);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		"\t\t\"version\": \"%s\",\n", cpv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
+
+	/* config. */
+#define	CONFIG_WRITE_BOOL_JSON(n, c)					\
+	if (json) {							\
+		CTL_GET("config."#n, &bv, bool);			\
+		malloc_cprintf(write_cb, cbopaque,			\
+		    "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false",	\
+		    (c));						\
+	}
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"config\": {\n");
+	}
+
+	CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",")
+
+	CTL_GET("config.debug", &bv, bool);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"debug\": %s,\n", bv ? "true" : "false");
+	} else {
+		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
+		    bv ? "enabled" : "disabled");
+	}
+
+	CONFIG_WRITE_BOOL_JSON(fill, ",")
+	CONFIG_WRITE_BOOL_JSON(lazy_lock, ",")
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"malloc_conf\": \"%s\",\n",
+		    config_malloc_conf);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
+	}
+
+	CONFIG_WRITE_BOOL_JSON(munmap, ",")
+	CONFIG_WRITE_BOOL_JSON(prof, ",")
+	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
+	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
+	CONFIG_WRITE_BOOL_JSON(stats, ",")
+	CONFIG_WRITE_BOOL_JSON(tcache, ",")
+	CONFIG_WRITE_BOOL_JSON(tls, ",")
+	CONFIG_WRITE_BOOL_JSON(utrace, ",")
+	CONFIG_WRITE_BOOL_JSON(valgrind, ",")
+	CONFIG_WRITE_BOOL_JSON(xmalloc, "")
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+#undef CONFIG_WRITE_BOOL_JSON
+
+	/* opt. */
+#define	OPT_WRITE_BOOL(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
+			    "false", (c));				\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %s\n", bv ? "true" : "false");	\
+		}							\
+	}
+#define	OPT_WRITE_BOOL_MUTABLE(n, m, c) {				\
+	bool bv2;							\
+	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 &&	\
+	    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {			\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
+			    "false", (c));				\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %s ("#m": %s)\n", bv ? "true"	\
+			    : "false", bv2 ? "true" : "false");		\
+		}							\
+	}								\
+}
+#define	OPT_WRITE_UNSIGNED(n, c)					\
+	if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %u%s\n", uv, (c));		\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %u\n", uv);			\
+		}							\
+	}
+#define	OPT_WRITE_SIZE_T(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zu%s\n", sv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %zu\n", sv);			\
+		}							\
+	}
+#define	OPT_WRITE_SSIZE_T(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %zd\n", ssv);			\
+		}							\
+	}
+#define	OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) {				\
+	ssize_t ssv2;							\
+	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 &&	\
+	    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {		\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %zd ("#m": %zd)\n",		\
+			    ssv, ssv2);					\
+		}							\
+	}								\
+}
+#define	OPT_WRITE_CHAR_P(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": \"%s\"\n", cpv);		\
+		}							\
+	}
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"opt\": {\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Run-time option settings:\n");
+	}
+	OPT_WRITE_BOOL(abort, ",")
+	OPT_WRITE_SIZE_T(lg_chunk, ",")
+	OPT_WRITE_CHAR_P(dss, ",")
+	OPT_WRITE_UNSIGNED(narenas, ",")
+	OPT_WRITE_CHAR_P(purge, ",")
+	if (json || opt_purge == purge_mode_ratio) {
+		OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult,
+		    arenas.lg_dirty_mult, ",")
+	}
+	if (json || opt_purge == purge_mode_decay) {
+		OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
+	}
+	OPT_WRITE_CHAR_P(junk, ",")
+	OPT_WRITE_SIZE_T(quarantine, ",")
+	OPT_WRITE_BOOL(redzone, ",")
+	OPT_WRITE_BOOL(zero, ",")
+	OPT_WRITE_BOOL(utrace, ",")
+	OPT_WRITE_BOOL(xmalloc, ",")
+	OPT_WRITE_BOOL(tcache, ",")
+	OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
+	OPT_WRITE_BOOL(prof, ",")
+	OPT_WRITE_CHAR_P(prof_prefix, ",")
+	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")
+	OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init,
+	    ",")
+	OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",")
+	OPT_WRITE_BOOL(prof_accum, ",")
+	OPT_WRITE_SSIZE_T(lg_prof_interval, ",")
+	OPT_WRITE_BOOL(prof_gdump, ",")
+	OPT_WRITE_BOOL(prof_final, ",")
+	OPT_WRITE_BOOL(prof_leak, ",")
+	/*
+	 * stats_print is always emitted, so as long as stats_print comes last
+	 * it's safe to unconditionally omit the comma here (rather than having
+	 * to conditionally omit it elsewhere depending on configuration).
+	 */
+	OPT_WRITE_BOOL(stats_print, "")
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+
+#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_BOOL_MUTABLE
+#undef OPT_WRITE_SIZE_T
+#undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_CHAR_P
+
+	/* arenas. */
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"arenas\": {\n");
+	}
+
+	CTL_GET("arenas.narenas", &uv, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"narenas\": %u,\n", uv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
 
-	if (bins)
-		stats_arena_bins_print(write_cb, cbopaque, i);
-	if (large)
-		stats_arena_lruns_print(write_cb, cbopaque, i);
-	if (huge)
-		stats_arena_hchunks_print(write_cb, cbopaque, i);
+	CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lg_dirty_mult\": %zd,\n", ssv);
+	} else if (opt_purge == purge_mode_ratio) {
+		if (ssv >= 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: "
+			    "%u:1\n", (1U << ssv));
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: "
+			    "N/A\n");
+		}
+	}
+	CTL_GET("arenas.decay_time", &ssv, ssize_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"decay_time\": %zd,\n", ssv);
+	} else if (opt_purge == purge_mode_decay) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Unused dirty page decay time: %zd%s\n",
+		    ssv, (ssv < 0) ? " (no decay)" : "");
+	}
+
+	CTL_GET("arenas.quantum", &sv, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"quantum\": %zu,\n", sv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
+
+	CTL_GET("arenas.page", &sv, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"page\": %zu,\n", sv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
+
+	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\"tcache_max\": %zu,\n", sv);
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Maximum thread-cached size class: %zu\n", sv);
+		}
+	}
+
+	if (json) {
+		unsigned nbins, nlruns, nhchunks, i;
+
+		CTL_GET("arenas.nbins", &nbins, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nbins\": %u,\n", nbins);
+
+		CTL_GET("arenas.nhbins", &uv, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nhbins\": %u,\n", uv);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"bin\": [\n");
+		for (i = 0; i < nbins; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu,\n", sv);
+
+			CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v);
+
+			CTL_M2_GET("arenas.bin.0.run_size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"run_size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : "");
+		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t],\n");
+
+		CTL_GET("arenas.nlruns", &nlruns, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nlruns\": %u,\n", nlruns);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lrun\": [\n");
+		for (i = 0; i < nlruns; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.lrun.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nlruns) ? "," : "");
+		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t],\n");
+
+		CTL_GET("arenas.nhchunks", &nhchunks, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nhchunks\": %u,\n", nhchunks);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"hchunk\": [\n");
+		for (i = 0; i < nhchunks; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.hchunk.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nhchunks) ? "," : "");
+		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t]\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+
+	/* prof. */
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"prof\": {\n");
+
+		CTL_GET("prof.thread_active_init", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" :
+		    "false");
+
+		CTL_GET("prof.active", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"active\": %s,\n", bv ? "true" : "false");
+
+		CTL_GET("prof.gdump", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false");
+
+		CTL_GET("prof.interval", &u64v, uint64_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"interval\": %"FMTu64",\n", u64v);
+
+		CTL_GET("prof.lg_sample", &ssv, ssize_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lg_sample\": %zd\n", ssv);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t}%s\n", (config_stats || merged || unmerged) ? "," :
+		    "");
+	}
+}
+
+static void
+stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
+    bool json, bool merged, bool unmerged, bool bins, bool large, bool huge)
+{
+	size_t *cactive;
+	size_t allocated, active, metadata, resident, mapped, retained;
+
+	CTL_GET("stats.cactive", &cactive, size_t *);
+	CTL_GET("stats.allocated", &allocated, size_t);
+	CTL_GET("stats.active", &active, size_t);
+	CTL_GET("stats.metadata", &metadata, size_t);
+	CTL_GET("stats.resident", &resident, size_t);
+	CTL_GET("stats.mapped", &mapped, size_t);
+	CTL_GET("stats.retained", &retained, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"stats\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"cactive\": %zu,\n", atomic_read_z(cactive));
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"allocated\": %zu,\n", allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"active\": %zu,\n", active);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"metadata\": %zu,\n", metadata);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"resident\": %zu,\n", resident);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"mapped\": %zu,\n", mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"retained\": %zu\n", retained);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t}%s\n", (merged || unmerged) ? "," : "");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Allocated: %zu, active: %zu, metadata: %zu,"
+		    " resident: %zu, mapped: %zu, retained: %zu\n",
+		    allocated, active, metadata, resident, mapped, retained);
+		malloc_cprintf(write_cb, cbopaque,
+		    "Current active ceiling: %zu\n",
+		    atomic_read_z(cactive));
+	}
+
+	if (merged || unmerged) {
+		unsigned narenas;
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\"stats.arenas\": {\n");
+		}
+
+		CTL_GET("arenas.narenas", &narenas, unsigned);
+		{
+			VARIABLE_ARRAY(bool, initialized, narenas);
+			size_t isz;
+			unsigned i, j, ninitialized;
+
+			isz = sizeof(bool) * narenas;
+			xmallctl("arenas.initialized", (void *)initialized,
+			    &isz, NULL, 0);
+			for (i = ninitialized = 0; i < narenas; i++) {
+				if (initialized[i])
+					ninitialized++;
+			}
+
+			/* Merged stats. */
+			if (merged && (ninitialized > 1 || !unmerged)) {
+				/* Print merged arena stats. */
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t\"merged\": {\n");
+				} else {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\nMerged arenas stats:\n");
+				}
+				stats_arena_print(write_cb, cbopaque, json,
+				    narenas, bins, large, huge);
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t}%s\n", (ninitialized > 1) ?
+					    "," : "");
+				}
+			}
+
+			/* Unmerged stats. */
+			for (i = j = 0; i < narenas; i++) {
+				if (initialized[i]) {
+					if (json) {
+						j++;
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\t\t\t\"%u\": {\n", i);
+					} else {
+						malloc_cprintf(write_cb,
+						    cbopaque, "\narenas[%u]:\n",
+						    i);
+					}
+					stats_arena_print(write_cb, cbopaque,
+					    json, i, bins, large, huge);
+					if (json) {
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\t\t\t}%s\n", (j <
+						    ninitialized) ? "," : "");
+					}
+				}
+			}
+		}
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t}\n");
+		}
+	}
 }
 
 void
 stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts)
 {
 	int err;
 	uint64_t epoch;
 	size_t u64sz;
+	bool json = false;
 	bool general = true;
 	bool merged = true;
 	bool unmerged = true;
 	bool bins = true;
 	bool large = true;
 	bool huge = true;
 
 	/*
@@ -400,16 +1096,19 @@ stats_print(void (*write_cb)(void *, con
 		abort();
 	}
 
 	if (opts != NULL) {
 		unsigned i;
 
 		for (i = 0; opts[i] != '\0'; i++) {
 			switch (opts[i]) {
+			case 'J':
+				json = true;
+				break;
 			case 'g':
 				general = false;
 				break;
 			case 'm':
 				merged = false;
 				break;
 			case 'a':
 				unmerged = false;
@@ -423,250 +1122,32 @@ stats_print(void (*write_cb)(void *, con
 			case 'h':
 				huge = false;
 				break;
 			default:;
 			}
 		}
 	}
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "___ Begin jemalloc statistics ___\n");
-	if (general) {
-		const char *cpv;
-		bool bv;
-		unsigned uv;
-		ssize_t ssv;
-		size_t sv, bsz, usz, ssz, sssz, cpsz;
-
-		bsz = sizeof(bool);
-		usz = sizeof(unsigned);
-		ssz = sizeof(size_t);
-		sssz = sizeof(ssize_t);
-		cpsz = sizeof(const char *);
-
-		CTL_GET("version", &cpv, const char *);
-		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
-		CTL_GET("config.debug", &bv, bool);
-		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
-		    bv ? "enabled" : "disabled");
-		malloc_cprintf(write_cb, cbopaque,
-		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
-
-#define	OPT_WRITE_BOOL(n)						\
-		if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %s\n", bv ? "true" : "false");	\
-		}
-#define	OPT_WRITE_BOOL_MUTABLE(n, m) {					\
-		bool bv2;						\
-		if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 &&	\
-		    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {		\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %s ("#m": %s)\n", bv ? "true"	\
-			    : "false", bv2 ? "true" : "false");		\
-		}							\
-}
-#define	OPT_WRITE_UNSIGNED(n)						\
-		if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %u\n", uv);			\
-		}
-#define	OPT_WRITE_SIZE_T(n)						\
-		if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %zu\n", sv);			\
-		}
-#define	OPT_WRITE_SSIZE_T(n)						\
-		if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %zd\n", ssv);			\
-		}
-#define	OPT_WRITE_SSIZE_T_MUTABLE(n, m) {				\
-		ssize_t ssv2;						\
-		if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 &&	\
-		    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %zd ("#m": %zd)\n",		\
-			    ssv, ssv2);					\
-		}							\
-}
-#define	OPT_WRITE_CHAR_P(n)						\
-		if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": \"%s\"\n", cpv);		\
-		}
-
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "Run-time option settings:\n");
-		OPT_WRITE_BOOL(abort)
-		OPT_WRITE_SIZE_T(lg_chunk)
-		OPT_WRITE_CHAR_P(dss)
-		OPT_WRITE_UNSIGNED(narenas)
-		OPT_WRITE_CHAR_P(purge)
-		if (opt_purge == purge_mode_ratio) {
-			OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult,
-			    arenas.lg_dirty_mult)
-		}
-		if (opt_purge == purge_mode_decay)
-			OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time)
-		OPT_WRITE_BOOL(stats_print)
-		OPT_WRITE_CHAR_P(junk)
-		OPT_WRITE_SIZE_T(quarantine)
-		OPT_WRITE_BOOL(redzone)
-		OPT_WRITE_BOOL(zero)
-		OPT_WRITE_BOOL(utrace)
-		OPT_WRITE_BOOL(valgrind)
-		OPT_WRITE_BOOL(xmalloc)
-		OPT_WRITE_BOOL(tcache)
-		OPT_WRITE_SSIZE_T(lg_tcache_max)
-		OPT_WRITE_BOOL(prof)
-		OPT_WRITE_CHAR_P(prof_prefix)
-		OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active)
-		OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init,
-		    prof.thread_active_init)
-		OPT_WRITE_SSIZE_T(lg_prof_sample)
-		OPT_WRITE_BOOL(prof_accum)
-		OPT_WRITE_SSIZE_T(lg_prof_interval)
-		OPT_WRITE_BOOL(prof_gdump)
-		OPT_WRITE_BOOL(prof_final)
-		OPT_WRITE_BOOL(prof_leak)
-
-#undef OPT_WRITE_BOOL
-#undef OPT_WRITE_BOOL_MUTABLE
-#undef OPT_WRITE_SIZE_T
-#undef OPT_WRITE_SSIZE_T
-#undef OPT_WRITE_CHAR_P
-
-		malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus);
-
-		CTL_GET("arenas.narenas", &uv, unsigned);
-		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
-
-		malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n",
-		    sizeof(void *));
-
-		CTL_GET("arenas.quantum", &sv, size_t);
-		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n",
-		    sv);
-
-		CTL_GET("arenas.page", &sv, size_t);
-		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
-
-		CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t);
-		if (opt_purge == purge_mode_ratio) {
-			if (ssv >= 0) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Min active:dirty page ratio per arena: "
-				    "%u:1\n", (1U << ssv));
-			} else {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Min active:dirty page ratio per arena: "
-				    "N/A\n");
-			}
-		}
-		CTL_GET("arenas.decay_time", &ssv, ssize_t);
-		if (opt_purge == purge_mode_decay) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "Unused dirty page decay time: %zd%s\n",
-			    ssv, (ssv < 0) ? " (no decay)" : "");
-		}
-		if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "Maximum thread-cached size class: %zu\n", sv);
-		}
-		if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) {
-			CTL_GET("prof.lg_sample", &sv, size_t);
-			malloc_cprintf(write_cb, cbopaque,
-			    "Average profile sample interval: %"FMTu64
-			    " (2^%zu)\n", (((uint64_t)1U) << sv), sv);
-
-			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
-			if (ssv >= 0) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Average profile dump interval: %"FMTu64
-				    " (2^%zd)\n",
-				    (((uint64_t)1U) << ssv), ssv);
-			} else {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Average profile dump interval: N/A\n");
-			}
-		}
-		CTL_GET("opt.lg_chunk", &sv, size_t);
+		    "{\n"
+		    "\t\"jemalloc\": {\n");
+	} else {
 		malloc_cprintf(write_cb, cbopaque,
-		    "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv);
+		    "___ Begin jemalloc statistics ___\n");
 	}
 
+	if (general)
+		stats_general_print(write_cb, cbopaque, json, merged, unmerged);
 	if (config_stats) {
-		size_t *cactive;
-		size_t allocated, active, metadata, resident, mapped;
-
-		CTL_GET("stats.cactive", &cactive, size_t *);
-		CTL_GET("stats.allocated", &allocated, size_t);
-		CTL_GET("stats.active", &active, size_t);
-		CTL_GET("stats.metadata", &metadata, size_t);
-		CTL_GET("stats.resident", &resident, size_t);
-		CTL_GET("stats.mapped", &mapped, size_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Allocated: %zu, active: %zu, metadata: %zu,"
-		    " resident: %zu, mapped: %zu\n",
-		    allocated, active, metadata, resident, mapped);
+		stats_print_helper(write_cb, cbopaque, json, merged, unmerged,
+		    bins, large, huge);
+	}
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "Current active ceiling: %zu\n",
-		    atomic_read_z(cactive));
-
-		if (merged) {
-			unsigned narenas;
-
-			CTL_GET("arenas.narenas", &narenas, unsigned);
-			{
-				VARIABLE_ARRAY(bool, initialized, narenas);
-				size_t isz;
-				unsigned i, ninitialized;
-
-				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized", initialized,
-				    &isz, NULL, 0);
-				for (i = ninitialized = 0; i < narenas; i++) {
-					if (initialized[i])
-						ninitialized++;
-				}
-
-				if (ninitialized > 1 || !unmerged) {
-					/* Print merged arena stats. */
-					malloc_cprintf(write_cb, cbopaque,
-					    "\nMerged arenas stats:\n");
-					stats_arena_print(write_cb, cbopaque,
-					    narenas, bins, large, huge);
-				}
-			}
-		}
-
-		if (unmerged) {
-			unsigned narenas;
-
-			/* Print stats for each arena. */
-
-			CTL_GET("arenas.narenas", &narenas, unsigned);
-			{
-				VARIABLE_ARRAY(bool, initialized, narenas);
-				size_t isz;
-				unsigned i;
-
-				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized", initialized,
-				    &isz, NULL, 0);
-
-				for (i = 0; i < narenas; i++) {
-					if (initialized[i]) {
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\narenas[%u]:\n", i);
-						stats_arena_print(write_cb,
-						    cbopaque, i, bins, large,
-						    huge);
-					}
-				}
-			}
-		}
+		    "\t}\n"
+		    "}\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "--- End jemalloc statistics ---\n");
 	}
-	malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n");
 }
--- a/memory/jemalloc/src/src/tcache.c
+++ b/memory/jemalloc/src/src/tcache.c
@@ -18,20 +18,21 @@ tcaches_t		*tcaches;
 /* Index of first element within tcaches that has never been used. */
 static unsigned		tcaches_past;
 
 /* Head of singly linked list tracking available tcaches elements. */
 static tcaches_t	*tcaches_avail;
 
 /******************************************************************************/
 
-size_t	tcache_salloc(const void *ptr)
+size_t
+tcache_salloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (arena_salloc(ptr, false));
+	return (arena_salloc(tsdn, ptr, false));
 }
 
 void
 tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 {
 	szind_t binind = tcache->next_gc_bin;
 	tcache_bin_t *tbin = &tcache->tbins[binind];
 	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
@@ -65,22 +66,22 @@ tcache_event_hard(tsd_t *tsd, tcache_t *
 	tbin->low_water = tbin->ncached;
 
 	tcache->next_gc_bin++;
 	if (tcache->next_gc_bin == nhbins)
 		tcache->next_gc_bin = 0;
 }
 
 void *
-tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
 {
 	void *ret;
 
-	arena_tcache_fill_small(tsd, arena, tbin, binind, config_prof ?
+	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
 	    tcache->prof_accumbytes : 0);
 	if (config_prof)
 		tcache->prof_accumbytes = 0;
 	ret = tcache_alloc_easy(tbin, tcache_success);
 
 	return (ret);
 }
 
@@ -101,66 +102,67 @@ tcache_bin_flush_small(tsd_t *tsd, tcach
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
 		    *(tbin->avail - 1));
 		arena_t *bin_arena = extent_node_arena_get(&chunk->node);
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
-			if (arena_prof_accum(arena, tcache->prof_accumbytes))
-				prof_idump();
+			if (arena_prof_accum(tsd_tsdn(tsd), arena,
+			    tcache->prof_accumbytes))
+				prof_idump(tsd_tsdn(tsd));
 			tcache->prof_accumbytes = 0;
 		}
 
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (config_stats && bin_arena == arena) {
 			assert(!merged_stats);
 			merged_stats = true;
 			bin->stats.nflushes++;
 			bin->stats.nrequests += tbin->tstats.nrequests;
 			tbin->tstats.nrequests = 0;
 		}
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
 			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (extent_node_arena_get(&chunk->node) == bin_arena) {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
-				    arena_bitselm_get(chunk, pageind);
-				arena_dalloc_bin_junked_locked(bin_arena, chunk,
-				    ptr, bitselm);
+				    arena_bitselm_get_mutable(chunk, pageind);
+				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
+				    bin_arena, chunk, ptr, bitselm);
 			} else {
 				/*
 				 * This object was allocated via a different
 				 * arena bin than the one that is currently
 				 * locked.  Stash the object, so that it can be
 				 * handled in a future pass.
 				 */
 				*(tbin->avail - 1 - ndeferred) = ptr;
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&bin->lock);
-		arena_decay_ticks(tsd, bin_arena, nflush - ndeferred);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		arena_bin_t *bin = &arena->bins[binind];
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
 	if ((int)tbin->ncached < tbin->low_water)
 		tbin->low_water = tbin->ncached;
 }
@@ -183,17 +185,17 @@ tcache_bin_flush_large(tsd_t *tsd, tcach
 		/* Lock the arena associated with the first object. */
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
 		    *(tbin->avail - 1));
 		arena_t *locked_arena = extent_node_arena_get(&chunk->node);
 		UNUSED bool idump;
 
 		if (config_prof)
 			idump = false;
-		malloc_mutex_lock(&locked_arena->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
 		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
 				idump = arena_prof_accum_locked(arena,
 				    tcache->prof_accumbytes);
 				tcache->prof_accumbytes = 0;
 			}
 			if (config_stats) {
 				merged_stats = true;
@@ -206,136 +208,138 @@ tcache_bin_flush_large(tsd_t *tsd, tcach
 		}
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
 			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (extent_node_arena_get(&chunk->node) ==
 			    locked_arena) {
-				arena_dalloc_large_junked_locked(locked_arena,
-				    chunk, ptr);
+				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
+				    locked_arena, chunk, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
 				 * arena than the one that is currently locked.
 				 * Stash the object, so that it can be handled
 				 * in a future pass.
 				 */
 				*(tbin->avail - 1 - ndeferred) = ptr;
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&locked_arena->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
 		if (config_prof && idump)
-			prof_idump();
-		arena_decay_ticks(tsd, locked_arena, nflush - ndeferred);
+			prof_idump(tsd_tsdn(tsd));
+		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
+		    ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 		arena->stats.nrequests_large += tbin->tstats.nrequests;
 		arena->stats.lstats[binind - NBINS].nrequests +=
 		    tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
 	if ((int)tbin->ncached < tbin->low_water)
 		tbin->low_water = tbin->ncached;
 }
 
-void
-tcache_arena_associate(tcache_t *tcache, arena_t *arena)
+static void
+tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 }
 
-void
-tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena)
-{
-
-	tcache_arena_dissociate(tcache, oldarena);
-	tcache_arena_associate(tcache, newarena);
-}
-
-void
-tcache_arena_dissociate(tcache_t *tcache, arena_t *arena)
+static void
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_debug) {
 			bool in_ql = false;
 			tcache_t *iter;
 			ql_foreach(iter, &arena->tcache_ql, link) {
 				if (iter == tcache) {
 					in_ql = true;
 					break;
 				}
 			}
 			assert(in_ql);
 		}
 		ql_remove(&arena->tcache_ql, tcache, link);
-		tcache_stats_merge(tcache, arena);
-		malloc_mutex_unlock(&arena->lock);
+		tcache_stats_merge(tsdn, tcache, arena);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 }
 
+void
+tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
+    arena_t *newarena)
+{
+
+	tcache_arena_dissociate(tsdn, tcache, oldarena);
+	tcache_arena_associate(tsdn, tcache, newarena);
+}
+
 tcache_t *
 tcache_get_hard(tsd_t *tsd)
 {
 	arena_t *arena;
 
 	if (!tcache_enabled_get()) {
 		if (tsd_nominal(tsd))
 			tcache_enabled_set(false); /* Memoize. */
 		return (NULL);
 	}
 	arena = arena_choose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (NULL);
-	return (tcache_create(tsd, arena));
+	return (tcache_create(tsd_tsdn(tsd), arena));
 }
 
 tcache_t *
-tcache_create(tsd_t *tsd, arena_t *arena)
+tcache_create(tsdn_t *tsdn, arena_t *arena)
 {
 	tcache_t *tcache;
 	size_t size, stack_offset;
 	unsigned i;
 
 	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
 	stack_offset = size;
 	size += stack_nelms * sizeof(void *);
 	/* Avoid false cacheline sharing. */
 	size = sa2u(size, CACHELINE);
 
-	tcache = ipallocztm(tsd, size, CACHELINE, true, false, true,
-	    arena_get(0, false));
+	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
 	if (tcache == NULL)
 		return (NULL);
 
-	tcache_arena_associate(tcache, arena);
+	tcache_arena_associate(tsdn, tcache, arena);
 
 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
 
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	for (i = 0; i < nhbins; i++) {
 		tcache->tbins[i].lg_fill_div = 1;
 		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
 		/*
@@ -352,48 +356,48 @@ tcache_create(tsd_t *tsd, arena_t *arena
 
 static void
 tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 {
 	arena_t *arena;
 	unsigned i;
 
 	arena = arena_choose(tsd, NULL);
-	tcache_arena_dissociate(tcache, arena);
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
 			arena_bin_t *bin = &arena->bins[i];
-			malloc_mutex_lock(&bin->lock);
+			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 			bin->stats.nrequests += tbin->tstats.nrequests;
-			malloc_mutex_unlock(&bin->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		}
 	}
 
 	for (; i < nhbins; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
-			malloc_mutex_lock(&arena->lock);
+			malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 			arena->stats.nrequests_large += tbin->tstats.nrequests;
 			arena->stats.lstats[i - NBINS].nrequests +=
 			    tbin->tstats.nrequests;
-			malloc_mutex_unlock(&arena->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 		}
 	}
 
 	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(arena, tcache->prof_accumbytes))
-		prof_idump();
+	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
+		prof_idump(tsd_tsdn(tsd));
 
-	idalloctm(tsd, tcache, false, true, true);
+	idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
 }
 
 void
 tcache_cleanup(tsd_t *tsd)
 {
 	tcache_t *tcache;
 
 	if (!config_tcache)
@@ -407,59 +411,64 @@ tcache_cleanup(tsd_t *tsd)
 
 void
 tcache_enabled_cleanup(tsd_t *tsd)
 {
 
 	/* Do nothing. */
 }
 
-/* Caller must own arena->lock. */
 void
-tcache_stats_merge(tcache_t *tcache, arena_t *arena)
+tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
+
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 		tbin->tstats.nrequests = 0;
 	}
 
 	for (; i < nhbins; i++) {
 		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
 		tcache_bin_t *tbin = &tcache->tbins[i];
 		arena->stats.nrequests_large += tbin->tstats.nrequests;
 		lstats->nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
 	}
 }
 
 bool
 tcaches_create(tsd_t *tsd, unsigned *r_ind)
 {
+	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(sizeof(tcache_t *) *
+		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
 	}
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	tcache = tcache_create(tsd, arena_get(0, false));
+	arena = arena_ichoose(tsd, NULL);
+	if (unlikely(arena == NULL))
+		return (true);
+	tcache = tcache_create(tsd_tsdn(tsd), arena);
 	if (tcache == NULL)
 		return (true);
 
 	if (tcaches_avail != NULL) {
 		elm = tcaches_avail;
 		tcaches_avail = tcaches_avail->next;
 		elm->tcache = tcache;
 		*r_ind = (unsigned)(elm - tcaches);
@@ -495,17 +504,17 @@ tcaches_destroy(tsd_t *tsd, unsigned ind
 {
 	tcaches_t *elm = &tcaches[ind];
 	tcaches_elm_flush(tsd, elm);
 	elm->next = tcaches_avail;
 	tcaches_avail = elm;
 }
 
 bool
-tcache_boot(void)
+tcache_boot(tsdn_t *tsdn)
 {
 	unsigned i;
 
 	/*
 	 * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is
 	 * known.
 	 */
 	if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
@@ -513,17 +522,17 @@ tcache_boot(void)
 	else if ((1U << opt_lg_tcache_max) > large_maxclass)
 		tcache_maxclass = large_maxclass;
 	else
 		tcache_maxclass = (1U << opt_lg_tcache_max);
 
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, nhbins *
 	    sizeof(tcache_bin_info_t));
 	if (tcache_bin_info == NULL)
 		return (true);
 	stack_nelms = 0;
 	for (i = 0; i < NBINS; i++) {
 		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			tcache_bin_info[i].ncached_max =
 			    TCACHE_NSLOTS_SMALL_MIN;
--- a/memory/jemalloc/src/src/tsd.c
+++ b/memory/jemalloc/src/src/tsd.c
@@ -72,17 +72,17 @@ tsd_cleanup(void *arg)
 {
 	tsd_t *tsd = (tsd_t *)arg;
 
 	switch (tsd->state) {
 	case tsd_state_uninitialized:
 		/* Do nothing. */
 		break;
 	case tsd_state_nominal:
-#define O(n, t)								\
+#define	O(n, t)								\
 		n##_cleanup(tsd);
 MALLOC_TSD
 #undef O
 		tsd->state = tsd_state_purgatory;
 		tsd_set(tsd);
 		break;
 	case tsd_state_purgatory:
 		/*
@@ -101,25 +101,27 @@ MALLOC_TSD
 		tsd->state = tsd_state_purgatory;
 		tsd_set(tsd);
 		break;
 	default:
 		not_reached();
 	}
 }
 
-bool
+tsd_t *
 malloc_tsd_boot0(void)
 {
+	tsd_t *tsd;
 
 	ncleanups = 0;
 	if (tsd_boot0())
-		return (true);
-	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = true;
-	return (false);
+		return (NULL);
+	tsd = tsd_fetch();
+	*tsd_arenas_tdata_bypassp_get(tsd) = true;
+	return (tsd);
 }
 
 void
 malloc_tsd_boot1(void)
 {
 
 	tsd_boot1();
 	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false;
@@ -164,32 +166,32 @@ BOOL	(WINAPI *const tls_callback)(HINSTA
     !defined(_WIN32))
 void *
 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 {
 	pthread_t self = pthread_self();
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
-	malloc_mutex_lock(&head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
-			malloc_mutex_unlock(&head->lock);
+			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return (iter->data);
 		}
 	}
 	/* Insert block into list. */
 	ql_elm_new(block, link);
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 	return (NULL);
 }
 
 void
 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
 {
 
-	malloc_mutex_lock(&head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 }
 #endif
--- a/memory/jemalloc/src/src/util.c
+++ b/memory/jemalloc/src/src/util.c
@@ -9,16 +9,17 @@
 	}								\
 } while (0)
 
 #define	not_reached() do {						\
 	if (config_debug) {						\
 		malloc_write("<jemalloc>: Unreachable code reached\n");	\
 		abort();						\
 	}								\
+	unreachable();							\
 } while (0)
 
 #define	not_implemented() do {						\
 	if (config_debug) {						\
 		malloc_write("<jemalloc>: Not implemented\n");		\
 		abort();						\
 	}								\
 } while (0)
@@ -43,17 +44,17 @@ static char	*x2s(uintmax_t x, bool alt_f
 
 /******************************************************************************/
 
 /* malloc_message() setup. */
 static void
 wrtmessage(void *cbopaque, const char *s)
 {
 
-#ifdef SYS_write
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
 	 * the possibility of memory allocation within libc.  This is necessary
 	 * on FreeBSD; most operating systems do not have this problem though.
 	 *
 	 * syscall() returns long or int, depending on platform, so capture the
 	 * unused result in the widest plausible type to avoid compiler
 	 * warnings.
@@ -309,20 +310,19 @@ x2s(uintmax_t x, bool alt_form, bool upp
 	if (alt_form) {
 		s -= 2;
 		(*slen_p) += 2;
 		memcpy(s, uppercase ? "0X" : "0x", 2);
 	}
 	return (s);
 }
 
-int
+size_t
 malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 {
-	int ret;
 	size_t i;
 	const char *f;
 
 #define	APPEND_C(c) do {						\
 	if (i < size)							\
 		str[i] = (c);						\
 	i++;								\
 } while (0)
@@ -403,16 +403,18 @@ malloc_vsnprintf(char *str, size_t size,
 		case '%': {
 			bool alt_form = false;
 			bool left_justify = false;
 			bool plus_space = false;
 			bool plus_plus = false;
 			int prec = -1;
 			int width = -1;
 			unsigned char len = '?';
+			char *s;
+			size_t slen;
 
 			f++;
 			/* Flags. */
 			while (true) {
 				switch (*f) {
 				case '#':
 					assert(!alt_form);
 					alt_form = true;
@@ -493,18 +495,16 @@ malloc_vsnprintf(char *str, size_t size,
 			case 'q': case 'j': case 't': case 'z':
 				len = *f;
 				f++;
 				break;
 			default: break;
 			}
 			/* Conversion specifier. */
 			switch (*f) {
-				char *s;
-				size_t slen;
 			case '%':
 				/* %% */
 				APPEND_C(*f);
 				f++;
 				break;
 			case 'd': case 'i': {
 				intmax_t val JEMALLOC_CC_SILENCE_INIT(0);
 				char buf[D2S_BUFSIZE];
@@ -580,31 +580,29 @@ malloc_vsnprintf(char *str, size_t size,
 			break;
 		}}
 	}
 	label_out:
 	if (i < size)
 		str[i] = '\0';
 	else
 		str[size - 1] = '\0';
-	assert(i < INT_MAX);
-	ret = (int)i;
 
 #undef APPEND_C
 #undef APPEND_S
 #undef APPEND_PADDED_S
 #undef GET_ARG_NUMERIC
-	return (ret);
+	return (i);
 }
 
 JEMALLOC_FORMAT_PRINTF(3, 4)
-int
+size_t
 malloc_snprintf(char *str, size_t size, const char *format, ...)
 {
-	int ret;
+	size_t ret;
 	va_list ap;
 
 	va_start(ap, format);
 	ret = malloc_vsnprintf(str, size, format, ap);
 	va_end(ap);
 
 	return (ret);
 }
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/src/witness.c
@@ -0,0 +1,136 @@
+#define	JEMALLOC_WITNESS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+void
+witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp)
+{
+
+	witness->name = name;
+	witness->rank = rank;
+	witness->comp = comp;
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_lock_error
+#define	witness_lock_error JEMALLOC_N(n_witness_lock_error)
+#endif
+void
+witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
+{
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Lock rank order reversal:");
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_lock_error
+#define	witness_lock_error JEMALLOC_N(witness_lock_error)
+witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef witness_owner_error
+#define	witness_owner_error JEMALLOC_N(n_witness_owner_error)
+#endif
+void
+witness_owner_error(const witness_t *witness)
+{
+
+	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_owner_error
+#define	witness_owner_error JEMALLOC_N(witness_owner_error)
+witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef witness_not_owner_error
+#define	witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error)
+#endif
+void
+witness_not_owner_error(const witness_t *witness)
+{
+
+	malloc_printf("<jemalloc>: Should not own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_not_owner_error
+#define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
+witness_not_owner_error_t *witness_not_owner_error =
+    JEMALLOC_N(n_witness_not_owner_error);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef witness_lockless_error
+#define	witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
+#endif
+void
+witness_lockless_error(const witness_list_t *witnesses)
+{
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Should not own any locks:");
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf("\n");
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_lockless_error
+#define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
+witness_lockless_error_t *witness_lockless_error =
+    JEMALLOC_N(n_witness_lockless_error);
+#endif
+
+void
+witnesses_cleanup(tsd_t *tsd)
+{
+
+	witness_assert_lockless(tsd_tsdn(tsd));
+
+	/* Do nothing. */
+}
+
+void
+witness_fork_cleanup(tsd_t *tsd)
+{
+
+	/* Do nothing. */
+}
+
+void
+witness_prefork(tsd_t *tsd)
+{
+
+	tsd_witness_fork_set(tsd, true);
+}
+
+void
+witness_postfork_parent(tsd_t *tsd)
+{
+
+	tsd_witness_fork_set(tsd, false);
+}
+
+void
+witness_postfork_child(tsd_t *tsd)
+{
+#ifndef JEMALLOC_MUTEX_INIT_CB
+	witness_list_t *witnesses;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_new(witnesses);
+#endif
+	tsd_witness_fork_set(tsd, false);
+}
--- a/memory/jemalloc/src/src/zone.c
+++ b/memory/jemalloc/src/src/zone.c
@@ -1,25 +1,26 @@
 #include "jemalloc/internal/jemalloc_internal.h"
 #ifndef JEMALLOC_ZONE
 #  error "This source file is for zones on Darwin (OS X)."
 #endif
 
 /*
- * The malloc_default_purgeable_zone function is only available on >= 10.6.
+ * The malloc_default_purgeable_zone() function is only available on >= 10.6.
  * We need to check whether it is present at runtime, thus the weak_import.
  */
 extern malloc_zone_t *malloc_default_purgeable_zone(void)
 JEMALLOC_ATTR(weak_import);
 
 /******************************************************************************/
 /* Data. */
 
-static malloc_zone_t zone;
-static struct malloc_introspection_t zone_introspect;
+static malloc_zone_t *default_zone, *purgeable_zone;
+static malloc_zone_t jemalloc_zone;
+static struct malloc_introspection_t jemalloc_zone_introspect;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
 static size_t	zone_size(malloc_zone_t *zone, void *ptr);
 static void	*zone_malloc(malloc_zone_t *zone, size_t size);
 static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
 static void	*zone_valloc(malloc_zone_t *zone, size_t size);
@@ -51,17 +52,17 @@ zone_size(malloc_zone_t *zone, void *ptr
 	 * There appear to be places within Darwin (such as setenv(3)) that
 	 * cause calls to this function with pointers that *no* zone owns.  If
 	 * we knew that all pointers were owned by *some* zone, we could split
 	 * our zone into two parts, and use one as the default allocator and
 	 * the other as the default deallocator/reallocator.  Since that will
 	 * not work in practice, we must check all pointers to assure that they
 	 * reside within a mapped chunk before determining size.
 	 */
-	return (ivsalloc(ptr, config_prof));
+	return (ivsalloc(tsdn_fetch(), ptr, config_prof));
 }
 
 static void *
 zone_malloc(malloc_zone_t *zone, size_t size)
 {
 
 	return (je_malloc(size));
 }
@@ -82,29 +83,29 @@ zone_valloc(malloc_zone_t *zone, size_t 
 
 	return (ret);
 }
 
 static void
 zone_free(malloc_zone_t *zone, void *ptr)
 {
 
-	if (ivsalloc(ptr, config_prof) != 0) {
+	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0) {
 		je_free(ptr);
 		return;
 	}
 
 	free(ptr);
 }
 
 static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
-	if (ivsalloc(ptr, config_prof) != 0)
+	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0)
 		return (je_realloc(ptr, size));
 
 	return (realloc(ptr, size));
 }
 
 #if (JEMALLOC_ZONE_VERSION >= 5)
 static void *
 zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
@@ -118,17 +119,17 @@ zone_memalign(malloc_zone_t *zone, size_
 #endif
 
 #if (JEMALLOC_ZONE_VERSION >= 6)
 static void
 zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
 	size_t alloc_size;
 
-	alloc_size = ivsalloc(ptr, config_prof);
+	alloc_size = ivsalloc(tsdn_fetch(), ptr, config_prof);
 	if (alloc_size != 0) {
 		assert(alloc_size == size);
 		je_free(ptr);
 		return;
 	}
 
 	free(ptr);
 }
@@ -159,118 +160,171 @@ zone_force_lock(malloc_zone_t *zone)
 	if (isthreaded)
 		jemalloc_prefork();
 }
 
 static void
 zone_force_unlock(malloc_zone_t *zone)
 {
 
+	/*
+	 * Call jemalloc_postfork_child() rather than
+	 * jemalloc_postfork_parent(), because this function is executed by both
+	 * parent and child.  The parent can tolerate having state
+	 * reinitialized, but the child cannot unlock mutexes that were locked
+	 * by the parent.
+	 */
 	if (isthreaded)
-		jemalloc_postfork_parent();
+		jemalloc_postfork_child();
 }
 
-JEMALLOC_ATTR(constructor)
-void
-register_zone(void)
+static void
+zone_init(void)
 {
 
-	/*
-	 * If something else replaced the system default zone allocator, don't
-	 * register jemalloc's.
-	 */
-	malloc_zone_t *default_zone = malloc_default_zone();
-	malloc_zone_t *purgeable_zone = NULL;
-	if (!default_zone->zone_name ||
-	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
-		return;
-	}
-
-	zone.size = (void *)zone_size;
-	zone.malloc = (void *)zone_malloc;
-	zone.calloc = (void *)zone_calloc;
-	zone.valloc = (void *)zone_valloc;
-	zone.free = (void *)zone_free;
-	zone.realloc = (void *)zone_realloc;
-	zone.destroy = (void *)zone_destroy;
-	zone.zone_name = "jemalloc_zone";
-	zone.batch_malloc = NULL;
-	zone.batch_free = NULL;
-	zone.introspect = &zone_introspect;
-	zone.version = JEMALLOC_ZONE_VERSION;
+	jemalloc_zone.size = (void *)zone_size;
+	jemalloc_zone.malloc = (void *)zone_malloc;
+	jemalloc_zone.calloc = (void *)zone_calloc;
+	jemalloc_zone.valloc = (void *)zone_valloc;
+	jemalloc_zone.free = (void *)zone_free;
+	jemalloc_zone.realloc = (void *)zone_realloc;
+	jemalloc_zone.destroy = (void *)zone_destroy;
+	jemalloc_zone.zone_name = "jemalloc_zone";
+	jemalloc_zone.batch_malloc = NULL;
+	jemalloc_zone.batch_free = NULL;
+	jemalloc_zone.introspect = &jemalloc_zone_introspect;
+	jemalloc_zone.version = JEMALLOC_ZONE_VERSION;
 #if (JEMALLOC_ZONE_VERSION >= 5)
-	zone.memalign = zone_memalign;
+	jemalloc_zone.memalign = zone_memalign;
 #endif
 #if (JEMALLOC_ZONE_VERSION >= 6)
-	zone.free_definite_size = zone_free_definite_size;
+	jemalloc_zone.free_definite_size = zone_free_definite_size;
 #endif
 #if (JEMALLOC_ZONE_VERSION >= 8)
-	zone.pressure_relief = NULL;
+	jemalloc_zone.pressure_relief = NULL;
 #endif
 
-	zone_introspect.enumerator = NULL;
-	zone_introspect.good_size = (void *)zone_good_size;
-	zone_introspect.check = NULL;
-	zone_introspect.print = NULL;
-	zone_introspect.log = NULL;
-	zone_introspect.force_lock = (void *)zone_force_lock;
-	zone_introspect.force_unlock = (void *)zone_force_unlock;
-	zone_introspect.statistics = NULL;
+	jemalloc_zone_introspect.enumerator = NULL;
+	jemalloc_zone_introspect.good_size = (void *)zone_good_size;
+	jemalloc_zone_introspect.check = NULL;
+	jemalloc_zone_introspect.print = NULL;
+	jemalloc_zone_introspect.log = NULL;
+	jemalloc_zone_introspect.force_lock = (void *)zone_force_lock;
+	jemalloc_zone_introspect.force_unlock = (void *)zone_force_unlock;
+	jemalloc_zone_introspect.statistics = NULL;
 #if (JEMALLOC_ZONE_VERSION >= 6)
-	zone_introspect.zone_locked = NULL;
+	jemalloc_zone_introspect.zone_locked = NULL;
 #endif
 #if (JEMALLOC_ZONE_VERSION >= 7)
-	zone_introspect.enable_discharge_checking = NULL;
-	zone_introspect.disable_discharge_checking = NULL;
-	zone_introspect.discharge = NULL;
-#ifdef __BLOCKS__
-	zone_introspect.enumerate_discharged_pointers = NULL;
-#else
-	zone_introspect.enumerate_unavailable_without_blocks = NULL;
+	jemalloc_zone_introspect.enable_discharge_checking = NULL;
+	jemalloc_zone_introspect.disable_discharge_checking = NULL;
+	jemalloc_zone_introspect.discharge = NULL;
+#  ifdef __BLOCKS__
+	jemalloc_zone_introspect.enumerate_discharged_pointers = NULL;
+#  else
+	jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
+#  endif
 #endif
-#endif
+}
+
+static malloc_zone_t *
+zone_default_get(void)
+{
+	malloc_zone_t **zones = NULL;
+	unsigned int num_zones = 0;
 
 	/*
-	 * The default purgeable zone is created lazily by OSX's libc.  It uses
-	 * the default zone when it is created for "small" allocations
-	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
-	 * obviously fails when the default zone is the jemalloc zone, so
-	 * malloc_default_purgeable_zone is called beforehand so that the
-	 * default purgeable zone is created when the default zone is still
-	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
-	 * to check for the existence of malloc_default_purgeable_zone() at
-	 * run time.
+	 * On OSX 10.12, malloc_default_zone returns a special zone that is not
+	 * present in the list of registered zones. That zone uses a "lite zone"
+	 * if one is present (apparently enabled when malloc stack logging is
+	 * enabled), or the first registered zone otherwise. In practice this
+	 * means unless malloc stack logging is enabled, the first registered
+	 * zone is the default.  So get the list of zones to get the first one,
+	 * instead of relying on malloc_default_zone.
 	 */
-	if (malloc_default_purgeable_zone != NULL)
-		purgeable_zone = malloc_default_purgeable_zone();
+	if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
+	    (vm_address_t**)&zones, &num_zones)) {
+		/*
+		 * Reset the value in case the failure happened after it was
+		 * set.
+		 */
+		num_zones = 0;
+	}
 
-	/* Register the custom zone.  At this point it won't be the default. */
-	malloc_zone_register(&zone);
+	if (num_zones)
+		return (zones[0]);
+
+	return (malloc_default_zone());
+}
+
+/* As written, this function can only promote jemalloc_zone. */
+static void
+zone_promote(void)
+{
+	malloc_zone_t *zone;
 
 	do {
-		default_zone = malloc_default_zone();
 		/*
 		 * Unregister and reregister the default zone.  On OSX >= 10.6,
 		 * unregistering takes the last registered zone and places it
 		 * at the location of the specified zone.  Unregistering the
 		 * default zone thus makes the last registered one the default.
 		 * On OSX < 10.6, unregistering shifts all registered zones.
 		 * The first registered zone then becomes the default.
 		 */
 		malloc_zone_unregister(default_zone);
 		malloc_zone_register(default_zone);
+
 		/*
 		 * On OSX 10.6, having the default purgeable zone appear before
 		 * the default zone makes some things crash because it thinks it
 		 * owns the default zone allocated pointers.  We thus
 		 * unregister/re-register it in order to ensure it's always
 		 * after the default zone.  On OSX < 10.6, there is no purgeable
 		 * zone, so this does nothing.  On OSX >= 10.6, unregistering
 		 * replaces the purgeable zone with the last registered zone
 		 * above, i.e. the default zone.  Registering it again then puts
 		 * it at the end, obviously after the default zone.
 		 */
-		if (purgeable_zone) {
+		if (purgeable_zone != NULL) {
 			malloc_zone_unregister(purgeable_zone);
 			malloc_zone_register(purgeable_zone);
 		}
-	} while (malloc_default_zone() != &zone);
+
+		zone = zone_default_get();
+	} while (zone != &jemalloc_zone);
 }
+
+JEMALLOC_ATTR(constructor)
+void
+zone_register(void)
+{
+
+	/*
+	 * If something else replaced the system default zone allocator, don't
+	 * register jemalloc's.
+	 */
+	default_zone = zone_default_get();
+	if (!default_zone->zone_name || strcmp(default_zone->zone_name,
+	    "DefaultMallocZone") != 0)
+		return;
+
+	/*
+	 * The default purgeable zone is created lazily by OSX's libc.  It uses
+	 * the default zone when it is created for "small" allocations
+	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
+	 * obviously fails when the default zone is the jemalloc zone, so
+	 * malloc_default_purgeable_zone() is called beforehand so that the
+	 * default purgeable zone is created when the default zone is still
+	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
+	 * to check for the existence of malloc_default_purgeable_zone() at
+	 * run time.
+	 */
+	purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL :
+	    malloc_default_purgeable_zone();
+
+	/* Register the custom zone.  At this point it won't be the default. */
+	zone_init();
+	malloc_zone_register(&jemalloc_zone);
+
+	/* Promote the custom zone to be default. */
+	zone_promote();
+}
--- a/memory/jemalloc/src/test/include/test/jemalloc_test.h.in
+++ b/memory/jemalloc/src/test/include/test/jemalloc_test.h.in
@@ -14,49 +14,16 @@
 
 #ifdef _WIN32
 #  include <windows.h>
 #  include "msvc_compat/windows_extra.h"
 #else
 #  include <pthread.h>
 #endif
 
-/******************************************************************************/
-/*
- * Define always-enabled assertion macros, so that test assertions execute even
- * if assertions are disabled in the library code.  These definitions must
- * exist prior to including "jemalloc/internal/util.h".
- */
-#define	assert(e) do {							\
-	if (!(e)) {							\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
-		    __FILE__, __LINE__, #e);				\
-		abort();						\
-	}								\
-} while (0)
-
-#define	not_reached() do {						\
-	malloc_printf(							\
-	    "<jemalloc>: %s:%d: Unreachable code reached\n",		\
-	    __FILE__, __LINE__);					\
-	abort();							\
-} while (0)
-
-#define	not_implemented() do {						\
-	malloc_printf("<jemalloc>: %s:%d: Not implemented\n",		\
-	    __FILE__, __LINE__);					\
-	abort();							\
-} while (0)
-
-#define	assert_not_implemented(e) do {					\
-	if (!(e))							\
-		not_implemented();					\
-} while (0)
-
 #include "test/jemalloc_test_defs.h"
 
 #ifdef JEMALLOC_OSSPIN
 #  include <libkern/OSAtomic.h>
 #endif
 
 #if defined(HAVE_ALTIVEC) && !defined(__APPLE__)
 #  include <altivec.h>
@@ -81,16 +48,24 @@
  * essentially identical code within the test infrastructure).
  */
 #elif defined(JEMALLOC_INTEGRATION_TEST)
 #  define JEMALLOC_MANGLE
 #  include "jemalloc/jemalloc@install_suffix@.h"
 #  include "jemalloc/internal/jemalloc_internal_defs.h"
 #  include "jemalloc/internal/jemalloc_internal_macros.h"
 
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+    true
+#else
+    false
+#endif
+    ;
+
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
 
 #  define JEMALLOC_H_TYPES
 #  define JEMALLOC_H_STRUCTS
 #  define JEMALLOC_H_EXTERNS
 #  define JEMALLOC_H_INLINES
 #  include "jemalloc/internal/nstime.h"
@@ -144,8 +119,45 @@
 #include "test/math.h"
 #include "test/mtx.h"
 #include "test/mq.h"
 #include "test/test.h"
 #include "test/timer.h"
 #include "test/thd.h"
 #define	MEXP 19937
 #include "test/SFMT.h"
+
+/******************************************************************************/
+/*
+ * Define always-enabled assertion macros, so that test assertions execute even
+ * if assertions are disabled in the library code.
+ */
+#undef assert
+#undef not_reached
+#undef not_implemented
+#undef assert_not_implemented
+
+#define	assert(e) do {							\
+	if (!(e)) {							\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
+		    __FILE__, __LINE__, #e);				\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_reached() do {						\
+	malloc_printf(							\
+	    "<jemalloc>: %s:%d: Unreachable code reached\n",		\
+	    __FILE__, __LINE__);					\
+	abort();							\
+} while (0)
+
+#define	not_implemented() do {						\
+	malloc_printf("<jemalloc>: %s:%d: Not implemented\n",		\
+	    __FILE__, __LINE__);					\
+	abort();							\
+} while (0)
+
+#define	assert_not_implemented(e) do {					\
+	if (!(e))							\
+		not_implemented();					\
+} while (0)
--- a/memory/jemalloc/src/test/include/test/mtx.h
+++ b/memory/jemalloc/src/test/include/test/mtx.h
@@ -3,16 +3,18 @@
  * is unfortunate, but there are allocator bootstrapping considerations that
  * would leak into the test infrastructure if malloc_mutex were used directly
  * in tests.
  */
 
 typedef struct {
 #ifdef _WIN32
 	CRITICAL_SECTION	lock;
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #else
 	pthread_mutex_t		lock;
 #endif
 } mtx_t;
 
 bool	mtx_init(mtx_t *mtx);
--- a/memory/jemalloc/src/test/include/test/test.h
+++ b/memory/jemalloc/src/test/include/test/test.h
@@ -306,24 +306,28 @@ f(void)									\
 	goto label_test_end;						\
 label_test_end:								\
 	p_test_fini();							\
 }
 
 #define	test(...)							\
 	p_test(__VA_ARGS__, NULL)
 
+#define	test_no_malloc_init(...)					\
+	p_test_no_malloc_init(__VA_ARGS__, NULL)
+
 #define	test_skip_if(e) do {						\
 	if (e) {							\
 		test_skip("%s:%s:%d: Test skipped: (%s)",		\
 		    __func__, __FILE__, __LINE__, #e);			\
 		goto label_test_end;					\
 	}								\
 } while (0)
 
 void	test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 void	test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 /* For private use by macros. */
 test_status_t	p_test(test_t *t, ...);
+test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
--- a/memory/jemalloc/src/test/integration/aligned_alloc.c
+++ b/memory/jemalloc/src/test/integration/aligned_alloc.c
@@ -1,14 +1,25 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define	MAXALIGN ((size_t)0x2000000LU)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 23)
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
 
 TEST_BEGIN(test_alignment_errors)
 {
 	size_t alignment;
 	void *p;
 
 	alignment = 0;
 	set_errno(0);
@@ -69,16 +80,17 @@ TEST_BEGIN(test_oom_errors)
 	assert_false(p != NULL || get_errno() != ENOMEM,
 	    "Expected error for aligned_alloc(&p, %zu, %zu)",
 	    alignment, size);
 }
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
+#define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	void *ps[NITER];
 
 	for (i = 0; i < NITER; i++)
 		ps[i] = NULL;
 
 	for (alignment = 8;
@@ -105,17 +117,19 @@ TEST_BEGIN(test_alignment_and_size)
 			}
 			for (i = 0; i < NITER; i++) {
 				if (ps[i] != NULL) {
 					free(ps[i]);
 					ps[i] = NULL;
 				}
 			}
 		}
+		purge();
 	}
+#undef NITER
 }
 TEST_END
 
 int
 main(void)
 {
 
 	return (test(
--- a/memory/jemalloc/src/test/integration/mallocx.c
+++ b/memory/jemalloc/src/test/integration/mallocx.c
@@ -1,10 +1,14 @@
 #include "test/jemalloc_test.h"
 
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
 static unsigned
 get_nsizes_impl(const char *cmd)
 {
 	unsigned ret;
 	size_t z;
 
 	z = sizeof(unsigned);
 	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
@@ -41,16 +45,29 @@ get_size_impl(const char *cmd, size_t in
 
 static size_t
 get_huge_size(size_t ind)
 {
 
 	return (get_size_impl("arenas.hchunk.0.size", ind));
 }
 
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
+
 TEST_BEGIN(test_overflow)
 {
 	size_t hugemax;
 
 	hugemax = get_huge_size(get_nhuge()-1);
 
 	assert_ptr_null(mallocx(hugemax+1, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", hugemax+1);
@@ -64,17 +81,17 @@ TEST_BEGIN(test_overflow)
 	assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
 	    "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
 	    ZU(PTRDIFF_MAX)+1);
 }
 TEST_END
 
 TEST_BEGIN(test_oom)
 {
-	size_t hugemax, size, alignment;
+	size_t hugemax;
 	bool oom;
 	void *ptrs[3];
 	unsigned i;
 
 	/*
 	 * It should be impossible to allocate three objects that each consume
 	 * nearly half the virtual address space.
 	 */
@@ -87,66 +104,73 @@ TEST_BEGIN(test_oom)
 	}
 	assert_true(oom,
 	    "Expected OOM during series of calls to mallocx(size=%zu, 0)",
 	    hugemax);
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
 		if (ptrs[i] != NULL)
 			dallocx(ptrs[i], 0);
 	}
+	purge();
 
 #if LG_SIZEOF_PTR == 3
-	size      = ZU(0x8000000000000000);
-	alignment = ZU(0x8000000000000000);
+	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x8000000000000000ULL)),
+	    "Expected OOM for mallocx()");
+	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x80000000)),
+	    "Expected OOM for mallocx()");
 #else
-	size      = ZU(0x80000000);
-	alignment = ZU(0x80000000);
+	assert_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
+	    "Expected OOM for mallocx()");
 #endif
-	assert_ptr_null(mallocx(size, MALLOCX_ALIGN(alignment)),
-	    "Expected OOM for mallocx(size=%#zx, MALLOCX_ALIGN(%#zx)", size,
-	    alignment);
 }
 TEST_END
 
 TEST_BEGIN(test_basic)
 {
-#define	MAXSZ (((size_t)1) << 26)
+#define	MAXSZ (((size_t)1) << 23)
 	size_t sz;
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
 		size_t nsz, rsz;
 		void *p;
 		nsz = nallocx(sz, 0);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		assert_zu_ge(rsz, sz, "Real size smaller than expected");
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
 		dallocx(p, 0);
 
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, MALLOCX_ZERO);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error",
+		    nsz);
 		rsz = sallocx(p, 0);
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
 		dallocx(p, 0);
+		purge();
 	}
 #undef MAXSZ
 }
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
 #define	NITER 4
 	size_t nsz, rsz, sz, alignment, total;
 	unsigned i;
 	void *ps[NITER];
 
 	for (i = 0; i < NITER; i++)
 		ps[i] = NULL;
 
@@ -186,16 +210,17 @@ TEST_BEGIN(test_alignment_and_size)
 			}
 			for (i = 0; i < NITER; i++) {
 				if (ps[i] != NULL) {
 					dallocx(ps[i], 0);
 					ps[i] = NULL;
 				}
 			}
 		}
+		purge();
 	}
 #undef MAXALIGN
 #undef NITER
 }
 TEST_END
 
 int
 main(void)
--- a/memory/jemalloc/src/test/integration/posix_memalign.c
+++ b/memory/jemalloc/src/test/integration/posix_memalign.c
@@ -1,14 +1,25 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define	MAXALIGN ((size_t)0x2000000LU)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 23)
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
 
 TEST_BEGIN(test_alignment_errors)
 {
 	size_t alignment;
 	void *p;
 
 	for (alignment = 0; alignment < sizeof(void *); alignment++) {
 		assert_d_eq(posix_memalign(&p, alignment, 1), EINVAL,
@@ -61,16 +72,17 @@ TEST_BEGIN(test_oom_errors)
 	assert_d_ne(posix_memalign(&p, alignment, size), 0,
 	    "Expected error for posix_memalign(&p, %zu, %zu)",
 	    alignment, size);
 }
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
+#define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	int err;
 	void *ps[NITER];
 
 	for (i = 0; i < NITER; i++)
 		ps[i] = NULL;
 
@@ -99,17 +111,19 @@ TEST_BEGIN(test_alignment_and_size)
 			}
 			for (i = 0; i < NITER; i++) {
 				if (ps[i] != NULL) {
 					free(ps[i]);
 					ps[i] = NULL;
 				}
 			}
 		}
+		purge();
 	}
+#undef NITER
 }
 TEST_END
 
 int
 main(void)
 {
 
 	return (test(
--- a/memory/jemalloc/src/test/integration/xallocx.c
+++ b/memory/jemalloc/src/test/integration/xallocx.c
@@ -1,10 +1,14 @@
 #include "test/jemalloc_test.h"
 
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
 /*
  * Use a separate arena for xallocx() extension/contraction tests so that
  * internal allocation e.g. by heap profiling can't interpose allocations where
  * xallocx() would ordinarily be able to extend.
  */
 static unsigned
 arena_ind(void)
 {
--- a/memory/jemalloc/src/test/src/mtx.c
+++ b/memory/jemalloc/src/test/src/mtx.c
@@ -6,16 +6,18 @@
 
 bool
 mtx_init(mtx_t *mtx)
 {
 
 #ifdef _WIN32
 	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT))
 		return (true);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mtx->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mtx->lock = 0;
 #else
 	pthread_mutexattr_t attr;
 
 	if (pthread_mutexattr_init(&attr) != 0)
 		return (true);
 	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
@@ -28,39 +30,44 @@ mtx_init(mtx_t *mtx)
 	return (false);
 }
 
 void
 mtx_fini(mtx_t *mtx)
 {
 
 #ifdef _WIN32
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #elif (defined(JEMALLOC_OSSPIN))
 #else
 	pthread_mutex_destroy(&mtx->lock);
 #endif
 }
 
 void
 mtx_lock(mtx_t *mtx)
 {
 
 #ifdef _WIN32
 	EnterCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock_lock(&mtx->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLockLock(&mtx->lock);
 #else
 	pthread_mutex_lock(&mtx->lock);
 #endif
 }
 
 void
 mtx_unlock(mtx_t *mtx)
 {
 
 #ifdef _WIN32
 	LeaveCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock_unlock(&mtx->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLockUnlock(&mtx->lock);
 #else
 	pthread_mutex_unlock(&mtx->lock);
 #endif
 }
--- a/memory/jemalloc/src/test/src/test.c
+++ b/memory/jemalloc/src/test/src/test.c
@@ -55,53 +55,79 @@ p_test_init(const char *name)
 void
 p_test_fini(void)
 {
 
 	test_counts[test_status]++;
 	malloc_printf("%s: %s\n", test_name, test_status_string(test_status));
 }
 
-test_status_t
-p_test(test_t *t, ...)
+static test_status_t
+p_test_impl(bool do_malloc_init, test_t *t, va_list ap)
 {
 	test_status_t ret;
-	va_list ap;
 
-	/*
-	 * Make sure initialization occurs prior to running tests.  Tests are
-	 * special because they may use internal facilities prior to triggering
-	 * initialization as a side effect of calling into the public API.  This
-	 * is a final safety that works even if jemalloc_constructor() doesn't
-	 * run, as for MSVC builds.
-	 */
-	if (nallocx(1, 0) == 0) {
-		malloc_printf("Initialization error");
-		return (test_status_fail);
+	if (do_malloc_init) {
+		/*
+		 * Make sure initialization occurs prior to running tests.
+		 * Tests are special because they may use internal facilities
+		 * prior to triggering initialization as a side effect of
+		 * calling into the public API.
+		 */
+		if (nallocx(1, 0) == 0) {
+			malloc_printf("Initialization error");
+			return (test_status_fail);
+		}
 	}
 
 	ret = test_status_pass;
-	va_start(ap, t);
 	for (; t != NULL; t = va_arg(ap, test_t *)) {
 		t();
 		if (test_status > ret)
 			ret = test_status;
 	}
-	va_end(ap);
 
 	malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
 	    test_status_string(test_status_pass),
 	    test_counts[test_status_pass], test_count,
 	    test_status_string(test_status_skip),
 	    test_counts[test_status_skip], test_count,
 	    test_status_string(test_status_fail),
 	    test_counts[test_status_fail], test_count);
 
 	return (ret);
 }
 
+test_status_t
+p_test(test_t *t, ...)
+{
+	test_status_t ret;
+	va_list ap;
+
+	ret = test_status_pass;
+	va_start(ap, t);
+	ret = p_test_impl(true, t, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+test_status_t
+p_test_no_malloc_init(test_t *t, ...)
+{
+	test_status_t ret;
+	va_list ap;
+
+	ret = test_status_pass;
+	va_start(ap, t);
+	ret = p_test_impl(false, t, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
 void
 p_test_fail(const char *prefix, const char *message)
 {
 
 	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
 	test_status = test_status_fail;
 }
--- a/memory/jemalloc/src/test/src/timer.c
+++ b/memory/jemalloc/src/test/src/timer.c
@@ -27,19 +27,18 @@ timer_usec(const timedelta_t *timer)
 }
 
 void
 timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen)
 {
 	uint64_t t0 = timer_usec(a);
 	uint64_t t1 = timer_usec(b);
 	uint64_t mult;
-	unsigned i = 0;
-	unsigned j;
-	int n;
+	size_t i = 0;
+	size_t j, n;
 
 	/* Whole. */
 	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
 	i += n;
 	if (i >= buflen)
 		return;
 	mult = 1;
 	for (j = 0; j < n; j++)
--- a/memory/jemalloc/src/test/stress/microbench.c
+++ b/memory/jemalloc/src/test/stress/microbench.c
@@ -1,12 +1,13 @@
 #include "test/jemalloc_test.h"
 
 JEMALLOC_INLINE_C void
-time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void))
+time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
+    void (*func)(void))
 {
 	uint64_t i;
 
 	for (i = 0; i < nwarmup; i++)
 		func();
 	timer_start(timer);
 	for (i = 0; i < niter; i++)
 		func();
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/test/unit/a0.c
@@ -0,0 +1,19 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_a0)
+{
+	void *p;
+
+	p = a0malloc(1);
+	assert_ptr_not_null(p, "Unexpected a0malloc() error");
+	a0dalloc(p);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test_no_malloc_init(
+	    test_a0));
+}
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/test/unit/arena_reset.c
@@ -0,0 +1,159 @@
+#include "test/jemalloc_test.h"
+
+#ifdef JEMALLOC_PROF
+const char *malloc_conf = "prof:true,lg_prof_sample:0";
+#endif
+
+static unsigned
+get_nsizes_impl(const char *cmd)
+{
+	unsigned ret;
+	size_t z;
+
+	z = sizeof(unsigned);
+	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+	return (ret);
+}
+
+static unsigned
+get_nsmall(void)
+{
+
+	return (get_nsizes_impl("arenas.nbins"));
+}
+
+static unsigned
+get_nlarge(void)
+{
+
+	return (get_nsizes_impl("arenas.nlruns"));
+}
+
+static unsigned
+get_nhuge(void)
+{
+
+	return (get_nsizes_impl("arenas.nhchunks"));
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind)
+{
+	size_t ret;
+	size_t z;
+	size_t mib[4];
+	size_t miblen = 4;
+
+	z = sizeof(size_t);
+	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	mib[2] = ind;
+	z = sizeof(size_t);
+	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+	return (ret);
+}
+
+static size_t
+get_small_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.bin.0.size", ind));
+}
+
+static size_t
+get_large_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.lrun.0.size", ind));
+}
+
+static size_t
+get_huge_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.hchunk.0.size", ind));
+}
+
+TEST_BEGIN(test_arena_reset)
+{
+#define	NHUGE	4
+	unsigned arena_ind, nsmall, nlarge, nhuge, nptrs, i;
+	size_t sz, miblen;
+	void **ptrs;
+	int flags;
+	size_t mib[3];
+	tsdn_t *tsdn;
+
+	test_skip_if((config_valgrind && unlikely(in_valgrind)) || (config_fill
+	    && unlikely(opt_quarantine)));
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+
+	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	nsmall = get_nsmall();
+	nlarge = get_nlarge();
+	nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge();
+	nptrs = nsmall + nlarge + nhuge;
+	ptrs = (void **)malloc(nptrs * sizeof(void *));
+	assert_ptr_not_null(ptrs, "Unexpected malloc() failure");
+
+	/* Allocate objects with a wide range of sizes. */
+	for (i = 0; i < nsmall; i++) {
+		sz = get_small_size(i);
+		ptrs[i] = mallocx(sz, flags);
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
+	}
+	for (i = 0; i < nlarge; i++) {
+		sz = get_large_size(i);
+		ptrs[nsmall + i] = mallocx(sz, flags);
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
+	}
+	for (i = 0; i < nhuge; i++) {
+		sz = get_huge_size(i);
+		ptrs[nsmall + nlarge + i] = mallocx(sz, flags);
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
+	}
+
+	tsdn = tsdn_fetch();
+
+	/* Verify allocations. */
+	for (i = 0; i < nptrs; i++) {
+		assert_zu_gt(ivsalloc(tsdn, ptrs[i], false), 0,
+		    "Allocation should have queryable size");
+	}
+
+	/* Reset. */
+	miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	/* Verify allocations no longer exist. */
+	for (i = 0; i < nptrs; i++) {
+		assert_zu_eq(ivsalloc(tsdn, ptrs[i], false), 0,
+		    "Allocation should no longer exist");
+	}
+
+	free(ptrs);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_arena_reset));
+}
--- a/memory/jemalloc/src/test/unit/bitmap.c
+++ b/memory/jemalloc/src/test/unit/bitmap.c
@@ -96,17 +96,17 @@ TEST_END
 TEST_BEGIN(test_bitmap_sfu)
 {
 	size_t i;
 
 	for (i = 1; i <= BITMAP_MAXBITS; i++) {
 		bitmap_info_t binfo;
 		bitmap_info_init(&binfo, i);
 		{
-			ssize_t j;
+			size_t j;
 			bitmap_t *bitmap = (bitmap_t *)malloc(
 			    bitmap_size(&binfo));
 			bitmap_init(bitmap, &binfo);
 
 			/* Iteratively set bits starting at the beginning. */
 			for (j = 0; j < i; j++) {
 				assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
 				    "First unset bit should be just after "
@@ -114,17 +114,17 @@ TEST_BEGIN(test_bitmap_sfu)
 			}
 			assert_true(bitmap_full(bitmap, &binfo),
 			    "All bits should be set");
 
 			/*
 			 * Iteratively unset bits starting at the end, and
 			 * verify that bitmap_sfu() reaches the unset bits.
 			 */
-			for (j = i - 1; j >= 0; j--) {
+			for (j = i - 1; j < i; j--) { /* (i..0] */
 				bitmap_unset(bitmap, &binfo, j);
 				assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
 				    "First unset bit should the bit previously "
 				    "unset");
 				bitmap_unset(bitmap, &binfo, j);
 			}
 			assert_false(bitmap_get(bitmap, &binfo, 0),
 			    "Bit should be unset");
--- a/memory/jemalloc/src/test/unit/ckh.c
+++ b/memory/jemalloc/src/test/unit/ckh.c
@@ -2,18 +2,18 @@
 
 TEST_BEGIN(test_new_delete)
 {
 	tsd_t *tsd;
 	ckh_t ckh;
 
 	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
-	    "Unexpected ckh_new() error");
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
+	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 
 	assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
@@ -27,18 +27,18 @@ TEST_BEGIN(test_count_insert_search_remo
 	    "a string.",
 	    "A string."
 	};
 	const char *missing = "A string not in the hash table.";
 	size_t i;
 
 	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
-	    "Unexpected ckh_new() error");
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
+	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
 	    ckh_count(&ckh));
 
 	/* Insert. */
 	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
 		ckh_insert(tsd, &ckh, strs[i], strs[i]);
 		assert_zu_eq(ckh_count(&ckh), i+1,
--- a/memory/jemalloc/src/test/unit/decay.c
+++ b/memory/jemalloc/src/test/unit/decay.c
@@ -1,26 +1,34 @@
 #include "test/jemalloc_test.h"
 
 const char *malloc_conf = "purge:decay,decay_time:1";
 
+static nstime_monotonic_t *nstime_monotonic_orig;
 static nstime_update_t *nstime_update_orig;
 
 static unsigned nupdates_mock;
 static nstime_t time_mock;
-static bool nonmonotonic_mock;
+static bool monotonic_mock;
+
+static bool
+nstime_monotonic_mock(void)
+{
+
+	return (monotonic_mock);
+}
 
 static bool
 nstime_update_mock(nstime_t *time)
 {
 
 	nupdates_mock++;
-	if (!nonmonotonic_mock)
+	if (monotonic_mock)
 		nstime_copy(time, &time_mock);
-	return (nonmonotonic_mock);
+	return (!monotonic_mock);
 }
 
 TEST_BEGIN(test_decay_ticks)
 {
 	ticker_t *decay_ticker;
 	unsigned tick0, tick1;
 	size_t sz, huge0, large0;
 	void *p;
@@ -240,30 +248,33 @@ TEST_BEGIN(test_decay_ticker)
 	for (i = 0; i < NPS; i++) {
 		ps[i] = mallocx(large, flags);
 		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
 	}
 
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
 	nstime_update(&time_mock);
-	nonmonotonic_mock = false;
+	monotonic_mock = true;
 
+	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
 		dallocx(ps[i], flags);
 		nupdates0 = nupdates_mock;
 		assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 		    "Unexpected arena.0.decay failure");
 		assert_u_gt(nupdates_mock, nupdates0,
 		    "Expected nstime_update() to be called");
 	}
 
+	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
 
 	nstime_init(&time, 0);
 	nstime_update(&time);
 	nstime_init2(&decay_time, opt_decay_time, 0);
 	nstime_copy(&deadline, &time);
 	nstime_add(&deadline, &decay_time);
 	do {
@@ -311,19 +322,21 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	    "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
 	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0),
 	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
 	nstime_update(&time_mock);
-	nonmonotonic_mock = true;
+	monotonic_mock = false;
 
+	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
 		ps[i] = mallocx(large0, flags);
 		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
 	}
 
 	for (i = 0; i < NPS; i++) {
@@ -337,18 +350,19 @@ TEST_BEGIN(test_decay_nonmonotonic)
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
 	    "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
 	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0),
 	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	if (config_stats)
-		assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
+		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
 
+	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
 #undef NPS
 }
 TEST_END
 
 int
 main(void)
 {
--- a/memory/jemalloc/src/test/unit/fork.c
+++ b/memory/jemalloc/src/test/unit/fork.c
@@ -9,31 +9,50 @@ TEST_BEGIN(test_fork)
 #ifndef _WIN32
 	void *p;
 	pid_t pid;
 
 	p = malloc(1);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	pid = fork();
+
+	free(p);
+
+	p = malloc(64);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	free(p);
+
 	if (pid == -1) {
 		/* Error. */
 		test_fail("Unexpected fork() failure");
 	} else if (pid == 0) {
 		/* Child. */
-		exit(0);
+		_exit(0);
 	} else {
 		int status;
 
 		/* Parent. */
-		free(p);
-		do {
+		while (true) {
 			if (waitpid(pid, &status, 0) == -1)
 				test_fail("Unexpected waitpid() failure");
-		} while (!WIFEXITED(status) && !WIFSIGNALED(status));
+			if (WIFSIGNALED(status)) {
+				test_fail("Unexpected child termination due to "
+				    "signal %d", WTERMSIG(status));
+				break;
+			}
+			if (WIFEXITED(status)) {
+				if (WEXITSTATUS(status) != 0) {
+					test_fail(
+					    "Unexpected child exit value %d",
+					    WEXITSTATUS(status));
+				}
+				break;
+			}
+		}
 	}
 #else
 	test_skip("fork(2) is irrelevant to Windows");
 #endif
 }
 TEST_END
 
 int
--- a/memory/jemalloc/src/test/unit/junk.c
+++ b/memory/jemalloc/src/test/unit/junk.c
@@ -24,32 +24,32 @@ watch_junking(void *p)
 
 static void
 arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info)
 {
 	size_t i;
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
 	for (i = 0; i < bin_info->reg_size; i++) {
-		assert_c_eq(((char *)ptr)[i], 0x5a,
+		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, bin_info->reg_size);
 	}
 	if (ptr == watch_for_junking)
 		saw_junking = true;
 }
 
 static void
 arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 {
 	size_t i;
 
 	arena_dalloc_junk_large_orig(ptr, usize);
 	for (i = 0; i < usize; i++) {
-		assert_c_eq(((char *)ptr)[i], 0x5a,
+		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, usize);
 	}
 	if (ptr == watch_for_junking)
 		saw_junking = true;
 }
 
 static void
@@ -64,55 +64,55 @@ huge_dalloc_junk_intercept(void *ptr, si
 	 */
 	if (ptr == watch_for_junking)
 		saw_junking = true;
 }
 
 static void
 test_junk(size_t sz_min, size_t sz_max)
 {
-	char *s;
+	uint8_t *s;
 	size_t sz_prev, sz, i;
 
 	if (opt_junk_free) {
 		arena_dalloc_junk_small_orig = arena_dalloc_junk_small;
 		arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
 		arena_dalloc_junk_large_orig = arena_dalloc_junk_large;
 		arena_dalloc_junk_large = arena_dalloc_junk_large_intercept;
 		huge_dalloc_junk_orig = huge_dalloc_junk;
 		huge_dalloc_junk = huge_dalloc_junk_intercept;
 	}
 
 	sz_prev = 0;
-	s = (char *)mallocx(sz_min, 0);
+	s = (uint8_t *)mallocx(sz_min, 0);
 	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_c_eq(s[0], 'a',
+			assert_u_eq(s[0], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_c_eq(s[sz_prev-1], 'a',
+			assert_u_eq(s[sz_prev-1], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
 			if (opt_junk_alloc) {
-				assert_c_eq(s[i], 0xa5,
+				assert_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
 				    "Newly allocated byte %zu/%zu isn't "
 				    "junk-filled", i, sz);
 			}
 			s[i] = 'a';
 		}
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
 			watch_junking(s);
-			s = (char *)rallocx(s, sz+1, 0);
+			s = (uint8_t *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 			assert_true(!opt_junk_free || saw_junking,
 			    "Expected region of size %zu to be junk-filled",
 			    sz);
 		}
 	}
 
@@ -239,16 +239,15 @@ TEST_BEGIN(test_junk_redzone)
 	arena_redzone_corruption = arena_redzone_corruption_orig;
 }
 TEST_END
 
 int
 main(void)
 {
 
-	assert(!config_fill || opt_junk_alloc || opt_junk_free);
 	return (test(
 	    test_junk_small,
 	    test_junk_large,
 	    test_junk_huge,
 	    test_junk_large_ralloc_shrink,
 	    test_junk_redzone));
 }
--- a/memory/jemalloc/src/test/unit/junk_alloc.c
+++ b/memory/jemalloc/src/test/unit/junk_alloc.c
@@ -1,3 +1,3 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:alloc"
+#define	JEMALLOC_TEST_JUNK_OPT "junk:alloc"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
--- a/memory/jemalloc/src/test/unit/junk_free.c
+++ b/memory/jemalloc/src/test/unit/junk_free.c
@@ -1,3 +1,3 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:free"
+#define	JEMALLOC_TEST_JUNK_OPT "junk:free"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
--- a/memory/jemalloc/src/test/unit/math.c
+++ b/memory/jemalloc/src/test/unit/math.c
@@ -1,15 +1,19 @@
 #include "test/jemalloc_test.h"
 
 #define	MAX_REL_ERR 1.0e-9
 #define	MAX_ABS_ERR 1.0e-9
 
 #include <float.h>
 
+#ifdef __PGI
+#undef INFINITY
+#endif
+
 #ifndef INFINITY
 #define	INFINITY (DBL_MAX + DBL_MAX)
 #endif
 
 static bool
 double_eq_rel(double a, double b, double max_rel_err, double max_abs_err)
 {
 	double rel_err;
--- a/memory/jemalloc/src/test/unit/nstime.c
+++ b/memory/jemalloc/src/test/unit/nstime.c
@@ -171,16 +171,23 @@ TEST_BEGIN(test_nstime_divide)
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_subtract(&nsta, &nstc);
 	assert_u64_eq(nstime_divide(&nsta, &nstb), 9,
 	    "Incorrect division result");
 }
 TEST_END
 
+TEST_BEGIN(test_nstime_monotonic)
+{
+
+	nstime_monotonic();
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_update)
 {
 	nstime_t nst;
 
 	nstime_init(&nst, 0);
 
 	assert_false(nstime_update(&nst), "Basic time update failed.");
 
@@ -193,17 +200,16 @@ TEST_BEGIN(test_nstime_update)
 	{
 		nstime_t nst0;
 		nstime_copy(&nst0, &nst);
 		assert_true(nstime_update(&nst),
 		    "Update should detect time roll-back.");
 		assert_d_eq(nstime_compare(&nst, &nst0), 0,
 		    "Time should not have been modified");
 	}
-
 }
 TEST_END
 
 int
 main(void)
 {
 
 	return (test(
@@ -211,10 +217,11 @@ main(void)
 	    test_nstime_init2,
 	    test_nstime_copy,
 	    test_nstime_compare,
 	    test_nstime_add,
 	    test_nstime_subtract,
 	    test_nstime_imultiply,
 	    test_nstime_idivide,
 	    test_nstime_divide,
+	    test_nstime_monotonic,
 	    test_nstime_update));
 }
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/test/unit/ph.c
@@ -0,0 +1,290 @@
+#include "test/jemalloc_test.h"
+
+typedef struct node_s node_t;
+
+struct node_s {
+#define	NODE_MAGIC 0x9823af7e
+	uint32_t magic;
+	phn(node_t) link;
+	uint64_t key;
+};
+
+static int
+node_cmp(const node_t *a, const node_t *b)
+{
+	int ret;
+
+	ret = (a->key > b->key) - (a->key < b->key);
+	if (ret == 0) {
+		/*
+		 * Duplicates are not allowed in the heap, so force an
+		 * arbitrary ordering for non-identical items with equal keys.
+		 */
+		ret = (((uintptr_t)a) > ((uintptr_t)b))
+		    - (((uintptr_t)a) < ((uintptr_t)b));
+	}
+	return (ret);
+}
+
+static int
+node_cmp_magic(const node_t *a, const node_t *b) {
+
+	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
+	assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
+
+	return (node_cmp(a, b));
+}
+
+typedef ph(node_t) heap_t;
+ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic);
+
+static void
+node_print(const node_t *node, unsigned depth)
+{
+	unsigned i;
+	node_t *leftmost_child, *sibling;
+
+	for (i = 0; i < depth; i++)
+		malloc_printf("\t");
+	malloc_printf("%2"FMTu64"\n", node->key);
+
+	leftmost_child = phn_lchild_get(node_t, link, node);
+	if (leftmost_child == NULL)
+		return;
+	node_print(leftmost_child, depth + 1);
+
+	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+		node_print(sibling, depth + 1);
+	}
+}
+
+static void
+heap_print(const heap_t *heap)
+{
+	node_t *auxelm;
+
+	malloc_printf("vvv heap %p vvv\n", heap);
+	if (heap->ph_root == NULL)
+		goto label_return;
+
+	node_print(heap->ph_root, 0);
+
+	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+	    auxelm = phn_next_get(node_t, link, auxelm)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, auxelm)), auxelm,
+		    "auxelm's prev doesn't link to auxelm");
+		node_print(auxelm, 0);
+	}
+
+label_return:
+	malloc_printf("^^^ heap %p ^^^\n", heap);
+}
+
+static unsigned
+node_validate(const node_t *node, const node_t *parent)
+{
+	unsigned nnodes = 1;
+	node_t *leftmost_child, *sibling;
+
+	if (parent != NULL) {
+		assert_d_ge(node_cmp_magic(node, parent), 0,
+		    "Child is less than parent");
+	}
+
+	leftmost_child = phn_lchild_get(node_t, link, node);
+	if (leftmost_child == NULL)
+		return (nnodes);
+	assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
+	    (void *)node, "Leftmost child does not link to node");
+	nnodes += node_validate(leftmost_child, node);
+
+	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, sibling)), sibling,
+		    "sibling's prev doesn't link to sibling");
+		nnodes += node_validate(sibling, node);
+	}
+	return (nnodes);
+}
+
+static unsigned
+heap_validate(const heap_t *heap)
+{
+	unsigned nnodes = 0;
+	node_t *auxelm;
+
+	if (heap->ph_root == NULL)
+		goto label_return;
+
+	nnodes += node_validate(heap->ph_root, NULL);
+
+	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+	    auxelm = phn_next_get(node_t, link, auxelm)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, auxelm)), auxelm,
+		    "auxelm's prev doesn't link to auxelm");
+		nnodes += node_validate(auxelm, NULL);
+	}
+
+label_return:
+	if (false)
+		heap_print(heap);
+	return (nnodes);
+}
+
+TEST_BEGIN(test_ph_empty)
+{
+	heap_t heap;
+
+	heap_new(&heap);
+	assert_true(heap_empty(&heap), "Heap should be empty");
+	assert_ptr_null(heap_first(&heap), "Unexpected node");
+}
+TEST_END
+
+static void
+node_remove(heap_t *heap, node_t *node)
+{
+
+	heap_remove(heap, node);
+
+	node->magic = 0;
+}
+
+static node_t *
+node_remove_first(heap_t *heap)
+{
+	node_t *node = heap_remove_first(heap);
+	node->magic = 0;
+	return (node);
+}
+
+TEST_BEGIN(test_ph_random)
+{
+#define	NNODES 25
+#define	NBAGS 250
+#define	SEED 42
+	sfmt_t *sfmt;
+	uint64_t bag[NNODES];
+	heap_t heap;
+	node_t nodes[NNODES];
+	unsigned i, j, k;
+
+	sfmt = init_gen_rand(SEED);
+	for (i = 0; i < NBAGS; i++) {
+		switch (i) {
+		case 0:
+			/* Insert in order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = j;
+			break;
+		case 1:
+			/* Insert in reverse order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = NNODES - j - 1;
+			break;
+		default:
+			for (j = 0; j < NNODES; j++)
+				bag[j] = gen_rand64_range(sfmt, NNODES);
+		}
+
+		for (j = 1; j <= NNODES; j++) {
+			/* Initialize heap and nodes. */
+			heap_new(&heap);
+			assert_u_eq(heap_validate(&heap), 0,
+			    "Incorrect node count");
+			for (k = 0; k < j; k++) {
+				nodes[k].magic = NODE_MAGIC;
+				nodes[k].key = bag[k];
+			}
+
+			/* Insert nodes. */
+			for (k = 0; k < j; k++) {
+				heap_insert(&heap, &nodes[k]);
+				if (i % 13 == 12) {
+					/* Trigger merging. */
+					assert_ptr_not_null(heap_first(&heap),
+					    "Heap should not be empty");
+				}
+				assert_u_eq(heap_validate(&heap), k + 1,
+				    "Incorrect node count");
+			}
+
+			assert_false(heap_empty(&heap),
+			    "Heap should not be empty");
+
+			/* Remove nodes. */
+			switch (i % 4) {
+			case 0:
+				for (k = 0; k < j; k++) {
+					assert_u_eq(heap_validate(&heap), j - k,
+					    "Incorrect node count");
+					node_remove(&heap, &nodes[k]);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+				}
+				break;
+			case 1:
+				for (k = j; k > 0; k--) {
+					node_remove(&heap, &nodes[k-1]);
+					assert_u_eq(heap_validate(&heap), k - 1,
+					    "Incorrect node count");
+				}
+				break;
+			case 2: {
+				node_t *prev = NULL;
+				for (k = 0; k < j; k++) {
+					node_t *node = node_remove_first(&heap);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+					if (prev != NULL) {
+						assert_d_ge(node_cmp(node,
+						    prev), 0,
+						    "Bad removal order");
+					}
+					prev = node;
+				}
+				break;
+			} case 3: {
+				node_t *prev = NULL;
+				for (k = 0; k < j; k++) {
+					node_t *node = heap_first(&heap);
+					assert_u_eq(heap_validate(&heap), j - k,
+					    "Incorrect node count");
+					if (prev != NULL) {
+						assert_d_ge(node_cmp(node,
+						    prev), 0,
+						    "Bad removal order");
+					}
+					node_remove(&heap, node);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+					prev = node;
+				}
+				break;
+			} default:
+				not_reached();
+			}
+
+			assert_ptr_null(heap_first(&heap),
+			    "Heap should be empty");
+			assert_true(heap_empty(&heap), "Heap should be empty");
+		}
+	}
+	fini_gen_rand(sfmt);
+#undef NNODES
+#undef SEED
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_ph_empty,
+	    test_ph_random));
+}
--- a/memory/jemalloc/src/test/unit/prng.c
+++ b/memory/jemalloc/src/test/unit/prng.c
@@ -1,68 +1,263 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_prng_lg_range)
+static void
+test_prng_lg_range_u32(bool atomic)
+{
+	uint32_t sa, sb, ra, rb;
+	unsigned lg_range;
+
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32, atomic);
+	sa = 42;
+	rb = prng_lg_range_u32(&sa, 32, atomic);
+	assert_u32_eq(ra, rb,
+	    "Repeated generation should produce repeated results");
+
+	sb = 42;
+	rb = prng_lg_range_u32(&sb, 32, atomic);
+	assert_u32_eq(ra, rb,
+	    "Equivalent generation should produce equivalent results");
+
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32, atomic);
+	rb = prng_lg_range_u32(&sa, 32, atomic);
+	assert_u32_ne(ra, rb,
+	    "Full-width results must not immediately repeat");
+
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32, atomic);
+	for (lg_range = 31; lg_range > 0; lg_range--) {
+		sb = 42;
+		rb = prng_lg_range_u32(&sb, lg_range, atomic);
+		assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
+		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		assert_u32_eq(rb, (ra >> (32 - lg_range)),
+		    "Expected high order bits of full-width result, "
+		    "lg_range=%u", lg_range);
+	}
+}
+
+static void
+test_prng_lg_range_u64(void)
 {
 	uint64_t sa, sb, ra, rb;
 	unsigned lg_range;
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64);
+	ra = prng_lg_range_u64(&sa, 64);
 	sa = 42;
-	rb = prng_lg_range(&sa, 64);
+	rb = prng_lg_range_u64(&sa, 64);
 	assert_u64_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	sb = 42;
-	rb = prng_lg_range(&sb, 64);
+	rb = prng_lg_range_u64(&sb, 64);
 	assert_u64_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64);
-	rb = prng_lg_range(&sa, 64);
+	ra = prng_lg_range_u64(&sa, 64);
+	rb = prng_lg_range_u64(&sa, 64);
 	assert_u64_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64);
+	ra = prng_lg_range_u64(&sa, 64);
 	for (lg_range = 63; lg_range > 0; lg_range--) {
 		sb = 42;
-		rb = prng_lg_range(&sb, lg_range);
+		rb = prng_lg_range_u64(&sb, lg_range);
 		assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		assert_u64_eq(rb, (ra >> (64 - lg_range)),
 		    "Expected high order bits of full-width result, "
 		    "lg_range=%u", lg_range);
 	}
 }
+
+static void
+test_prng_lg_range_zu(bool atomic)
+{
+	size_t sa, sb, ra, rb;
+	unsigned lg_range;
+
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sa = 42;
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	assert_zu_eq(ra, rb,
+	    "Repeated generation should produce repeated results");
+
+	sb = 42;
+	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	assert_zu_eq(ra, rb,
+	    "Equivalent generation should produce equivalent results");
+
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	assert_zu_ne(ra, rb,
+	    "Full-width results must not immediately repeat");
+
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
+	    lg_range--) {
+		sb = 42;
+		rb = prng_lg_range_zu(&sb, lg_range, atomic);
+		assert_zu_eq((rb & (SIZE_T_MAX << lg_range)),
+		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		assert_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
+		    lg_range)), "Expected high order bits of full-width "
+		    "result, lg_range=%u", lg_range);
+	}
+}
+
+TEST_BEGIN(test_prng_lg_range_u32_nonatomic)
+{
+
+	test_prng_lg_range_u32(false);
+}
 TEST_END
 
-TEST_BEGIN(test_prng_range)
+TEST_BEGIN(test_prng_lg_range_u32_atomic)
+{
+
+	test_prng_lg_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_u64_nonatomic)
+{
+
+	test_prng_lg_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_nonatomic)
+{
+
+	test_prng_lg_range_zu(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_atomic)
+{
+
+	test_prng_lg_range_zu(true);
+}
+TEST_END
+
+static void
+test_prng_range_u32(bool atomic)
+{
+	uint32_t range;
+#define	MAX_RANGE	10000000
+#define	RANGE_STEP	97
+#define	NREPS		10
+
+	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+		uint32_t s;
+		unsigned rep;
+
+		s = range;
+		for (rep = 0; rep < NREPS; rep++) {
+			uint32_t r = prng_range_u32(&s, range, atomic);
+
+			assert_u32_lt(r, range, "Out of range");
+		}
+	}
+}
+
+static void
+test_prng_range_u64(void)
 {
 	uint64_t range;
 #define	MAX_RANGE	10000000
 #define	RANGE_STEP	97
 #define	NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
 		uint64_t s;
 		unsigned rep;
 
 		s = range;
 		for (rep = 0; rep < NREPS; rep++) {
-			uint64_t r = prng_range(&s, range);
+			uint64_t r = prng_range_u64(&s, range);
 
 			assert_u64_lt(r, range, "Out of range");
 		}
 	}
 }
+
+static void
+test_prng_range_zu(bool atomic)
+{
+	size_t range;
+#define	MAX_RANGE	10000000
+#define	RANGE_STEP	97
+#define	NREPS		10
+
+	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+		size_t s;
+		unsigned rep;
+
+		s = range;
+		for (rep = 0; rep < NREPS; rep++) {
+			size_t r = prng_range_zu(&s, range, atomic);
+
+			assert_zu_lt(r, range, "Out of range");
+		}
+	}
+}
+
+TEST_BEGIN(test_prng_range_u32_nonatomic)
+{
+
+	test_prng_range_u32(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_u32_atomic)
+{
+
+	test_prng_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_u64_nonatomic)
+{
+
+	test_prng_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_zu_nonatomic)
+{
+
+	test_prng_range_zu(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_zu_atomic)
+{
+
+	test_prng_range_zu(true);
+}
 TEST_END
 
 int
 main(void)
 {
 
 	return (test(
-	    test_prng_lg_range,
-	    test_prng_range));
+	    test_prng_lg_range_u32_nonatomic,
+	    test_prng_lg_range_u32_atomic,
+	    test_prng_lg_range_u64_nonatomic,
+	    test_prng_lg_range_zu_nonatomic,
+	    test_prng_lg_range_zu_atomic,
+	    test_prng_range_u32_nonatomic,
+	    test_prng_range_u32_atomic,
+	    test_prng_range_u64_nonatomic,
+	    test_prng_range_zu_nonatomic,
+	    test_prng_range_zu_atomic));
 }
--- a/memory/jemalloc/src/test/unit/prof_reset.c
+++ b/memory/jemalloc/src/test/unit/prof_reset.c
@@ -89,17 +89,18 @@ TEST_BEGIN(test_prof_reset_basic)
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
 }
 TEST_END
 
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static bool
-prof_dump_header_intercept(bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
+    const prof_cnt_t *cnt_all)
 {
 
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
 
 	return (false);
 }
 
--- a/memory/jemalloc/src/test/unit/run_quantize.c
+++ b/memory/jemalloc/src/test/unit/run_quantize.c
@@ -106,17 +106,17 @@ TEST_BEGIN(test_monotonic)
 	    "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	floor_prev = 0;
 	ceil_prev = 0;
-	for (i = 1; i < run_quantize_max >> LG_PAGE; i++) {
+	for (i = 1; i <= chunksize >> LG_PAGE; i++) {
 		size_t run_size, floor, ceil;
 
 		run_size = i << LG_PAGE;
 		floor = run_quantize_floor(run_size);
 		ceil = run_quantize_ceil(run_size);
 
 		assert_zu_le(floor, run_size,
 		    "Floor should be <= (floor=%zu, run_size=%zu, ceil=%zu)",
--- a/memory/jemalloc/src/test/unit/size_classes.c
+++ b/memory/jemalloc/src/test/unit/size_classes.c
@@ -75,38 +75,110 @@ TEST_BEGIN(test_size_classes)
 	    "s2u() does not round up to size class");
 	assert_zu_eq(size_class, s2u(size_class-1),
 	    "s2u() does not round up to size class");
 	assert_zu_eq(size_class, s2u(size_class),
 	    "s2u() does not compute same size class");
 }
 TEST_END
 
+TEST_BEGIN(test_psize_classes)
+{
+	size_t size_class, max_size_class;
+	pszind_t pind, max_pind;
+
+	max_size_class = get_max_size_class();
+	max_pind = psz2ind(max_size_class);
+
+	for (pind = 0, size_class = pind2sz(pind); pind < max_pind ||
+	    size_class < max_size_class; pind++, size_class =
+	    pind2sz(pind)) {
+		assert_true(pind < max_pind,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		assert_true(size_class < max_size_class,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+
+		assert_u_eq(pind, psz2ind(size_class),
+		    "psz2ind() does not reverse pind2sz(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+		assert_zu_eq(size_class, pind2sz(psz2ind(size_class)),
+		    "pind2sz() does not reverse psz2ind(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+
+		assert_u_eq(pind+1, psz2ind(size_class+1),
+		    "Next size_class does not round up properly");
+
+		assert_zu_eq(size_class, (pind > 0) ?
+		    psz2u(pind2sz(pind-1)+1) : psz2u(1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class-1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class),
+		    "psz2u() does not compute same size class");
+		assert_zu_eq(psz2u(size_class+1), pind2sz(pind+1),
+		    "psz2u() does not round up to next size class");
+	}
+
+	assert_u_eq(pind, psz2ind(pind2sz(pind)),
+	    "psz2ind() does not reverse pind2sz()");
+	assert_zu_eq(max_size_class, pind2sz(psz2ind(max_size_class)),
+	    "pind2sz() does not reverse psz2ind()");
+
+	assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class-1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class),
+	    "psz2u() does not compute same size class");
+}
+TEST_END
+
 TEST_BEGIN(test_overflow)
 {
 	size_t max_size_class;
 
 	max_size_class = get_max_size_class();
 
-	assert_u_ge(size2index(max_size_class+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(SIZE_T_MAX), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
+	assert_u_eq(size2index(max_size_class+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(SIZE_T_MAX), NSIZES,
+	    "size2index() should return NSIZES on overflow");
 
-	assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
-	assert_zu_gt(s2u(ZU(PTRDIFF_MAX)+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
+	assert_zu_eq(s2u(max_size_class+1), 0,
+	    "s2u() should return 0 for unsupported size");
+	assert_zu_eq(s2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "s2u() should return 0 for unsupported size");
 	assert_zu_eq(s2u(SIZE_T_MAX), 0,
 	    "s2u() should return 0 on overflow");
+
+	assert_u_eq(psz2ind(max_size_class+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+
+	assert_zu_eq(psz2u(max_size_class+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(SIZE_T_MAX), 0,
+	    "psz2u() should return 0 on overflow");
 }
 TEST_END
 
 int
 main(void)
 {
 
 	return (test(
 	    test_size_classes,
+	    test_psize_classes,
 	    test_overflow));
 }
--- a/memory/jemalloc/src/test/unit/stats.c
+++ b/memory/jemalloc/src/test/unit/stats.c
@@ -215,21 +215,21 @@ TEST_BEGIN(test_stats_arenas_large)
 	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 	assert_d_eq(mallctl("stats.arenas.0.large.nrequests", &nrequests, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_zu_gt(nmalloc, 0,
+		assert_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_zu_ge(nmalloc, ndalloc,
+		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_zu_gt(nrequests, 0,
+		assert_u64_gt(nrequests, 0,
 		    "nrequests should be greater than zero");
 	}
 
 	dallocx(p, 0);
 }
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_huge)
@@ -257,19 +257,19 @@ TEST_BEGIN(test_stats_arenas_huge)
 	assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 	assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_zu_gt(nmalloc, 0,
+		assert_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_zu_ge(nmalloc, ndalloc,
+		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 	}
 
 	dallocx(p, 0);
 }
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_bins)
--- a/memory/jemalloc/src/test/unit/tsd.c
+++ b/memory/jemalloc/src/test/unit/tsd.c
@@ -53,28 +53,28 @@ malloc_tsd_data(, data_, data_t, DATA_IN
 malloc_tsd_funcs(, data_, data_t, DATA_INIT, data_cleanup)
 
 static void *
 thd_start(void *arg)
 {
 	data_t d = (data_t)(uintptr_t)arg;
 	void *p;
 
-	assert_x_eq(*data_tsd_get(), DATA_INIT,
+	assert_x_eq(*data_tsd_get(true), DATA_INIT,
 	    "Initial tsd get should return initialization value");
 
 	p = malloc(1);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	data_tsd_set(&d);
-	assert_x_eq(*data_tsd_get(), d,
+	assert_x_eq(*data_tsd_get(true), d,
 	    "After tsd set, tsd get should return value that was set");
 
 	d = 0;
-	assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg,
+	assert_x_eq(*data_tsd_get(true), (data_t)(uintptr_t)arg,
 	    "Resetting local data should have no effect on tsd");
 
 	free(p);
 	return (NULL);
 }
 
 TEST_BEGIN(test_tsd_main_thread)
 {
@@ -94,14 +94,19 @@ TEST_BEGIN(test_tsd_sub_thread)
 	    "Cleanup function should have executed");
 }
 TEST_END
 
 int
 main(void)
 {
 
+	/* Core tsd bootstrapping must happen prior to data_tsd_boot(). */
+	if (nallocx(1, 0) == 0) {
+		malloc_printf("Initialization error");
+		return (test_status_fail);
+	}
 	data_tsd_boot();
 
 	return (test(
 	    test_tsd_main_thread,
 	    test_tsd_sub_thread));
 }
--- a/memory/jemalloc/src/test/unit/util.c
+++ b/memory/jemalloc/src/test/unit/util.c
@@ -1,35 +1,35 @@
 #include "test/jemalloc_test.h"
 
 #define	TEST_POW2_CEIL(t, suf, pri) do {				\
 	unsigned i, pow2;						\
 	t x;								\
 									\
-	assert_zu_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
+	assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
 									\
 	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		assert_zu_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) << i,	\
-		    "Unexpected result");				\
+		assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
+		    << i, "Unexpected result");				\
 	}								\
 									\
 	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
 		    ((t)1) << i, "Unexpected result");			\
 	}								\
 									\
 	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
 		    ((t)1) << (i+1), "Unexpected result");		\
 	}								\
 									\
 	for (pow2 = 1; pow2 < 25; pow2++) {				\
 		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
 		    x++) {						\
-			assert_zu_eq(pow2_ceil_##suf(x),		\
+			assert_##suf##_eq(pow2_ceil_##suf(x),		\
 			    ((t)1) << pow2,				\
 			    "Unexpected result, x=%"pri, x);		\
 		}							\
 	}								\
 } while (0)
 
 TEST_BEGIN(test_pow2_ceil_u64)
 {
@@ -155,24 +155,24 @@ TEST_BEGIN(test_malloc_strtoumax)
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf_truncated)
 {
 #define	BUFLEN	15
 	char buf[BUFLEN];
-	int result;
+	size_t result;
 	size_t len;
-#define TEST(expected_str_untruncated, ...) do {			\
+#define	TEST(expected_str_untruncated, ...) do {			\
 	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
 	assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
 	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
-	    buf, expected_str_untruncated);		\
-	assert_d_eq(result, strlen(expected_str_untruncated),		\
+	    buf, expected_str_untruncated);				\
+	assert_zu_eq(result, strlen(expected_str_untruncated),		\
 	    "Unexpected result");					\
 } while (0)
 
 	for (len = 1; len < BUFLEN; len++) {
 		TEST("012346789",	"012346789");
 		TEST("a0123b",		"a%sb", "0123");
 		TEST("a01234567",	"a%s%s", "0123", "4567");
 		TEST("a0123  ",		"a%-6s", "0123");
@@ -188,21 +188,21 @@ TEST_BEGIN(test_malloc_snprintf_truncate
 #undef TEST
 }
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf)
 {
 #define	BUFLEN	128
 	char buf[BUFLEN];
-	int result;
+	size_t result;
 #define	TEST(expected_str, ...) do {					\
 	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
 	assert_str_eq(buf, expected_str, "Unexpected output");		\
-	assert_d_eq(result, strlen(expected_str), "Unexpected result");	\
+	assert_zu_eq(result, strlen(expected_str), "Unexpected result");\
 } while (0)
 
 	TEST("hello", "hello");
 
 	TEST("50%, 100%", "50%%, %d%%", 100);
 
 	TEST("a0123b", "a%sb", "0123");
 
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/test/unit/witness.c
@@ -0,0 +1,278 @@
+#include "test/jemalloc_test.h"
+
+static witness_lock_error_t *witness_lock_error_orig;
+static witness_owner_error_t *witness_owner_error_orig;
+static witness_not_owner_error_t *witness_not_owner_error_orig;
+static witness_lockless_error_t *witness_lockless_error_orig;
+
+static bool saw_lock_error;
+static bool saw_owner_error;
+static bool saw_not_owner_error;
+static bool saw_lockless_error;
+
+static void
+witness_lock_error_intercept(const witness_list_t *witnesses,
+    const witness_t *witness)
+{
+
+	saw_lock_error = true;
+}
+
+static void
+witness_owner_error_intercept(const witness_t *witness)
+{
+
+	saw_owner_error = true;
+}
+
+static void
+witness_not_owner_error_intercept(const witness_t *witness)
+{
+
+	saw_not_owner_error = true;
+}
+
+static void
+witness_lockless_error_intercept(const witness_list_t *witnesses)
+{
+
+	saw_lockless_error = true;
+}
+
+static int
+witness_comp(const witness_t *a, const witness_t *b)
+{
+
+	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+	return (strcmp(a->name, b->name));
+}
+
+static int
+witness_comp_reverse(const witness_t *a, const witness_t *b)
+{
+
+	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+	return (-strcmp(a->name, b->name));
+}
+
+TEST_BEGIN(test_witness)
+{
+	witness_t a, b;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+	witness_assert_not_owner(tsdn, &a);
+	witness_lock(tsdn, &a);
+	witness_assert_owner(tsdn, &a);
+
+	witness_init(&b, "b", 2, NULL);
+	witness_assert_not_owner(tsdn, &b);
+	witness_lock(tsdn, &b);
+	witness_assert_owner(tsdn, &b);
+
+	witness_unlock(tsdn, &a);
+	witness_unlock(tsdn, &b);
+
+	witness_assert_lockless(tsdn);
+}
+TEST_END
+
+TEST_BEGIN(test_witness_comp)
+{
+	witness_t a, b, c, d;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, witness_comp);
+	witness_assert_not_owner(tsdn, &a);
+	witness_lock(tsdn, &a);
+	witness_assert_owner(tsdn, &a);
+
+	witness_init(&b, "b", 1, witness_comp);
+	witness_assert_not_owner(tsdn, &b);
+	witness_lock(tsdn, &b);
+	witness_assert_owner(tsdn, &b);
+	witness_unlock(tsdn, &b);
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	witness_init(&c, "c", 1, witness_comp_reverse);
+	witness_assert_not_owner(tsdn, &c);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsdn, &c);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	witness_unlock(tsdn, &c);
+
+	saw_lock_error = false;
+
+	witness_init(&d, "d", 1, NULL);
+	witness_assert_not_owner(tsdn, &d);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsdn, &d);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	witness_unlock(tsdn, &d);
+
+	witness_unlock(tsdn, &a);
+
+	witness_assert_lockless(tsdn);
+
+	witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_reversal)
+{
+	witness_t a, b;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+	witness_init(&b, "b", 2, NULL);
+
+	witness_lock(tsdn, &b);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsdn, &a);
+	assert_true(saw_lock_error, "Expected witness lock error");
+
+	witness_unlock(tsdn, &a);
+	witness_unlock(tsdn, &b);
+
+	witness_assert_lockless(tsdn);
+
+	witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_recursive)
+{
+	witness_t a;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	witness_not_owner_error_orig = witness_not_owner_error;
+	witness_not_owner_error = witness_not_owner_error_intercept;
+	saw_not_owner_error = false;
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+
+	witness_lock(tsdn, &a);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	assert_false(saw_not_owner_error, "Unexpected witness not owner error");
+	witness_lock(tsdn, &a);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	assert_true(saw_not_owner_error, "Expected witness not owner error");
+
+	witness_unlock(tsdn, &a);
+
+	witness_assert_lockless(tsdn);
+
+	witness_owner_error = witness_owner_error_orig;
+	witness_lock_error = witness_lock_error_orig;
+
+}
+TEST_END
+
+TEST_BEGIN(test_witness_unlock_not_owned)
+{
+	witness_t a;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	witness_owner_error_orig = witness_owner_error;
+	witness_owner_error = witness_owner_error_intercept;
+	saw_owner_error = false;
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+
+	assert_false(saw_owner_error, "Unexpected owner error");
+	witness_unlock(tsdn, &a);
+	assert_true(saw_owner_error, "Expected owner error");
+
+	witness_assert_lockless(tsdn);
+
+	witness_owner_error = witness_owner_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_lockful)
+{
+	witness_t a;
+	tsdn_t *tsdn;
+
+	test_skip_if(!config_debug);
+
+	witness_lockless_error_orig = witness_lockless_error;
+	witness_lockless_error = witness_lockless_error_intercept;
+	saw_lockless_error = false;
+
+	tsdn = tsdn_fetch();
+
+	witness_assert_lockless(tsdn);
+
+	witness_init(&a, "a", 1, NULL);
+
+	assert_false(saw_lockless_error, "Unexpected lockless error");
+	witness_assert_lockless(tsdn);
+
+	witness_lock(tsdn, &a);
+	witness_assert_lockless(tsdn);
+	assert_true(saw_lockless_error, "Expected lockless error");
+
+	witness_unlock(tsdn, &a);
+
+	witness_assert_lockless(tsdn);
+
+	witness_lockless_error = witness_lockless_error_orig;
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_witness,
+	    test_witness_comp,
+	    test_witness_reversal,
+	    test_witness_recursive,
+	    test_witness_unlock_not_owned,
+	    test_witness_lockful));
+}
--- a/memory/jemalloc/src/test/unit/zero.c
+++ b/memory/jemalloc/src/test/unit/zero.c
@@ -3,49 +3,51 @@
 #ifdef JEMALLOC_FILL
 const char *malloc_conf =
     "abort:false,junk:false,zero:true,redzone:false,quarantine:0";
 #endif
 
 static void
 test_zero(size_t sz_min, size_t sz_max)
 {
-	char *s;
+	uint8_t *s;
 	size_t sz_prev, sz, i;
+#define	MAGIC	((uint8_t)0x61)
 
 	sz_prev = 0;
-	s = (char *)mallocx(sz_min, 0);
+	s = (uint8_t *)mallocx(sz_min, 0);
 	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_c_eq(s[0], 'a',
+			assert_u_eq(s[0], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_c_eq(s[sz_prev-1], 'a',
+			assert_u_eq(s[sz_prev-1], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
-			assert_c_eq(s[i], 0x0,
+			assert_u_eq(s[i], 0x0,
 			    "Newly allocated byte %zu/%zu isn't zero-filled",
 			    i, sz);
-			s[i] = 'a';
+			s[i] = MAGIC;
 		}
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
-			s = (char *)rallocx(s, sz+1, 0);
+			s = (uint8_t *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 		}
 	}
 
 	dallocx(s, 0);
+#undef MAGIC
 }
 
 TEST_BEGIN(test_zero_small)
 {
 
 	test_skip_if(!config_fill);
 	test_zero(1, SMALL_MAXCLASS-1);
 }
--- a/memory/jemalloc/upstream.info
+++ b/memory/jemalloc/upstream.info
@@ -1,2 +1,2 @@
 UPSTREAM_REPO=https://github.com/jemalloc/jemalloc
-UPSTREAM_COMMIT=4.1.1
+UPSTREAM_COMMIT=4.3.1