Bug 1277704 - Update jemalloc 4 to version 4.3.1. r=glandium
authorRyan VanderMeulen <ryanvm@gmail.com>
Tue, 12 Jul 2016 10:37:04 -0400
changeset 321826 c6d5c06685a248fd8eaa01d695e4509392db9209
parent 321825 151ddb5b85a1661a59ed528ccab7e6fe2bbad9de
child 321827 95ab9f05b980662a420d6d664c1996c0dfb8e4c8
push id21
push usermaklebus@msu.edu
push dateThu, 01 Dec 2016 06:22:08 +0000
reviewersglandium
bugs1277704
milestone52.0a1
Bug 1277704 - Update jemalloc 4 to version 4.3.1. r=glandium
memory/jemalloc/moz.build
memory/jemalloc/src/.appveyor.yml
memory/jemalloc/src/.travis.yml
memory/jemalloc/src/COPYING
memory/jemalloc/src/ChangeLog
memory/jemalloc/src/INSTALL
memory/jemalloc/src/Makefile.in
memory/jemalloc/src/README
memory/jemalloc/src/VERSION
memory/jemalloc/src/configure
memory/jemalloc/src/configure.ac
memory/jemalloc/src/doc/html.xsl.in
memory/jemalloc/src/doc/jemalloc.xml.in
memory/jemalloc/src/doc/stylesheet.xsl
memory/jemalloc/src/include/jemalloc/internal/arena.h
memory/jemalloc/src/include/jemalloc/internal/base.h
memory/jemalloc/src/include/jemalloc/internal/bitmap.h
memory/jemalloc/src/include/jemalloc/internal/chunk.h
memory/jemalloc/src/include/jemalloc/internal/chunk_dss.h
memory/jemalloc/src/include/jemalloc/internal/ctl.h
memory/jemalloc/src/include/jemalloc/internal/extent.h
memory/jemalloc/src/include/jemalloc/internal/huge.h
memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in
memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h
memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in
memory/jemalloc/src/include/jemalloc/internal/mb.h
memory/jemalloc/src/include/jemalloc/internal/mutex.h
memory/jemalloc/src/include/jemalloc/internal/nstime.h
memory/jemalloc/src/include/jemalloc/internal/pages.h
memory/jemalloc/src/include/jemalloc/internal/ph.h
memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt
memory/jemalloc/src/include/jemalloc/internal/prng.h
memory/jemalloc/src/include/jemalloc/internal/prof.h
memory/jemalloc/src/include/jemalloc/internal/rtree.h
memory/jemalloc/src/include/jemalloc/internal/size_classes.sh
memory/jemalloc/src/include/jemalloc/internal/spin.h
memory/jemalloc/src/include/jemalloc/internal/stats.h
memory/jemalloc/src/include/jemalloc/internal/tcache.h
memory/jemalloc/src/include/jemalloc/internal/tsd.h
memory/jemalloc/src/include/jemalloc/internal/util.h
memory/jemalloc/src/include/jemalloc/internal/valgrind.h
memory/jemalloc/src/include/jemalloc/internal/witness.h
memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in
memory/jemalloc/src/jemalloc.pc.in
memory/jemalloc/src/msvc/ReadMe.txt
memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
memory/jemalloc/src/src/arena.c
memory/jemalloc/src/src/base.c
memory/jemalloc/src/src/bitmap.c
memory/jemalloc/src/src/chunk.c
memory/jemalloc/src/src/chunk_dss.c
memory/jemalloc/src/src/chunk_mmap.c
memory/jemalloc/src/src/ckh.c
memory/jemalloc/src/src/ctl.c
memory/jemalloc/src/src/huge.c
memory/jemalloc/src/src/jemalloc.c
memory/jemalloc/src/src/mutex.c
memory/jemalloc/src/src/nstime.c
memory/jemalloc/src/src/pages.c
memory/jemalloc/src/src/prof.c
memory/jemalloc/src/src/quarantine.c
memory/jemalloc/src/src/rtree.c
memory/jemalloc/src/src/spin.c
memory/jemalloc/src/src/stats.c
memory/jemalloc/src/src/tcache.c
memory/jemalloc/src/src/tsd.c
memory/jemalloc/src/src/util.c
memory/jemalloc/src/src/witness.c
memory/jemalloc/src/src/zone.c
memory/jemalloc/src/test/include/test/jemalloc_test.h.in
memory/jemalloc/src/test/include/test/mtx.h
memory/jemalloc/src/test/include/test/test.h
memory/jemalloc/src/test/integration/aligned_alloc.c
memory/jemalloc/src/test/integration/mallocx.c
memory/jemalloc/src/test/integration/posix_memalign.c
memory/jemalloc/src/test/integration/xallocx.c
memory/jemalloc/src/test/src/mtx.c
memory/jemalloc/src/test/src/test.c
memory/jemalloc/src/test/src/timer.c
memory/jemalloc/src/test/stress/microbench.c
memory/jemalloc/src/test/unit/a0.c
memory/jemalloc/src/test/unit/arena_reset.c
memory/jemalloc/src/test/unit/bitmap.c
memory/jemalloc/src/test/unit/ckh.c
memory/jemalloc/src/test/unit/decay.c
memory/jemalloc/src/test/unit/fork.c
memory/jemalloc/src/test/unit/junk.c
memory/jemalloc/src/test/unit/junk_alloc.c
memory/jemalloc/src/test/unit/junk_free.c
memory/jemalloc/src/test/unit/math.c
memory/jemalloc/src/test/unit/nstime.c
memory/jemalloc/src/test/unit/ph.c
memory/jemalloc/src/test/unit/prng.c
memory/jemalloc/src/test/unit/prof_reset.c
memory/jemalloc/src/test/unit/run_quantize.c
memory/jemalloc/src/test/unit/size_classes.c
memory/jemalloc/src/test/unit/stats.c
memory/jemalloc/src/test/unit/tsd.c
memory/jemalloc/src/test/unit/util.c
memory/jemalloc/src/test/unit/witness.c
memory/jemalloc/src/test/unit/zero.c
memory/jemalloc/upstream.info
--- a/memory/jemalloc/moz.build
+++ b/memory/jemalloc/moz.build
@@ -20,23 +20,23 @@ UNIFIED_SOURCES += [
     'src/src/mb.c',
     'src/src/mutex.c',
     'src/src/nstime.c',
     'src/src/pages.c',
     'src/src/prng.c',
     'src/src/prof.c',
     'src/src/quarantine.c',
     'src/src/rtree.c',
+    'src/src/spin.c',
     'src/src/stats.c',
     'src/src/tcache.c',
     'src/src/ticker.c',
     'src/src/tsd.c',
     'src/src/util.c',
-    # FIXME do we ever want valgrind.c?
-    # 'src/src/valgrind.c',
+    'src/src/witness.c',
 ]
 
 SOURCES += [
     # This file cannot be built in unified mode because of symbol clash on arena_purge.
     'src/src/ctl.c',
 ]
 
 # Only OSX needs the zone allocation implementation,
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/.appveyor.yml
@@ -0,0 +1,28 @@
+version: '{build}'
+
+environment:
+  matrix:
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    MSVC: amd64
+  - MSYSTEM: MINGW32
+    CPU: i686
+    MSVC: x86
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+  - MSYSTEM: MINGW32
+    CPU: i686
+
+install:
+  - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
+  - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
+  - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+  - pacman --noconfirm -Suy mingw-w64-%CPU%-make
+
+build_script:
+  - bash -c "autoconf"
+  - bash -c "./configure"
+  - mingw32-make -j3
+  - file lib/jemalloc.dll
+  - mingw32-make -j3 tests
+  - mingw32-make -k check
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/.travis.yml
@@ -0,0 +1,29 @@
+language: c
+
+matrix:
+  include:
+    - os: linux
+      compiler: gcc
+    - os: linux
+      compiler: gcc
+      env:
+        - EXTRA_FLAGS=-m32
+      addons:
+        apt:
+          packages:
+          - gcc-multilib
+    - os: osx
+      compiler: clang
+    - os: osx
+      compiler: clang
+      env:
+        - EXTRA_FLAGS=-m32
+
+before_script:
+  - autoconf
+  - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"}
+  - make -j3
+  - make -j3 tests
+
+script:
+  - make check
--- a/memory/jemalloc/src/COPYING
+++ b/memory/jemalloc/src/COPYING
@@ -1,15 +1,15 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2015 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2016 Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2015 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-2016 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice(s),
    this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice(s),
    this list of conditions and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
--- a/memory/jemalloc/src/ChangeLog
+++ b/memory/jemalloc/src/ChangeLog
@@ -1,14 +1,101 @@
 Following are change highlights associated with official releases.  Important
 bug fixes are all mentioned, but some internal enhancements are omitted here for
 brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.3.1 (November 7, 2016)
+
+  Bug fixes:
+  - Fix a severe virtual memory leak.  This regression was first released in
+    4.3.0.  (@interwq, @jasone)
+  - Refactor atomic and prng APIs to restore support for 32-bit platforms that
+    use pre-C11 toolchains, e.g. FreeBSD's mips.  (@jasone)
+
+* 4.3.0 (November 4, 2016)
+
+  This is the first release that passes the test suite for multiple Windows
+  configurations, thanks in large part to @glandium setting up continuous
+  integration via AppVeyor (and Travis CI for Linux and OS X).
+
+  New features:
+  - Add "J" (JSON) support to malloc_stats_print().  (@jasone)
+  - Add Cray compiler support.  (@ronawho)
+
+  Optimizations:
+  - Add/use adaptive spinning for bootstrapping and radix tree node
+    initialization.  (@jasone)
+
+  Bug fixes:
+  - Fix large allocation to search starting in the optimal size class heap,
+    which can substantially reduce virtual memory churn and fragmentation.  This
+    regression was first released in 4.0.0.  (@mjp41, @jasone)
+  - Fix stats.arenas.<i>.nthreads accounting.  (@interwq)
+  - Fix and simplify decay-based purging.  (@jasone)
+  - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
+    deadlocks during thread exit.  (@jasone)
+  - Fix over-sized allocation of radix tree leaf nodes.  (@mjp41, @ogaun,
+    @jasone)
+  - Fix over-sized allocation of arena_t (plus associated stats) data
+    structures.  (@jasone, @interwq)
+  - Fix EXTRA_CFLAGS to not affect configuration.  (@jasone)
+  - Fix a Valgrind integration bug.  (@ronawho)
+  - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)
+  - Fix a file descriptor leak on Linux.  This regression was first released in
+    4.2.0.  (@vsarunas, @jasone)
+  - Fix static linking of jemalloc with glibc.  (@djwatson)
+  - Use syscall(2) rather than {open,read,close}(2) during boot on Linux.  This
+    works around other libraries' system call wrappers performing reentrant
+    allocation.  (@kspinka, @Whissi, @jasone)
+  - Fix OS X default zone replacement to work with OS X 10.12.  (@glandium,
+    @jasone)
+  - Fix cached memory management to avoid needless commit/decommit operations
+    during purging, which resolves permanent virtual memory map fragmentation
+    issues on Windows.  (@mjp41, @jasone)
+  - Fix TSD fetches to avoid (recursive) allocation.  This is relevant to
+    non-TLS and Windows configurations.  (@jasone)
+  - Fix malloc_conf overriding to work on Windows.  (@jasone)
+  - Forcibly disable lazy-lock on Windows (was forcibly *enabled*).  (@jasone)
+
+* 4.2.1 (June 8, 2016)
+
+  Bug fixes:
+  - Fix bootstrapping issues for configurations that require allocation during
+    tsd initialization (e.g. --disable-tls).  (@cferris1000, @jasone)
+  - Fix gettimeofday() version of nstime_update().  (@ronawho)
+  - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper().  (@ronawho)
+  - Fix potential VM map fragmentation regression.  (@jasone)
+  - Fix opt_zero-triggered in-place huge reallocation zeroing.  (@jasone)
+  - Fix heap profiling context leaks in reallocation edge cases.  (@jasone)
+
+* 4.2.0 (May 12, 2016)
+
+  New features:
+  - Add the arena.<i>.reset mallctl, which makes it possible to discard all of
+    an arena's allocations in a single operation.  (@jasone)
+  - Add the stats.retained and stats.arenas.<i>.retained statistics.  (@jasone)
+  - Add the --with-version configure option.  (@jasone)
+  - Support --with-lg-page values larger than actual page size.  (@jasone)
+
+  Optimizations:
+  - Use pairing heaps rather than red-black trees for various hot data
+    structures.  (@djwatson, @jasone)
+  - Streamline fast paths of rtree operations.  (@jasone)
+  - Optimize the fast paths of calloc() and [m,d,sd]allocx().  (@jasone)
+  - Decommit unused virtual memory if the OS does not overcommit.  (@jasone)
+  - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order
+    to avoid unfortunate interactions during fork(2).  (@jasone)
+
+  Bug fixes:
+  - Fix chunk accounting related to triggering gdump profiles.  (@jasone)
+  - Link against librt for clock_gettime(2) if glibc < 2.17.  (@jasone)
+  - Scale leak report summary according to sampling probability.  (@jasone)
+
 * 4.1.1 (May 3, 2016)
 
   This bugfix release resolves a variety of mostly minor issues, though the
   bitmap fix is critical for 64-bit Windows.
 
   Bug fixes:
   - Fix the linear scan version of bitmap_sfu() to shift by the proper amount
     even when sizeof(long) is not the same as sizeof(void *), as on 64-bit
@@ -16,17 +103,17 @@ brevity.  Much more detail can be found 
   - Fix hashing functions to avoid unaligned memory accesses (and resulting
     crashes).  This is relevant at least to some ARM-based platforms.
     (@rkmisra)
   - Fix fork()-related lock rank ordering reversals.  These reversals were
     unlikely to cause deadlocks in practice except when heap profiling was
     enabled and active.  (@jasone)
   - Fix various chunk leaks in OOM code paths.  (@jasone)
   - Fix malloc_stats_print() to print opt.narenas correctly.  (@jasone)
-  - Fix MSVC-specific build/test issues.  (@rustyx, yuslepukhin)
+  - Fix MSVC-specific build/test issues.  (@rustyx, @yuslepukhin)
   - Fix a variety of test failures that were due to test fragility rather than
     core bugs.  (@jasone)
 
 * 4.1.0 (February 28, 2016)
 
   This release is primarily about optimizations, but it also incorporates a lot
   of portability-motivated refactoring and enhancements.  Many people worked on
   this release, to an extent that even with the omission here of minor changes
@@ -75,24 +162,24 @@ brevity.  Much more detail can be found 
     up incremental huge reallocation.  (@jasone)
 
   Incompatible changes:
   - Make opt.narenas unsigned rather than size_t.  (@jasone)
 
   Bug fixes:
   - Fix stats.cactive accounting regression.  (@rustyx, @jasone)
   - Handle unaligned keys in hash().  This caused problems for some ARM systems.
-    (@jasone, Christopher Ferris)
+    (@jasone, @cferris1000)
   - Refactor arenas array.  In addition to fixing a fork-related deadlock, this
     makes arena lookups faster and simpler.  (@jasone)
   - Move retained memory allocation out of the default chunk allocation
     function, to a location that gets executed even if the application installs
     a custom chunk allocation function.  This resolves a virtual memory leak.
     (@buchgr)
-  - Fix a potential tsd cleanup leak.  (Christopher Ferris, @jasone)
+  - Fix a potential tsd cleanup leak.  (@cferris1000, @jasone)
   - Fix run quantization.  In practice this bug had no impact unless
     applications requested memory with alignment exceeding one page.
     (@jasone, @djwatson)
   - Fix LinuxThreads-specific bootstrapping deadlock.  (Cosmin Paraschiv)
   - jeprof:
     + Don't discard curl options if timeout is not defined.  (@djwatson)
     + Detect failed profile fetches.  (@djwatson)
   - Fix stats.arenas.<i>.{dss,lg_dirty_mult,decay_time,pactive,pdirty} for
--- a/memory/jemalloc/src/INSTALL
+++ b/memory/jemalloc/src/INSTALL
@@ -30,16 +30,20 @@ any of the following arguments (not a de
 --prefix=<install-root-dir>
     Set the base directory in which to install.  For example:
 
         ./configure --prefix=/usr/local
 
     will cause files to be installed into /usr/local/include, /usr/local/lib,
     and /usr/local/man.
 
+--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>
+    Use the specified version string rather than trying to generate one (if in
+    a git repository) or use existing the VERSION file (if present).
+
 --with-rpath=<colon-separated-rpath>
     Embed one or more library paths, so that libjemalloc can find the libraries
     it is linked to.  This works only on ELF-based systems.
 
 --with-mangling=<map>
     Mangle public symbols specified in <map> which is a comma-separated list of
     name:mangled pairs.
 
--- a/memory/jemalloc/src/Makefile.in
+++ b/memory/jemalloc/src/Makefile.in
@@ -19,21 +19,21 @@ DATADIR := $(DESTDIR)@DATADIR@
 MANDIR := $(DESTDIR)@MANDIR@
 srcroot := @srcroot@
 objroot := @objroot@
 abs_srcroot := @abs_srcroot@
 abs_objroot := @abs_objroot@
 
 # Build parameters.
 CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
-CFLAGS := @CFLAGS@
+EXTRA_CFLAGS := @EXTRA_CFLAGS@
+CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS)
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
-TESTLIBS := @TESTLIBS@
 RPATH_EXTRA := @RPATH_EXTRA@
 SO := @so@
 IMPORTLIB := @importlib@
 O := @o@
 A := @a@
 EXE := @exe@
 LIBPREFIX := @libprefix@
 REV := @rev@
@@ -48,25 +48,29 @@ cfghdrs_out := @cfghdrs_out@
 cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@)
 cfgoutputs_out := @cfgoutputs_out@
 enable_autogen := @enable_autogen@
 enable_code_coverage := @enable_code_coverage@
 enable_prof := @enable_prof@
 enable_valgrind := @enable_valgrind@
 enable_zone_allocator := @enable_zone_allocator@
 MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
+link_whole_archive := @link_whole_archive@
 DSO_LDFLAGS = @DSO_LDFLAGS@
 SOREV = @SOREV@
 PIC_CFLAGS = @PIC_CFLAGS@
 CTARGET = @CTARGET@
 LDTARGET = @LDTARGET@
+TEST_LD_MODE = @TEST_LD_MODE@
 MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
 CC_MM = @CC_MM@
+LM := @LM@
+INSTALL = @INSTALL@
 
 ifeq (macho, $(ABI))
 TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib"
 else
 ifeq (pecoff, $(ABI))
 TEST_LIBRARY_PATH := PATH="$(PATH):$(objroot)lib"
 else
 TEST_LIBRARY_PATH :=
@@ -95,20 +99,22 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/quarantine.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
+	$(srcroot)src/spin.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
-	$(srcroot)src/util.c
+	$(srcroot)src/util.c \
+	$(srcroot)src/witness.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
 endif
 ifeq ($(enable_zone_allocator), 1)
 C_SRCS += $(srcroot)src/zone.c
 endif
 ifeq ($(IMPORTLIB),$(SO))
 STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A)
@@ -117,42 +123,55 @@ ifdef PIC_CFLAGS
 STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A)
 else
 STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_s.$(A)
 endif
 DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
 ifneq ($(SOREV),$(SO))
 DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
 endif
+ifeq (1, $(link_whole_archive))
+LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive
+else
+LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
+endif
 PC := $(objroot)jemalloc.pc
 MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
 DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html)
 DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
 DOCS := $(DOCS_HTML) $(DOCS_MAN3)
 C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
+ifeq (1, $(link_whole_archive))
+C_UTIL_INTEGRATION_SRCS :=
+else
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
-TESTS_UNIT := $(srcroot)test/unit/atomic.c \
+endif
+TESTS_UNIT := \
+	$(srcroot)test/unit/a0.c \
+	$(srcroot)test/unit/arena_reset.c \
+	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
 	$(srcroot)test/unit/lg_chunk.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
 	$(srcroot)test/unit/prof_active.c \
 	$(srcroot)test/unit/prof_gdump.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_thread_name.c \
 	$(srcroot)test/unit/ql.c \
@@ -164,16 +183,17 @@ TESTS_UNIT := $(srcroot)test/unit/atomic
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/smoothstep.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/util.c \
+	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
 	$(srcroot)test/integration/sdallocx.c \
 	$(srcroot)test/integration/mallocx.c \
 	$(srcroot)test/integration/MALLOCX_ARENA.c \
 	$(srcroot)test/integration/overflow.c \
 	$(srcroot)test/integration/posix_memalign.c \
@@ -285,79 +305,79 @@ endif
 $(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(C_OBJS)
 
 $(STATIC_LIBS):
 	@mkdir -p $(@D)
 	$(AR) $(ARFLAGS)@AROUT@ $+
 
 $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
 build_lib: build_lib_shared build_lib_static
 
 install_bin:
-	install -d $(BINDIR)
+	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	echo "install -m 755 $$b $(BINDIR)"; \
-	install -m 755 $$b $(BINDIR); \
+	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
+	$(INSTALL) -m 755 $$b $(BINDIR); \
 done
 
 install_include:
-	install -d $(INCLUDEDIR)/jemalloc
+	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
-	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
-	install -d $(LIBDIR)
-	install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
+	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
 
 install_lib_static: $(STATIC_LIBS)
-	install -d $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	echo "install -m 755 $$l $(LIBDIR)"; \
-	install -m 755 $$l $(LIBDIR); \
+	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
+	$(INSTALL) -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
-	install -d $(LIBDIR)/pkgconfig
+	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \
-	install -m 644 $$l $(LIBDIR)/pkgconfig; \
+	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
+	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 install_lib: install_lib_shared install_lib_static install_lib_pc
 
 install_doc_html:
-	install -d $(DATADIR)/doc/jemalloc$(install_suffix)
+	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
-	install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man:
-	install -d $(MANDIR)/man3
+	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	echo "install -m 644 $$d $(MANDIR)/man3"; \
-	install -m 644 $$d $(MANDIR)/man3; \
+	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
+	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man
 
 install: install_bin install_include install_lib install_doc
 
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE))
--- a/memory/jemalloc/src/README
+++ b/memory/jemalloc/src/README
@@ -12,9 +12,9 @@ repercussions for real world application
 
 The COPYING file contains copyright and licensing information.
 
 The INSTALL file contains information on how to configure, build, and install
 jemalloc.
 
 The ChangeLog file contains a brief summary of changes for each release.
 
-URL: http://www.canonware.com/jemalloc/
+URL: http://jemalloc.net/
--- a/memory/jemalloc/src/VERSION
+++ b/memory/jemalloc/src/VERSION
@@ -1,1 +1,1 @@
-4.1.1-0-ge02b83cc5e3c4d30f93dba945162e3aa58d962d6
+4.3.1-0-g0110fa8451af905affd77c3bea0d545fee2251b2
--- a/memory/jemalloc/src/configure
+++ b/memory/jemalloc/src/configure
@@ -623,17 +623,16 @@ ac_subst_vars='LTLIBOBJS
 LIBOBJS
 cfgoutputs_out
 cfgoutputs_in
 cfghdrs_out
 cfghdrs_in
 enable_zone_allocator
 enable_tls
 enable_lazy_lock
-TESTLIBS
 jemalloc_version_gid
 jemalloc_version_nrev
 jemalloc_version_bugfix
 jemalloc_version_minor
 jemalloc_version_major
 jemalloc_version
 enable_cache_oblivious
 enable_xmalloc
@@ -653,26 +652,29 @@ enable_code_coverage
 AUTOCONF
 LD
 RANLIB
 INSTALL_DATA
 INSTALL_SCRIPT
 INSTALL_PROGRAM
 enable_autogen
 RPATH_EXTRA
+LM
 CC_MM
 AROUT
 ARFLAGS
 MKLIB
+TEST_LD_MODE
 LDTARGET
 CTARGET
 PIC_CFLAGS
 SOREV
 EXTRA_LDFLAGS
 DSO_LDFLAGS
+link_whole_archive
 libprefix
 exe
 a
 o
 importlib
 so
 LD_PRELOAD_VAR
 RPATH
@@ -684,16 +686,17 @@ host_cpu
 host
 build_os
 build_vendor
 build_cpu
 build
 EGREP
 GREP
 CPP
+EXTRA_CFLAGS
 OBJEXT
 EXEEXT
 ac_ct_CC
 CPPFLAGS
 LDFLAGS
 CFLAGS
 CC
 XSLROOT
@@ -778,16 +781,17 @@ enable_utrace
 enable_valgrind
 enable_xmalloc
 enable_cache_oblivious
 with_lg_tiny_min
 with_lg_quantum
 with_lg_page
 with_lg_page_sizes
 with_lg_size_class_group
+with_version
 enable_lazy_lock
 enable_tls
 enable_zone_allocator
 '
       ac_precious_vars='build_alias
 host_alias
 target_alias
 CC
@@ -1466,16 +1470,18 @@ Optional Packages:
   --with-lg-quantum=<lg-quantum>
                           Base 2 log of minimum allocation alignment
   --with-lg-page=<lg-page>
                           Base 2 log of system page size
   --with-lg-page-sizes=<lg-page-sizes>
                           Base 2 logs of system page sizes to support
   --with-lg-size-class-group=<lg-size-class-group>
                           Base 2 log of size classes per doubling
+  --with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>
+                          Version string
 
 Some influential environment variables:
   CC          C compiler command
   CFLAGS      C compiler flags
   LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
               nonstandard directory <lib dir>
   LIBS        libraries to pass to the linker, e.g. -l<library>
   CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
@@ -3430,16 +3436,17 @@ if test "x$ac_cv_prog_cc_c89" != xno; th
 fi
 
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 if test "x$GCC" != "xyes" ; then
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is MSVC" >&5
 $as_echo_n "checking whether compiler is MSVC... " >&6; }
 if ${je_cv_msvc+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -3463,20 +3470,135 @@ else
   je_cv_msvc=no
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_msvc" >&5
 $as_echo "$je_cv_msvc" >&6; }
 fi
 
+je_cv_cray_prgenv_wrapper=""
+if test "x${PE_ENV}" != "x" ; then
+  case "${CC}" in
+    CC|cc)
+	je_cv_cray_prgenv_wrapper="yes"
+	;;
+    *)
+       ;;
+  esac
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is cray" >&5
+$as_echo_n "checking whether compiler is cray... " >&6; }
+if ${je_cv_cray+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+#ifndef _CRAYC
+  int fail-1;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cray=yes
+else
+  je_cv_cray=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray" >&5
+$as_echo "$je_cv_cray" >&6; }
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether cray compiler version is 8.4" >&5
+$as_echo_n "checking whether cray compiler version is 8.4... " >&6; }
+if ${je_cv_cray_84+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4)
+  int fail-1;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cray_84=yes
+else
+  je_cv_cray_84=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray_84" >&5
+$as_echo "$je_cv_cray_84" >&6; }
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu11" >&5
+$as_echo_n "checking whether compiler supports -std=gnu11... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-std=gnu11"
+else
+  CFLAGS="${CFLAGS} -std=gnu11"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-std=gnu11
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+    if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
+      cat >>confdefs.h <<_ACEOF
+#define JEMALLOC_HAS_RESTRICT 1
+_ACEOF
+
+    else
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu99" >&5
 $as_echo_n "checking whether compiler supports -std=gnu99... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
   CFLAGS="-std=gnu99"
 else
   CFLAGS="${CFLAGS} -std=gnu99"
 fi
@@ -3502,21 +3624,22 @@ else
   je_cv_cflags_appended=
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
-    if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
-      cat >>confdefs.h <<_ACEOF
+      if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+        cat >>confdefs.h <<_ACEOF
 #define JEMALLOC_HAS_RESTRICT 1
 _ACEOF
 
+      fi
     fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wall" >&5
 $as_echo_n "checking whether compiler supports -Wall... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
   CFLAGS="-Wall"
 else
@@ -3617,16 +3740,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wsign-compare" >&5
+$as_echo_n "checking whether compiler supports -Wsign-compare... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-Wsign-compare"
+else
+  CFLAGS="${CFLAGS} -Wsign-compare"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-Wsign-compare
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -pipe" >&5
 $as_echo_n "checking whether compiler supports -pipe... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
   CFLAGS="-pipe"
 else
   CFLAGS="${CFLAGS} -pipe"
 fi
@@ -3836,55 +3995,168 @@ else
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
     CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
   fi
-fi
-if test "x$EXTRA_CFLAGS" != "x" ; then
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports $EXTRA_CFLAGS" >&5
-$as_echo_n "checking whether compiler supports $EXTRA_CFLAGS... " >&6; }
+  if test "x$je_cv_cray" = "xyes" ; then
+        if test "x$je_cv_cray_84" = "xyes" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hipa2" >&5
+$as_echo_n "checking whether compiler supports -hipa2... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
-  CFLAGS="$EXTRA_CFLAGS"
-else
-  CFLAGS="${CFLAGS} $EXTRA_CFLAGS"
+  CFLAGS="-hipa2"
+else
+  CFLAGS="${CFLAGS} -hipa2"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-hipa2
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnognu" >&5
+$as_echo_n "checking whether compiler supports -hnognu... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-hnognu"
+else
+  CFLAGS="${CFLAGS} -hnognu"
 fi
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
 int
 main ()
 {
 
     return 0;
 
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_c_try_compile "$LINENO"; then :
-  je_cv_cflags_appended=$EXTRA_CFLAGS
+  je_cv_cflags_appended=-hnognu
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
   je_cv_cflags_appended=
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
-fi
+    fi
+    if test "x$enable_cc_silence" != "xno" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=128" >&5
+$as_echo_n "checking whether compiler supports -hnomessage=128... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-hnomessage=128"
+else
+  CFLAGS="${CFLAGS} -hnomessage=128"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-hnomessage=128
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=1357" >&5
+$as_echo_n "checking whether compiler supports -hnomessage=1357... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-hnomessage=1357"
+else
+  CFLAGS="${CFLAGS} -hnomessage=1357"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-hnomessage=1357
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+    fi
+  fi
+fi
+
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5
 $as_echo_n "checking how to run the C preprocessor... " >&6; }
 # On Suns, sometimes $CPP names a directory.
@@ -4911,27 +5183,37 @@ cat >>confdefs.h <<_ACEOF
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
 importlib="${so}"
 o="$ac_objext"
 a="a"
 exe="$ac_exeext"
 libprefix="lib"
+link_whole_archive="0"
 DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
 RPATH='-Wl,-rpath,$(1)'
 SOREV="${so}.${rev}"
 PIC_CFLAGS='-fPIC -DPIC'
 CTARGET='-o $@'
 LDTARGET='-o $@'
+TEST_LD_MODE=
 EXTRA_LDFLAGS=
 ARFLAGS='crus'
 AROUT=' $@'
 CC_MM=1
 
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  TEST_LD_MODE='-dynamic'
+fi
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  CC_MM=
+fi
+
 
 
 
 if test -n "$ac_tool_prefix"; then
   # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
 set dummy ${ac_tool_prefix}ar; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
@@ -5018,65 +5300,64 @@ ac_tool_warned=yes ;;
 esac
     AR=$ac_ct_AR
   fi
 else
   AR="$ac_cv_prog_AR"
 fi
 
 
+CFLAGS="$CFLAGS"
 default_munmap="1"
 maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
-	CFLAGS="$CFLAGS"
 	abi="macho"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	RPATH=""
 	LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
 	so="dylib"
 	importlib="${so}"
 	force_tls="0"
 	DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)'
 	SOREV="${rev}.${so}"
 	sbrk_deprecated="1"
 	;;
   *-*-freebsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
+	$as_echo "#define JEMALLOC_SYSCTL_VM_OVERCOMMIT  " >>confdefs.h
+
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	;;
   *-*-openbsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	force_tls="0"
 	;;
   *-*-bitrig*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	;;
   *-*-linux*)
-	CFLAGS="$CFLAGS"
-	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+		CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	$as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h
 
+	$as_echo "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY  " >>confdefs.h
+
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED  " >>confdefs.h
 
 	$as_echo "#define JEMALLOC_THREADED_INIT  " >>confdefs.h
 
 	$as_echo "#define JEMALLOC_USE_CXX_THROW  " >>confdefs.h
 
 	default_munmap="0"
 	;;
@@ -5095,28 +5376,27 @@ int
 main ()
 {
 
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_c_try_compile "$LINENO"; then :
-  CFLAGS="$CFLAGS"; abi="elf"
+  abi="elf"
 else
   abi="aout"
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $abi" >&5
 $as_echo "$abi" >&6; }
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	;;
   *-*-solaris2*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	RPATH='-Wl,-R,$(1)'
 		CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
 	LIBS="$LIBS -lposix4 -lsocket -lnsl"
 	;;
   *-ibm-aix*)
@@ -5125,33 +5405,33 @@ rm -f core conftest.err conftest.$ac_obj
 	else
 	  	  LD_PRELOAD_VAR="LDR_PRELOAD"
 	fi
 	abi="xcoff"
 	;;
   *-*-mingw* | *-*-cygwin*)
 	abi="pecoff"
 	force_tls="0"
-	force_lazy_lock="1"
 	maps_coalesce="0"
 	RPATH=""
 	so="dll"
 	if test "x$je_cv_msvc" = "xyes" ; then
 	  importlib="lib"
 	  DSO_LDFLAGS="-LD"
 	  EXTRA_LDFLAGS="-link -DEBUG"
 	  CTARGET='-Fo$@'
 	  LDTARGET='-Fe$@'
 	  AR='lib'
 	  ARFLAGS='-nologo -out:'
 	  AROUT='$@'
 	  CC_MM=
         else
 	  importlib="${so}"
 	  DSO_LDFLAGS="-shared"
+	  link_whole_archive="1"
 	fi
 	a="lib"
 	libprefix=""
 	SOREV="${so}"
 	PIC_CFLAGS=""
 	;;
   *)
 	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Unsupported operating system: ${host}" >&5
@@ -5223,16 +5503,83 @@ cat >>confdefs.h <<_ACEOF
 
 
 
 
 
 
 
 
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing log" >&5
+$as_echo_n "checking for library containing log... " >&6; }
+if ${ac_cv_search_log+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char log ();
+int
+main ()
+{
+return log ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' m; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_search_log=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext
+  if ${ac_cv_search_log+:} false; then :
+  break
+fi
+done
+if ${ac_cv_search_log+:} false; then :
+
+else
+  ac_cv_search_log=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_log" >&5
+$as_echo "$ac_cv_search_log" >&6; }
+ac_res=$ac_cv_search_log
+if test "$ac_res" != no; then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+  as_fn_error $? "Missing math functions" "$LINENO" 5
+fi
+
+if test "x$ac_cv_search_log" != "xnone required" ; then
+  LM="$ac_cv_search_log"
+else
+  LM=
+fi
+
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __attribute__ syntax is compilable" >&5
 $as_echo_n "checking whether __attribute__ syntax is compilable... " >&6; }
 if ${je_cv_attribute+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 static __attribute__((unused)) void foo(void){}
@@ -5330,16 +5677,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-herror_on_warning"
+else
+  CFLAGS="${CFLAGS} -herror_on_warning"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-herror_on_warning
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether tls_model attribute is compilable" >&5
 $as_echo_n "checking whether tls_model attribute is compilable... " >&6; }
 if ${je_cv_tls_model+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
@@ -5405,16 +5788,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-herror_on_warning"
+else
+  CFLAGS="${CFLAGS} -herror_on_warning"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-herror_on_warning
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether alloc_size attribute is compilable" >&5
 $as_echo_n "checking whether alloc_size attribute is compilable... " >&6; }
 if ${je_cv_alloc_size+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdlib.h>
@@ -5475,16 +5894,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-herror_on_warning"
+else
+  CFLAGS="${CFLAGS} -herror_on_warning"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-herror_on_warning
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(gnu_printf, ...) attribute is compilable" >&5
 $as_echo_n "checking whether format(gnu_printf, ...) attribute is compilable... " >&6; }
 if ${je_cv_format_gnu_printf+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdlib.h>
@@ -5545,16 +6000,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-herror_on_warning"
+else
+  CFLAGS="${CFLAGS} -herror_on_warning"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-herror_on_warning
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(printf, ...) attribute is compilable" >&5
 $as_echo_n "checking whether format(printf, ...) attribute is compilable... " >&6; }
 if ${je_cv_format_printf+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include <stdlib.h>
@@ -6647,18 +7138,18 @@ if test "x$backtrace_method" = "x" ; the
   backtrace_method="none (disabling profiling)"
   enable_prof="0"
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking configured backtracing method" >&5
 $as_echo_n "checking configured backtracing method... " >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $backtrace_method" >&5
 $as_echo "$backtrace_method" >&6; }
 if test "x$enable_prof" = "x1" ; then
-  if test "x$abi" != "xpecoff"; then
-        LIBS="$LIBS -lm"
+    if test "x$LM" != "x" ; then
+    LIBS="$LIBS $LM"
   fi
 
   $as_echo "#define JEMALLOC_PROF  " >>confdefs.h
 
 fi
 
 
 # Check whether --enable-tcache was given.
@@ -6897,16 +7388,62 @@ fi
 
 if test "x$enable_cache_oblivious" = "x1" ; then
   $as_echo "#define JEMALLOC_CACHE_OBLIVIOUS  " >>confdefs.h
 
 fi
 
 
 
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_unreachable is compilable" >&5
+$as_echo_n "checking whether a program using __builtin_unreachable is compilable... " >&6; }
+if ${je_cv_gcc_builtin_unreachable+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+void foo (void) {
+  __builtin_unreachable();
+}
+
+int
+main ()
+{
+
+	{
+		foo();
+	}
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_gcc_builtin_unreachable=yes
+else
+  je_cv_gcc_builtin_unreachable=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_builtin_unreachable" >&5
+$as_echo "$je_cv_gcc_builtin_unreachable" >&6; }
+
+if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable" >>confdefs.h
+
+else
+  $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE abort" >>confdefs.h
+
+fi
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_ffsl is compilable" >&5
 $as_echo_n "checking whether a program using __builtin_ffsl is compilable... " >&6; }
 if ${je_cv_gcc_builtin_ffsl+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
@@ -7122,33 +7659,46 @@ fi
 if test "${with_lg_size_class_group+set}" = set; then :
   withval=$with_lg_size_class_group; LG_SIZE_CLASS_GROUP="$with_lg_size_class_group"
 else
   LG_SIZE_CLASS_GROUP="2"
 fi
 
 
 
-if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
-        rm -f "${objroot}VERSION"
-  for pattern in '[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
-                 '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9][0-9]'; do
-    if test ! -e "${objroot}VERSION" ; then
-      (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
-      if test $? -eq 0 ; then
-        mv "${objroot}VERSION.tmp" "${objroot}VERSION"
-        break
-      fi
+
+# Check whether --with-version was given.
+if test "${with_version+set}" = set; then :
+  withval=$with_version;
+    echo "${with_version}" | grep '^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$' 2>&1 1>/dev/null
+    if test $? -ne 0 ; then
+      as_fn_error $? "${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid>" "$LINENO" 5
     fi
-  done
-fi
-rm -f "${objroot}VERSION.tmp"
+    echo "$with_version" > "${objroot}VERSION"
+
+else
+
+        if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+                        for pattern in '[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+                     '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9][0-9]'; do
+        (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+        if test $? -eq 0 ; then
+          mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+          break
+        fi
+      done
+    fi
+    rm -f "${objroot}VERSION.tmp"
+
+fi
+
+
 if test ! -e "${objroot}VERSION" ; then
   if test ! -e "${srcroot}VERSION" ; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: result: Missing VERSION file, and unable to generate it; creating bogus VERSION" >&5
 $as_echo "Missing VERSION file, and unable to generate it; creating bogus VERSION" >&6; }
     echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
   else
     cp ${srcroot}VERSION ${objroot}VERSION
   fi
@@ -7280,18 +7830,16 @@ else
 fi
 
 fi
 
 fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
-SAVED_LIBS="${LIBS}"
-LIBS=
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5
 $as_echo_n "checking for library containing clock_gettime... " >&6; }
 if ${ac_cv_search_clock_gettime+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_func_search_save_LIBS=$LIBS
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
@@ -7335,21 +7883,316 @@ fi
 rm conftest.$ac_ext
 LIBS=$ac_func_search_save_LIBS
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5
 $as_echo "$ac_cv_search_clock_gettime" >&6; }
 ac_res=$ac_cv_search_clock_gettime
 if test "$ac_res" != no; then :
   test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
-  TESTLIBS="${LIBS}"
-fi
-
-
-LIBS="${SAVED_LIBS}"
+
+fi
+
+
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  if test "$ac_cv_search_clock_gettime" != "-lrt"; then
+    SAVED_CFLAGS="${CFLAGS}"
+
+    unset ac_cv_search_clock_gettime
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -dynamic" >&5
+$as_echo_n "checking whether compiler supports -dynamic... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-dynamic"
+else
+  CFLAGS="${CFLAGS} -dynamic"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-dynamic
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5
+$as_echo_n "checking for library containing clock_gettime... " >&6; }
+if ${ac_cv_search_clock_gettime+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+int
+main ()
+{
+return clock_gettime ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_search_clock_gettime=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext
+  if ${ac_cv_search_clock_gettime+:} false; then :
+  break
+fi
+done
+if ${ac_cv_search_clock_gettime+:} false; then :
+
+else
+  ac_cv_search_clock_gettime=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5
+$as_echo "$ac_cv_search_clock_gettime" >&6; }
+ac_res=$ac_cv_search_clock_gettime
+if test "$ac_res" != no; then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+
+    CFLAGS="${SAVED_CFLAGS}"
+  fi
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable" >&5
+$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable... " >&6; }
+if ${je_cv_clock_monotonic_coarse+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <time.h>
+
+int
+main ()
+{
+
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_clock_monotonic_coarse=yes
+else
+  je_cv_clock_monotonic_coarse=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic_coarse" >&5
+$as_echo "$je_cv_clock_monotonic_coarse" >&6; }
+
+if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable" >&5
+$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable... " >&6; }
+if ${je_cv_clock_monotonic+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <unistd.h>
+#include <time.h>
+
+int
+main ()
+{
+
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0
+#  error _POSIX_MONOTONIC_CLOCK missing/invalid
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_clock_monotonic=yes
+else
+  je_cv_clock_monotonic=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic" >&5
+$as_echo "$je_cv_clock_monotonic" >&6; }
+
+if test "x${je_cv_clock_monotonic}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mach_absolute_time() is compilable" >&5
+$as_echo_n "checking whether mach_absolute_time() is compilable... " >&6; }
+if ${je_cv_mach_absolute_time+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <mach/mach_time.h>
+
+int
+main ()
+{
+
+	mach_absolute_time();
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_mach_absolute_time=yes
+else
+  je_cv_mach_absolute_time=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_mach_absolute_time" >&5
+$as_echo "$je_cv_mach_absolute_time" >&6; }
+
+if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1" >>confdefs.h
+
+fi
+
+SAVED_CFLAGS="${CFLAGS}"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-Werror"
+else
+  CFLAGS="${CFLAGS} -Werror"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-Werror
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether syscall(2) is compilable" >&5
+$as_echo_n "checking whether syscall(2) is compilable... " >&6; }
+if ${je_cv_syscall+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+int
+main ()
+{
+
+	syscall(SYS_write, 2, "hello", 5);
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_syscall=yes
+else
+  je_cv_syscall=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_syscall" >&5
+$as_echo "$je_cv_syscall" >&6; }
+
+CFLAGS="${SAVED_CFLAGS}"
+if test "x$je_cv_syscall" = "xyes" ; then
+  $as_echo "#define JEMALLOC_HAVE_SYSCALL  " >>confdefs.h
+
+fi
 
 ac_fn_c_check_func "$LINENO" "secure_getenv" "ac_cv_func_secure_getenv"
 if test "x$ac_cv_func_secure_getenv" = xyes; then :
   have_secure_getenv="1"
 else
   have_secure_getenv="0"
 
 fi
@@ -7407,20 +8250,29 @@ else
   enable_lazy_lock="1"
 fi
 
 else
   enable_lazy_lock=""
 
 fi
 
-if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5
+if test "x${enable_lazy_lock}" = "x" ; then
+  if test "x${force_lazy_lock}" = "x1" ; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5
 $as_echo "Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&6; }
-  enable_lazy_lock="1"
+    enable_lazy_lock="1"
+  else
+    enable_lazy_lock="0"
+  fi
+fi
+if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no lazy-lock because thread creation monitoring is unimplemented" >&5
+$as_echo "Forcing no lazy-lock because thread creation monitoring is unimplemented" >&6; }
+  enable_lazy_lock="0"
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$abi" != "xpecoff" ; then
     for ac_header in dlfcn.h
 do :
   ac_fn_c_check_header_mongrel "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default"
 if test "x$ac_cv_header_dlfcn_h" = xyes; then :
   cat >>confdefs.h <<_ACEOF
@@ -7480,18 +8332,16 @@ else
 fi
 
 
 fi
 
   fi
   $as_echo "#define JEMALLOC_LAZY_LOCK  " >>confdefs.h
 
-else
-  enable_lazy_lock="0"
 fi
 
 
 # Check whether --enable-tls was given.
 if test "${enable_tls+set}" = set; then :
   enableval=$enable_tls; if test "x$enable_tls" = "xno" ; then
   enable_tls="0"
 else
@@ -7882,16 +8732,56 @@ fi
 
 if test "x${je_cv_builtin_clz}" = "xyes" ; then
   $as_echo "#define JEMALLOC_HAVE_BUILTIN_CLZ  " >>confdefs.h
 
 fi
 
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin os_unfair_lock_*() is compilable" >&5
+$as_echo_n "checking whether Darwin os_unfair_lock_*() is compilable... " >&6; }
+if ${je_cv_os_unfair_lock+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <os/lock.h>
+
+int
+main ()
+{
+
+	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
+	os_unfair_lock_lock(&lock);
+	os_unfair_lock_unlock(&lock);
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_os_unfair_lock=yes
+else
+  je_cv_os_unfair_lock=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_os_unfair_lock" >&5
+$as_echo "$je_cv_os_unfair_lock" >&6; }
+
+if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_OS_UNFAIR_LOCK  " >>confdefs.h
+
+fi
+
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSSpin*() is compilable" >&5
 $as_echo_n "checking whether Darwin OSSpin*() is compilable... " >&6; }
 if ${je_cv_osspin+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
@@ -9784,26 +10674,26 @@ fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
 $as_echo "" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: CONFIG             : ${CONFIG}" >&5
 $as_echo "CONFIG             : ${CONFIG}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: CC                 : ${CC}" >&5
 $as_echo "CC                 : ${CC}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: CFLAGS             : ${CFLAGS}" >&5
 $as_echo "CFLAGS             : ${CFLAGS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_CFLAGS       : ${EXTRA_CFLAGS}" >&5
+$as_echo "EXTRA_CFLAGS       : ${EXTRA_CFLAGS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: CPPFLAGS           : ${CPPFLAGS}" >&5
 $as_echo "CPPFLAGS           : ${CPPFLAGS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: LDFLAGS            : ${LDFLAGS}" >&5
 $as_echo "LDFLAGS            : ${LDFLAGS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}" >&5
 $as_echo "EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBS               : ${LIBS}" >&5
 $as_echo "LIBS               : ${LIBS}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: TESTLIBS           : ${TESTLIBS}" >&5
-$as_echo "TESTLIBS           : ${TESTLIBS}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: RPATH_EXTRA        : ${RPATH_EXTRA}" >&5
 $as_echo "RPATH_EXTRA        : ${RPATH_EXTRA}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
 $as_echo "" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: XSLTPROC           : ${XSLTPROC}" >&5
 $as_echo "XSLTPROC           : ${XSLTPROC}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: XSLROOT            : ${XSLROOT}" >&5
 $as_echo "XSLROOT            : ${XSLROOT}" >&6; }
--- a/memory/jemalloc/src/configure.ac
+++ b/memory/jemalloc/src/configure.ac
@@ -113,54 +113,108 @@ fi
   XSLROOT="${DEFAULT_XSLROOT}"
 )
 AC_SUBST([XSLROOT])
 
 dnl If CFLAGS isn't defined, set CFLAGS to something reasonable.  Otherwise,
 dnl just prevent autoconf from molesting CFLAGS.
 CFLAGS=$CFLAGS
 AC_PROG_CC
+
 if test "x$GCC" != "xyes" ; then
   AC_CACHE_CHECK([whether compiler is MSVC],
                  [je_cv_msvc],
                  [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
                                                      [
 #ifndef _MSC_VER
   int fail[-1];
 #endif
 ])],
                                [je_cv_msvc=yes],
                                [je_cv_msvc=no])])
 fi
 
+dnl check if a cray prgenv wrapper compiler is being used
+je_cv_cray_prgenv_wrapper=""
+if test "x${PE_ENV}" != "x" ; then
+  case "${CC}" in
+    CC|cc)
+	je_cv_cray_prgenv_wrapper="yes"
+	;;
+    *)
+       ;;
+  esac
+fi
+
+AC_CACHE_CHECK([whether compiler is cray],
+              [je_cv_cray],
+              [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                  [
+#ifndef _CRAYC
+  int fail[-1];
+#endif
+])],
+                            [je_cv_cray=yes],
+                            [je_cv_cray=no])])
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  AC_CACHE_CHECK([whether cray compiler version is 8.4],
+                [je_cv_cray_84],
+                [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                      [
+#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4)
+  int fail[-1];
+#endif
+])],
+                              [je_cv_cray_84=yes],
+                              [je_cv_cray_84=no])])
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
-    JE_CFLAGS_APPEND([-std=gnu99])
-    if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+dnl    JE_CFLAGS_APPEND([-std=gnu99])
+    JE_CFLAGS_APPEND([-std=gnu11])
+    if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+    else
+      JE_CFLAGS_APPEND([-std=gnu99])
+      if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+        AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+      fi
     fi
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
     JE_CFLAGS_APPEND([-Wshorten-64-to-32])
+    JE_CFLAGS_APPEND([-Wsign-compare])
     JE_CFLAGS_APPEND([-pipe])
     JE_CFLAGS_APPEND([-g3])
   elif test "x$je_cv_msvc" = "xyes" ; then
     CC="$CC -nologo"
     JE_CFLAGS_APPEND([-Zi])
     JE_CFLAGS_APPEND([-MT])
     JE_CFLAGS_APPEND([-W3])
     JE_CFLAGS_APPEND([-FS])
     CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
   fi
+  if test "x$je_cv_cray" = "xyes" ; then
+    dnl cray compiler 8.4 has an inlining bug
+    if test "x$je_cv_cray_84" = "xyes" ; then
+      JE_CFLAGS_APPEND([-hipa2])
+      JE_CFLAGS_APPEND([-hnognu])
+    fi
+    if test "x$enable_cc_silence" != "xno" ; then
+      dnl ignore unreachable code warning
+      JE_CFLAGS_APPEND([-hnomessage=128])
+      dnl ignore redefinition of "malloc", "free", etc warning
+      JE_CFLAGS_APPEND([-hnomessage=1357])
+    fi
+  fi
 fi
-dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
-if test "x$EXTRA_CFLAGS" != "x" ; then
-  JE_CFLAGS_APPEND([$EXTRA_CFLAGS])
-fi
+AC_SUBST([EXTRA_CFLAGS])
 AC_PROG_CPP
 
 AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
 if test "x${ac_cv_big_endian}" = "x1" ; then
   AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ])
 fi
 
 if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
@@ -257,103 +311,110 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
 importlib="${so}"
 o="$ac_objext"
 a="a"
 exe="$ac_exeext"
 libprefix="lib"
+link_whole_archive="0"
 DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
 RPATH='-Wl,-rpath,$(1)'
 SOREV="${so}.${rev}"
 PIC_CFLAGS='-fPIC -DPIC'
 CTARGET='-o $@'
 LDTARGET='-o $@'
+TEST_LD_MODE=
 EXTRA_LDFLAGS=
 ARFLAGS='crus'
 AROUT=' $@'
 CC_MM=1
 
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  TEST_LD_MODE='-dynamic'
+fi
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  CC_MM=
+fi
+
 AN_MAKEVAR([AR], [AC_PROG_AR])
 AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
 AC_PROG_AR
 
 dnl Platform-specific settings.  abi and RPATH can probably be determined
 dnl programmatically, but doing so is error-prone, which makes it generally
 dnl not worth the trouble.
 dnl 
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
+CFLAGS="$CFLAGS"
 default_munmap="1"
 maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
-	CFLAGS="$CFLAGS"
 	abi="macho"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH=""
 	LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
 	so="dylib"
 	importlib="${so}"
 	force_tls="0"
 	DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)'
 	SOREV="${rev}.${so}"
 	sbrk_deprecated="1"
 	;;
   *-*-freebsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-openbsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_tls="0"
 	;;
   *-*-bitrig*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux*)
-	CFLAGS="$CFLAGS"
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
 	default_munmap="0"
 	;;
   *-*-netbsd*)
 	AC_MSG_CHECKING([ABI])
         AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 [[#ifdef __ELF__
 /* ELF */
 #else
 #error aout
 #endif
 ]])],
-                          [CFLAGS="$CFLAGS"; abi="elf"],
+                          [abi="elf"],
                           [abi="aout"])
 	AC_MSG_RESULT([$abi])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-solaris2*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH='-Wl,-R,$(1)'
 	dnl Solaris needs this for sigwait().
 	CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
 	LIBS="$LIBS -lposix4 -lsocket -lnsl"
 	;;
   *-ibm-aix*)
@@ -364,33 +425,33 @@ case "${host}" in
 	  dnl 32bit AIX
 	  LD_PRELOAD_VAR="LDR_PRELOAD"
 	fi
 	abi="xcoff"
 	;;
   *-*-mingw* | *-*-cygwin*)
 	abi="pecoff"
 	force_tls="0"
-	force_lazy_lock="1"
 	maps_coalesce="0"
 	RPATH=""
 	so="dll"
 	if test "x$je_cv_msvc" = "xyes" ; then
 	  importlib="lib"
 	  DSO_LDFLAGS="-LD"
 	  EXTRA_LDFLAGS="-link -DEBUG"
 	  CTARGET='-Fo$@'
 	  LDTARGET='-Fe$@'
 	  AR='lib'
 	  ARFLAGS='-nologo -out:'
 	  AROUT='$@'
 	  CC_MM=
         else
 	  importlib="${so}"
 	  DSO_LDFLAGS="-shared"
+	  link_whole_archive="1"
 	fi
 	a="lib"
 	libprefix=""
 	SOREV="${so}"
 	PIC_CFLAGS=""
 	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])
@@ -418,75 +479,90 @@ AC_SUBST([abi])
 AC_SUBST([RPATH])
 AC_SUBST([LD_PRELOAD_VAR])
 AC_SUBST([so])
 AC_SUBST([importlib])
 AC_SUBST([o])
 AC_SUBST([a])
 AC_SUBST([exe])
 AC_SUBST([libprefix])
+AC_SUBST([link_whole_archive])
 AC_SUBST([DSO_LDFLAGS])
 AC_SUBST([EXTRA_LDFLAGS])
 AC_SUBST([SOREV])
 AC_SUBST([PIC_CFLAGS])
 AC_SUBST([CTARGET])
 AC_SUBST([LDTARGET])
+AC_SUBST([TEST_LD_MODE])
 AC_SUBST([MKLIB])
 AC_SUBST([ARFLAGS])
 AC_SUBST([AROUT])
 AC_SUBST([CC_MM])
 
+dnl Determine whether libm must be linked to use e.g. log(3).
+AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
+if test "x$ac_cv_search_log" != "xnone required" ; then
+  LM="$ac_cv_search_log"
+else
+  LM=
+fi
+AC_SUBST(LM)
+
 JE_COMPILABLE([__attribute__ syntax],
               [static __attribute__((unused)) void foo(void){}],
               [],
               [je_cv_attribute])
 if test "x${je_cv_attribute}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
   if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
     JE_CFLAGS_APPEND([-fvisibility=hidden])
   fi
 fi
 dnl Check for tls_model attribute support (clang 3.0 still lacks support).
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([tls_model attribute], [],
               [static __thread int
                __attribute__((tls_model("initial-exec"), unused)) foo;
                foo = 0;],
               [je_cv_tls_model])
 CFLAGS="${SAVED_CFLAGS}"
 if test "x${je_cv_tls_model}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_TLS_MODEL],
             [__attribute__((tls_model("initial-exec")))])
 else
   AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
 fi
 dnl Check for alloc_size attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
               [void *foo(size_t size) __attribute__((alloc_size(1)));],
               [je_cv_alloc_size])
 CFLAGS="${SAVED_CFLAGS}"
 if test "x${je_cv_alloc_size}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ])
 fi
 dnl Check for format(gnu_printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));],
               [je_cv_format_gnu_printf])
 CFLAGS="${SAVED_CFLAGS}"
 if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ])
 fi
 dnl Check for format(printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));],
               [je_cv_format_printf])
 CFLAGS="${SAVED_CFLAGS}"
 if test "x${je_cv_format_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
 fi
 
@@ -871,19 +947,19 @@ fi
 
 if test "x$backtrace_method" = "x" ; then
   backtrace_method="none (disabling profiling)"
   enable_prof="0"
 fi
 AC_MSG_CHECKING([configured backtracing method])
 AC_MSG_RESULT([$backtrace_method])
 if test "x$enable_prof" = "x1" ; then
-  if test "x$abi" != "xpecoff"; then
-    dnl Heap profiling uses the log(3) function.
-    LIBS="$LIBS -lm"
+  dnl Heap profiling uses the log(3) function.
+  if test "x$LM" != "x" ; then
+    LIBS="$LIBS $LM"
   fi
 
   AC_DEFINE([JEMALLOC_PROF], [ ])
 fi
 AC_SUBST([enable_prof])
 
 dnl Enable thread-specific caching by default.
 AC_ARG_ENABLE([tcache],
@@ -1042,16 +1118,33 @@ fi
 ],
 [enable_cache_oblivious="1"]
 )
 if test "x$enable_cache_oblivious" = "x1" ; then
   AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ])
 fi
 AC_SUBST([enable_cache_oblivious])
 
+
+
+JE_COMPILABLE([a program using __builtin_unreachable], [
+void foo (void) {
+  __builtin_unreachable();
+}
+], [
+	{
+		foo();
+	}
+], [je_cv_gcc_builtin_unreachable])
+if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
+else
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
+fi
+
 dnl ============================================================================
 dnl Check for  __builtin_ffsl(), then ffsl(3), and fail if neither are found.
 dnl One of those two functions should (theoretically) exist on all platforms
 dnl that jemalloc currently has a chance of functioning on without modification.
 dnl We additionally assume ffs[ll]() or __builtin_ffs[ll]() are defined if
 dnl ffsl() or __builtin_ffsl() are defined, respectively.
 JE_COMPILABLE([a program using __builtin_ffsl], [
 #include <stdio.h>
@@ -1167,37 +1260,46 @@ AC_ARG_WITH([lg_size_class_group],
    [Base 2 log of size classes per doubling])],
   [LG_SIZE_CLASS_GROUP="$with_lg_size_class_group"],
   [LG_SIZE_CLASS_GROUP="2"])
 
 dnl ============================================================================
 dnl jemalloc configuration.
 dnl 
 
-dnl Set VERSION if source directory is inside a git repository.
-if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
-  dnl Pattern globs aren't powerful enough to match both single- and
-  dnl double-digit version numbers, so iterate over patterns to support up to
-  dnl version 99.99.99 without any accidental matches.
-  rm -f "${objroot}VERSION"
-  for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
-                 '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
-    if test ! -e "${objroot}VERSION" ; then
-      (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
-      if test $? -eq 0 ; then
-        mv "${objroot}VERSION.tmp" "${objroot}VERSION"
-        break
-      fi
+AC_ARG_WITH([version],
+  [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
+   [Version string])],
+  [
+    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+    if test $? -ne 0 ; then
+      AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid>])
     fi
-  done
-fi
-rm -f "${objroot}VERSION.tmp"
+    echo "$with_version" > "${objroot}VERSION"
+  ], [
+    dnl Set VERSION if source directory is inside a git repository.
+    if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+      dnl Pattern globs aren't powerful enough to match both single- and
+      dnl double-digit version numbers, so iterate over patterns to support up
+      dnl to version 99.99.99 without any accidental matches.
+      for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+                     '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
+        (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+        if test $? -eq 0 ; then
+          mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+          break
+        fi
+      done
+    fi
+    rm -f "${objroot}VERSION.tmp"
+  ])
+
 if test ! -e "${objroot}VERSION" ; then
   if test ! -e "${srcroot}VERSION" ; then
     AC_MSG_RESULT(
       [Missing VERSION file, and unable to generate it; creating bogus VERSION])
     echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
   else
     cp ${srcroot}VERSION ${objroot}VERSION
   fi
@@ -1224,23 +1326,86 @@ if test "x$abi" != "xpecoff" ; then
   dnl first, but try libc too before failing.
   AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
                [AC_SEARCH_LIBS([pthread_create], , ,
                                AC_MSG_ERROR([libpthread is missing]))])
 fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
-dnl Check whether clock_gettime(2) is in libc or librt.  This function is only
-dnl used in test code, so save the result to TESTLIBS to avoid poluting LIBS.
-SAVED_LIBS="${LIBS}"
-LIBS=
-AC_SEARCH_LIBS([clock_gettime], [rt], [TESTLIBS="${LIBS}"])
-AC_SUBST([TESTLIBS])
-LIBS="${SAVED_LIBS}"
+dnl Check whether clock_gettime(2) is in libc or librt.
+AC_SEARCH_LIBS([clock_gettime], [rt])
+
+dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with
+dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  if test "$ac_cv_search_clock_gettime" != "-lrt"; then
+    SAVED_CFLAGS="${CFLAGS}"
+
+    unset ac_cv_search_clock_gettime
+    JE_CFLAGS_APPEND([-dynamic])
+    AC_SEARCH_LIBS([clock_gettime], [rt])
+
+    CFLAGS="${SAVED_CFLAGS}"
+  fi
+fi
+
+dnl check for CLOCK_MONOTONIC_COARSE (Linux-specific).
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+], [je_cv_clock_monotonic_coarse])
+if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE])
+fi
+
+dnl check for CLOCK_MONOTONIC.
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [
+#include <unistd.h>
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0
+#  error _POSIX_MONOTONIC_CLOCK missing/invalid
+#endif
+], [je_cv_clock_monotonic])
+if test "x${je_cv_clock_monotonic}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC])
+fi
+
+dnl Check for mach_absolute_time().
+JE_COMPILABLE([mach_absolute_time()], [
+#include <mach/mach_time.h>
+], [
+	mach_absolute_time();
+], [je_cv_mach_absolute_time])
+if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
+fi
+
+dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS X
+dnl 10.12's deprecation warning prevents use.
+SAVED_CFLAGS="${CFLAGS}"
+JE_CFLAGS_APPEND([-Werror])
+JE_COMPILABLE([syscall(2)], [
+#include <sys/syscall.h>
+#include <unistd.h>
+], [
+	syscall(SYS_write, 2, "hello", 5);
+],
+              [je_cv_syscall])
+CFLAGS="${SAVED_CFLAGS}"
+if test "x$je_cv_syscall" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_SYSCALL], [ ])
+fi
 
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
               [have_secure_getenv="0"]
              )
 if test "x$have_secure_getenv" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ])
@@ -1286,31 +1451,37 @@ AC_ARG_ENABLE([lazy_lock],
 [if test "x$enable_lazy_lock" = "xno" ; then
   enable_lazy_lock="0"
 else
   enable_lazy_lock="1"
 fi
 ],
 [enable_lazy_lock=""]
 )
-if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then
-  AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
-  enable_lazy_lock="1"
+if test "x${enable_lazy_lock}" = "x" ; then
+  if test "x${force_lazy_lock}" = "x1" ; then
+    AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
+    enable_lazy_lock="1"
+  else
+    enable_lazy_lock="0"
+  fi
+fi
+if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
+  AC_MSG_RESULT([Forcing no lazy-lock because thread creation monitoring is unimplemented])
+  enable_lazy_lock="0"
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$abi" != "xpecoff" ; then
     AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
     AC_CHECK_FUNC([dlsym], [],
       [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"],
                     [AC_MSG_ERROR([libdl is missing])])
       ])
   fi
   AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
-else
-  enable_lazy_lock="0"
 fi
 AC_SUBST([enable_lazy_lock])
 
 AC_ARG_ENABLE([tls],
   [AS_HELP_STRING([--disable-tls], [Disable thread-local storage (__thread keyword)])],
 if test "x$enable_tls" = "xno" ; then
   enable_tls="0"
 else
@@ -1489,16 +1660,30 @@ AC_CACHE_CHECK([for __builtin_clz],
                                [je_cv_builtin_clz=yes],
                                [je_cv_builtin_clz=no])])
 
 if test "x${je_cv_builtin_clz}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
 fi
 
 dnl ============================================================================
+dnl Check for os_unfair_lock operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin os_unfair_lock_*()], [
+#include <os/lock.h>
+], [
+	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
+	os_unfair_lock_lock(&lock);
+	os_unfair_lock_unlock(&lock);
+], [je_cv_os_unfair_lock])
+if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
+fi
+
+dnl ============================================================================
 dnl Check for spinlock(3) operations as provided on Darwin.
 
 JE_COMPILABLE([Darwin OSSpin*()], [
 #include <libkern/OSAtomic.h>
 #include <inttypes.h>
 ], [
 	OSSpinLock lock = 0;
 	OSSpinLockLock(&lock);
@@ -1732,21 +1917,21 @@ dnl ====================================
 dnl Print out the results of configuration.
 AC_MSG_RESULT([===============================================================================])
 AC_MSG_RESULT([jemalloc version   : ${jemalloc_version}])
 AC_MSG_RESULT([library revision   : ${rev}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([CONFIG             : ${CONFIG}])
 AC_MSG_RESULT([CC                 : ${CC}])
 AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
+AC_MSG_RESULT([EXTRA_CFLAGS       : ${EXTRA_CFLAGS}])
 AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
-AC_MSG_RESULT([TESTLIBS           : ${TESTLIBS}])
 AC_MSG_RESULT([RPATH_EXTRA        : ${RPATH_EXTRA}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([XSLTPROC           : ${XSLTPROC}])
 AC_MSG_RESULT([XSLROOT            : ${XSLROOT}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([PREFIX             : ${PREFIX}])
 AC_MSG_RESULT([BINDIR             : ${BINDIR}])
 AC_MSG_RESULT([DATADIR            : ${DATADIR}])
--- a/memory/jemalloc/src/doc/html.xsl.in
+++ b/memory/jemalloc/src/doc/html.xsl.in
@@ -1,4 +1,5 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:import href="@XSLROOT@/html/docbook.xsl"/>
   <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+  <xsl:output method="xml" encoding="utf-8"/>
 </xsl:stylesheet>
--- a/memory/jemalloc/src/doc/jemalloc.xml.in
+++ b/memory/jemalloc/src/doc/jemalloc.xml.in
@@ -47,17 +47,17 @@
     <refname>malloc_usable_size</refname>
     -->
     <refpurpose>general purpose memory allocation functions</refpurpose>
   </refnamediv>
   <refsect1 id="library">
     <title>LIBRARY</title>
     <para>This manual describes jemalloc @jemalloc_version@.  More information
     can be found at the <ulink
-    url="http://www.canonware.com/jemalloc/">jemalloc website</ulink>.</para>
+    url="http://jemalloc.net/">jemalloc website</ulink>.</para>
   </refsect1>
   <refsynopsisdiv>
     <title>SYNOPSIS</title>
     <funcsynopsis>
       <funcsynopsisinfo>#include &lt;<filename class="headerfile">jemalloc/jemalloc.h</filename>&gt;</funcsynopsisinfo>
       <refsect2>
         <title>Standard API</title>
         <funcprototype>
@@ -175,73 +175,73 @@
       </refsect2>
     </funcsynopsis>
   </refsynopsisdiv>
   <refsect1 id="description">
     <title>DESCRIPTION</title>
     <refsect2>
       <title>Standard API</title>
 
-      <para>The <function>malloc<parameter/></function> function allocates
+      <para>The <function>malloc()</function> function allocates
       <parameter>size</parameter> bytes of uninitialized memory.  The allocated
       space is suitably aligned (after possible pointer coercion) for storage
       of any type of object.</para>
 
-      <para>The <function>calloc<parameter/></function> function allocates
+      <para>The <function>calloc()</function> function allocates
       space for <parameter>number</parameter> objects, each
       <parameter>size</parameter> bytes in length.  The result is identical to
-      calling <function>malloc<parameter/></function> with an argument of
+      calling <function>malloc()</function> with an argument of
       <parameter>number</parameter> * <parameter>size</parameter>, with the
       exception that the allocated memory is explicitly initialized to zero
       bytes.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>, and returns the allocation in the value
       pointed to by <parameter>ptr</parameter>.  The requested
       <parameter>alignment</parameter> must be a power of 2 at least as large as
       <code language="C">sizeof(<type>void *</type>)</code>.</para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function
+      <para>The <function>aligned_alloc()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>.  The requested
       <parameter>alignment</parameter> must be a power of 2.  Behavior is
       undefined if <parameter>size</parameter> is not an integral multiple of
       <parameter>alignment</parameter>.</para>
 
-      <para>The <function>realloc<parameter/></function> function changes the
+      <para>The <function>realloc()</function> function changes the
       size of the previously allocated memory referenced by
       <parameter>ptr</parameter> to <parameter>size</parameter> bytes.  The
       contents of the memory are unchanged up to the lesser of the new and old
       sizes.  If the new size is larger, the contents of the newly allocated
       portion of the memory are undefined.  Upon success, the memory referenced
       by <parameter>ptr</parameter> is freed and a pointer to the newly
       allocated memory is returned.  Note that
-      <function>realloc<parameter/></function> may move the memory allocation,
+      <function>realloc()</function> may move the memory allocation,
       resulting in a different return value than <parameter>ptr</parameter>.
       If <parameter>ptr</parameter> is <constant>NULL</constant>, the
-      <function>realloc<parameter/></function> function behaves identically to
-      <function>malloc<parameter/></function> for the specified size.</para>
+      <function>realloc()</function> function behaves identically to
+      <function>malloc()</function> for the specified size.</para>
 
-      <para>The <function>free<parameter/></function> function causes the
+      <para>The <function>free()</function> function causes the
       allocated memory referenced by <parameter>ptr</parameter> to be made
       available for future allocations.  If <parameter>ptr</parameter> is
       <constant>NULL</constant>, no action occurs.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function>,
-      <function>rallocx<parameter/></function>,
-      <function>xallocx<parameter/></function>,
-      <function>sallocx<parameter/></function>,
-      <function>dallocx<parameter/></function>,
-      <function>sdallocx<parameter/></function>, and
-      <function>nallocx<parameter/></function> functions all have a
+      <para>The <function>mallocx()</function>,
+      <function>rallocx()</function>,
+      <function>xallocx()</function>,
+      <function>sallocx()</function>,
+      <function>dallocx()</function>,
+      <function>sdallocx()</function>, and
+      <function>nallocx()</function> functions all have a
       <parameter>flags</parameter> argument that can be used to specify
       options.  The functions only check the options that are contextually
       relevant.  Use bitwise or (<code language="C">|</code>) operations to
       specify one or more of the following:
         <variablelist>
           <varlistentry id="MALLOCX_LG_ALIGN">
             <term><constant>MALLOCX_LG_ALIGN(<parameter>la</parameter>)
             </constant></term>
@@ -302,87 +302,87 @@
             <parameter>a</parameter>.  This macro has no effect for regions that
             were allocated via an arena other than the one specified.  This
             macro does not validate that <parameter>a</parameter> specifies an
             arena index in the valid range.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>mallocx<parameter/></function> function allocates at
+      <para>The <function>mallocx()</function> function allocates at
       least <parameter>size</parameter> bytes of memory, and returns a pointer
       to the base address of the allocation.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>rallocx<parameter/></function> function resizes the
+      <para>The <function>rallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> to be at least
       <parameter>size</parameter> bytes, and returns a pointer to the base
       address of the resulting allocation, which may or may not have moved from
       its original location.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>xallocx<parameter/></function> function resizes the
+      <para>The <function>xallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> in place to be at least
       <parameter>size</parameter> bytes, and returns the real size of the
       allocation.  If <parameter>extra</parameter> is non-zero, an attempt is
       made to resize the allocation to be at least <code
       language="C">(<parameter>size</parameter> +
       <parameter>extra</parameter>)</code> bytes, though inability to allocate
       the extra byte(s) will not by itself result in failure to resize.
       Behavior is undefined if <parameter>size</parameter> is
       <constant>0</constant>, or if <code
       language="C">(<parameter>size</parameter> + <parameter>extra</parameter>
       &gt; <constant>SIZE_T_MAX</constant>)</code>.</para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation at <parameter>ptr</parameter>.</para>
 
-      <para>The <function>dallocx<parameter/></function> function causes the
+      <para>The <function>dallocx()</function> function causes the
       memory referenced by <parameter>ptr</parameter> to be made available for
       future allocations.</para>
 
-      <para>The <function>sdallocx<parameter/></function> function is an
-      extension of <function>dallocx<parameter/></function> with a
+      <para>The <function>sdallocx()</function> function is an
+      extension of <function>dallocx()</function> with a
       <parameter>size</parameter> parameter to allow the caller to pass in the
       allocation size as an optimization.  The minimum valid input size is the
       original requested size of the allocation, and the maximum valid input
       size is the corresponding value returned by
-      <function>nallocx<parameter/></function> or
-      <function>sallocx<parameter/></function>.</para>
+      <function>nallocx()</function> or
+      <function>sallocx()</function>.</para>
 
-      <para>The <function>nallocx<parameter/></function> function allocates no
+      <para>The <function>nallocx()</function> function allocates no
       memory, but it performs the same size computation as the
-      <function>mallocx<parameter/></function> function, and returns the real
+      <function>mallocx()</function> function, and returns the real
       size of the allocation that would result from the equivalent
-      <function>mallocx<parameter/></function> function call, or
+      <function>mallocx()</function> function call, or
       <constant>0</constant> if the inputs exceed the maximum supported size
       class and/or alignment.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>mallctl<parameter/></function> function provides a
+      <para>The <function>mallctl()</function> function provides a
       general interface for introspecting the memory allocator, as well as
       setting modifiable parameters and triggering actions.  The
       period-separated <parameter>name</parameter> argument specifies a
       location in a tree-structured namespace; see the <xref
       linkend="mallctl_namespace" xrefstyle="template:%t"/> section for
       documentation on the tree contents.  To read a value, pass a pointer via
       <parameter>oldp</parameter> to adequate space to contain the value, and a
       pointer to its length via <parameter>oldlenp</parameter>; otherwise pass
       <constant>NULL</constant> and <constant>NULL</constant>.  Similarly, to
       write a value, pass a pointer to the value via
       <parameter>newp</parameter>, and its length via
       <parameter>newlen</parameter>; otherwise pass <constant>NULL</constant>
       and <constant>0</constant>.</para>
 
-      <para>The <function>mallctlnametomib<parameter/></function> function
+      <para>The <function>mallctlnametomib()</function> function
       provides a way to avoid repeated name lookups for applications that
       repeatedly query the same portion of the namespace, by translating a name
-      to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
-      repeatedly to <function>mallctlbymib<parameter/></function>.  Upon
-      successful return from <function>mallctlnametomib<parameter/></function>,
+      to a <quote>Management Information Base</quote> (MIB) that can be passed
+      repeatedly to <function>mallctlbymib()</function>.  Upon
+      successful return from <function>mallctlnametomib()</function>,
       <parameter>mibp</parameter> contains an array of
       <parameter>*miblenp</parameter> integers, where
       <parameter>*miblenp</parameter> is the lesser of the number of components
       in <parameter>name</parameter> and the input value of
       <parameter>*miblenp</parameter>.  Thus it is possible to pass a
       <parameter>*miblenp</parameter> that is smaller than the number of
       period-separated name components, which results in a partial MIB that can
       be used as the basis for constructing a complete MIB.  For name
@@ -405,67 +405,68 @@ for (i = 0; i < nbins; i++) {
 	size_t bin_size;
 
 	mib[2] = i;
 	len = sizeof(bin_size);
 	mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
-      <para>The <function>malloc_stats_print<parameter/></function> function
-      writes human-readable summary statistics via the
-      <parameter>write_cb</parameter> callback function pointer and
-      <parameter>cbopaque</parameter> data passed to
-      <parameter>write_cb</parameter>, or
-      <function>malloc_message<parameter/></function> if
-      <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
-      function can be called repeatedly.  General information that never
-      changes during execution can be omitted by specifying "g" as a character
+      <para>The <function>malloc_stats_print()</function> function writes
+      summary statistics via the <parameter>write_cb</parameter> callback
+      function pointer and <parameter>cbopaque</parameter> data passed to
+      <parameter>write_cb</parameter>, or <function>malloc_message()</function>
+      if <parameter>write_cb</parameter> is <constant>NULL</constant>.  The
+      statistics are presented in human-readable form unless <quote>J</quote> is
+      specified as a character within the <parameter>opts</parameter> string, in
+      which case the statistics are presented in <ulink
+      url="http://www.json.org/">JSON format</ulink>.  This function can be
+      called repeatedly.  General information that never changes during
+      execution can be omitted by specifying <quote>g</quote> as a character
       within the <parameter>opts</parameter> string.  Note that
-      <function>malloc_message<parameter/></function> uses the
-      <function>mallctl*<parameter/></function> functions internally, so
-      inconsistent statistics can be reported if multiple threads use these
-      functions simultaneously.  If <option>--enable-stats</option> is
-      specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
-      be specified to omit merged arena and per arena statistics, respectively;
-      &ldquo;b&rdquo;, &ldquo;l&rdquo;, and &ldquo;h&rdquo; can be specified to
-      omit per size class statistics for bins, large objects, and huge objects,
-      respectively.  Unrecognized characters are silently ignored.  Note that
-      thread caching may prevent some statistics from being completely up to
-      date, since extra locking would be required to merge counters that track
-      thread cache operations.
-      </para>
+      <function>malloc_message()</function> uses the
+      <function>mallctl*()</function> functions internally, so inconsistent
+      statistics can be reported if multiple threads use these functions
+      simultaneously.  If <option>--enable-stats</option> is specified during
+      configuration, <quote>m</quote> and <quote>a</quote> can be specified to
+      omit merged arena and per arena statistics, respectively;
+      <quote>b</quote>, <quote>l</quote>, and <quote>h</quote> can be specified
+      to omit per size class statistics for bins, large objects, and huge
+      objects, respectively.  Unrecognized characters are silently ignored.
+      Note that thread caching may prevent some statistics from being completely
+      up to date, since extra locking would be required to merge counters that
+      track thread cache operations.</para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  The return value may be larger than the size
       that was requested during allocation.  The
-      <function>malloc_usable_size<parameter/></function> function is not a
-      mechanism for in-place <function>realloc<parameter/></function>; rather
+      <function>malloc_usable_size()</function> function is not a
+      mechanism for in-place <function>realloc()</function>; rather
       it is provided solely as a tool for introspection purposes.  Any
       discrepancy between the requested allocation size and the size reported
-      by <function>malloc_usable_size<parameter/></function> should not be
+      by <function>malloc_usable_size()</function> should not be
       depended on, since such behavior is entirely implementation-dependent.
       </para>
     </refsect2>
   </refsect1>
   <refsect1 id="tuning">
     <title>TUNING</title>
     <para>Once, when the first call is made to one of the memory allocation
     routines, the allocator initializes its internals based in part on various
     options that can be specified at compile- or run-time.</para>
 
     <para>The string specified via <option>--with-malloc-conf</option>, the
     string pointed to by the global variable <varname>malloc_conf</varname>, the
-    &ldquo;name&rdquo; of the file referenced by the symbolic link named
+    <quote>name</quote> of the file referenced by the symbolic link named
     <filename class="symlink">/etc/malloc.conf</filename>, and the value of the
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
     <varname>malloc_conf</varname> may be read before
-    <function>main<parameter/></function> is entered, so the declaration of
+    <function>main()</function> is entered, so the declaration of
     <varname>malloc_conf</varname> should specify an initializer that contains
     the final value to be read by jemalloc.  <option>--with-malloc-conf</option>
     and <varname>malloc_conf</varname> are compile-time mechanisms, whereas
     <filename class="symlink">/etc/malloc.conf</filename> and
     <envar>MALLOC_CONF</envar> can be safely set any time prior to program
     invocation.</para>
 
     <para>An options string is a comma-separated list of option:value pairs.
@@ -535,33 +536,33 @@ for (i = 0; i < nbins; i++) {
     up to the nearest power of two that is at least <code
     language="C">sizeof(<type>double</type>)</code>.  All other object size
     classes are multiples of the quantum, spaced such that there are four size
     classes for each doubling in size, which limits internal fragmentation to
     approximately 20% for all but the smallest size classes.  Small size classes
     are smaller than four times the page size, large size classes are smaller
     than the chunk size (see the <link
     linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), and
-    huge size classes extend from the chunk size up to one size class less than
-    the full address space size.</para>
+    huge size classes extend from the chunk size up to the largest size class
+    that does not exceed <constant>PTRDIFF_MAX</constant>.</para>
 
     <para>Allocations are packed tightly together, which can be an issue for
     multi-threaded applications.  If you need to assure that allocations do not
     suffer from cacheline sharing, round your allocation requests up to the
     nearest multiple of the cacheline size, or specify cacheline alignment when
     allocating.</para>
 
-    <para>The <function>realloc<parameter/></function>,
-    <function>rallocx<parameter/></function>, and
-    <function>xallocx<parameter/></function> functions may resize allocations
+    <para>The <function>realloc()</function>,
+    <function>rallocx()</function>, and
+    <function>xallocx()</function> functions may resize allocations
     without moving them under limited circumstances.  Unlike the
-    <function>*allocx<parameter/></function> API, the standard API does not
+    <function>*allocx()</function> API, the standard API does not
     officially round up the usable size of an allocation to the nearest size
     class, so technically it is necessary to call
-    <function>realloc<parameter/></function> to grow e.g. a 9-byte allocation to
+    <function>realloc()</function> to grow e.g. a 9-byte allocation to
     16 bytes, or shrink a 16-byte allocation to 9 bytes.  Growth and shrinkage
     trivially succeeds in place as long as the pre-size and post-size both round
     up to the same size class.  No other API guarantees are made regarding
     in-place resizing, but the current implementation also tries to resize large
     and huge allocations in place, as long as the pre-size and post-size are
     both large or both huge.  In such cases shrinkage always succeeds for large
     size classes, but for huge size classes the chunk allocator must support
     splitting (see <link
@@ -654,17 +655,17 @@ for (i = 0; i < nbins; i++) {
           <entry>128 KiB</entry>
           <entry>[640 KiB, 768 KiB, 896 KiB, 1 MiB]</entry>
         </row>
         <row>
           <entry>256 KiB</entry>
           <entry>[1280 KiB, 1536 KiB, 1792 KiB]</entry>
         </row>
         <row>
-          <entry morerows="6">Huge</entry>
+          <entry morerows="8">Huge</entry>
           <entry>256 KiB</entry>
           <entry>[2 MiB]</entry>
         </row>
         <row>
           <entry>512 KiB</entry>
           <entry>[2560 KiB, 3 MiB, 3584 KiB, 4 MiB]</entry>
         </row>
         <row>
@@ -682,24 +683,32 @@ for (i = 0; i < nbins; i++) {
         <row>
           <entry>8 MiB</entry>
           <entry>[40 MiB, 48 MiB, 56 MiB, 64 MiB]</entry>
         </row>
         <row>
           <entry>...</entry>
           <entry>...</entry>
         </row>
+        <row>
+          <entry>512 PiB</entry>
+          <entry>[2560 PiB, 3 EiB, 3584 PiB, 4 EiB]</entry>
+        </row>
+        <row>
+          <entry>1 EiB</entry>
+          <entry>[5 EiB, 6 EiB, 7 EiB]</entry>
+        </row>
       </tbody>
       </tgroup>
     </table>
   </refsect1>
   <refsect1 id="mallctl_namespace">
     <title>MALLCTL NAMESPACE</title>
     <para>The following names are defined in the namespace accessible via the
-    <function>mallctl*<parameter/></function> functions.  Value types are
+    <function>mallctl*()</function> functions.  Value types are
     specified in parentheses, their readable/writable statuses are encoded as
     <literal>rw</literal>, <literal>r-</literal>, <literal>-w</literal>, or
     <literal>--</literal>, and required build configuration flags follow, if
     any.  A name element encoded as <literal>&lt;i&gt;</literal> or
     <literal>&lt;j&gt;</literal> indicates an integer component, where the
     integer varies from 0 to some upper value that must be determined via
     introspection.  In the case of <mallctl>stats.arenas.&lt;i&gt;.*</mallctl>,
     <literal>&lt;i&gt;</literal> equal to <link
@@ -720,17 +729,17 @@ for (i = 0; i < nbins; i++) {
 
       <varlistentry id="epoch">
         <term>
           <mallctl>epoch</mallctl>
           (<type>uint64_t</type>)
           <literal>rw</literal>
         </term>
         <listitem><para>If a value is passed in, refresh the data from which
-        the <function>mallctl*<parameter/></function> functions report values,
+        the <function>mallctl*()</function> functions report values,
         and increment the epoch.  Return the current epoch.  This is useful for
         detecting whether another thread caused a refresh.</para></listitem>
       </varlistentry>
 
       <varlistentry id="config.cache_oblivious">
         <term>
           <mallctl>config.cache_oblivious</mallctl>
           (<type>bool</type>)
@@ -904,22 +913,22 @@ for (i = 0; i < nbins; i++) {
         </term>
         <listitem><para>dss (<citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry>) allocation precedence as
         related to <citerefentry><refentrytitle>mmap</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> allocation.  The following
         settings are supported if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system: &ldquo;disabled&rdquo;, &ldquo;primary&rdquo;, and
-        &ldquo;secondary&rdquo;; otherwise only &ldquo;disabled&rdquo; is
-        supported.  The default is &ldquo;secondary&rdquo; if
+        system: <quote>disabled</quote>, <quote>primary</quote>, and
+        <quote>secondary</quote>; otherwise only <quote>disabled</quote> is
+        supported.  The default is <quote>secondary</quote> if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system; &ldquo;disabled&rdquo; otherwise.
+        system; <quote>disabled</quote> otherwise.
         </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.lg_chunk">
         <term>
           <mallctl>opt.lg_chunk</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
@@ -1000,50 +1009,51 @@ for (i = 0; i < nbins; i++) {
 
       <varlistentry id="opt.stats_print">
         <term>
           <mallctl>opt.stats_print</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Enable/disable statistics printing at exit.  If
-        enabled, the <function>malloc_stats_print<parameter/></function>
+        enabled, the <function>malloc_stats_print()</function>
         function is called at program exit via an
         <citerefentry><refentrytitle>atexit</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> function.  If
         <option>--enable-stats</option> is specified during configuration, this
         has the potential to cause deadlock for a multi-threaded process that
         exits while one or more threads are executing in the memory allocation
-        functions.  Furthermore, <function>atexit<parameter/></function> may
+        functions.  Furthermore, <function>atexit()</function> may
         allocate memory during application initialization and then deadlock
         internally when jemalloc in turn calls
-        <function>atexit<parameter/></function>, so this option is not
+        <function>atexit()</function>, so this option is not
         universally usable (though the application can register its own
-        <function>atexit<parameter/></function> function with equivalent
+        <function>atexit()</function> function with equivalent
         functionality).  Therefore, this option should only be used with care;
         it is primarily intended as a performance tuning aid during application
         development.  This option is disabled by default.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.junk">
         <term>
           <mallctl>opt.junk</mallctl>
           (<type>const char *</type>)
           <literal>r-</literal>
           [<option>--enable-fill</option>]
         </term>
-        <listitem><para>Junk filling.  If set to "alloc", each byte of
-        uninitialized allocated memory will be initialized to
-        <literal>0xa5</literal>.  If set to "free", all deallocated memory will
-        be initialized to <literal>0x5a</literal>.  If set to "true", both
-        allocated and deallocated memory will be initialized, and if set to
-        "false", junk filling be disabled entirely.  This is intended for
-        debugging and will impact performance negatively.  This option is
-        "false" by default unless <option>--enable-debug</option> is specified
-        during configuration, in which case it is "true" by default unless
+        <listitem><para>Junk filling.  If set to <quote>alloc</quote>, each byte
+        of uninitialized allocated memory will be initialized to
+        <literal>0xa5</literal>.  If set to <quote>free</quote>, all deallocated
+        memory will be initialized to <literal>0x5a</literal>.  If set to
+        <quote>true</quote>, both allocated and deallocated memory will be
+        initialized, and if set to <quote>false</quote>, junk filling be
+        disabled entirely.  This is intended for debugging and will impact
+        performance negatively.  This option is <quote>false</quote> by default
+        unless <option>--enable-debug</option> is specified during
+        configuration, in which case it is <quote>true</quote> by default unless
         running inside <ulink
         url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.quarantine">
         <term>
           <mallctl>opt.quarantine</mallctl>
           (<type>size_t</type>)
@@ -1088,18 +1098,18 @@ for (i = 0; i < nbins; i++) {
           <mallctl>opt.zero</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
           [<option>--enable-fill</option>]
         </term>
         <listitem><para>Zero filling enabled/disabled.  If enabled, each byte
         of uninitialized allocated memory will be initialized to 0.  Note that
         this initialization only happens once for each byte, so
-        <function>realloc<parameter/></function> and
-        <function>rallocx<parameter/></function> calls do not zero memory that
+        <function>realloc()</function> and
+        <function>rallocx()</function> calls do not zero memory that
         was previously allocated.  This is intended for debugging and will
         impact performance negatively.  This option is disabled by default.
         </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.utrace">
         <term>
           <mallctl>opt.utrace</mallctl>
@@ -1312,21 +1322,21 @@ malloc_conf = "xmalloc:true";]]></progra
         </term>
         <listitem><para>Use an
         <citerefentry><refentrytitle>atexit</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> function to dump final memory
         usage to a file named according to the pattern
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  Note that <function>atexit<parameter/></function> may allocate
+        option.  Note that <function>atexit()</function> may allocate
         memory during application initialization and then deadlock internally
-        when jemalloc in turn calls <function>atexit<parameter/></function>, so
+        when jemalloc in turn calls <function>atexit()</function>, so
         this option is not universally usable (though the application can
-        register its own <function>atexit<parameter/></function> function with
+        register its own <function>atexit()</function> function with
         equivalent functionality).  This option is disabled by
         default.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.prof_leak">
         <term>
           <mallctl>opt.prof_leak</mallctl>
           (<type>bool</type>)
@@ -1375,17 +1385,17 @@ malloc_conf = "xmalloc:true";]]></progra
           (<type>uint64_t *</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Get a pointer to the the value that is returned by the
         <link
         linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.deallocated">
         <term>
           <mallctl>thread.deallocated</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
@@ -1402,17 +1412,17 @@ malloc_conf = "xmalloc:true";]]></progra
           (<type>uint64_t *</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Get a pointer to the the value that is returned by the
         <link
         linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.tcache.enabled">
         <term>
           <mallctl>thread.tcache.enabled</mallctl>
           (<type>bool</type>)
           <literal>rw</literal>
           [<option>--enable-tcache</option>]
@@ -1545,16 +1555,33 @@ malloc_conf = "xmalloc:true";]]></progra
         arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals <link
         linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.
         The proportion of unused dirty pages to be purged depends on the current
         time; see <link
         linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
         details.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.reset">
+        <term>
+          <mallctl>arena.&lt;i&gt;.reset</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Discard all of the arena's extant allocations.  This
+        interface can only be used with arenas created via <link
+        linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link>.  None
+        of the arena's discarded/cached allocations may accessed afterward.  As
+        part of this requirement, all thread caches which were used to
+        allocate/deallocate in conjunction with the arena must be flushed
+        beforehand.  This interface cannot be used if running inside Valgrind,
+        nor if the <link linkend="opt.quarantine">quarantine</link> size is
+        non-zero.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
           (<type>const char *</type>)
           <literal>rw</literal>
         </term>
         <listitem><para>Set the precedence of dss allocation as related to mmap
         allocation for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
@@ -2156,16 +2183,35 @@ typedef struct {
         allocator.  This is a multiple of the chunk size, and is larger than
         <link linkend="stats.active"><mallctl>stats.active</mallctl></link>.
         This does not include inactive chunks, even those that contain unused
         dirty pages, which means that there is no strict ordering between this
         and <link
         linkend="stats.resident"><mallctl>stats.resident</mallctl></link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.retained">
+        <term>
+          <mallctl>stats.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Total number of bytes in virtual memory mappings that
+        were retained rather than being returned to the operating system via
+        e.g. <citerefentry><refentrytitle>munmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry>.  Retained virtual memory is
+        typically untouched, decommitted, or purged, so it has no strongly
+        associated physical memory (see <link
+        linkend="arena.i.chunk_hooks">chunk hooks</link> for details).  Retained
+        memory is excluded from mapped memory statistics, e.g. <link
+        linkend="stats.mapped"><mallctl>stats.mapped</mallctl></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.dss">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.dss</mallctl>
           (<type>const char *</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>dss (<citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry>) allocation precedence as
@@ -2236,16 +2282,28 @@ typedef struct {
           <mallctl>stats.arenas.&lt;i&gt;.mapped</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Number of mapped bytes.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.retained">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of retained bytes.  See <link
+        linkend="stats.retained"><mallctl>stats.retained</mallctl></link> for
+        details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.metadata.mapped">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Number of mapped bytes in arena chunk headers, which
@@ -2673,17 +2731,17 @@ MAPPED_LIBRARIES:
     <title>DEBUGGING MALLOC PROBLEMS</title>
     <para>When debugging, it is a good idea to configure/build jemalloc with
     the <option>--enable-debug</option> and <option>--enable-fill</option>
     options, and recompile the program with suitable options and symbols for
     debugger support.  When so configured, jemalloc incorporates a wide variety
     of run-time assertions that catch application errors such as double-free,
     write-after-free, etc.</para>
 
-    <para>Programs often accidentally depend on &ldquo;uninitialized&rdquo;
+    <para>Programs often accidentally depend on <quote>uninitialized</quote>
     memory actually being filled with zero bytes.  Junk filling
     (see the <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link>
     option) tends to expose such bugs in the form of obviously incorrect
     results and/or coredumps.  Conversely, zero
     filling (see the <link
     linkend="opt.zero"><mallctl>opt.zero</mallctl></link> option) eliminates
     the symptoms of such bugs.  Between these two options, it is usually
     possible to quickly detect, diagnose, and eliminate such bugs.</para>
@@ -2702,39 +2760,39 @@ MAPPED_LIBRARIES:
     dumping core.  If the <link
     linkend="opt.abort"><mallctl>opt.abort</mallctl></link> option is set, most
     warnings are treated as errors.</para>
 
     <para>The <varname>malloc_message</varname> variable allows the programmer
     to override the function which emits the text strings forming the errors
     and warnings if for some reason the <constant>STDERR_FILENO</constant> file
     descriptor is not suitable for this.
-    <function>malloc_message<parameter/></function> takes the
+    <function>malloc_message()</function> takes the
     <parameter>cbopaque</parameter> pointer argument that is
     <constant>NULL</constant> unless overridden by the arguments in a call to
-    <function>malloc_stats_print<parameter/></function>, followed by a string
+    <function>malloc_stats_print()</function>, followed by a string
     pointer.  Please note that doing anything which tries to allocate memory in
     this function is likely to result in a crash or deadlock.</para>
 
     <para>All messages are prefixed by
-    &ldquo;<computeroutput>&lt;jemalloc&gt;: </computeroutput>&rdquo;.</para>
+    <quote><computeroutput>&lt;jemalloc&gt;: </computeroutput></quote>.</para>
   </refsect1>
   <refsect1 id="return_values">
     <title>RETURN VALUES</title>
     <refsect2>
       <title>Standard API</title>
-      <para>The <function>malloc<parameter/></function> and
-      <function>calloc<parameter/></function> functions return a pointer to the
+      <para>The <function>malloc()</function> and
+      <function>calloc()</function> functions return a pointer to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname>.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       returns the value 0 if successful; otherwise it returns an error value.
-      The <function>posix_memalign<parameter/></function> function will fail
+      The <function>posix_memalign()</function> function will fail
       if:
         <variablelist>
           <varlistentry>
             <term><errorname>EINVAL</errorname></term>
 
             <listitem><para>The <parameter>alignment</parameter> parameter is
             not a power of 2 at least as large as
             <code language="C">sizeof(<type>void *</type>)</code>.
@@ -2743,75 +2801,75 @@ MAPPED_LIBRARIES:
           <varlistentry>
             <term><errorname>ENOMEM</errorname></term>
 
             <listitem><para>Memory allocation error.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function returns
+      <para>The <function>aligned_alloc()</function> function returns
       a pointer to the allocated memory if successful; otherwise a
       <constant>NULL</constant> pointer is returned and
       <varname>errno</varname> is set.  The
-      <function>aligned_alloc<parameter/></function> function will fail if:
+      <function>aligned_alloc()</function> function will fail if:
         <variablelist>
           <varlistentry>
             <term><errorname>EINVAL</errorname></term>
 
             <listitem><para>The <parameter>alignment</parameter> parameter is
             not a power of 2.
             </para></listitem>
           </varlistentry>
           <varlistentry>
             <term><errorname>ENOMEM</errorname></term>
 
             <listitem><para>Memory allocation error.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>realloc<parameter/></function> function returns a
+      <para>The <function>realloc()</function> function returns a
       pointer, possibly identical to <parameter>ptr</parameter>, to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned, and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname> if the error was the result of an
-      allocation failure.  The <function>realloc<parameter/></function>
+      allocation failure.  The <function>realloc()</function>
       function always leaves the original buffer intact when an error occurs.
       </para>
 
-      <para>The <function>free<parameter/></function> function returns no
+      <para>The <function>free()</function> function returns no
       value.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function> and
-      <function>rallocx<parameter/></function> functions return a pointer to
+      <para>The <function>mallocx()</function> and
+      <function>rallocx()</function> functions return a pointer to
       the allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned to indicate insufficient contiguous memory was
       available to service the allocation request.  </para>
 
-      <para>The <function>xallocx<parameter/></function> function returns the
+      <para>The <function>xallocx()</function> function returns the
       real size of the resulting resized allocation pointed to by
       <parameter>ptr</parameter>, which is a value less than
       <parameter>size</parameter> if the allocation could not be adequately
       grown in place.  </para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation pointed to by <parameter>ptr</parameter>.
       </para>
 
-      <para>The <function>nallocx<parameter/></function> returns the real size
+      <para>The <function>nallocx()</function> returns the real size
       that would result from a successful equivalent
-      <function>mallocx<parameter/></function> function call, or zero if
+      <function>mallocx()</function> function call, or zero if
       insufficient memory is available to perform the size computation.  </para>
 
-      <para>The <function>mallctl<parameter/></function>,
-      <function>mallctlnametomib<parameter/></function>, and
-      <function>mallctlbymib<parameter/></function> functions return 0 on
+      <para>The <function>mallctl()</function>,
+      <function>mallctlnametomib()</function>, and
+      <function>mallctlbymib()</function> functions return 0 on
       success; otherwise they return an error value.  The functions will fail
       if:
         <variablelist>
           <varlistentry>
             <term><errorname>EINVAL</errorname></term>
 
             <listitem><para><parameter>newp</parameter> is not
             <constant>NULL</constant>, and <parameter>newlen</parameter> is too
@@ -2837,23 +2895,23 @@ MAPPED_LIBRARIES:
 
             <listitem><para>A memory allocation failure
             occurred.</para></listitem>
           </varlistentry>
           <varlistentry>
             <term><errorname>EFAULT</errorname></term>
 
             <listitem><para>An interface with side effects failed in some way
-            not directly related to <function>mallctl*<parameter/></function>
+            not directly related to <function>mallctl*()</function>
             read/write processing.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  </para>
     </refsect2>
   </refsect1>
   <refsect1 id="environment">
     <title>ENVIRONMENT</title>
     <para>The following environment variable affects the execution of the
     allocation functions:
@@ -2891,18 +2949,18 @@ malloc_conf = "lg_chunk:24";]]></program
     <manvolnum>3</manvolnum></citerefentry>,
     <citerefentry><refentrytitle>atexit</refentrytitle>
     <manvolnum>3</manvolnum></citerefentry>,
     <citerefentry><refentrytitle>getpagesize</refentrytitle>
     <manvolnum>3</manvolnum></citerefentry></para>
   </refsect1>
   <refsect1 id="standards">
     <title>STANDARDS</title>
-    <para>The <function>malloc<parameter/></function>,
-    <function>calloc<parameter/></function>,
-    <function>realloc<parameter/></function>, and
-    <function>free<parameter/></function> functions conform to ISO/IEC
-    9899:1990 (&ldquo;ISO C90&rdquo;).</para>
+    <para>The <function>malloc()</function>,
+    <function>calloc()</function>,
+    <function>realloc()</function>, and
+    <function>free()</function> functions conform to ISO/IEC
+    9899:1990 (<quote>ISO C90</quote>).</para>
 
-    <para>The <function>posix_memalign<parameter/></function> function conforms
-    to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
+    <para>The <function>posix_memalign()</function> function conforms
+    to IEEE Std 1003.1-2001 (<quote>POSIX.1</quote>).</para>
   </refsect1>
 </refentry>
--- a/memory/jemalloc/src/doc/stylesheet.xsl
+++ b/memory/jemalloc/src/doc/stylesheet.xsl
@@ -1,7 +1,10 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:param name="funcsynopsis.style">ansi</xsl:param>
-  <xsl:param name="function.parens" select="1"/>
+  <xsl:param name="function.parens" select="0"/>
+  <xsl:template match="function">
+    <xsl:call-template name="inline.monoseq"/>
+  </xsl:template>
   <xsl:template match="mallctl">
-    "<xsl:call-template name="inline.monoseq"/>"
+    <quote><xsl:call-template name="inline.monoseq"/></quote>
   </xsl:template>
 </xsl:stylesheet>
--- a/memory/jemalloc/src/include/jemalloc/internal/arena.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/arena.h
@@ -31,21 +31,23 @@ typedef enum {
 } purge_mode_t;
 #define	PURGE_DEFAULT		purge_mode_ratio
 /* Default decay time in seconds. */
 #define	DECAY_TIME_DEFAULT	10
 /* Number of event ticks between time checks. */
 #define	DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
+typedef struct arena_avail_links_s arena_avail_links_t;
 typedef struct arena_run_s arena_run_t;
 typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
 typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
 typedef struct arena_chunk_s arena_chunk_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
@@ -148,39 +150,39 @@ struct arena_runs_dirty_link_s {
 
 /*
  * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
  * like arena_chunk_map_bits_t.  Two separate arrays are stored within each
  * chunk header in order to improve cache locality.
  */
 struct arena_chunk_map_misc_s {
 	/*
-	 * Linkage for run trees.  There are two disjoint uses:
+	 * Linkage for run heaps.  There are two disjoint uses:
 	 *
-	 * 1) arena_t's runs_avail tree.
+	 * 1) arena_t's runs_avail heaps.
 	 * 2) arena_run_t conceptually uses this linkage for in-use non-full
 	 *    runs, rather than directly embedding linkage.
 	 */
-	rb_node(arena_chunk_map_misc_t)		rb_link;
+	phn(arena_chunk_map_misc_t)		ph_link;
 
 	union {
 		/* Linkage for list of dirty runs. */
 		arena_runs_dirty_link_t		rd;
 
 		/* Profile counters, used for large object runs. */
 		union {
 			void			*prof_tctx_pun;
 			prof_tctx_t		*prof_tctx;
 		};
 
 		/* Small region run metadata. */
 		arena_run_t			run;
 	};
 };
-typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
+typedef ph(arena_chunk_map_misc_t) arena_run_heap_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
 #ifdef JEMALLOC_ARENA_STRUCTS_B
 /* Arena chunk header. */
 struct arena_chunk_s {
 	/*
 	 * A pointer to the arena that owns the chunk is stored within the node.
 	 * This field as a whole is used by chunks_rtree to support both
@@ -251,53 +253,104 @@ struct arena_bin_info_s {
 	 * bin.
 	 */
 	bitmap_info_t		bitmap_info;
 
 	/* Offset of first region in a run for this bin's size class. */
 	uint32_t		reg0_offset;
 };
 
+struct arena_decay_s {
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			time;
+	/* time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		epoch;
+	/* Deadline randomness generator. */
+	uint64_t		jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
+	 */
+	nstime_t		deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between arena->decay.ndirty and
+	 * arena->ndirty to determine how many dirty pages, if any, were
+	 * generated.
+	 */
+	size_t			ndirty;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to epoch.
+	 */
+	size_t			backlog[SMOOTHSTEP_NSTEPS];
+};
+
 struct arena_bin_s {
 	/*
 	 * All operations on runcur, runs, and stats require that lock be
 	 * locked.  Run allocation/deallocation are protected by the arena lock,
 	 * which may be acquired while holding one or more bin locks, but not
 	 * vise versa.
 	 */
 	malloc_mutex_t		lock;
 
 	/*
 	 * Current run being used to service allocations of this bin's size
 	 * class.
 	 */
 	arena_run_t		*runcur;
 
 	/*
-	 * Tree of non-full runs.  This tree is used when looking for an
+	 * Heap of non-full runs.  This heap is used when looking for an
 	 * existing run when runcur is no longer usable.  We choose the
 	 * non-full run that is lowest in memory; this policy tends to keep
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	arena_run_tree_t	runs;
+	arena_run_heap_t	runs;
 
 	/* Bin statistics. */
 	malloc_bin_stats_t	stats;
 };
 
 struct arena_s {
 	/* This arena's index within the arenas array. */
 	unsigned		ind;
 
 	/*
-	 * Number of threads currently assigned to this arena.  This field is
-	 * synchronized via atomic operations.
+	 * Number of threads currently assigned to this arena, synchronized via
+	 * atomic operations.  Each thread has two distinct assignments, one for
+	 * application-serving allocation, and the other for internal metadata
+	 * allocation.  Internal metadata must not be allocated from arenas
+	 * created via the arenas.extend mallctl, because the arena.<i>.reset
+	 * mallctl indiscriminately discards all allocations for the affected
+	 * arena.
+	 *
+	 *   0: Application allocation.
+	 *   1: Internal metadata allocation.
 	 */
-	unsigned		nthreads;
+	unsigned		nthreads[2];
 
 	/*
 	 * There are three classes of arena operations from a locking
 	 * perspective:
 	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
 	 * 2) Bin-related operations are protected by bin locks.
 	 * 3) Chunk- and run-related operations are protected by this mutex.
 	 */
@@ -312,20 +365,24 @@ struct arena_s {
 	ql_head(tcache_t)	tcache_ql;
 
 	uint64_t		prof_accumbytes;
 
 	/*
 	 * PRNG state for cache index randomization of large allocation base
 	 * pointers.
 	 */
-	uint64_t		offset_state;
+	size_t			offset_state;
 
 	dss_prec_t		dss_prec;
 
+
+	/* Extant arena chunks. */
+	ql_head(extent_node_t)	achunks;
+
 	/*
 	 * In order to avoid rapid chunk allocation/deallocation when an arena
 	 * oscillates right on the cusp of needing a new chunk, cache the most
 	 * recently freed chunk.  The spare is left in the arena's chunk trees
 	 * until it is deleted.
 	 *
 	 * There is one spare chunk per arena, rather than one spare total, in
 	 * order to avoid interactions between multiple threads that could make
@@ -376,62 +433,18 @@ struct arena_s {
 	 *        |            |    \-------/    \-------/   |         |
 	 *        |            |                             |         |
 	 *        |            |                             |         |
 	 *        \------------/                             \---------/
 	 */
 	arena_runs_dirty_link_t	runs_dirty;
 	extent_node_t		chunks_cache;
 
-	/*
-	 * Approximate time in seconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	ssize_t			decay_time;
-	/* decay_time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		decay_interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		decay_epoch;
-	/* decay_deadline randomness generator. */
-	uint64_t		decay_jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of decay_interval and
-	 * per epoch jitter which is a uniform random variable in
-	 * [0..decay_interval).  Epochs always advance by precise multiples of
-	 * decay_interval, but we randomize the deadline to reduce the
-	 * likelihood of arenas purging in lockstep.
-	 */
-	nstime_t		decay_deadline;
-	/*
-	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between decay_ndirty and ndirty to
-	 * determine how many dirty pages, if any, were generated, and record
-	 * the result in decay_backlog.
-	 */
-	size_t			decay_ndirty;
-	/*
-	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
-	 * to the current contents of decay_backlog, i.e. the limit on how many
-	 * pages are allowed to exist for the decay epochs.
-	 */
-	size_t			decay_backlog_npages_limit;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to decay_epoch.
-	 */
-	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
+	/* Decay-based purging state. */
+	arena_decay_t		decay;
 
 	/* Extant huge allocations. */
 	ql_head(extent_node_t)	huge;
 	/* Synchronizes all huge allocation/update/deallocation. */
 	malloc_mutex_t		huge_mtx;
 
 	/*
 	 * Trees of chunks that were previously allocated (trees differ only in
@@ -452,20 +465,22 @@ struct arena_s {
 
 	/* User-configurable chunk hook functions. */
 	chunk_hooks_t		chunk_hooks;
 
 	/* bins is used to store trees of free regions. */
 	arena_bin_t		bins[NBINS];
 
 	/*
-	 * Quantized address-ordered trees of this arena's available runs.  The
-	 * trees are used for first-best-fit run allocation.
+	 * Size-segregated address-ordered heaps of this arena's available runs,
+	 * used for first-best-fit run allocation.  Runs are quantized, i.e.
+	 * they reside in the last heap which corresponds to a size class less
+	 * than or equal to the run size.
 	 */
-	arena_run_tree_t	runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[NPSIZES];
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
 struct arena_tdata_s {
 	ticker_t		decay_ticker;
 };
 #endif /* JEMALLOC_ARENA_STRUCTS_B */
 
@@ -487,142 +502,155 @@ extern ssize_t		opt_lg_dirty_mult;
 extern ssize_t		opt_decay_time;
 
 extern arena_bin_info_t	arena_bin_info[NBINS];
 
 extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
-extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
 #ifdef JEMALLOC_JET
 typedef size_t (run_quantize_t)(size_t);
 extern run_quantize_t *run_quantize_floor;
 extern run_quantize_t *run_quantize_ceil;
 #endif
 void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
     bool cache);
-extent_node_t	*arena_node_alloc(arena_t *arena);
-void	arena_node_dalloc(arena_t *arena, extent_node_t *node);
-void	*arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
-    bool *zero);
-void	arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize);
-void	arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-void	arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-bool	arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize, bool *zero);
-ssize_t	arena_lg_dirty_mult_get(arena_t *arena);
-bool	arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
-ssize_t	arena_decay_time_get(arena_t *arena);
-bool	arena_decay_time_set(arena_t *arena, ssize_t decay_time);
-void	arena_maybe_purge(arena_t *arena);
-void	arena_purge(arena_t *arena, bool all);
-void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
-    szind_t binind, uint64_t prof_accumbytes);
+extent_node_t	*arena_node_alloc(tsdn_t *tsdn, arena_t *arena);
+void	arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node);
+void	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool *zero);
+void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk,
+    size_t usize);
+void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+bool	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize, bool *zero);
+ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
+    ssize_t lg_dirty_mult);
+ssize_t	arena_decay_time_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
+void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
+void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
+void	arena_reset(tsd_t *tsd, arena_t *arena);
+void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
+    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
 typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t,
     uint8_t);
 extern arena_redzone_corruption_t *arena_redzone_corruption;
 typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *);
 extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 #else
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
-void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache);
-void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
+void	*arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind,
+    bool zero);
+void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
+    szind_t ind, bool zero);
+void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promoted(const void *ptr, size_t size);
-void	arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size);
+void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm);
+void	arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm);
+void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
 extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
 #else
 void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
-void	arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr);
-void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr);
+void	arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
 #ifdef JEMALLOC_JET
 typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
 extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
-bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero);
+bool	arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t size, size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(arena_t *arena);
-bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_basic_stats_merge(arena_t *arena, unsigned *nthreads,
+void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+    unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult,
+    ssize_t *decay_time, size_t *nactive, size_t *ndirty);
+void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
-    size_t *nactive, size_t *ndirty);
-void	arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
-unsigned	arena_nthreads_get(arena_t *arena);
-void	arena_nthreads_inc(arena_t *arena);
-void	arena_nthreads_dec(arena_t *arena);
-arena_t	*arena_new(unsigned ind);
-bool	arena_boot(void);
-void	arena_prefork0(arena_t *arena);
-void	arena_prefork1(arena_t *arena);
-void	arena_prefork2(arena_t *arena);
-void	arena_prefork3(arena_t *arena);
-void	arena_postfork_parent(arena_t *arena);
-void	arena_postfork_child(arena_t *arena);
+    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+    malloc_huge_stats_t *hstats);
+unsigned	arena_nthreads_get(arena_t *arena, bool internal);
+void	arena_nthreads_inc(arena_t *arena, bool internal);
+void	arena_nthreads_dec(arena_t *arena, bool internal);
+arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
+void	arena_boot(void);
+void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-arena_chunk_map_bits_t	*arena_bitselm_get(arena_chunk_t *chunk,
+arena_chunk_map_bits_t	*arena_bitselm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
-arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
+const arena_chunk_map_bits_t	*arena_bitselm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
+arena_chunk_map_misc_t	*arena_miscelm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
+const arena_chunk_map_misc_t	*arena_miscelm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
 size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
-void	*arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
+void	*arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
-size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbitsp_read(size_t *mapbitsp);
-size_t	arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
+size_t	*arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind);
+const size_t	*arena_mapbitsp_get_const(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbitsp_read(const size_t *mapbitsp);
+size_t	arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_size_decode(size_t mapbits);
-size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
+size_t	arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_size_get(const arena_chunk_t *chunk,
     size_t pageind);
-size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-szind_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_small_runind_get(const arena_chunk_t *chunk,
+    size_t pageind);
+szind_t	arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_decommitted_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind);
 void	arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
 size_t	arena_mapbits_size_encode(size_t size);
 void	arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
     size_t size, size_t flags);
 void	arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
     size_t size);
 void	arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind,
     size_t flags);
@@ -632,73 +660,89 @@ void	arena_mapbits_large_binind_set(aren
     szind_t binind);
 void	arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
     size_t runind, szind_t binind, size_t flags);
 void	arena_metadata_allocated_add(arena_t *arena, size_t size);
 void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
-prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
-void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(const void *ptr, size_t usize,
+prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
-void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
-void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
-void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
+void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
+void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
-size_t	arena_salloc(const void *ptr, bool demote);
-void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+size_t	arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
+void	arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
-arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
+arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
 	assert(pageind < chunk_npages);
 
 	return (&chunk->map_bits[pageind-map_bias]);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t *
+arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
+arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
 	assert(pageind < chunk_npages);
 
 	return ((arena_chunk_map_misc_t *)((uintptr_t)chunk +
 	    (uintptr_t)map_misc_offset) + pageind-map_bias);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t *
+arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk +
 	    map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias;
 
 	assert(pageind >= map_bias);
 	assert(pageind < chunk_npages);
 
 	return (pageind);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm)
+arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
 
 	return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE)));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
@@ -721,34 +765,41 @@ arena_run_to_miscelm(arena_run_t *run)
 
 	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
 	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
 
 	return (miscelm);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t *
-arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (&arena_bitselm_get(chunk, pageind)->bits);
+	return (&arena_bitselm_get_mutable(chunk, pageind)->bits);
+}
+
+JEMALLOC_ALWAYS_INLINE const size_t *
+arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbitsp_read(size_t *mapbitsp)
+arena_mapbitsp_read(const size_t *mapbitsp)
 {
 
 	return (*mapbitsp);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind)));
+	return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind)));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 arena_mapbits_size_decode(size_t mapbits)
 {
 	size_t size;
 
 #if CHUNK_MAP_SIZE_SHIFT > 0
@@ -758,103 +809,103 @@ arena_mapbits_size_decode(size_t mapbits
 #else
 	size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT;
 #endif
 
 	return (size);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
 	return (arena_mapbits_size_decode(mapbits));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
 	    (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
 	return (arena_mapbits_size_decode(mapbits));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
 	    CHUNK_MAP_ALLOCATED);
 	return (mapbits >> CHUNK_MAP_RUNIND_SHIFT);
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 	szind_t binind;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
 	assert(binind < NBINS || binind == BININD_INVALID);
 	return (binind);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	return (mapbits & CHUNK_MAP_DIRTY);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	return (mapbits & CHUNK_MAP_UNZEROED);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	return (mapbits & CHUNK_MAP_DECOMMITTED);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	return (mapbits & CHUNK_MAP_LARGE);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	return (mapbits & CHUNK_MAP_ALLOCATED);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -880,82 +931,82 @@ arena_mapbits_size_encode(size_t size)
 	assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0);
 	return (mapbits);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
 	assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
 	    CHUNK_MAP_BININD_INVALID | flags);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
     size_t size)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
 	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
 	    (mapbits & ~CHUNK_MAP_SIZE_MASK));
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((flags & CHUNK_MAP_UNZEROED) == flags);
 	arena_mapbitsp_write(mapbitsp, flags);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
 	assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
 	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
 	    CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE |
 	    CHUNK_MAP_ALLOCATED);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
     szind_t binind)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert(binind <= BININD_INVALID);
 	assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS +
 	    large_pad);
 	arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) |
 	    (binind << CHUNK_MAP_BININD_SHIFT));
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
     szind_t binind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert(binind < BININD_INVALID);
 	assert(pageind - runind >= map_bias);
 	assert((flags & CHUNK_MAP_UNZEROED) == flags);
 	arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) |
 	    (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED);
 }
 
@@ -1002,30 +1053,30 @@ arena_prof_accum_locked(arena_t *arena, 
 	cassert(config_prof);
 
 	if (likely(prof_interval == 0))
 		return (false);
 	return (arena_prof_accum_impl(arena, accumbytes));
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum(arena_t *arena, uint64_t accumbytes)
+arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
 {
 
 	cassert(config_prof);
 
 	if (likely(prof_interval == 0))
 		return (false);
 
 	{
 		bool ret;
 
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		ret = arena_prof_accum_impl(arena, accumbytes);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (ret);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
 arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 {
 	szind_t binind;
@@ -1033,35 +1084,35 @@ arena_ptr_small_binind_get(const void *p
 	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
 
 	if (config_debug) {
 		arena_chunk_t *chunk;
 		arena_t *arena;
 		size_t pageind;
 		size_t actual_mapbits;
 		size_t rpages_ind;
-		arena_run_t *run;
+		const arena_run_t *run;
 		arena_bin_t *bin;
 		szind_t run_binind, actual_binind;
 		arena_bin_info_t *bin_info;
-		arena_chunk_map_misc_t *miscelm;
-		void *rpages;
+		const arena_chunk_map_misc_t *miscelm;
+		const void *rpages;
 
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 		arena = extent_node_arena_get(&chunk->node);
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		actual_mapbits = arena_mapbits_get(chunk, pageind);
 		assert(mapbits == actual_mapbits);
 		assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		rpages_ind = pageind - arena_mapbits_small_runind_get(chunk,
 		    pageind);
-		miscelm = arena_miscelm_get(chunk, rpages_ind);
+		miscelm = arena_miscelm_get_const(chunk, rpages_ind);
 		run = &miscelm->run;
 		run_binind = run->binind;
 		bin = &arena->bins[run_binind];
 		actual_binind = (szind_t)(bin - arena->bins);
 		assert(run_binind == actual_binind);
 		bin_info = &arena_bin_info[actual_binind];
 		rpages = arena_miscelm_to_rpages(miscelm);
 		assert(((uintptr_t)ptr - ((uintptr_t)rpages +
@@ -1151,44 +1202,45 @@ arena_run_regind(arena_run_t *run, arena
 	}
 	assert(diff == regind * interval);
 	assert(regind < bin_info->nregs);
 
 	return (regind);
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(const void *ptr)
+arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *ret;
 	arena_chunk_t *chunk;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		size_t mapbits = arena_mapbits_get(chunk, pageind);
 		assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0))
 			ret = (prof_tctx_t *)(uintptr_t)1U;
 		else {
-			arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk,
-			    pageind);
+			arena_chunk_map_misc_t *elm =
+			    arena_miscelm_get_mutable(chunk, pageind);
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
-		ret = huge_prof_tctx_get(ptr);
+		ret = huge_prof_tctx_get(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
@@ -1197,34 +1249,34 @@ arena_prof_tctx_set(const void *ptr, siz
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 
 		if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx >
 		    (uintptr_t)1U)) {
 			arena_chunk_map_misc_t *elm;
 
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun, tctx);
 		} else {
 			/*
 			 * tctx must always be initialized for large runs.
 			 * Assert that the surrounding conditional logic is
 			 * equivalent to checking whether ptr refers to a large
 			 * run.
 			 */
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
-		huge_prof_tctx_set(ptr, tctx);
+		huge_prof_tctx_set(tsdn, ptr, tctx);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
-    prof_tctx_t *old_tctx)
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr &&
 	    (uintptr_t)old_tctx > (uintptr_t)1U))) {
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -1233,83 +1285,86 @@ arena_prof_tctx_reset(const void *ptr, s
 			arena_chunk_map_misc_t *elm;
 
 			pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
 			    LG_PAGE;
 			assert(arena_mapbits_allocated_get(chunk, pageind) !=
 			    0);
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
-			huge_prof_tctx_reset(ptr);
+			huge_prof_tctx_reset(tsdn, ptr);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks)
+arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
 {
+	tsd_t *tsd;
 	ticker_t *decay_ticker;
 
-	if (unlikely(tsd == NULL))
+	if (unlikely(tsdn_null(tsdn)))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	decay_ticker = decay_ticker_get(tsd, arena->ind);
 	if (unlikely(decay_ticker == NULL))
 		return;
 	if (unlikely(ticker_ticks(decay_ticker, nticks)))
-		arena_purge(arena, false);
+		arena_purge(tsdn, arena, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_tick(tsd_t *tsd, arena_t *arena)
+arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
 {
 
-	arena_decay_ticks(tsd, arena, 1);
+	arena_decay_ticks(tsdn, arena, 1);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
+arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path)
 {
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(size != 0);
 
 	if (likely(tcache != NULL)) {
 		if (likely(size <= SMALL_MAXCLASS)) {
-			return (tcache_alloc_small(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_small(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		if (likely(size <= tcache_maxclass)) {
-			return (tcache_alloc_large(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		/* (size > tcache_maxclass) case falls through. */
 		assert(size > tcache_maxclass);
 	}
 
-	return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache));
+	return (arena_malloc_hard(tsdn, arena, size, ind, zero));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(const void *ptr)
 {
 	arena_chunk_t *chunk;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr))
 		return (extent_node_arena_get(&chunk->node));
 	else
 		return (huge_aalloc(ptr));
 }
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(const void *ptr, bool demote)
+arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
 	size_t pageind;
 	szind_t binind;
 
 	assert(ptr != NULL);
 
@@ -1342,112 +1397,120 @@ arena_salloc(const void *ptr, bool demot
 			 * object).
 			 */
 			assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
 			    arena_ptr_small_binind_get(ptr,
 			    arena_mapbits_get(chunk, pageind)) == binind);
 			ret = index2size(binind);
 		}
 	} else
-		ret = huge_salloc(ptr);
+		ret = huge_salloc(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		mapbits = arena_mapbits_get(chunk, pageind);
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = arena_ptr_small_binind_get(ptr,
 				    mapbits);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    slow_path);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			size_t size = arena_mapbits_large_size_get(chunk,
 			    pageind);
 
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
 			if (likely(tcache != NULL) && size - large_pad <=
 			    tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size -
-				    large_pad, slow_path);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size - large_pad, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 	arena_chunk_t *chunk;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		if (config_prof && opt_prof) {
 			size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
 			    LG_PAGE;
 			assert(arena_mapbits_allocated_get(chunk, pageind) !=
 			    0);
 			if (arena_mapbits_large_get(chunk, pageind) != 0) {
 				/*
 				 * Make sure to use promoted size, not request
 				 * size.
 				 */
 				size = arena_mapbits_large_size_get(chunk,
 				    pageind) - large_pad;
 			}
 		}
-		assert(s2u(size) == s2u(arena_salloc(ptr, false)));
+		assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false)));
 
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    true);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
 			if (likely(tcache != NULL) && size <= tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size,
-				    true);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsdn, ptr);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/base.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/base.h
@@ -4,21 +4,22 @@
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*base_alloc(size_t size);
-void	base_stats_get(size_t *allocated, size_t *resident, size_t *mapped);
+void	*base_alloc(tsdn_t *tsdn, size_t size);
+void	base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
+    size_t *mapped);
 bool	base_boot(void);
-void	base_prefork(void);
-void	base_postfork_parent(void);
-void	base_postfork_child(void);
+void	base_prefork(tsdn_t *tsdn);
+void	base_postfork_parent(tsdn_t *tsdn);
+void	base_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/bitmap.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/bitmap.h
@@ -12,18 +12,18 @@ typedef unsigned long bitmap_t;
 
 /* Number of bits per group. */
 #define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
 #define	BITMAP_GROUP_NBITS		(ZU(1) << LG_BITMAP_GROUP_NBITS)
 #define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
 
 /*
  * Do some analysis on how big the bitmap is before we use a tree.  For a brute
- * force linear search, if we would have to call ffsl more than 2^3 times, use a
- * tree instead.
+ * force linear search, if we would have to call ffs_lu() more than 2^3 times,
+ * use a tree instead.
  */
 #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
 #  define USE_TREE
 #endif
 
 /* Number of groups required to store a given number of bits. */
 #define	BITMAP_BITS2GROUPS(nbits)					\
     ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
@@ -218,17 +218,17 @@ bitmap_sfu(bitmap_t *bitmap, const bitma
 	}
 #else
 	i = 0;
 	g = bitmap[0];
 	while ((bit = ffs_lu(g)) == 0) {
 		i++;
 		g = bitmap[i];
 	}
-	bit = (bit - 1) + (i << LG_BITMAP_GROUP_NBITS);
+	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);
 }
 
 JEMALLOC_INLINE void
 bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 {
--- a/memory/jemalloc/src/include/jemalloc/internal/chunk.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/chunk.h
@@ -43,38 +43,39 @@ extern const char	*opt_dss;
 extern rtree_t		chunks_rtree;
 
 extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
 extern size_t		chunk_npages;
 
 extern const chunk_hooks_t	chunk_hooks_default;
 
-chunk_hooks_t	chunk_hooks_get(arena_t *arena);
-chunk_hooks_t	chunk_hooks_set(arena_t *arena,
+chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
+chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(const void *chunk, const extent_node_t *node);
+bool	chunk_register(tsdn_t *tsdn, const void *chunk,
+    const extent_node_t *node);
 void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
-void	*chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool dalloc_node);
-void	*chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
-void	chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed);
-void	chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed);
-bool	chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, size_t offset, size_t length);
+void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit, bool dalloc_node);
+void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit);
+void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
+void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
+    bool committed);
+bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
+    size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(void);
-void	chunk_postfork_parent(void);
-void	chunk_postfork_child(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 extent_node_t	*chunk_lookup(const void *chunk, bool dependent);
 #endif
--- a/memory/jemalloc/src/include/jemalloc/internal/chunk_dss.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/chunk_dss.h
@@ -18,22 +18,20 @@ typedef enum {
 extern const char *dss_prec_names[];
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 dss_prec_t	chunk_dss_prec_get(void);
 bool	chunk_dss_prec_set(dss_prec_t dss_prec);
-void	*chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit);
+void	*chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit);
 bool	chunk_in_dss(void *chunk);
-bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(void);
-void	chunk_dss_postfork_parent(void);
-void	chunk_dss_postfork_child(void);
+bool	chunk_dss_mergeable(void *chunk_a, void *chunk_b);
+void	chunk_dss_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/ctl.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/ctl.h
@@ -16,23 +16,24 @@ struct ctl_node_s {
 };
 
 struct ctl_named_node_s {
 	struct ctl_node_s	node;
 	const char		*name;
 	/* If (nchildren == 0), this is a terminal node. */
 	unsigned		nchildren;
 	const			ctl_node_t *children;
-	int			(*ctl)(const size_t *, size_t, void *, size_t *,
-	    void *, size_t);
+	int			(*ctl)(tsd_t *, const size_t *, size_t, void *,
+	    size_t *, void *, size_t);
 };
 
 struct ctl_indexed_node_s {
 	struct ctl_node_s	node;
-	const ctl_named_node_t	*(*index)(const size_t *, size_t, size_t);
+	const ctl_named_node_t	*(*index)(tsdn_t *, const size_t *, size_t,
+	    size_t);
 };
 
 struct ctl_arena_stats_s {
 	bool			initialized;
 	unsigned		nthreads;
 	const char		*dss;
 	ssize_t			lg_dirty_mult;
 	ssize_t			decay_time;
@@ -55,34 +56,36 @@ struct ctl_arena_stats_s {
 };
 
 struct ctl_stats_s {
 	size_t			allocated;
 	size_t			active;
 	size_t			metadata;
 	size_t			resident;
 	size_t			mapped;
+	size_t			retained;
 	unsigned		narenas;
 	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-int	ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen);
-int	ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
+int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen);
+int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
+    size_t *miblenp);
 
-int	ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen);
+int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
 bool	ctl_boot(void);
-void	ctl_prefork(void);
-void	ctl_postfork_parent(void);
-void	ctl_postfork_child(void);
+void	ctl_prefork(tsdn_t *tsdn);
+void	ctl_postfork_parent(tsdn_t *tsdn);
+void	ctl_postfork_child(tsdn_t *tsdn);
 
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
 	    != 0) {							\
 		malloc_printf(						\
 		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
 		    name);						\
 		abort();						\
--- a/memory/jemalloc/src/include/jemalloc/internal/extent.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/extent.h
@@ -43,17 +43,17 @@ struct extent_node_s {
 	/* Linkage for arena's runs_dirty and chunks_cache rings. */
 	arena_runs_dirty_link_t	rd;
 	qr(extent_node_t)	cc_link;
 
 	union {
 		/* Linkage for the size/address-ordered tree. */
 		rb_node(extent_node_t)	szad_link;
 
-		/* Linkage for arena's huge and node_cache lists. */
+		/* Linkage for arena's achunks, huge, and node_cache lists. */
 		ql_elm(extent_node_t)	ql_link;
 	};
 
 	/* Linkage for the address-ordered tree. */
 	rb_node(extent_node_t)	ad_link;
 };
 typedef rb_tree(extent_node_t) extent_tree_t;
 
--- a/memory/jemalloc/src/include/jemalloc/internal/huge.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/huge.h
@@ -4,33 +4,32 @@
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
-    tcache_t *tcache);
-void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache);
-bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
+void	*huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
+void	*huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero);
+bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
 typedef void (huge_dalloc_junk_t)(void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void	huge_dalloc(tsdn_t *tsdn, void *ptr);
 arena_t	*huge_aalloc(const void *ptr);
-size_t	huge_salloc(const void *ptr);
-prof_tctx_t	*huge_prof_tctx_get(const void *ptr);
-void	huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(const void *ptr);
+size_t	huge_salloc(tsdn_t *tsdn, const void *ptr);
+prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in
@@ -156,17 +156,20 @@ static const bool config_cache_oblivious
 
 #ifdef JEMALLOC_ZONE
 #include <mach/mach_error.h>
 #include <mach/mach_init.h>
 #include <mach/vm_map.h>
 #include <malloc/malloc.h>
 #endif
 
+#include "jemalloc/internal/ph.h"
+#ifndef __PGI
 #define	RB_COMPACT
+#endif
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"
 
 /*
  * jemalloc can conceptually be broken into components (arena, tcache, etc.),
  * but there are circular dependencies that cannot be broken without
  * substantial performance degradation.  In order to reduce the effect on
@@ -179,16 +182,19 @@ static const bool config_cache_oblivious
  *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
  *   JEMALLOC_H_INLINES : Inline functions.
  */
 /******************************************************************************/
 #define	JEMALLOC_H_TYPES
 
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
+/* Page size index type. */
+typedef unsigned pszind_t;
+
 /* Size class index type. */
 typedef unsigned szind_t;
 
 /*
  * Flags bits:
  *
  * a: arena
  * t: tcache
@@ -228,17 +234,17 @@ typedef unsigned szind_t;
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __ia64__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __alpha__
 #    define LG_QUANTUM		4
 #  endif
-#  if (defined(__sparc64__) || defined(__sparcv9))
+#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
 #    define LG_QUANTUM		4
 #  endif
 #  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __arm__
 #    define LG_QUANTUM		3
 #  endif
@@ -252,16 +258,19 @@ typedef unsigned szind_t;
 #    define LG_QUANTUM		3
 #  endif
 #  ifdef __or1k__
 #    define LG_QUANTUM		3
 #  endif
 #  ifdef __powerpc__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __riscv__
+#    define LG_QUANTUM		4
+#  endif
 #  ifdef __s390__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __SH4__
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __tile__
 #    define LG_QUANTUM		4
@@ -355,23 +364,25 @@ typedef unsigned szind_t;
 #else
 #  define VARIABLE_ARRAY(type, name, count) type name[(count)]
 #endif
 
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
@@ -386,23 +397,25 @@ typedef unsigned szind_t;
 #undef JEMALLOC_H_TYPES
 /******************************************************************************/
 #define	JEMALLOC_H_STRUCTS
 
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #define	JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/extent.h"
 #define	JEMALLOC_ARENA_STRUCTS_B
@@ -435,66 +448,77 @@ extern bool	opt_xmalloc;
 extern bool	opt_zero;
 extern unsigned	opt_narenas;
 
 extern bool	in_valgrind;
 
 /* Number of CPUs. */
 extern unsigned	ncpus;
 
+/* Number of arenas used for automatic multiplexing of threads and arenas. */
+extern unsigned	narenas_auto;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
  */
 extern arena_t	**arenas;
 
 /*
+ * pind2sz_tab encodes the same information as could be computed by
+ * pind2sz_compute().
+ */
+extern size_t const	pind2sz_tab[NPSIZES];
+/*
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
  */
-extern size_t const	index2size_tab[NSIZES+1];
+extern size_t const	index2size_tab[NSIZES];
 /*
  * size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
  * and all accesses are via size2index().
  */
 extern uint8_t const	size2index_tab[];
 
+arena_t	*a0get(void);
 void	*a0malloc(size_t size);
 void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
-arena_t	*arenas_extend(unsigned ind);
 unsigned	narenas_total_get(void);
-arena_t	*arena_init(unsigned ind);
+arena_t	*arena_init(tsdn_t *tsdn, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
-arena_t	*arena_choose_hard(tsd_t *tsd);
+arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void	thread_allocated_cleanup(tsd_t *tsd);
 void	thread_deallocated_cleanup(tsd_t *tsd);
+void	iarena_cleanup(tsd_t *tsd);
 void	arena_cleanup(tsd_t *tsd);
 void	arenas_tdata_cleanup(tsd_t *tsd);
 void	narenas_tdata_cleanup(tsd_t *tsd);
 void	arenas_tdata_bypass_cleanup(tsd_t *tsd);
 void	jemalloc_prefork(void);
 void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
 
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
@@ -509,67 +533,154 @@ void	jemalloc_postfork_child(void);
 #undef JEMALLOC_H_EXTERNS
 /******************************************************************************/
 #define	JEMALLOC_H_INLINES
 
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
+pszind_t	psz2ind(size_t psz);
+size_t	pind2sz_compute(pszind_t pind);
+size_t	pind2sz_lookup(pszind_t pind);
+size_t	pind2sz(pszind_t pind);
+size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
 szind_t	size2index_lookup(size_t size);
 szind_t	size2index(size_t size);
 size_t	index2size_compute(szind_t index);
 size_t	index2size_lookup(szind_t index);
 size_t	index2size(szind_t index);
 size_t	s2u_compute(size_t size);
 size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
+arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
-arena_t	*arena_get(unsigned ind, bool init_if_missing);
+arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE pszind_t
+psz2ind(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (NPSIZES);
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return (ind);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz_compute(pszind_t pind)
+{
+
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return (sz);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz_lookup(pszind_t pind)
+{
+	size_t ret = (size_t)pind2sz_tab[pind];
+	assert(ret == pind2sz_compute(pind));
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	assert(pind < NPSIZES);
+	return (pind2sz_lookup(pind));
+}
+
+JEMALLOC_INLINE size_t
+psz2u(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (0);
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return (usize);
+	}
+}
+
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (NSIZES);
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
 		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
 	}
 #endif
 	{
-		szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		szind_t x = lg_floor((size<<1)-1);
 		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
 		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
 		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
 
 		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 
 		size_t delta_inverse_mask = ZI(-1) << lg_delta;
@@ -645,28 +756,28 @@ index2size(szind_t index)
 	assert(index < NSIZES);
 	return (index2size_lookup(index));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (0);
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
 		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
 		    (ZU(1) << lg_ceil));
 	}
 #endif
 	{
-		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		size_t x = lg_floor((size<<1)-1);
 		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 		size_t delta = ZU(1) << lg_delta;
 		size_t delta_mask = delta - 1;
 		size_t usize = (size + delta_mask) & ~delta_mask;
 		return (usize);
 	}
 }
@@ -775,29 +886,44 @@ sa2u(size_t size, size_t alignment)
 		/* size_t overflow. */
 		return (0);
 	}
 	return (usize);
 }
 
 /* Choose an arena based on a per-thread value. */
 JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena)
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal)
 {
 	arena_t *ret;
 
 	if (arena != NULL)
 		return (arena);
 
-	if (unlikely((ret = tsd_arena_get(tsd)) == NULL))
-		ret = arena_choose_hard(tsd);
+	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
+	if (unlikely(ret == NULL))
+		ret = arena_choose_hard(tsd, internal);
 
 	return (ret);
 }
 
+JEMALLOC_INLINE arena_t *
+arena_choose(tsd_t *tsd, arena_t *arena)
+{
+
+	return (arena_choose_impl(tsd, arena, false));
+}
+
+JEMALLOC_INLINE arena_t *
+arena_ichoose(tsd_t *tsd, arena_t *arena)
+{
+
+	return (arena_choose_impl(tsd, arena, true));
+}
+
 JEMALLOC_INLINE arena_tdata_t *
 arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 {
 	arena_tdata_t *tdata;
 	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
 
 	if (unlikely(arenas_tdata == NULL)) {
 		/* arenas_tdata hasn't been initialized yet. */
@@ -814,27 +940,27 @@ arena_tdata_get(tsd_t *tsd, unsigned ind
 
 	tdata = &arenas_tdata[ind];
 	if (likely(tdata != NULL) || !refresh_if_missing)
 		return (tdata);
 	return (arena_tdata_get_hard(tsd, ind));
 }
 
 JEMALLOC_INLINE arena_t *
-arena_get(unsigned ind, bool init_if_missing)
+arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing)
 {
 	arena_t *ret;
 
 	assert(ind <= MALLOCX_ARENA_MAX);
 
 	ret = arenas[ind];
 	if (unlikely(ret == NULL)) {
 		ret = atomic_read_p((void *)&arenas[ind]);
 		if (init_if_missing && unlikely(ret == NULL))
-			ret = arena_init(ind);
+			ret = arena_init(tsdn, ind);
 	}
 	return (ret);
 }
 
 JEMALLOC_INLINE ticker_t *
 decay_ticker_get(tsd_t *tsd, unsigned ind)
 {
 	arena_tdata_t *tdata;
@@ -858,171 +984,151 @@ decay_ticker_get(tsd_t *tsd, unsigned in
 #define	JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/quarantine.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(const void *ptr);
-size_t	isalloc(const void *ptr, bool demote);
-void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+size_t	isalloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
-void	*imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path);
-void	*icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*icalloc(tsd_t *tsd, size_t size, szind_t ind);
-void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+    bool slow_path);
+void	*ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena);
-void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t	ivsalloc(const void *ptr, bool demote);
+size_t	ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote);
 size_t	u2rz(size_t usize);
-size_t	p2rz(const void *ptr);
-void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+size_t	p2rz(tsdn_t *tsdn, const void *ptr);
+void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
-void	idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
 void	idalloc(tsd_t *tsd, void *ptr);
 void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
-void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void	isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
+void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 void	*iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
 void	*iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
 void	*iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero);
-bool	ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+bool	ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(const void *ptr)
 {
 
 	assert(ptr != NULL);
 
 	return (arena_aalloc(ptr));
 }
 
 /*
  * Typical usage:
+ *   tsdn_t *tsdn = [...]
  *   void *ptr = [...]
- *   size_t sz = isalloc(ptr, config_prof);
+ *   size_t sz = isalloc(tsdn, ptr, config_prof);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(const void *ptr, bool demote)
+isalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 
 	assert(ptr != NULL);
 	/* Demotion only makes sense if config_prof is true. */
 	assert(config_prof || !demote);
 
-	return (arena_salloc(ptr, demote));
+	return (arena_salloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
     bool is_metadata, arena_t *arena, bool slow_path)
 {
 	void *ret;
 
 	assert(size != 0);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
-	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
+	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+		arena_metadata_allocated_add(iaalloc(ret),
+		    isalloc(tsdn, ret, config_prof));
+	}
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
+{
+
+	return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
+	    false, NULL, slow_path));
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, bool is_metadata, arena_t *arena)
+{
+	void *ret;
+
+	assert(usize != 0);
+	assert(usize == sa2u(usize, alignment));
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
+
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
+	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
+	if (config_stats && is_metadata && likely(ret != NULL)) {
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret,
 		    config_prof));
 	}
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
-{
-
-	return (iallocztm(tsd, size, ind, false, tcache, false, arena, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path)
-{
-
-	return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false,
-	    NULL, slow_path));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache, false, arena, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloc(tsd_t *tsd, size_t size, szind_t ind)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false,
-	    NULL, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_metadata, arena_t *arena)
-{
-	void *ret;
-
-	assert(usize != 0);
-	assert(usize == sa2u(usize, alignment));
-
-	ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
-	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
-	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
-		    config_prof));
-	}
-	return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena));
+	return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, true),
-	    false, NULL));
+	return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
+	    tcache_get(tsd, true), false, NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(const void *ptr, bool demote)
+ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	extent_node_t *node;
 
 	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
 	node = chunk_lookup(ptr, false);
 	if (node == NULL)
 		return (0);
 	/* Only arena chunks should be looked up via interior pointers. */
 	assert(extent_node_addr_get(node) == ptr ||
 	    extent_node_achunk_get(node));
 
-	return (isalloc(ptr, demote));
+	return (isalloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_INLINE size_t
 u2rz(size_t usize)
 {
 	size_t ret;
 
 	if (usize <= SMALL_MAXCLASS) {
@@ -1030,107 +1136,104 @@ u2rz(size_t usize)
 		ret = arena_bin_info[binind].redzone_size;
 	} else
 		ret = 0;
 
 	return (ret);
 }
 
 JEMALLOC_INLINE size_t
-p2rz(const void *ptr)
+p2rz(tsdn_t *tsdn, const void *ptr)
 {
-	size_t usize = isalloc(ptr, false);
+	size_t usize = isalloc(tsdn, ptr, false);
 
 	return (u2rz(usize));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path)
 {
 
 	assert(ptr != NULL);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr,
+		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr,
 		    config_prof));
 	}
 
-	arena_dalloc(tsd, ptr, tcache, slow_path);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
-{
-
-	idalloctm(tsd, ptr, tcache, false, true);
+	arena_dalloc(tsdn, ptr, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr)
 {
 
-	idalloctm(tsd, ptr, tcache_get(tsd, false), false, true);
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 {
 
 	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		idalloctm(tsd, ptr, tcache, false, slow_path);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 
-	arena_sdalloc(tsd, ptr, size, tcache);
+	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
 {
 
-	if (config_fill && unlikely(opt_quarantine))
+	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		isdalloct(tsd, ptr, size, tcache);
+		isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
 {
 	void *p;
 	size_t usize, copysize;
 
 	usize = sa2u(size + extra, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+	p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
 		if (extra == 0)
 			return (NULL);
 		/* Try again, without extra this time. */
 		usize = sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			return (NULL);
-		p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+		p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache,
+		    arena);
 		if (p == NULL)
 			return (NULL);
 	}
 	/*
 	 * Copy at most size bytes (not size+extra), since the caller has no
 	 * expectation that the extra bytes will be reliably preserved.
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache);
+	isqalloc(tsd, ptr, oldsize, tcache, true);
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
     bool zero, tcache_t *tcache, arena_t *arena)
 {
 
@@ -1156,30 +1259,30 @@ iralloc(tsd_t *tsd, void *ptr, size_t ol
     bool zero)
 {
 
 	return (iralloct(tsd, ptr, oldsize, size, alignment, zero,
 	    tcache_get(tsd, true), NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra,
+ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero)
 {
 
 	assert(ptr != NULL);
 	assert(size != 0);
 
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
 		/* Existing object alignment is inadequate. */
 		return (true);
 	}
 
-	return (arena_ralloc_no_move(tsd, ptr, oldsize, size, extra, zero));
+	return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero));
 }
 #endif
 
 #include "jemalloc/internal/prof.h"
 
 #undef JEMALLOC_H_INLINES
 /******************************************************************************/
 #endif /* JEMALLOC_INTERNAL_H */
--- a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -12,18 +12,28 @@
 #  if !defined(__pnacl__) && !defined(__native_client__)
 #    include <sys/syscall.h>
 #    if !defined(SYS_write) && defined(__NR_write)
 #      define SYS_write __NR_write
 #    endif
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef JEMALLOC_OS_UNFAIR_LOCK
+#    include <os/lock.h>
+#  endif
+#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#    include <sched.h>
+#  endif
 #  include <errno.h>
 #  include <sys/time.h>
+#  include <time.h>
+#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#    include <mach/mach_time.h>
+#  endif
 #endif
 #include <sys/types.h>
 
 #include <limits.h>
 #ifndef SIZE_T_MAX
 #  define SIZE_T_MAX	SIZE_MAX
 #endif
 #include <stdarg.h>
--- a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -56,32 +56,55 @@
 #undef JEMALLOC_HAVE_BUILTIN_CLZ
 
 /*
  * Defined if madvise(2) is available.
  */
 #undef JEMALLOC_HAVE_MADVISE
 
 /*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+#undef JEMALLOC_OS_UNFAIR_LOCK
+
+/*
  * Defined if OSSpin*() functions are available, as provided by Darwin, and
  * documented in the spinlock(3) manual page.
  */
 #undef JEMALLOC_OSSPIN
 
+/* Defined if syscall(2) is available. */
+#undef JEMALLOC_HAVE_SYSCALL
+
 /*
  * Defined if secure_getenv(3) is available.
  */
 #undef JEMALLOC_HAVE_SECURE_GETENV
 
 /*
  * Defined if issetugid(2) is available.
  */
 #undef JEMALLOC_HAVE_ISSETUGID
 
 /*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+
+/*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
  * bootstrapping will cause recursion into the pthreads library.  Therefore, if
  * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
  * malloc_tsd.
  */
 #undef JEMALLOC_MALLOC_THREAD_CLEANUP
 
@@ -184,16 +207,22 @@
  * of mmap()/munmap() calls will cause virtual memory map holes.
  */
 #undef JEMALLOC_MUNMAP
 
 /* TLS is used to map arenas and magazine caches to threads. */
 #undef JEMALLOC_TLS
 
 /*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#undef JEMALLOC_INTERNAL_UNREACHABLE
+
+/*
  * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
  * use ffs_*() from util.h.
  */
 #undef JEMALLOC_INTERNAL_FFSLL
 #undef JEMALLOC_INTERNAL_FFSL
 #undef JEMALLOC_INTERNAL_FFS
 
 /*
@@ -210,16 +239,25 @@
 
 /*
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
 #undef JEMALLOC_ZONE
 #undef JEMALLOC_ZONE_VERSION
 
 /*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ *                                         /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+
+/*
  * Methods for purging unused pages differ between operating systems.
  *
  *   madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
  *                                 such that new pages will be demand-zeroed if
  *                                 the address region is later touched.
  *   madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being
  *                             unused, such that they will be discarded rather
  *                             than swapped out.
--- a/memory/jemalloc/src/include/jemalloc/internal/mb.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/mb.h
@@ -37,27 +37,27 @@ mb_write(void)
 	asm volatile ("pusha;"
 	    "xor  %%eax,%%eax;"
 	    "cpuid;"
 	    "popa;"
 	    : /* Outputs. */
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#else
+#  else
 	/*
 	 * This is hopefully enough to keep the compiler from reordering
 	 * instructions around this one.
 	 */
 	asm volatile ("nop;"
 	    : /* Outputs. */
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#endif
+#  endif
 }
 #elif (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE void
 mb_write(void)
 {
 
 	asm volatile ("sfence"
 	    : /* Outputs. */
@@ -99,17 +99,17 @@ mb_write(void)
  * This is much slower than a simple memory barrier, but the semantics of mutex
  * unlock make this work.
  */
 JEMALLOC_INLINE void
 mb_write(void)
 {
 	malloc_mutex_t mtx;
 
-	malloc_mutex_init(&mtx);
-	malloc_mutex_lock(&mtx);
-	malloc_mutex_unlock(&mtx);
+	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
+	malloc_mutex_lock(TSDN_NULL, &mtx);
+	malloc_mutex_unlock(TSDN_NULL, &mtx);
 }
 #endif
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/mutex.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/mutex.h
@@ -1,111 +1,147 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
 typedef struct malloc_mutex_s malloc_mutex_t;
 
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_OSSPIN))
-#  define MALLOC_MUTEX_INITIALIZER {0}
+#  define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
+#  define MALLOC_MUTEX_INITIALIZER					\
+    {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #else
 #  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
        defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP,				\
+        WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  endif
 #endif
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 struct malloc_mutex_s {
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 	SRWLOCK         	lock;
 #  else
 	CRITICAL_SECTION	lock;
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 	pthread_mutex_t		lock;
 	malloc_mutex_t		*postponed_next;
 #else
 	pthread_mutex_t		lock;
 #endif
+	witness_t		witness;
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 #ifdef JEMALLOC_LAZY_LOCK
 extern bool isthreaded;
 #else
 #  undef isthreaded /* Undo private_namespace.h definition. */
 #  define isthreaded true
 #endif
 
-bool	malloc_mutex_init(malloc_mutex_t *mutex);
-void	malloc_mutex_prefork(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_parent(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_child(malloc_mutex_t *mutex);
-bool	mutex_boot(void);
+bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+    witness_rank_t rank);
+void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
+bool	malloc_mutex_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	malloc_mutex_lock(malloc_mutex_t *mutex);
-void	malloc_mutex_unlock(malloc_mutex_t *mutex);
+void	malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 JEMALLOC_INLINE void
-malloc_mutex_lock(malloc_mutex_t *mutex)
+malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
 #  else
 		EnterCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_lock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockLock(&mutex->lock);
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
+		witness_lock(tsdn, &mutex->witness);
 	}
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_unlock(malloc_mutex_t *mutex)
+malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
 #  else
 		LeaveCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_unlock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockUnlock(&mutex->lock);
 #else
 		pthread_mutex_unlock(&mutex->lock);
 #endif
 	}
 }
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded)
+		witness_assert_owner(tsdn, &mutex->witness);
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded)
+		witness_assert_not_owner(tsdn, &mutex->witness);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/nstime.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/nstime.h
@@ -1,18 +1,15 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
-    && _POSIX_MONOTONIC_CLOCK >= 0
-
 typedef struct nstime_s nstime_t;
 
 /* Maximum supported number of seconds (~584 years). */
-#define	NSTIME_SEC_MAX	18446744072
+#define	NSTIME_SEC_MAX	KQU(18446744072)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 struct nstime_s {
 	uint64_t	ns;
 };
@@ -29,19 +26,22 @@ uint64_t	nstime_nsec(const nstime_t *tim
 void	nstime_copy(nstime_t *time, const nstime_t *source);
 int	nstime_compare(const nstime_t *a, const nstime_t *b);
 void	nstime_add(nstime_t *time, const nstime_t *addend);
 void	nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
 void	nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void	nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t	nstime_divide(const nstime_t *time, const nstime_t *divisor);
 #ifdef JEMALLOC_JET
+typedef bool (nstime_monotonic_t)(void);
+extern nstime_monotonic_t *nstime_monotonic;
 typedef bool (nstime_update_t)(nstime_t *);
 extern nstime_update_t *nstime_update;
 #else
+bool	nstime_monotonic(void);
 bool	nstime_update(nstime_t *time);
 #endif
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
--- a/memory/jemalloc/src/include/jemalloc/internal/pages.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/pages.h
@@ -4,23 +4,24 @@
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*pages_map(void *addr, size_t size);
+void	*pages_map(void *addr, size_t size, bool *commit);
 void	pages_unmap(void *addr, size_t size);
 void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
-    size_t size);
+    size_t size, bool *commit);
 bool	pages_commit(void *addr, size_t size);
 bool	pages_decommit(void *addr, size_t size);
 bool	pages_purge(void *addr, size_t size);
+void	pages_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
 
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/ph.h
@@ -0,0 +1,345 @@
+/*
+ * A Pairing Heap implementation.
+ *
+ * "The Pairing Heap: A New Form of Self-Adjusting Heap"
+ * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
+ *
+ * With auxiliary twopass list, described in a follow on paper.
+ *
+ * "Pairing Heaps: Experiments and Analysis"
+ * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
+ *
+ *******************************************************************************
+ */
+
+#ifndef PH_H_
+#define	PH_H_
+
+/* Node structure. */
+#define	phn(a_type)							\
+struct {								\
+	a_type	*phn_prev;						\
+	a_type	*phn_next;						\
+	a_type	*phn_lchild;						\
+}
+
+/* Root structure. */
+#define	ph(a_type)							\
+struct {								\
+	a_type	*ph_root;						\
+}
+
+/* Internal utility macros. */
+#define	phn_lchild_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_lchild)
+#define	phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
+	a_phn->a_field.phn_lchild = a_lchild;				\
+} while (0)
+
+#define	phn_next_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_next)
+#define	phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
+	a_phn->a_field.phn_prev = a_prev;				\
+} while (0)
+
+#define	phn_prev_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_prev)
+#define	phn_next_set(a_type, a_field, a_phn, a_next) do {		\
+	a_phn->a_field.phn_next = a_next;				\
+} while (0)
+
+#define	phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
+	a_type *phn0child;						\
+									\
+	assert(a_phn0 != NULL);						\
+	assert(a_phn1 != NULL);						\
+	assert(a_cmp(a_phn0, a_phn1) <= 0);				\
+									\
+	phn_prev_set(a_type, a_field, a_phn1, a_phn0);			\
+	phn0child = phn_lchild_get(a_type, a_field, a_phn0);		\
+	phn_next_set(a_type, a_field, a_phn1, phn0child);		\
+	if (phn0child != NULL)						\
+		phn_prev_set(a_type, a_field, phn0child, a_phn1);	\
+	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
+} while (0)
+
+#define	phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
+	if (a_phn0 == NULL)						\
+		r_phn = a_phn1;						\
+	else if (a_phn1 == NULL)					\
+		r_phn = a_phn0;						\
+	else if (a_cmp(a_phn0, a_phn1) < 0) {				\
+		phn_merge_ordered(a_type, a_field, a_phn0, a_phn1,	\
+		    a_cmp);						\
+		r_phn = a_phn0;						\
+	} else {							\
+		phn_merge_ordered(a_type, a_field, a_phn1, a_phn0,	\
+		    a_cmp);						\
+		r_phn = a_phn1;						\
+	}								\
+} while (0)
+
+#define	ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *head = NULL;						\
+	a_type *tail = NULL;						\
+	a_type *phn0 = a_phn;						\
+	a_type *phn1 = phn_next_get(a_type, a_field, phn0);		\
+									\
+	/*								\
+	 * Multipass merge, wherein the first two elements of a FIFO	\
+	 * are repeatedly merged, and each result is appended to the	\
+	 * singly linked FIFO, until the FIFO contains only a single	\
+	 * element.  We start with a sibling list but no reference to	\
+	 * its tail, so we do a single pass over the sibling list to	\
+	 * populate the FIFO.						\
+	 */								\
+	if (phn1 != NULL) {						\
+		a_type *phnrest = phn_next_get(a_type, a_field, phn1);	\
+		if (phnrest != NULL)					\
+			phn_prev_set(a_type, a_field, phnrest, NULL);	\
+		phn_prev_set(a_type, a_field, phn0, NULL);		\
+		phn_next_set(a_type, a_field, phn0, NULL);		\
+		phn_prev_set(a_type, a_field, phn1, NULL);		\
+		phn_next_set(a_type, a_field, phn1, NULL);		\
+		phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0);	\
+		head = tail = phn0;					\
+		phn0 = phnrest;						\
+		while (phn0 != NULL) {					\
+			phn1 = phn_next_get(a_type, a_field, phn0);	\
+			if (phn1 != NULL) {				\
+				phnrest = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				if (phnrest != NULL) {			\
+					phn_prev_set(a_type, a_field,	\
+					    phnrest, NULL);		\
+				}					\
+				phn_prev_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_prev_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = phnrest;				\
+			} else {					\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = NULL;				\
+			}						\
+		}							\
+		phn0 = head;						\
+		phn1 = phn_next_get(a_type, a_field, phn0);		\
+		if (phn1 != NULL) {					\
+			while (true) {					\
+				head = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn0) == NULL);			\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn1) == NULL);			\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				if (head == NULL)			\
+					break;				\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = head;				\
+				phn1 = phn_next_get(a_type, a_field,	\
+				    phn0);				\
+			}						\
+		}							\
+	}								\
+	r_phn = phn0;							\
+} while (0)
+
+#define	ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
+	a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root);	\
+	if (phn != NULL) {						\
+		phn_prev_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_next_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_prev_set(a_type, a_field, phn, NULL);		\
+		ph_merge_siblings(a_type, a_field, phn, a_cmp, phn);	\
+		assert(phn_next_get(a_type, a_field, phn) == NULL);	\
+		phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp,	\
+		    a_ph->ph_root);					\
+	}								\
+} while (0)
+
+#define	ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
+	if (lchild == NULL)						\
+		r_phn = NULL;						\
+	else {								\
+		ph_merge_siblings(a_type, a_field, lchild, a_cmp,	\
+		    r_phn);						\
+	}								\
+} while (0)
+
+/*
+ * The ph_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to ph_gen().
+ */
+#define	ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
+a_attr void	a_prefix##new(a_ph_type *ph);				\
+a_attr bool	a_prefix##empty(a_ph_type *ph);				\
+a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
+a_attr void	a_prefix##insert(a_ph_type *ph, a_type *phn);		\
+a_attr a_type	*a_prefix##remove_first(a_ph_type *ph);			\
+a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
+
+/*
+ * The ph_gen() macro generates a type-specific pairing heap implementation,
+ * based on the above cpp macros.
+ */
+#define	ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_ph_type *ph)						\
+{									\
+									\
+	memset(ph, 0, sizeof(ph(a_type)));				\
+}									\
+a_attr bool								\
+a_prefix##empty(a_ph_type *ph)						\
+{									\
+									\
+	return (ph->ph_root == NULL);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_ph_type *ph)						\
+{									\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+	return (ph->ph_root);						\
+}									\
+a_attr void								\
+a_prefix##insert(a_ph_type *ph, a_type *phn)				\
+{									\
+									\
+	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
+									\
+	/*								\
+	 * Treat the root as an aux list during insertion, and lazily	\
+	 * merge during a_prefix##remove_first().  For elements that	\
+	 * are inserted, then removed via a_prefix##remove() before the	\
+	 * aux list is ever processed, this makes insert/remove		\
+	 * constant-time, whereas eager merging would make insert	\
+	 * O(log n).							\
+	 */								\
+	if (ph->ph_root == NULL)					\
+		ph->ph_root = phn;					\
+	else {								\
+		phn_next_set(a_type, a_field, phn, phn_next_get(a_type,	\
+		    a_field, ph->ph_root));				\
+		if (phn_next_get(a_type, a_field, ph->ph_root) !=	\
+		    NULL) {						\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, ph->ph_root),	\
+			    phn);					\
+		}							\
+		phn_prev_set(a_type, a_field, phn, ph->ph_root);	\
+		phn_next_set(a_type, a_field, ph->ph_root, phn);	\
+	}								\
+}									\
+a_attr a_type *								\
+a_prefix##remove_first(a_ph_type *ph)					\
+{									\
+	a_type *ret;							\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+									\
+	ret = ph->ph_root;						\
+									\
+	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
+	    ph->ph_root);						\
+									\
+	return (ret);							\
+}									\
+a_attr void								\
+a_prefix##remove(a_ph_type *ph, a_type *phn)				\
+{									\
+	a_type *replace, *parent;					\
+									\
+	/*								\
+	 * We can delete from aux list without merging it, but we need	\
+	 * to merge if we are dealing with the root node.		\
+	 */								\
+	if (ph->ph_root == phn) {					\
+		ph_merge_aux(a_type, a_field, ph, a_cmp);		\
+		if (ph->ph_root == phn) {				\
+			ph_merge_children(a_type, a_field, ph->ph_root,	\
+			    a_cmp, ph->ph_root);			\
+			return;						\
+		}							\
+	}								\
+									\
+	/* Get parent (if phn is leftmost child) before mutating. */	\
+	if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) {	\
+		if (phn_lchild_get(a_type, a_field, parent) != phn)	\
+			parent = NULL;					\
+	}								\
+	/* Find a possible replacement node, and link to parent. */	\
+	ph_merge_children(a_type, a_field, phn, a_cmp, replace);	\
+	/* Set next/prev for sibling linked list. */			\
+	if (replace != NULL) {						\
+		if (parent != NULL) {					\
+			phn_prev_set(a_type, a_field, replace, parent);	\
+			phn_lchild_set(a_type, a_field, parent,		\
+			    replace);					\
+		} else {						\
+			phn_prev_set(a_type, a_field, replace,		\
+			    phn_prev_get(a_type, a_field, phn));	\
+			if (phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL) {					\
+				phn_next_set(a_type, a_field,		\
+				    phn_prev_get(a_type, a_field, phn),	\
+				    replace);				\
+			}						\
+		}							\
+		phn_next_set(a_type, a_field, replace,			\
+		    phn_next_get(a_type, a_field, phn));		\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    replace);					\
+		}							\
+	} else {							\
+		if (parent != NULL) {					\
+			a_type *next = phn_next_get(a_type, a_field,	\
+			    phn);					\
+			phn_lchild_set(a_type, a_field, parent, next);	\
+			if (next != NULL) {				\
+				phn_prev_set(a_type, a_field, next,	\
+				    parent);				\
+			}						\
+		} else {						\
+			assert(phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL);					\
+			phn_next_set(a_type, a_field,			\
+			    phn_prev_get(a_type, a_field, phn),		\
+			    phn_next_get(a_type, a_field, phn));	\
+		}							\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    phn_prev_get(a_type, a_field, phn));	\
+		}							\
+	}								\
+}
+
+#endif /* PH_H_ */
--- a/memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt
+++ b/memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt
@@ -1,46 +1,48 @@
 a0dalloc
+a0get
 a0malloc
 arena_aalloc
 arena_alloc_junk_small
 arena_basic_stats_merge
 arena_bin_index
 arena_bin_info
-arena_bitselm_get
+arena_bitselm_get_const
+arena_bitselm_get_mutable
 arena_boot
 arena_choose
 arena_choose_hard
+arena_choose_impl
 arena_chunk_alloc_huge
 arena_chunk_cache_maybe_insert
 arena_chunk_cache_maybe_remove
 arena_chunk_dalloc_huge
 arena_chunk_ralloc_huge_expand
 arena_chunk_ralloc_huge_shrink
 arena_chunk_ralloc_huge_similar
 arena_cleanup
 arena_dalloc
 arena_dalloc_bin
 arena_dalloc_bin_junked_locked
 arena_dalloc_junk_large
-arena_dalloc_junk_large_impl
 arena_dalloc_junk_small
-arena_dalloc_junk_small_impl
 arena_dalloc_large
 arena_dalloc_large_junked_locked
 arena_dalloc_small
 arena_decay_tick
 arena_decay_ticks
 arena_decay_time_default_get
 arena_decay_time_default_set
 arena_decay_time_get
 arena_decay_time_set
 arena_dss_prec_get
 arena_dss_prec_set
 arena_get
+arena_ichoose
 arena_init
 arena_lg_dirty_mult_default_get
 arena_lg_dirty_mult_default_set
 arena_lg_dirty_mult_get
 arena_lg_dirty_mult_set
 arena_malloc
 arena_malloc_hard
 arena_malloc_large
@@ -57,26 +59,28 @@ arena_mapbits_large_size_get
 arena_mapbits_size_decode
 arena_mapbits_size_encode
 arena_mapbits_small_runind_get
 arena_mapbits_small_set
 arena_mapbits_unallocated_set
 arena_mapbits_unallocated_size_get
 arena_mapbits_unallocated_size_set
 arena_mapbits_unzeroed_get
-arena_mapbitsp_get
+arena_mapbitsp_get_const
+arena_mapbitsp_get_mutable
 arena_mapbitsp_read
 arena_mapbitsp_write
 arena_maxrun
 arena_maybe_purge
 arena_metadata_allocated_add
 arena_metadata_allocated_get
 arena_metadata_allocated_sub
 arena_migrate
-arena_miscelm_get
+arena_miscelm_get_const
+arena_miscelm_get_mutable
 arena_miscelm_to_pageind
 arena_miscelm_to_rpages
 arena_new
 arena_node_alloc
 arena_node_dalloc
 arena_nthreads_dec
 arena_nthreads_get
 arena_nthreads_inc
@@ -97,16 +101,17 @@ arena_prof_tctx_set
 arena_ptr_small_binind_get
 arena_purge
 arena_quarantine_junk_small
 arena_ralloc
 arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
+arena_reset
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
 arena_sdalloc
 arena_stats_merge
 arena_tcache_fill_small
 arena_tdata_get
 arena_tdata_get_hard
@@ -158,30 +163,25 @@ chunk_alloc_dss
 chunk_alloc_mmap
 chunk_alloc_wrapper
 chunk_boot
 chunk_dalloc_cache
 chunk_dalloc_mmap
 chunk_dalloc_wrapper
 chunk_deregister
 chunk_dss_boot
-chunk_dss_postfork_child
-chunk_dss_postfork_parent
+chunk_dss_mergeable
 chunk_dss_prec_get
 chunk_dss_prec_set
-chunk_dss_prefork
 chunk_hooks_default
 chunk_hooks_get
 chunk_hooks_set
 chunk_in_dss
 chunk_lookup
 chunk_npages
-chunk_postfork_child
-chunk_postfork_parent
-chunk_prefork
 chunk_purge_wrapper
 chunk_register
 chunks_rtree
 chunksize
 chunksize_mask
 ckh_count
 ckh_delete
 ckh_insert
@@ -282,24 +282,21 @@ huge_malloc
 huge_palloc
 huge_prof_tctx_get
 huge_prof_tctx_reset
 huge_prof_tctx_set
 huge_ralloc
 huge_ralloc_no_move
 huge_salloc
 iaalloc
+ialloc
 iallocztm
-icalloc
-icalloct
+iarena_cleanup
 idalloc
-idalloct
 idalloctm
-imalloc
-imalloct
 in_valgrind
 index2size
 index2size_compute
 index2size_lookup
 index2size_tab
 ipalloc
 ipalloct
 ipallocztm
@@ -315,16 +312,19 @@ ivsalloc
 ixalloc
 jemalloc_postfork_child
 jemalloc_postfork_parent
 jemalloc_prefork
 large_maxclass
 lg_floor
 lg_prof_sample
 malloc_cprintf
+malloc_mutex_assert_not_owner
+malloc_mutex_assert_owner
+malloc_mutex_boot
 malloc_mutex_init
 malloc_mutex_lock
 malloc_mutex_postfork_child
 malloc_mutex_postfork_parent
 malloc_mutex_prefork
 malloc_mutex_unlock
 malloc_printf
 malloc_snprintf
@@ -336,37 +336,37 @@ malloc_tsd_dalloc
 malloc_tsd_malloc
 malloc_tsd_no_cleanup
 malloc_vcprintf
 malloc_vsnprintf
 malloc_write
 map_bias
 map_misc_offset
 mb_write
-mutex_boot
+narenas_auto
 narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
 nhclasses
 nlclasses
 nstime_add
 nstime_compare
 nstime_copy
 nstime_divide
 nstime_idivide
 nstime_imultiply
 nstime_init
 nstime_init2
+nstime_monotonic
 nstime_ns
 nstime_nsec
 nstime_sec
 nstime_subtract
 nstime_update
-nstime_update_impl
 opt_abort
 opt_decay_time
 opt_dss
 opt_junk
 opt_junk_alloc
 opt_junk_free
 opt_lg_chunk
 opt_lg_dirty_mult
@@ -386,27 +386,39 @@ opt_purge
 opt_quarantine
 opt_redzone
 opt_stats_print
 opt_tcache
 opt_utrace
 opt_xmalloc
 opt_zero
 p2rz
+pages_boot
 pages_commit
 pages_decommit
 pages_map
 pages_purge
 pages_trim
 pages_unmap
+pind2sz
+pind2sz_compute
+pind2sz_lookup
+pind2sz_tab
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
-prng_lg_range
-prng_range
+prng_lg_range_u32
+prng_lg_range_u64
+prng_lg_range_zu
+prng_range_u32
+prng_range_u64
+prng_range_zu
+prng_state_next_u32
+prng_state_next_u64
+prng_state_next_zu
 prof_active
 prof_active_get
 prof_active_get_unlocked
 prof_active_set
 prof_alloc_prep
 prof_alloc_rollback
 prof_backtrace
 prof_boot0
@@ -445,22 +457,23 @@ prof_tdata_get
 prof_tdata_init
 prof_tdata_reinit
 prof_thread_active_get
 prof_thread_active_init_get
 prof_thread_active_init_set
 prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
+psz2ind
+psz2u
 purge_mode_names
 quarantine
 quarantine_alloc_hook
 quarantine_alloc_hook_work
 quarantine_cleanup
-register_zone
 rtree_child_read
 rtree_child_read_hard
 rtree_child_tryread
 rtree_delete
 rtree_get
 rtree_new
 rtree_node_valid
 rtree_set
@@ -468,37 +481,36 @@ rtree_start_level
 rtree_subkey
 rtree_subtree_read
 rtree_subtree_read_hard
 rtree_subtree_tryread
 rtree_val_read
 rtree_val_write
 run_quantize_ceil
 run_quantize_floor
-run_quantize_max
 s2u
 s2u_compute
 s2u_lookup
 sa2u
 set_errno
 size2index
 size2index_compute
 size2index_lookup
 size2index_tab
+spin_adaptive
+spin_init
 stats_cactive
 stats_cactive_add
 stats_cactive_get
 stats_cactive_sub
 stats_print
 tcache_alloc_easy
 tcache_alloc_large
 tcache_alloc_small
 tcache_alloc_small_hard
-tcache_arena_associate
-tcache_arena_dissociate
 tcache_arena_reassociate
 tcache_bin_flush_large
 tcache_bin_flush_small
 tcache_bin_info
 tcache_boot
 tcache_cleanup
 tcache_create
 tcache_dalloc_large
@@ -534,29 +546,35 @@ tsd_arenas_tdata_bypass_set
 tsd_arenas_tdata_bypassp_get
 tsd_arenas_tdata_get
 tsd_arenas_tdata_set
 tsd_arenas_tdatap_get
 tsd_boot
 tsd_boot0
 tsd_boot1
 tsd_booted
+tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
+tsd_fetch_impl
 tsd_get
+tsd_get_allocates
+tsd_iarena_get
+tsd_iarena_set
+tsd_iarenap_get
+tsd_initialized
+tsd_init_check_recursion
+tsd_init_finish
+tsd_init_head
 tsd_narenas_tdata_get
 tsd_narenas_tdata_set
 tsd_narenas_tdatap_get
 tsd_wrapper_get
 tsd_wrapper_set
-tsd_initialized
-tsd_init_check_recursion
-tsd_init_finish
-tsd_init_head
 tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
 tsd_prof_tdatap_get
 tsd_quarantine_get
 tsd_quarantine_set
 tsd_quarantinep_get
 tsd_set
@@ -569,13 +587,40 @@ tsd_tcachep_get
 tsd_thread_allocated_get
 tsd_thread_allocated_set
 tsd_thread_allocatedp_get
 tsd_thread_deallocated_get
 tsd_thread_deallocated_set
 tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
+tsd_tsdn
+tsd_witness_fork_get
+tsd_witness_fork_set
+tsd_witness_forkp_get
+tsd_witnesses_get
+tsd_witnesses_set
+tsd_witnessesp_get
+tsdn_fetch
+tsdn_null
+tsdn_tsd
 u2rz
 valgrind_freelike_block
 valgrind_make_mem_defined
 valgrind_make_mem_noaccess
 valgrind_make_mem_undefined
+witness_assert_lockless
+witness_assert_not_owner
+witness_assert_owner
+witness_fork_cleanup
+witness_init
+witness_lock
+witness_lock_error
+witness_lockless_error
+witness_not_owner_error
+witness_owner
+witness_owner_error
+witness_postfork_child
+witness_postfork_parent
+witness_prefork
+witness_unlock
+witnesses_cleanup
+zone_register
--- a/memory/jemalloc/src/include/jemalloc/internal/prng.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/prng.h
@@ -14,66 +14,194 @@
  *
  * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
  *
  * This choice of m has the disadvantage that the quality of the bits is
  * proportional to bit position.  For example, the lowest bit has a cycle of 2,
  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
  * bits.
  */
-#define	PRNG_A	UINT64_C(6364136223846793005)
-#define	PRNG_C	UINT64_C(1442695040888963407)
+
+#define	PRNG_A_32	UINT32_C(1103515241)
+#define	PRNG_C_32	UINT32_C(12347)
+
+#define	PRNG_A_64	UINT64_C(6364136223846793005)
+#define	PRNG_C_64	UINT64_C(1442695040888963407)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-uint64_t	prng_lg_range(uint64_t *state, unsigned lg_range);
-uint64_t	prng_range(uint64_t *state, uint64_t range);
+uint32_t	prng_state_next_u32(uint32_t state);
+uint64_t	prng_state_next_u64(uint64_t state);
+size_t	prng_state_next_zu(size_t state);
+
+uint32_t	prng_lg_range_u32(uint32_t *state, unsigned lg_range,
+    bool atomic);
+uint64_t	prng_lg_range_u64(uint64_t *state, unsigned lg_range);
+size_t	prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);
+
+uint32_t	prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
+uint64_t	prng_range_u64(uint64_t *state, uint64_t range);
+size_t	prng_range_zu(size_t *state, size_t range, bool atomic);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_state_next_u32(uint32_t state)
+{
+
+	return ((state * PRNG_A_32) + PRNG_C_32);
+}
+
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_lg_range(uint64_t *state, unsigned lg_range)
+prng_state_next_u64(uint64_t state)
+{
+
+	return ((state * PRNG_A_64) + PRNG_C_64);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_state_next_zu(size_t state)
+{
+
+#if LG_SIZEOF_PTR == 2
+	return ((state * PRNG_A_32) + PRNG_C_32);
+#elif LG_SIZEOF_PTR == 3
+	return ((state * PRNG_A_64) + PRNG_C_64);
+#else
+#error Unsupported pointer size
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
 {
-	uint64_t ret;
+	uint32_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= 32);
+
+	if (atomic) {
+		uint32_t state0;
+
+		do {
+			state0 = atomic_read_uint32(state);
+			state1 = prng_state_next_u32(state0);
+		} while (atomic_cas_uint32(state, state0, state1));
+	} else {
+		state1 = prng_state_next_u32(*state);
+		*state = state1;
+	}
+	ret = state1 >> (32 - lg_range);
+
+	return (ret);
+}
+
+/* 64-bit atomic operations cannot be supported on all relevant platforms. */
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_lg_range_u64(uint64_t *state, unsigned lg_range)
+{
+	uint64_t ret, state1;
 
 	assert(lg_range > 0);
 	assert(lg_range <= 64);
 
-	ret = (*state * PRNG_A) + PRNG_C;
-	*state = ret;
-	ret >>= (64 - lg_range);
+	state1 = prng_state_next_u64(*state);
+	*state = state1;
+	ret = state1 >> (64 - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
+{
+	size_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
+
+	if (atomic) {
+		size_t state0;
+
+		do {
+			state0 = atomic_read_z(state);
+			state1 = prng_state_next_zu(state0);
+		} while (atomic_cas_z(state, state0, state1));
+	} else {
+		state1 = prng_state_next_zu(*state);
+		*state = state1;
+	}
+	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
+{
+	uint32_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_u32(state, lg_range, atomic);
+	} while (ret >= range);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_range(uint64_t *state, uint64_t range)
+prng_range_u64(uint64_t *state, uint64_t range)
 {
 	uint64_t ret;
 	unsigned lg_range;
 
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
 	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
-		ret = prng_lg_range(state, lg_range);
+		ret = prng_lg_range_u64(state, lg_range);
+	} while (ret >= range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_range_zu(size_t *state, size_t range, bool atomic)
+{
+	size_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_zu(state, lg_range, atomic);
 	} while (ret >= range);
 
 	return (ret);
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/prof.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/prof.h
@@ -276,76 +276,76 @@ extern uint64_t	prof_interval;
 
 /*
  * Initialized as opt_lg_prof_sample, and potentially modified during profiling
  * resets.
  */
 extern size_t	lg_prof_sample;
 
 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
+void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
 void	prof_backtrace(prof_bt_t *bt);
 prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 #ifdef JEMALLOC_JET
 size_t	prof_tdata_count(void);
 size_t	prof_bt_count(void);
 const prof_cnt_t *prof_cnt_all(void);
 typedef int (prof_dump_open_t)(bool, const char *);
 extern prof_dump_open_t *prof_dump_open;
-typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *prof_dump_header;
 #endif
-void	prof_idump(void);
-bool	prof_mdump(const char *filename);
-void	prof_gdump(void);
+void	prof_idump(tsdn_t *tsdn);
+bool	prof_mdump(tsd_t *tsd, const char *filename);
+void	prof_gdump(tsdn_t *tsdn);
 prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void	prof_reset(tsd_t *tsd, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
-const char	*prof_thread_name_get(void);
-bool	prof_active_get(void);
-bool	prof_active_set(bool active);
+bool	prof_active_get(tsdn_t *tsdn);
+bool	prof_active_set(tsdn_t *tsdn, bool active);
+const char	*prof_thread_name_get(tsd_t *tsd);
 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool	prof_thread_active_get(void);
-bool	prof_thread_active_set(bool active);
-bool	prof_thread_active_init_get(void);
-bool	prof_thread_active_init_set(bool active_init);
-bool	prof_gdump_get(void);
-bool	prof_gdump_set(bool active);
+bool	prof_thread_active_get(tsd_t *tsd);
+bool	prof_thread_active_set(tsd_t *tsd, bool active);
+bool	prof_thread_active_init_get(tsdn_t *tsdn);
+bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool	prof_gdump_get(tsdn_t *tsdn);
+bool	prof_gdump_set(tsdn_t *tsdn, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(void);
-void	prof_prefork0(void);
-void	prof_prefork1(void);
-void	prof_postfork_parent(void);
-void	prof_postfork_child(void);
+bool	prof_boot2(tsd_t *tsd);
+void	prof_prefork0(tsdn_t *tsdn);
+void	prof_prefork1(tsdn_t *tsdn);
+void	prof_postfork_parent(tsdn_t *tsdn);
+void	prof_postfork_child(tsdn_t *tsdn);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
+prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
-prof_tctx_t	*prof_tctx_get(const void *ptr);
-void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
-void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
     size_t old_usize, prof_tctx_t *old_tctx);
 void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
 JEMALLOC_ALWAYS_INLINE bool
@@ -393,44 +393,44 @@ prof_tdata_get(tsd_t *tsd, bool create)
 		}
 		assert(tdata == NULL || tdata->attached);
 	}
 
 	return (tdata);
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(const void *ptr)
+prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return (arena_prof_tctx_get(ptr));
+	return (arena_prof_tctx_get(tsdn, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(ptr, usize, tctx);
+	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
     prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+	arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
     prof_tdata_t **tdata_out)
 {
 	prof_tdata_t *tdata;
 
@@ -475,72 +475,73 @@ prof_alloc_prep(tsd_t *tsd, size_t usize
 		prof_backtrace(&bt);
 		ret = prof_lookup(tsd, &bt);
 	}
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsdn, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
     bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
     prof_tctx_t *old_tctx)
 {
 	bool sampled, old_sampled;
 
 	cassert(config_prof);
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(ptr, true));
+		assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
 			 * was larger than what actually got allocated, so a
 			 * backtrace was captured for this allocation, even
 			 * though its actual usize was insufficient to cross the
 			 * sample threshold.
 			 */
+			prof_alloc_rollback(tsd, tctx, true);
 			tctx = (prof_tctx_t *)(uintptr_t)1U;
 		}
 	}
 
 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
 
 	if (unlikely(sampled))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
 	else
-		prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+		prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
 
 	if (unlikely(old_sampled))
 		prof_free_sampled_object(tsd, old_usize, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
 {
-	prof_tctx_t *tctx = prof_tctx_get(ptr);
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_free_sampled_object(tsd, usize, tctx);
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/rtree.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/rtree.h
@@ -10,19 +10,20 @@ typedef struct rtree_node_elm_s rtree_no
 typedef struct rtree_level_s rtree_level_t;
 typedef struct rtree_s rtree_t;
 
 /*
  * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
  * machine address width.
  */
 #define	LG_RTREE_BITS_PER_LEVEL	4
-#define	RTREE_BITS_PER_LEVEL	(ZU(1) << LG_RTREE_BITS_PER_LEVEL)
+#define	RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
+/* Maximum rtree height. */
 #define	RTREE_HEIGHT_MAX						\
-    ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
+    ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
 
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_node_elm_t *)0x1)
 
 /*
  * The node allocation callback function's argument is the number of contiguous
  * rtree_node_elm_t structures to allocate, and the resulting memory must be
  * zeroed.
@@ -106,85 +107,91 @@ rtree_node_elm_t	*rtree_child_read_hard(
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 unsigned	rtree_start_level(rtree_t *rtree, uintptr_t key);
 uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
 
 bool	rtree_node_valid(rtree_node_elm_t *node);
-rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm);
+rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm,
+    bool dependent);
 rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
-    unsigned level);
+    unsigned level, bool dependent);
 extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
     bool dependent);
 void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
     const extent_node_t *val);
-rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level);
-rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level);
+rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
+    bool dependent);
+rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
+    bool dependent);
 
 extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
 bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-JEMALLOC_INLINE unsigned
+JEMALLOC_ALWAYS_INLINE unsigned
 rtree_start_level(rtree_t *rtree, uintptr_t key)
 {
 	unsigned start_level;
 
 	if (unlikely(key == 0))
 		return (rtree->height - 1);
 
 	start_level = rtree->start_level[lg_floor(key) >>
 	    LG_RTREE_BITS_PER_LEVEL];
 	assert(start_level < rtree->height);
 	return (start_level);
 }
 
-JEMALLOC_INLINE uintptr_t
+JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 {
 
 	return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    rtree->levels[level].cumbits)) & ((ZU(1) <<
 	    rtree->levels[level].bits) - 1));
 }
 
-JEMALLOC_INLINE bool
+JEMALLOC_ALWAYS_INLINE bool
 rtree_node_valid(rtree_node_elm_t *node)
 {
 
 	return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_tryread(rtree_node_elm_t *elm)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 {
 	rtree_node_elm_t *child;
 
 	/* Double-checked read (first read may be stale. */
 	child = elm->child;
-	if (!rtree_node_valid(child))
+	if (!dependent && !rtree_node_valid(child))
 		child = atomic_read_p(&elm->pun);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
+    bool dependent)
 {
 	rtree_node_elm_t *child;
 
-	child = rtree_child_tryread(elm);
-	if (unlikely(!rtree_node_valid(child)))
+	child = rtree_child_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(child)))
 		child = rtree_child_read_hard(rtree, elm, level);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
 {
 
 	if (dependent) {
 		/*
 		 * Reading a val on behalf of a pointer to a valid allocation is
 		 * guaranteed to be a clean read even without synchronization,
 		 * because the rtree update became visible in memory before the
@@ -203,91 +210,156 @@ rtree_val_read(rtree_t *rtree, rtree_nod
 
 JEMALLOC_INLINE void
 rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
 {
 
 	atomic_write_p(&elm->pun, val);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_tryread(rtree_t *rtree, unsigned level)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
 	/* Double-checked read (first read may be stale. */
 	subtree = rtree->levels[level].subtree;
-	if (!rtree_node_valid(subtree))
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_read(rtree_t *rtree, unsigned level)
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
-	subtree = rtree_subtree_tryread(rtree, level);
-	if (unlikely(!rtree_node_valid(subtree)))
+	subtree = rtree_subtree_tryread(rtree, level, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = rtree_subtree_read_hard(rtree, level);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	uintptr_t subkey;
-	unsigned i, start_level;
-	rtree_node_elm_t *node, *child;
+	unsigned start_level;
+	rtree_node_elm_t *node;
 
 	start_level = rtree_start_level(rtree, key);
 
-	for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
-	    /**/; i++, node = child) {
-		if (!dependent && unlikely(!rtree_node_valid(node)))
-			return (NULL);
-		subkey = rtree_subkey(rtree, key, i);
-		if (i == rtree->height - 1) {
-			/*
-			 * node is a leaf, so it contains values rather than
-			 * child pointers.
-			 */
-			return (rtree_val_read(rtree, &node[subkey],
-			    dependent));
-		}
-		assert(i < rtree->height - 1);
-		child = rtree_child_tryread(&node[subkey]);
+	node = rtree_subtree_tryread(rtree, start_level, dependent);
+#define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
+	switch (start_level + RTREE_GET_BIAS) {
+#define	RTREE_GET_SUBTREE(level)					\
+	case level:							\
+		assert(level < (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		node = rtree_child_tryread(&node[subkey], dependent);	\
+		/* Fall through. */
+#define	RTREE_GET_LEAF(level)						\
+	case level:							\
+		assert(level == (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		/*							\
+		 * node is a leaf, so it contains values rather than	\
+		 * child pointers.					\
+		 */							\
+		return (rtree_val_read(rtree, &node[subkey],		\
+		    dependent));
+#if RTREE_HEIGHT_MAX > 1
+	RTREE_GET_SUBTREE(0)
+#endif
+#if RTREE_HEIGHT_MAX > 2
+	RTREE_GET_SUBTREE(1)
+#endif
+#if RTREE_HEIGHT_MAX > 3
+	RTREE_GET_SUBTREE(2)
+#endif
+#if RTREE_HEIGHT_MAX > 4
+	RTREE_GET_SUBTREE(3)
+#endif
+#if RTREE_HEIGHT_MAX > 5
+	RTREE_GET_SUBTREE(4)
+#endif
+#if RTREE_HEIGHT_MAX > 6
+	RTREE_GET_SUBTREE(5)
+#endif
+#if RTREE_HEIGHT_MAX > 7
+	RTREE_GET_SUBTREE(6)
+#endif
+#if RTREE_HEIGHT_MAX > 8
+	RTREE_GET_SUBTREE(7)
+#endif
+#if RTREE_HEIGHT_MAX > 9
+	RTREE_GET_SUBTREE(8)
+#endif
+#if RTREE_HEIGHT_MAX > 10
+	RTREE_GET_SUBTREE(9)
+#endif
+#if RTREE_HEIGHT_MAX > 11
+	RTREE_GET_SUBTREE(10)
+#endif
+#if RTREE_HEIGHT_MAX > 12
+	RTREE_GET_SUBTREE(11)
+#endif
+#if RTREE_HEIGHT_MAX > 13
+	RTREE_GET_SUBTREE(12)
+#endif
+#if RTREE_HEIGHT_MAX > 14
+	RTREE_GET_SUBTREE(13)
+#endif
+#if RTREE_HEIGHT_MAX > 15
+	RTREE_GET_SUBTREE(14)
+#endif
+#if RTREE_HEIGHT_MAX > 16
+#  error Unsupported RTREE_HEIGHT_MAX
+#endif
+	RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
+#undef RTREE_GET_SUBTREE
+#undef RTREE_GET_LEAF
+	default: not_reached();
 	}
+#undef RTREE_GET_BIAS
 	not_reached();
 }
 
 JEMALLOC_INLINE bool
 rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
 {
 	uintptr_t subkey;
 	unsigned i, start_level;
 	rtree_node_elm_t *node, *child;
 
 	start_level = rtree_start_level(rtree, key);
 
-	node = rtree_subtree_read(rtree, start_level);
+	node = rtree_subtree_read(rtree, start_level, false);
 	if (node == NULL)
 		return (true);
 	for (i = start_level; /**/; i++, node = child) {
 		subkey = rtree_subkey(rtree, key, i);
 		if (i == rtree->height - 1) {
 			/*
 			 * node is a leaf, so it contains values rather than
 			 * child pointers.
 			 */
 			rtree_val_write(rtree, &node[subkey], val);
 			return (false);
 		}
 		assert(i + 1 < rtree->height);
-		child = rtree_child_read(rtree, &node[subkey], i);
+		child = rtree_child_read(rtree, &node[subkey], i, false);
 		if (child == NULL)
 			return (true);
 	}
 	not_reached();
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
--- a/memory/jemalloc/src/include/jemalloc/internal/size_classes.sh
+++ b/memory/jemalloc/src/include/jemalloc/internal/size_classes.sh
@@ -43,16 +43,31 @@ lg() {
 size_class() {
   index=$1
   lg_grp=$2
   lg_delta=$3
   ndelta=$4
   lg_p=$5
   lg_kmax=$6
 
+  if [ ${lg_delta} -ge ${lg_p} ] ; then
+    psz="yes"
+  else
+    pow2 ${lg_p}; p=${pow2_result}
+    pow2 ${lg_grp}; grp=${pow2_result}
+    pow2 ${lg_delta}; delta=${pow2_result}
+    sz=$((${grp} + ${delta} * ${ndelta}))
+    npgs=$((${sz} / ${p}))
+    if [ ${sz} -eq $((${npgs} * ${p})) ] ; then
+      psz="yes"
+    else
+      psz="no"
+    fi
+  fi
+
   lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
   if [ ${pow2_result} -lt ${ndelta} ] ; then
     rem="yes"
   else
     rem="no"
   fi
 
   lg_size=${lg_grp}
@@ -69,54 +84,59 @@ size_class() {
     bin="no"
   fi
   if [ ${lg_size} -lt ${lg_kmax} \
       -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then
     lg_delta_lookup=${lg_delta}
   else
     lg_delta_lookup="no"
   fi
-  printf '    SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup}
+  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${lg_delta_lookup}
   # Defined upon return:
+  # - psz ("yes" or "no")
+  # - bin ("yes" or "no")
   # - lg_delta_lookup (${lg_delta} or "no")
-  # - bin ("yes" or "no")
 }
 
 sep_line() {
-  echo "                                               \\"
+  echo "                                                         \\"
 }
 
 size_classes() {
   lg_z=$1
   lg_q=$2
   lg_t=$3
   lg_p=$4
   lg_g=$5
 
   pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result}
   pow2 ${lg_g}; g=${pow2_result}
 
   echo "#define	SIZE_CLASSES \\"
-  echo "  /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\"
+  echo "  /* index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup */ \\"
 
   ntbins=0
   nlbins=0
   lg_tiny_maxclass='"NA"'
   nbins=0
+  npsizes=0
 
   # Tiny size classes.
   ndelta=0
   index=0
   lg_grp=${lg_t}
   lg_delta=${lg_grp}
   while [ ${lg_grp} -lt ${lg_q} ] ; do
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     if [ ${lg_delta_lookup} != "no" ] ; then
       nlbins=$((${index} + 1))
     fi
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${bin} != "no" ] ; then
       nbins=$((${index} + 1))
     fi
     ntbins=$((${ntbins} + 1))
     lg_tiny_maxclass=${lg_grp} # Final written value is correct.
     index=$((${index} + 1))
     lg_delta=${lg_grp}
     lg_grp=$((${lg_grp} + 1))
@@ -128,21 +148,27 @@ size_classes() {
     # The first size class has an unusual encoding, because the size has to be
     # split between grp and delta*ndelta.
     lg_grp=$((${lg_grp} - 1))
     ndelta=1
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     lg_grp=$((${lg_grp} + 1))
     lg_delta=$((${lg_delta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
   fi
   while [ ${ndelta} -lt ${g} ] ; do
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     ndelta=$((${ndelta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
   done
 
   # All remaining groups.
   lg_grp=$((${lg_grp} + ${lg_g}))
   while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do
     sep_line
     ndelta=1
     if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then
@@ -152,16 +178,19 @@ size_classes() {
     fi
     while [ ${ndelta} -le ${ndelta_limit} ] ; do
       size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
       if [ ${lg_delta_lookup} != "no" ] ; then
         nlbins=$((${index} + 1))
         # Final written value is correct:
         lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
       fi
+      if [ ${psz} = "yes" ] ; then
+        npsizes=$((${npsizes} + 1))
+      fi
       if [ ${bin} != "no" ] ; then
         nbins=$((${index} + 1))
         # Final written value is correct:
         small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
         if [ ${lg_g} -gt 0 ] ; then
           lg_large_minclass=$((${lg_grp} + 1))
         else
           lg_large_minclass=$((${lg_grp} + 2))
@@ -178,16 +207,17 @@ size_classes() {
   echo
   nsizes=${index}
 
   # Defined upon completion:
   # - ntbins
   # - nlbins
   # - nbins
   # - nsizes
+  # - npsizes
   # - lg_tiny_maxclass
   # - lookup_maxclass
   # - small_maxclass
   # - lg_large_minclass
   # - huge_maxclass
 }
 
 cat <<EOF
@@ -195,30 +225,31 @@ cat <<EOF
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
 /*
  * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to
  * be defined prior to inclusion, and it in turn defines:
  *
  *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
- *   SIZE_CLASSES: Complete table of
- *                 SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)
- *                 tuples.
+ *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
+ *                 bin, lg_delta_lookup) tuples.
  *     index: Size class index.
  *     lg_grp: Lg group base size (no deltas added).
  *     lg_delta: Lg delta to previous size class.
  *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
+ *     psz: 'yes' if a multiple of the page size, 'no' otherwise.
  *     bin: 'yes' if a small bin size class, 'no' otherwise.
  *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
  *                      otherwise.
  *   NTBINS: Number of tiny bins.
  *   NLBINS: Number of bins supported by the lookup table.
  *   NBINS: Number of small size class bins.
  *   NSIZES: Number of size classes.
+ *   NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
  *   LG_LARGE_MINCLASS: Lg of minimum large size class.
  *   HUGE_MAXCLASS: Maximum (huge) size class.
  */
 
 #define	LG_SIZE_CLASS_GROUP	${lg_g}
@@ -233,16 +264,17 @@ for lg_z in ${lg_zarr} ; do
       for lg_p in ${lg_parr} ; do
         echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
         size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g}
         echo "#define	SIZE_CLASSES_DEFINED"
         echo "#define	NTBINS			${ntbins}"
         echo "#define	NLBINS			${nlbins}"
         echo "#define	NBINS			${nbins}"
         echo "#define	NSIZES			${nsizes}"
+        echo "#define	NPSIZES			${npsizes}"
         echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
         echo "#define	LG_LARGE_MINCLASS	${lg_large_minclass}"
         echo "#define	HUGE_MAXCLASS		${huge_maxclass}"
         echo "#endif"
         echo
       done
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/spin.h
@@ -0,0 +1,51 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct spin_s spin_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct spin_s {
+	unsigned iteration;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	spin_init(spin_t *spin);
+void	spin_adaptive(spin_t *spin);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
+JEMALLOC_INLINE void
+spin_init(spin_t *spin)
+{
+
+	spin->iteration = 0;
+}
+
+JEMALLOC_INLINE void
+spin_adaptive(spin_t *spin)
+{
+	volatile uint64_t i;
+
+	for (i = 0; i < (KQU(1) << spin->iteration); i++)
+		CPU_SPINWAIT;
+
+	if (spin->iteration < 63)
+		spin->iteration++;
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
--- a/memory/jemalloc/src/include/jemalloc/internal/stats.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/stats.h
@@ -98,16 +98,24 @@ struct malloc_huge_stats_s {
 	size_t		curhchunks;
 };
 
 struct arena_stats_s {
 	/* Number of bytes currently mapped. */
 	size_t		mapped;
 
 	/*
+	 * Number of bytes currently retained as a side effect of munmap() being
+	 * disabled/bypassed.  Retained bytes are technically mapped (though
+	 * always decommitted or purged), but they are excluded from the mapped
+	 * statistic (above).
+	 */
+	size_t		retained;
+
+	/*
 	 * Total number of purge sweeps, total number of madvise calls made,
 	 * and total pages purged in order to keep dirty unused memory under
 	 * control.
 	 */
 	uint64_t	npurge;
 	uint64_t	nmadvise;
 	uint64_t	purged;
 
--- a/memory/jemalloc/src/include/jemalloc/internal/tcache.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/tcache.h
@@ -125,37 +125,35 @@ extern size_t	tcache_maxclass;
  * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
  * completely disjoint from this data structure.  tcaches starts off as a sparse
  * array, so it has no physical memory footprint until individual pages are
  * touched.  This allows the entire array to be allocated the first time an
  * explicit tcache is created without a disproportionate impact on memory usage.
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(const void *ptr);
+size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
-void	*tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
-void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
-void	tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena,
-    arena_t *newarena);
-void	tcache_arena_dissociate(tcache_t *tcache, arena_t *arena);
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
+    arena_t *oldarena, arena_t *newarena);
 tcache_t *tcache_get_hard(tsd_t *tsd);
-tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
+tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
+void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(void);
+bool	tcache_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 void	tcache_event(tsd_t *tsd, tcache_t *tcache);
 void	tcache_flush(void);
@@ -292,30 +290,30 @@ tcache_alloc_small(tsd_t *tsd, arena_t *
 	ret = tcache_alloc_easy(tbin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
 		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind,
-			&tcache_hard_success);
+		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
+		    tbin, binind, &tcache_hard_success);
 		if (tcache_hard_success == false)
 			return (NULL);
 	}
 
 	assert(ret);
 	/*
 	 * Only compute usize if required.  The checks in the following if
 	 * statement are all static.
 	 */
 	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
 		usize = index2size(binind);
-		assert(tcache_salloc(ret) == usize);
+		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 	}
 
 	if (likely(!zero)) {
 		if (slow_path && config_fill) {
 			if (unlikely(opt_junk_alloc)) {
 				arena_alloc_junk_small(ret,
 				    &arena_bin_info[binind], false);
 			} else if (unlikely(opt_zero))
@@ -353,17 +351,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *
 		/*
 		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
 		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = arena_malloc_large(tsd, arena, binind, zero);
+		ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
 		size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 		/* Only compute usize on demand */
 		if (config_prof || (slow_path && config_fill) ||
 		    unlikely(zero)) {
@@ -376,19 +374,20 @@ tcache_alloc_large(tsd_t *tsd, arena_t *
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
 			size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
 			    LG_PAGE);
 			arena_mapbits_large_binind_set(chunk, pageind,
 			    BININD_INVALID);
 		}
 		if (likely(!zero)) {
 			if (slow_path && config_fill) {
-				if (unlikely(opt_junk_alloc))
-					memset(ret, 0xa5, usize);
-				else if (unlikely(opt_zero))
+				if (unlikely(opt_junk_alloc)) {
+					memset(ret, JEMALLOC_ALLOC_JUNK,
+					    usize);
+				} else if (unlikely(opt_zero))
 					memset(ret, 0, usize);
 			}
 		} else
 			memset(ret, 0, usize);
 
 		if (config_stats)
 			tbin->tstats.nrequests++;
 		if (config_prof)
@@ -401,17 +400,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *
 
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path)
 {
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
-	assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
 		tcache_bin_flush_small(tsd, tcache, tbin, binind,
@@ -428,18 +427,18 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
     bool slow_path)
 {
 	szind_t binind;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
 	assert((size & PAGE_MASK) == 0);
-	assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
-	assert(tcache_salloc(ptr) <= tcache_maxclass);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
 	binind = size2index(size);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_large(ptr, size);
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
@@ -453,16 +452,18 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t
 
 	tcache_event(tsd, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE tcache_t *
 tcaches_get(tsd_t *tsd, unsigned ind)
 {
 	tcaches_t *elm = &tcaches[ind];
-	if (unlikely(elm->tcache == NULL))
-		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
+	if (unlikely(elm->tcache == NULL)) {
+		elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd,
+		    NULL));
+	}
 	return (elm->tcache);
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/tsd.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/tsd.h
@@ -8,16 +8,19 @@ typedef bool (*malloc_tsd_cleanup_t)(voi
 
 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
     !defined(_WIN32))
 typedef struct tsd_init_block_s tsd_init_block_t;
 typedef struct tsd_init_head_s tsd_init_head_t;
 #endif
 
 typedef struct tsd_s tsd_t;
+typedef struct tsdn_s tsdn_t;
+
+#define	TSDN_NULL	((tsdn_t *)0)
 
 typedef enum {
 	tsd_state_uninitialized,
 	tsd_state_nominal,
 	tsd_state_purgatory,
 	tsd_state_reincarnated
 } tsd_state_t;
 
@@ -39,17 +42,18 @@ typedef enum {
  * In example.c:
  *   malloc_tsd_data(, example_, example_t, EX_INITIALIZER)
  *   malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER,
  *       example_tsd_cleanup)
  *
  * The result is a set of generated functions, e.g.:
  *
  *   bool example_tsd_boot(void) {...}
- *   example_t *example_tsd_get() {...}
+ *   bool example_tsd_booted_get(void) {...}
+ *   example_t *example_tsd_get(bool init) {...}
  *   void example_tsd_set(example_t *val) {...}
  *
  * Note that all of the functions deal in terms of (a_type *) rather than
  * (a_type) so that it is possible to support non-pointer types (unlike
  * pthreads TSD).  example_tsd_cleanup() is passed an (a_type *) pointer that is
  * cast to (void *).  This means that the cleanup function needs to cast the
  * function argument to (a_type *), then dereference the resulting pointer to
  * access fields, e.g.
@@ -93,18 +97,20 @@ typedef struct {							\
 /* malloc_tsd_protos(). */
 #define	malloc_tsd_protos(a_attr, a_name, a_type)			\
 a_attr bool								\
 a_name##tsd_boot0(void);						\
 a_attr void								\
 a_name##tsd_boot1(void);						\
 a_attr bool								\
 a_name##tsd_boot(void);							\
+a_attr bool								\
+a_name##tsd_booted_get(void);						\
 a_attr a_type *								\
-a_name##tsd_get(void);							\
+a_name##tsd_get(bool init);						\
 a_attr void								\
 a_name##tsd_set(a_type *val);
 
 /* malloc_tsd_externs(). */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
 #define	malloc_tsd_externs(a_name, a_type)				\
 extern __thread a_type	a_name##tsd_tls;				\
 extern __thread bool	a_name##tsd_initialized;			\
@@ -196,19 +202,31 @@ a_name##tsd_boot1(void)							\
 	/* Do nothing. */						\
 }									\
 a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
 	return (&a_name##tsd_tls);					\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
@@ -241,19 +259,31 @@ a_name##tsd_boot1(void)							\
 	/* Do nothing. */						\
 }									\
 a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
 	return (&a_name##tsd_tls);					\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
@@ -302,24 +332,24 @@ a_name##tsd_wrapper_set(a_name##tsd_wrap
 									\
 	if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) {		\
 		malloc_write("<jemalloc>: Error setting"		\
 		    " TSD for "#a_name"\n");				\
 		abort();						\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	DWORD error = GetLastError();					\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    TlsGetValue(a_name##tsd_tsd);				\
 	SetLastError(error);						\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
 		if (wrapper == NULL) {					\
 			malloc_write("<jemalloc>: Error allocating"	\
 			    " TSD for "#a_name"\n");			\
 			abort();					\
 		} else {						\
 			wrapper->initialized = false;			\
@@ -363,33 +393,47 @@ a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
 									\
 	if (a_name##tsd_boot0())					\
 		return (true);						\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
 }
 #else
 #define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
     a_cleanup)								\
 /* Initialization/cleanup. */						\
@@ -423,22 +467,22 @@ a_name##tsd_wrapper_set(a_name##tsd_wrap
 	if (pthread_setspecific(a_name##tsd_tsd,			\
 	    (void *)wrapper)) {						\
 		malloc_write("<jemalloc>: Error setting"		\
 		    " TSD for "#a_name"\n");				\
 		abort();						\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    pthread_getspecific(a_name##tsd_tsd);			\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		tsd_init_block_t block;					\
 		wrapper = tsd_init_check_recursion(			\
 		    &a_name##tsd_init_head, &block);			\
 		if (wrapper)						\
 		    return (wrapper);					\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
 		block.data = wrapper;					\
@@ -485,33 +529,47 @@ a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
 									\
 	if (a_name##tsd_boot0())					\
 		return (true);						\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
 }
 #endif
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -531,108 +589,146 @@ struct tsd_init_head_s {
 #endif
 
 #define	MALLOC_TSD							\
 /*  O(name,			type) */				\
     O(tcache,			tcache_t *)				\
     O(thread_allocated,		uint64_t)				\
     O(thread_deallocated,	uint64_t)				\
     O(prof_tdata,		prof_tdata_t *)				\
+    O(iarena,			arena_t *)				\
     O(arena,			arena_t *)				\
     O(arenas_tdata,		arena_tdata_t *)			\
     O(narenas_tdata,		unsigned)				\
     O(arenas_tdata_bypass,	bool)					\
     O(tcache_enabled,		tcache_enabled_t)			\
     O(quarantine,		quarantine_t *)				\
+    O(witnesses,		witness_list_t)				\
+    O(witness_fork,		bool)					\
 
 #define	TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
     NULL,								\
     0,									\
     0,									\
     NULL,								\
     NULL,								\
     NULL,								\
+    NULL,								\
     0,									\
     false,								\
     tcache_enabled_default,						\
-    NULL								\
+    NULL,								\
+    ql_head_initializer(witnesses),					\
+    false								\
 }
 
 struct tsd_s {
 	tsd_state_t	state;
 #define	O(n, t)								\
 	t		n;
 MALLOC_TSD
 #undef O
 };
 
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+	tsd_t	tsd;
+};
+
 static const tsd_t tsd_initializer = TSD_INITIALIZER;
 
 malloc_tsd_types(, tsd_t)
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 void	*malloc_tsd_malloc(size_t size);
 void	malloc_tsd_dalloc(void *wrapper);
 void	malloc_tsd_no_cleanup(void *arg);
 void	malloc_tsd_cleanup_register(bool (*f)(void));
-bool	malloc_tsd_boot0(void);
+tsd_t	*malloc_tsd_boot0(void);
 void	malloc_tsd_boot1(void);
 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
     !defined(_WIN32))
 void	*tsd_init_check_recursion(tsd_init_head_t *head,
     tsd_init_block_t *block);
 void	tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
 #endif
 void	tsd_cleanup(void *arg);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 
+tsd_t	*tsd_fetch_impl(bool init);
 tsd_t	*tsd_fetch(void);
+tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
 #define	O(n, t)								\
 t	*tsd_##n##p_get(tsd_t *tsd);					\
 t	tsd_##n##_get(tsd_t *tsd);					\
 void	tsd_##n##_set(tsd_t *tsd, t n);
 MALLOC_TSD
 #undef O
+tsdn_t	*tsdn_fetch(void);
+bool	tsdn_null(const tsdn_t *tsdn);
+tsd_t	*tsdn_tsd(tsdn_t *tsdn);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
 malloc_tsd_externs(, tsd_t)
 malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch(void)
+tsd_fetch_impl(bool init)
 {
-	tsd_t *tsd = tsd_get();
+	tsd_t *tsd = tsd_get(init);
+
+	if (!init && tsd_get_allocates() && tsd == NULL)
+		return (NULL);
+	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
 		if (tsd->state == tsd_state_uninitialized) {
 			tsd->state = tsd_state_nominal;
 			/* Trigger cleanup handler registration. */
 			tsd_set(tsd);
 		} else if (tsd->state == tsd_state_purgatory) {
 			tsd->state = tsd_state_reincarnated;
 			tsd_set(tsd);
 		} else
 			assert(tsd->state == tsd_state_reincarnated);
 	}
 
 	return (tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void)
+{
+
+	return (tsd_fetch_impl(true));
+}
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd)
+{
+
+	return ((tsdn_t *)tsd);
+}
+
 JEMALLOC_INLINE bool
 tsd_nominal(tsd_t *tsd)
 {
 
 	return (tsd->state == tsd_state_nominal);
 }
 
 #define	O(n, t)								\
@@ -654,12 +750,38 @@ JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t n)						\
 {									\
 									\
 	assert(tsd->state == tsd_state_nominal);			\
 	tsd->n = n;							\
 }
 MALLOC_TSD
 #undef O
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsdn_fetch(void)
+{
+
+	if (!tsd_booted_get())
+		return (NULL);
+
+	return (tsd_tsdn(tsd_fetch_impl(false)));
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn)
+{
+
+	return (tsdn == NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn)
+{
+
+	assert(!tsdn_null(tsdn));
+
+	return (&tsdn->tsd);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/util.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/util.h
@@ -35,16 +35,20 @@
 #define	BUFERROR_BUF		64
 
 /*
  * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
  * large enough for all possible uses within jemalloc.
  */
 #define	MALLOC_PRINTF_BUFSIZE	4096
 
+/* Junk fill patterns. */
+#define	JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#define	JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+
 /*
  * Wrap a cpp argument that contains commas such that it isn't broken up into
  * multiple arguments.
  */
 #define	JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
 
 /*
  * Silence compiler warnings due to uninitialized values.  This is used
@@ -52,40 +56,30 @@
  * uninitialized.
  */
 #ifdef JEMALLOC_CC_SILENCE
 #	define JEMALLOC_CC_SILENCE_INIT(v) = v
 #else
 #	define JEMALLOC_CC_SILENCE_INIT(v)
 #endif
 
-#define	JEMALLOC_GNUC_PREREQ(major, minor)				\
-    (!defined(__clang__) &&						\
-    (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
-#ifndef __has_builtin
-#  define __has_builtin(builtin) (0)
-#endif
-#define	JEMALLOC_CLANG_HAS_BUILTIN(builtin)				\
-    (defined(__clang__) && __has_builtin(builtin))
-
 #ifdef __GNUC__
 #	define likely(x)   __builtin_expect(!!(x), 1)
 #	define unlikely(x) __builtin_expect(!!(x), 0)
-#  if JEMALLOC_GNUC_PREREQ(4, 6) ||					\
-      JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
-#	define unreachable() __builtin_unreachable()
-#  else
-#	define unreachable()
-#  endif
 #else
 #	define likely(x)   !!(x)
 #	define unlikely(x) !!(x)
-#	define unreachable()
 #endif
 
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+
 #include "jemalloc/internal/assert.h"
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
 #define	cassert(c) do {							\
 	if (unlikely(!(c)))						\
 		not_reached();						\
 } while (0)
 
@@ -101,19 +95,19 @@ int	buferror(int err, char *buf, size_t 
 uintmax_t	malloc_strtoumax(const char *restrict nptr,
     char **restrict endptr, int base);
 void	malloc_write(const char *s);
 
 /*
  * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
  * point math.
  */
-int	malloc_vsnprintf(char *str, size_t size, const char *format,
+size_t	malloc_vsnprintf(char *str, size_t size, const char *format,
     va_list ap);
-int	malloc_snprintf(char *str, size_t size, const char *format, ...)
+size_t	malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, va_list ap);
 void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
     const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void	malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 #endif /* JEMALLOC_H_EXTERNS */
--- a/memory/jemalloc/src/include/jemalloc/internal/valgrind.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/valgrind.h
@@ -25,25 +25,27 @@
 	if (unlikely(in_valgrind))					\
 		valgrind_make_mem_defined(ptr, usize);			\
 } while (0)
 /*
  * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro
  * calls must be embedded in macros rather than in functions so that when
  * Valgrind reports errors, there are no extra stack frames in the backtraces.
  */
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {		\
-	if (unlikely(in_valgrind && cond))				\
-		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero);	\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {	\
+	if (unlikely(in_valgrind && cond)) {				\
+		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr),	\
+		    zero);						\
+	}								\
 } while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {								\
 	if (unlikely(in_valgrind)) {					\
-		size_t rzsize = p2rz(ptr);				\
+		size_t rzsize = p2rz(tsdn, ptr);			\
 									\
 		if (!maybe_moved || ptr == old_ptr) {			\
 			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
 			    usize, rzsize);				\
 			if (zero && old_usize < usize) {		\
 				valgrind_make_mem_defined(		\
 				    (void *)((uintptr_t)ptr +		\
 				    old_usize), usize - old_usize);	\
@@ -76,18 +78,18 @@
 	if (unlikely(in_valgrind))					\
 		valgrind_freelike_block(ptr, rzsize);			\
 } while (0)
 #else
 #define	RUNNING_ON_VALGRIND	((unsigned)0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0)
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {} while (0)
 #define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
 #endif
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/witness.h
@@ -0,0 +1,266 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct witness_s witness_t;
+typedef unsigned witness_rank_t;
+typedef ql_head(witness_t) witness_list_t;
+typedef int witness_comp_t (const witness_t *, const witness_t *);
+
+/*
+ * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
+ * the witness machinery.
+ */
+#define	WITNESS_RANK_OMIT		0U
+
+#define	WITNESS_RANK_INIT		1U
+#define	WITNESS_RANK_CTL		1U
+#define	WITNESS_RANK_ARENAS		2U
+
+#define	WITNESS_RANK_PROF_DUMP		3U
+#define	WITNESS_RANK_PROF_BT2GCTX	4U
+#define	WITNESS_RANK_PROF_TDATAS	5U
+#define	WITNESS_RANK_PROF_TDATA		6U
+#define	WITNESS_RANK_PROF_GCTX		7U
+
+#define	WITNESS_RANK_ARENA		8U
+#define	WITNESS_RANK_ARENA_CHUNKS	9U
+#define	WITNESS_RANK_ARENA_NODE_CACHE	10
+
+#define	WITNESS_RANK_BASE		11U
+
+#define	WITNESS_RANK_LEAF		0xffffffffU
+#define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_ARENA_HUGE		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_DSS		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+
+#define	WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}}
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct witness_s {
+	/* Name, used for printing lock order reversal messages. */
+	const char		*name;
+
+	/*
+	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
+	 * must be acquired in order of increasing rank.
+	 */
+	witness_rank_t		rank;
+
+	/*
+	 * If two witnesses are of equal rank and they have the samp comp
+	 * function pointer, it is called as a last attempt to differentiate
+	 * between witnesses of equal rank.
+	 */
+	witness_comp_t		*comp;
+
+	/* Linkage for thread's currently owned locks. */
+	ql_elm(witness_t)	link;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp);
+#ifdef JEMALLOC_JET
+typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+extern witness_lock_error_t *witness_lock_error;
+#else
+void	witness_lock_error(const witness_list_t *witnesses,
+    const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_owner_error_t)(const witness_t *);
+extern witness_owner_error_t *witness_owner_error;
+#else
+void	witness_owner_error(const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_not_owner_error_t)(const witness_t *);
+extern witness_not_owner_error_t *witness_not_owner_error;
+#else
+void	witness_not_owner_error(const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_lockless_error_t)(const witness_list_t *);
+extern witness_lockless_error_t *witness_lockless_error;
+#else
+void	witness_lockless_error(const witness_list_t *witnesses);
+#endif
+
+void	witnesses_cleanup(tsd_t *tsd);
+void	witness_fork_cleanup(tsd_t *tsd);
+void	witness_prefork(tsd_t *tsd);
+void	witness_postfork_parent(tsd_t *tsd);
+void	witness_postfork_child(tsd_t *tsd);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool	witness_owner(tsd_t *tsd, const witness_t *witness);
+void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_lockless(tsdn_t *tsdn);
+void	witness_lock(tsdn_t *tsdn, witness_t *witness);
+void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE bool
+witness_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return (true);
+	}
+
+	return (false);
+}
+
+JEMALLOC_INLINE void
+witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	if (witness_owner(tsd, witness))
+		return;
+	witness_owner_error(witness);
+}
+
+JEMALLOC_INLINE void
+witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			witness_not_owner_error(witness);
+	}
+}
+
+JEMALLOC_INLINE void
+witness_assert_lockless(tsdn_t *tsdn)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL)
+		witness_lockless_error(witnesses);
+}
+
+JEMALLOC_INLINE void
+witness_lock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_not_owner(tsdn, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
+		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, witness) > 0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+JEMALLOC_INLINE void
+witness_unlock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	/*
+	 * Check whether owner before removal, rather than relying on
+	 * witness_assert_owner() to abort, so that unit tests can test this
+	 * function's failure mode without causing undefined behavior.
+	 */
+	if (witness_owner(tsd, witness)) {
+		witnesses = tsd_witnessesp_get(tsd);
+		ql_remove(witnesses, witness, link);
+	} else
+		witness_assert_owner(tsdn, witness);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in
+++ b/memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in
@@ -8,33 +8,33 @@
 #define	JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
 #define	JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
 #define	JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
 #define	JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
 #define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
 
 #  define MALLOCX_LG_ALIGN(la)	((int)(la))
 #  if LG_SIZEOF_PTR == 2
-#    define MALLOCX_ALIGN(a)	((int)(ffs(a)-1))
+#    define MALLOCX_ALIGN(a)	((int)(ffs((int)(a))-1))
 #  else
 #    define MALLOCX_ALIGN(a)						\
-       ((int)(((a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :		\
-       ffs((int)((a)>>32))+31))
+       ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :	\
+       ffs((int)(((size_t)(a))>>32))+31))
 #  endif
 #  define MALLOCX_ZERO	((int)0x40)
 /*
  * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1
  * encodes MALLOCX_TCACHE_NONE.
  */
 #  define MALLOCX_TCACHE(tc)	((int)(((tc)+2) << 8))
 #  define MALLOCX_TCACHE_NONE	MALLOCX_TCACHE(-1)
 /*
  * Bias arena index bits so that 0 encodes "use an automatically chosen arena".
  */
-#  define MALLOCX_ARENA(a)	((int)(((a)+1) << 20))
+#  define MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
 #  define JEMALLOC_CXX_THROW throw()
 #else
 #  define JEMALLOC_CXX_THROW
 #endif
 
 #if _MSC_VER
--- a/memory/jemalloc/src/jemalloc.pc.in
+++ b/memory/jemalloc/src/jemalloc.pc.in
@@ -1,12 +1,12 @@
 prefix=@prefix@
 exec_prefix=@exec_prefix@
 libdir=@libdir@
 includedir=@includedir@
 install_suffix=@install_suffix@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
-URL: http://www.canonware.com/jemalloc
+URL: http://jemalloc.net/
 Version: @jemalloc_version@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}
--- a/memory/jemalloc/src/msvc/ReadMe.txt
+++ b/memory/jemalloc/src/msvc/ReadMe.txt
@@ -12,13 +12,13 @@ 1. Install Cygwin with at least the foll
 2. Install Visual Studio 2015 with Visual C++
 
 3. Add Cygwin\bin to the PATH environment variable
 
 4. Open "VS2015 x86 Native Tools Command Prompt"
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
-   sh -c "./autogen.sh CC=cl --enable-lazy-lock=no"
+   sh -c "CC=cl ./autogen.sh"
 
 6. Now the project can be opened and built in Visual Studio:
    msvc\jemalloc_vc2015.sln
 
--- a/memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -51,34 +51,37 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_defs.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\prof.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_unnamespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ql.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\quarantine.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\spin.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_mangle.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos_jet.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_rename.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_typedefs.h" />
@@ -104,21 +107,23 @@
     <ClCompile Include="..\..\..\..\src\mb.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\spin.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>jemalloc</RootNamespace>
     <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
@@ -245,85 +250,86 @@
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
-      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
     </Link>
@@ -334,17 +340,17 @@
       <PrecompiledHeader>
       </PrecompiledHeader>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
       <S