Bug 1254850 - Update jemalloc 4 to version 4.1.0 draft
authorMike Hommey <mh+mozilla@glandium.org>
Mon, 22 Feb 2016 23:20:15 +0900
changeset 338468 bcaa5c4d92aa9dfccbddad5ef69263221c24ddc0
parent 338467 74c384246dccf7ff973d695191af1851fc13aa01
child 515800 19dea18fd1c6b94f7328038b3a09421dcd5cb211
push id12513
push userbmo:mh+mozilla@glandium.org
push dateWed, 09 Mar 2016 05:09:40 +0000
bugs1254850
milestone48.0a1
Bug 1254850 - Update jemalloc 4 to version 4.1.0
memory/jemalloc/moz.build
memory/jemalloc/src/ChangeLog
memory/jemalloc/src/INSTALL
memory/jemalloc/src/Makefile.in
memory/jemalloc/src/VERSION
memory/jemalloc/src/bin/jeprof.in
memory/jemalloc/src/configure
memory/jemalloc/src/configure.ac
memory/jemalloc/src/doc/jemalloc.xml.in
memory/jemalloc/src/include/jemalloc/internal/arena.h
memory/jemalloc/src/include/jemalloc/internal/atomic.h
memory/jemalloc/src/include/jemalloc/internal/bitmap.h
memory/jemalloc/src/include/jemalloc/internal/chunk_mmap.h
memory/jemalloc/src/include/jemalloc/internal/ckh.h
memory/jemalloc/src/include/jemalloc/internal/ctl.h
memory/jemalloc/src/include/jemalloc/internal/hash.h
memory/jemalloc/src/include/jemalloc/internal/huge.h
memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in
memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h
memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in
memory/jemalloc/src/include/jemalloc/internal/nstime.h
memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt
memory/jemalloc/src/include/jemalloc/internal/prng.h
memory/jemalloc/src/include/jemalloc/internal/rb.h
memory/jemalloc/src/include/jemalloc/internal/size_classes.sh
memory/jemalloc/src/include/jemalloc/internal/smoothstep.h
memory/jemalloc/src/include/jemalloc/internal/smoothstep.sh
memory/jemalloc/src/include/jemalloc/internal/stats.h
memory/jemalloc/src/include/jemalloc/internal/tcache.h
memory/jemalloc/src/include/jemalloc/internal/ticker.h
memory/jemalloc/src/include/jemalloc/internal/tsd.h
memory/jemalloc/src/include/jemalloc/internal/util.h
memory/jemalloc/src/include/jemalloc/jemalloc_defs.h.in
memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in
memory/jemalloc/src/include/msvc_compat/strings.h
memory/jemalloc/src/msvc/ReadMe.txt
memory/jemalloc/src/msvc/jemalloc_vc2015.sln
memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads.cpp
memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads.h
memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads.vcxproj
memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads_main.cpp
memory/jemalloc/src/src/arena.c
memory/jemalloc/src/src/bitmap.c
memory/jemalloc/src/src/chunk.c
memory/jemalloc/src/src/chunk_mmap.c
memory/jemalloc/src/src/ckh.c
memory/jemalloc/src/src/ctl.c
memory/jemalloc/src/src/huge.c
memory/jemalloc/src/src/jemalloc.c
memory/jemalloc/src/src/nstime.c
memory/jemalloc/src/src/prng.c
memory/jemalloc/src/src/prof.c
memory/jemalloc/src/src/stats.c
memory/jemalloc/src/src/tcache.c
memory/jemalloc/src/src/ticker.c
memory/jemalloc/src/src/tsd.c
memory/jemalloc/src/src/util.c
memory/jemalloc/src/test/include/test/jemalloc_test.h.in
memory/jemalloc/src/test/include/test/timer.h
memory/jemalloc/src/test/integration/mallocx.c
memory/jemalloc/src/test/integration/rallocx.c
memory/jemalloc/src/test/integration/xallocx.c
memory/jemalloc/src/test/src/timer.c
memory/jemalloc/src/test/unit/bitmap.c
memory/jemalloc/src/test/unit/decay.c
memory/jemalloc/src/test/unit/hash.c
memory/jemalloc/src/test/unit/mallctl.c
memory/jemalloc/src/test/unit/nstime.c
memory/jemalloc/src/test/unit/prng.c
memory/jemalloc/src/test/unit/rb.c
memory/jemalloc/src/test/unit/run_quantize.c
memory/jemalloc/src/test/unit/size_classes.c
memory/jemalloc/src/test/unit/smoothstep.c
memory/jemalloc/src/test/unit/stats.c
memory/jemalloc/src/test/unit/ticker.c
memory/jemalloc/src/test/unit/util.c
memory/jemalloc/upstream.info
--- a/memory/jemalloc/moz.build
+++ b/memory/jemalloc/moz.build
@@ -14,22 +14,25 @@ UNIFIED_SOURCES += [
     'src/src/chunk_mmap.c',
     'src/src/ckh.c',
     'src/src/extent.c',
     'src/src/hash.c',
     'src/src/huge.c',
     'src/src/jemalloc.c',
     'src/src/mb.c',
     'src/src/mutex.c',
+    'src/src/nstime.c',
     'src/src/pages.c',
+    'src/src/prng.c',
     'src/src/prof.c',
     'src/src/quarantine.c',
     'src/src/rtree.c',
     'src/src/stats.c',
     'src/src/tcache.c',
+    'src/src/ticker.c',
     'src/src/tsd.c',
     'src/src/util.c',
     # FIXME do we ever want valgrind.c?
     # 'src/src/valgrind.c',
 ]
 
 SOURCES += [
     # This file cannot be built in unified mode because of symbol clash on arena_purge.
@@ -71,8 +74,10 @@ DEFINES['abort'] = 'moz_abort'
 
 LOCAL_INCLUDES += [
     '!src/include',
     'src/include',
 ]
 
 # We allow warnings for third-party code that can be updated from upstream.
 ALLOW_COMPILER_WARNINGS = True
+
+OS_LIBS += CONFIG['REALTIME_LIBS']
--- a/memory/jemalloc/src/ChangeLog
+++ b/memory/jemalloc/src/ChangeLog
@@ -1,14 +1,87 @@
 Following are change highlights associated with official releases.  Important
 bug fixes are all mentioned, but some internal enhancements are omitted here for
 brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.1.0 (February 28, 2016)
+
+  This release is primarily about optimizations, but it also incorporates a lot
+  of portability-motivated refactoring and enhancements.  Many people worked on
+  this release, to an extent that even with the omission here of minor changes
+  (see git revision history), and of the people who reported and diagnosed
+  issues, so much of the work was contributed that starting with this release,
+  changes are annotated with author credits to help reflect the collaborative
+  effort involved.
+
+  New features:
+  - Implement decay-based unused dirty page purging, a major optimization with
+    mallctl API impact.  This is an alternative to the existing ratio-based
+    unused dirty page purging, and is intended to eventually become the sole
+    purging mechanism.  New mallctls:
+    + opt.purge
+    + opt.decay_time
+    + arena.<i>.decay
+    + arena.<i>.decay_time
+    + arenas.decay_time
+    + stats.arenas.<i>.decay_time
+    (@jasone, @cevans87)
+  - Add --with-malloc-conf, which makes it possible to embed a default
+    options string during configuration.  This was motivated by the desire to
+    specify --with-malloc-conf=purge:decay , since the default must remain
+    purge:ratio until the 5.0.0 release.  (@jasone)
+  - Add MS Visual Studio 2015 support.  (@rustyx, @yuslepukhin)
+  - Make *allocx() size class overflow behavior defined.  The maximum
+    size class is now less than PTRDIFF_MAX to protect applications against
+    numerical overflow, and all allocation functions are guaranteed to indicate
+    errors rather than potentially crashing if the request size exceeds the
+    maximum size class.  (@jasone)
+  - jeprof:
+    + Add raw heap profile support.  (@jasone)
+    + Add --retain and --exclude for backtrace symbol filtering.  (@jasone)
+
+  Optimizations:
+  - Optimize the fast path to combine various bootstrapping and configuration
+    checks and execute more streamlined code in the common case.  (@interwq)
+  - Use linear scan for small bitmaps (used for small object tracking).  In
+    addition to speeding up bitmap operations on 64-bit systems, this reduces
+    allocator metadata overhead by approximately 0.2%.  (@djwatson)
+  - Separate arena_avail trees, which substantially speeds up run tree
+    operations.  (@djwatson)
+  - Use memoization (boot-time-computed table) for run quantization.  Separate
+    arena_avail trees reduced the importance of this optimization.  (@jasone)
+  - Attempt mmap-based in-place huge reallocation.  This can dramatically speed
+    up incremental huge reallocation.  (@jasone)
+
+  Incompatible changes:
+  - Make opt.narenas unsigned rather than size_t.  (@jasone)
+
+  Bug fixes:
+  - Fix stats.cactive accounting regression.  (@rustyx, @jasone)
+  - Handle unaligned keys in hash().  This caused problems for some ARM systems.
+    (@jasone, Christopher Ferris)
+  - Refactor arenas array.  In addition to fixing a fork-related deadlock, this
+    makes arena lookups faster and simpler.  (@jasone)
+  - Move retained memory allocation out of the default chunk allocation
+    function, to a location that gets executed even if the application installs
+    a custom chunk allocation function.  This resolves a virtual memory leak.
+    (@buchgr)
+  - Fix a potential tsd cleanup leak.  (Christopher Ferris, @jasone)
+  - Fix run quantization.  In practice this bug had no impact unless
+    applications requested memory with alignment exceeding one page.
+    (@jasone, @djwatson)
+  - Fix LinuxThreads-specific bootstrapping deadlock.  (Cosmin Paraschiv)
+  - jeprof:
+    + Don't discard curl options if timeout is not defined.  (@djwatson)
+    + Detect failed profile fetches.  (@djwatson)
+  - Fix stats.arenas.<i>.{dss,lg_dirty_mult,decay_time,pactive,pdirty} for
+    --disable-stats case.  (@jasone)
+
 * 4.0.4 (October 24, 2015)
 
   This bugfix release fixes another xallocx() regression.  No other regressions
   have come to light in over a month, so this is likely a good starting point
   for people who prefer to wait for "dot one" releases with all the major issues
   shaken out.
 
   Bug fixes:
--- a/memory/jemalloc/src/INSTALL
+++ b/memory/jemalloc/src/INSTALL
@@ -79,16 +79,24 @@ any of the following arguments (not a de
     for static libraries, naming collisions are a real possibility.  By
     default, <prefix> is empty, which results in a symbol prefix of je_ .
 
 --with-install-suffix=<suffix>
     Append <suffix> to the base name of all installed files, such that multiple
     versions of jemalloc can coexist in the same installation directory.  For
     example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
 
+--with-malloc-conf=<malloc_conf>
+    Embed <malloc_conf> as a run-time options string that is processed prior to
+    the malloc_conf global variable, the /etc/malloc.conf symlink, and the
+    MALLOC_CONF environment variable.  For example, to change the default chunk
+    size to 256 KiB:
+
+      --with-malloc-conf=lg_chunk:18
+
 --disable-cc-silence
     Disable code that silences non-useful compiler warnings.  This is mainly
     useful during development when auditing the set of warnings that are being
     silenced.
 
 --enable-debug
     Enable assertions and validation code.  This incurs a substantial
     performance hit, but is very useful during application development.
--- a/memory/jemalloc/src/Makefile.in
+++ b/memory/jemalloc/src/Makefile.in
@@ -73,25 +73,42 @@ TEST_LIBRARY_PATH :=
 endif
 endif
 
 LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix)
 
 # Lists of files.
 BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof
 C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h
-C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \
-	$(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \
-	$(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \
-	$(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \
-	$(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \
-	$(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \
-	$(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \
-	$(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/util.c \
-	$(srcroot)src/tsd.c
+C_SRCS := $(srcroot)src/jemalloc.c \
+	$(srcroot)src/arena.c \
+	$(srcroot)src/atomic.c \
+	$(srcroot)src/base.c \
+	$(srcroot)src/bitmap.c \
+	$(srcroot)src/chunk.c \
+	$(srcroot)src/chunk_dss.c \
+	$(srcroot)src/chunk_mmap.c \
+	$(srcroot)src/ckh.c \
+	$(srcroot)src/ctl.c \
+	$(srcroot)src/extent.c \
+	$(srcroot)src/hash.c \
+	$(srcroot)src/huge.c \
+	$(srcroot)src/mb.c \
+	$(srcroot)src/mutex.c \
+	$(srcroot)src/nstime.c \
+	$(srcroot)src/pages.c \
+	$(srcroot)src/prng.c \
+	$(srcroot)src/prof.c \
+	$(srcroot)src/quarantine.c \
+	$(srcroot)src/rtree.c \
+	$(srcroot)src/stats.c \
+	$(srcroot)src/tcache.c \
+	$(srcroot)src/ticker.c \
+	$(srcroot)src/tsd.c \
+	$(srcroot)src/util.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
 endif
 ifeq ($(enable_zone_allocator), 1)
 C_SRCS += $(srcroot)src/zone.c
 endif
 ifeq ($(IMPORTLIB),$(SO))
 STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A)
@@ -111,43 +128,49 @@ DOCS_XML := $(objroot)doc/jemalloc$(inst
 DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html)
 DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
 DOCS := $(DOCS_HTML) $(DOCS_MAN3)
 C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
-C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c
+C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
 TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
+	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
 	$(srcroot)test/unit/lg_chunk.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
 	$(srcroot)test/unit/prof_active.c \
 	$(srcroot)test/unit/prof_gdump.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_thread_name.c \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
 	$(srcroot)test/unit/quarantine.c \
 	$(srcroot)test/unit/rb.c \
 	$(srcroot)test/unit/rtree.c \
+	$(srcroot)test/unit/run_quantize.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
+	$(srcroot)test/unit/smoothstep.c \
 	$(srcroot)test/unit/stats.c \
+	$(srcroot)test/unit/ticker.c \
+	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/util.c \
 	$(srcroot)test/unit/zero.c
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
 	$(srcroot)test/integration/sdallocx.c \
 	$(srcroot)test/integration/mallocx.c \
 	$(srcroot)test/integration/MALLOCX_ARENA.c \
@@ -344,28 +367,32 @@ check_unit_dir:
 	@mkdir -p $(objroot)test/unit
 check_integration_dir:
 	@mkdir -p $(objroot)test/integration
 stress_dir:
 	@mkdir -p $(objroot)test/stress
 check_dir: check_unit_dir check_integration_dir
 
 check_unit: tests_unit check_unit_dir
-	$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="purge:ratio" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
 check_integration_prof: tests_integration check_integration_dir
 ifeq ($(enable_prof), 1)
 	$(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 	$(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 endif
+check_integration_decay: tests_integration check_integration_dir
+	$(MALLOC_CONF)="purge:decay,decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="purge:decay,decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 check_integration: tests_integration check_integration_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
-check: tests check_dir check_integration_prof
-	$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+check: check_unit check_integration check_integration_decay check_integration_prof
 
 ifeq ($(enable_code_coverage), 1)
 coverage_unit: check_unit
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS)
 
 coverage_integration: check_integration
--- a/memory/jemalloc/src/VERSION
+++ b/memory/jemalloc/src/VERSION
@@ -1,1 +1,1 @@
-4.0.4-12-g3a92319ddc5610b755f755cbbbd12791ca9d0c3d
+4.1.0-0-gdf900dbfaf4835d3efc06d771535f3e781544913
--- a/memory/jemalloc/src/bin/jeprof.in
+++ b/memory/jemalloc/src/bin/jeprof.in
@@ -90,17 +90,17 @@ my %obj_tool_map = (
 );
 # NOTE: these are lists, so you can put in commandline flags if you want.
 my @DOT = ("dot");          # leave non-absolute, since it may be in /usr/local
 my @GV = ("gv");
 my @EVINCE = ("evince");    # could also be xpdf or perhaps acroread
 my @KCACHEGRIND = ("kcachegrind");
 my @PS2PDF = ("ps2pdf");
 # These are used for dynamic profiles
-my @URL_FETCHER = ("curl", "-s");
+my @URL_FETCHER = ("curl", "-s", "--fail");
 
 # These are the web pages that servers need to support for dynamic profiles
 my $HEAP_PAGE = "/pprof/heap";
 my $PROFILE_PAGE = "/pprof/profile";   # must support cgi-param "?seconds=#"
 my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param
                                                 # ?seconds=#&event=x&period=n
 my $GROWTH_PAGE = "/pprof/growth";
 my $CONTENTION_PAGE = "/pprof/contention";
@@ -218,22 +218,24 @@ Contention-profile options:
    --contentions       Display number of delays at each region
    --mean_delay        Display mean delay at each region
 
 Call-graph Options:
    --nodecount=<n>     Show at most so many nodes [default=80]
    --nodefraction=<f>  Hide nodes below <f>*total [default=.005]
    --edgefraction=<f>  Hide edges below <f>*total [default=.001]
    --maxdegree=<n>     Max incoming/outgoing edges per node [default=8]
-   --focus=<regexp>    Focus on nodes matching <regexp>
+   --focus=<regexp>    Focus on backtraces with nodes matching <regexp>
    --thread=<n>        Show profile for thread <n>
-   --ignore=<regexp>   Ignore nodes matching <regexp>
+   --ignore=<regexp>   Ignore backtraces with nodes matching <regexp>
    --scale=<n>         Set GV scaling [default=0]
    --heapcheck         Make nodes with non-0 object counts
                        (i.e. direct leak generators) more visible
+   --retain=<regexp>   Retain only nodes that match <regexp>
+   --exclude=<regexp>  Exclude all nodes that match <regexp>
 
 Miscellaneous:
    --tools=<prefix or binary:fullpath>[,...]   \$PATH for object tool pathnames
    --test              Run unit tests
    --help              This message
    --version           Version information
 
 Environment Variables:
@@ -334,16 +336,18 @@ sub Init() {
   $main::opt_nodefraction = 0.005;
   $main::opt_edgefraction = 0.001;
   $main::opt_maxdegree = 8;
   $main::opt_focus = '';
   $main::opt_thread = undef;
   $main::opt_ignore = '';
   $main::opt_scale = 0;
   $main::opt_heapcheck = 0;
+  $main::opt_retain = '';
+  $main::opt_exclude = '';
   $main::opt_seconds = 30;
   $main::opt_lib = "";
 
   $main::opt_inuse_space   = 0;
   $main::opt_inuse_objects = 0;
   $main::opt_alloc_space   = 0;
   $main::opt_alloc_objects = 0;
   $main::opt_show_bytes    = 0;
@@ -405,16 +409,18 @@ sub Init() {
              "nodefraction=f" => \$main::opt_nodefraction,
              "edgefraction=f" => \$main::opt_edgefraction,
              "maxdegree=i"    => \$main::opt_maxdegree,
              "focus=s"        => \$main::opt_focus,
              "thread=s"       => \$main::opt_thread,
              "ignore=s"       => \$main::opt_ignore,
              "scale=i"        => \$main::opt_scale,
              "heapcheck"      => \$main::opt_heapcheck,
+             "retain=s"       => \$main::opt_retain,
+             "exclude=s"      => \$main::opt_exclude,
              "inuse_space!"   => \$main::opt_inuse_space,
              "inuse_objects!" => \$main::opt_inuse_objects,
              "alloc_space!"   => \$main::opt_alloc_space,
              "alloc_objects!" => \$main::opt_alloc_objects,
              "show_bytes!"    => \$main::opt_show_bytes,
              "drop_negative!" => \$main::opt_drop_negative,
              "total_delay!"   => \$main::opt_total_delay,
              "contentions!"   => \$main::opt_contentions,
@@ -2835,16 +2841,53 @@ sub ExtractCalls {
       AddEntry($calls, $call, $count);
       $destination = $source;
     }
   }
 
   return $calls;
 }
 
+sub FilterFrames {
+  my $symbols = shift;
+  my $profile = shift;
+
+  if ($main::opt_retain eq '' && $main::opt_exclude eq '') {
+    return $profile;
+  }
+
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    my @path = ();
+    foreach my $a (@addrs) {
+      my $sym;
+      if (exists($symbols->{$a})) {
+        $sym = $symbols->{$a}->[0];
+      } else {
+        $sym = $a;
+      }
+      if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) {
+        next;
+      }
+      if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) {
+        next;
+      }
+      push(@path, $a);
+    }
+    if (scalar(@path) > 0) {
+      my $reduced_path = join("\n", @path);
+      AddEntry($result, $reduced_path, $count);
+    }
+  }
+
+  return $result;
+}
+
 sub RemoveUninterestingFrames {
   my $symbols = shift;
   my $profile = shift;
 
   # List of function names to skip
   my %skip = ();
   my $skip_regexp = 'NOMATCH';
   if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
@@ -2979,16 +3022,19 @@ sub RemoveUninterestingFrames {
           next;
         }
       }
       push(@path, $a);
     }
     my $reduced_path = join("\n", @path);
     AddEntry($result, $reduced_path, $count);
   }
+
+  $result = FilterFrames($symbols, $result);
+
   return $result;
 }
 
 # Reduce profile to granularity given by user
 sub ReduceProfile {
   my $symbols = shift;
   my $profile = shift;
   my $result = {};
@@ -3288,17 +3334,17 @@ sub ResolveRedirectionForCurl {
   }
   close(CMDLINE);
   return $url;
 }
 
 # Add a timeout flat to URL_FETCHER.  Returns a new list.
 sub AddFetchTimeout {
   my $timeout = shift;
-  my @fetcher = shift;
+  my @fetcher = @_;
   if (defined($timeout)) {
     if (join(" ", @fetcher) =~ m/\bcurl -s/) {
       push(@fetcher, "--max-time", sprintf("%d", $timeout));
     } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) {
       push(@fetcher, sprintf("--deadline=%d", $timeout));
     }
   }
   return @fetcher;
@@ -3334,16 +3380,37 @@ sub ReadSymbols {
         printf STDERR ("Ignoring unknown variable in symbols list: " .
             "'%s' = '%s'\n", $variable, $value);
       }
     }
   }
   return $map;
 }
 
+sub URLEncode {
+  my $str = shift;
+  $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg;
+  return $str;
+}
+
+sub AppendSymbolFilterParams {
+  my $url = shift;
+  my @params = ();
+  if ($main::opt_retain ne '') {
+    push(@params, sprintf("retain=%s", URLEncode($main::opt_retain)));
+  }
+  if ($main::opt_exclude ne '') {
+    push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude)));
+  }
+  if (scalar @params > 0) {
+    $url = sprintf("%s?%s", $url, join("&", @params));
+  }
+  return $url;
+}
+
 # Fetches and processes symbols to prepare them for use in the profile output
 # code.  If the optional 'symbol_map' arg is not given, fetches symbols from
 # $SYMBOL_PAGE for all PC values found in profile.  Otherwise, the raw symbols
 # are assumed to have already been fetched into 'symbol_map' and are simply
 # extracted and processed.
 sub FetchSymbols {
   my $pcset = shift;
   my $symbol_map = shift;
@@ -3358,19 +3425,21 @@ sub FetchSymbols {
     print POSTFILE $post_data;
     close(POSTFILE);
 
     my $url = SymbolPageURL();
 
     my $command_line;
     if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) {
       $url = ResolveRedirectionForCurl($url);
+      $url = AppendSymbolFilterParams($url);
       $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym",
                                   $url);
     } else {
+      $url = AppendSymbolFilterParams($url);
       $command_line = (ShellEscape(@URL_FETCHER, "--post", $url)
                        . " < " . ShellEscape($main::tmpfile_sym));
     }
     # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
     my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"});
     open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line);
     $symbol_map = ReadSymbols(*SYMBOL{IO});
     close(SYMBOL);
--- a/memory/jemalloc/src/configure
+++ b/memory/jemalloc/src/configure
@@ -724,16 +724,17 @@ libdir
 psdir
 pdfdir
 dvidir
 htmldir
 infodir
 docdir
 oldincludedir
 includedir
+runstatedir
 localstatedir
 sharedstatedir
 sysconfdir
 datadir
 datarootdir
 libexecdir
 sbindir
 bindir
@@ -755,16 +756,17 @@ with_xslroot
 with_rpath
 enable_autogen
 enable_code_coverage
 with_mangling
 with_jemalloc_prefix
 with_export
 with_private_namespace
 with_install_suffix
+with_malloc_conf
 enable_cc_silence
 enable_debug
 enable_ivsalloc
 enable_stats
 enable_prof
 enable_prof_libunwind
 with_static_libunwind
 enable_prof_libgcc
@@ -827,16 +829,17 @@ x_libraries=NONE
 bindir='${exec_prefix}/bin'
 sbindir='${exec_prefix}/sbin'
 libexecdir='${exec_prefix}/libexec'
 datarootdir='${prefix}/share'
 datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
+runstatedir='${localstatedir}/run'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
 docdir='${datarootdir}/doc/${PACKAGE}'
 infodir='${datarootdir}/info'
 htmldir='${docdir}'
 dvidir='${docdir}'
 pdfdir='${docdir}'
 psdir='${docdir}'
@@ -1079,16 +1082,25 @@ do
     ac_prev=psdir ;;
   -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
     psdir=$ac_optarg ;;
 
   -q | -quiet | --quiet | --quie | --qui | --qu | --q \
   | -silent | --silent | --silen | --sile | --sil)
     silent=yes ;;
 
+  -runstatedir | --runstatedir | --runstatedi | --runstated \
+  | --runstate | --runstat | --runsta | --runst | --runs \
+  | --run | --ru | --r)
+    ac_prev=runstatedir ;;
+  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
+  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
+  | --run=* | --ru=* | --r=*)
+    runstatedir=$ac_optarg ;;
+
   -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
     ac_prev=sbindir ;;
   -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
   | --sbi=* | --sb=*)
     sbindir=$ac_optarg ;;
 
   -sharedstatedir | --sharedstatedir | --sharedstatedi \
   | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
@@ -1216,17 +1228,17 @@ if test -n "$ac_unrecognized_opts"; then
     *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
   esac
 fi
 
 # Check all directory arguments for consistency.
 for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
 		datadir sysconfdir sharedstatedir localstatedir includedir \
 		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
-		libdir localedir mandir
+		libdir localedir mandir runstatedir
 do
   eval ac_val=\$$ac_var
   # Remove trailing slashes.
   case $ac_val in
     */ )
       ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
       eval $ac_var=\$ac_val;;
   esac
@@ -1369,16 +1381,17 @@ For better control, use the options belo
 
 Fine tuning of the installation directories:
   --bindir=DIR            user executables [EPREFIX/bin]
   --sbindir=DIR           system admin executables [EPREFIX/sbin]
   --libexecdir=DIR        program executables [EPREFIX/libexec]
   --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
   --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
   --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --runstatedir=DIR       modifiable per-process data [LOCALSTATEDIR/run]
   --libdir=DIR            object code libraries [EPREFIX/lib]
   --includedir=DIR        C header files [PREFIX/include]
   --oldincludedir=DIR     C header files for non-gcc [/usr/include]
   --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
   --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
   --infodir=DIR           info documentation [DATAROOTDIR/info]
   --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
   --mandir=DIR            man documentation [DATAROOTDIR/man]
@@ -1438,16 +1451,18 @@ Optional Packages:
   --with-mangling=<map>   Mangle symbols in <map>
   --with-jemalloc-prefix=<prefix>
                           Prefix to prepend to all public APIs
   --without-export        disable exporting jemalloc public APIs
   --with-private-namespace=<prefix>
                           Prefix to prepend to all library-private APIs
   --with-install-suffix=<suffix>
                           Suffix to append to all installed files
+  --with-malloc-conf=<malloc_conf>
+                          config.malloc_conf options string
   --with-static-libunwind=<libunwind.a>
                           Path to static libunwind library; use rather than
                           dynamically linking
   --with-lg-tiny-min=<lg-tiny-min>
                           Base 2 log of minimum tiny size class to support
   --with-lg-quantum=<lg-quantum>
                           Base 2 log of minimum allocation alignment
   --with-lg-page=<lg-page>
@@ -3566,16 +3581,52 @@ else
               { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wshorten-64-to-32" >&5
+$as_echo_n "checking whether compiler supports -Wshorten-64-to-32... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-Wshorten-64-to-32"
+else
+  CFLAGS="${CFLAGS} -Wshorten-64-to-32"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_cflags_appended=-Wshorten-64-to-32
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  je_cv_cflags_appended=
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -pipe" >&5
 $as_echo_n "checking whether compiler supports -pipe... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
   CFLAGS="-pipe"
 else
   CFLAGS="${CFLAGS} -pipe"
 fi
@@ -4456,17 +4507,22 @@ if test "x${ac_cv_big_endian}" = "x1" ; 
 _ACEOF
 
 fi
 
 if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
   CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99"
 fi
 
-# The cast to long int works around a bug in the HP C Compiler
+if test "x${je_cv_msvc}" = "xyes" ; then
+  LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit" >&5
+$as_echo "Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit" >&6; }
+else
+  # The cast to long int works around a bug in the HP C Compiler
 # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
 # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
 # This bug is HP SR number 8606223364.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5
 $as_echo_n "checking size of void *... " >&6; }
 if ${ac_cv_sizeof_void_p+:} false; then :
   $as_echo_n "(cached) " >&6
 else
@@ -4489,22 +4545,23 @@ fi
 
 
 
 cat >>confdefs.h <<_ACEOF
 #define SIZEOF_VOID_P $ac_cv_sizeof_void_p
 _ACEOF
 
 
-if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
-  LG_SIZEOF_PTR=3
-elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
-  LG_SIZEOF_PTR=2
-else
-  as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5
+  if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
+    LG_SIZEOF_PTR=3
+  elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
+    LG_SIZEOF_PTR=2
+  else
+    as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5
+  fi
 fi
 cat >>confdefs.h <<_ACEOF
 #define LG_SIZEOF_PTR $LG_SIZEOF_PTR
 _ACEOF
 
 
 # The cast to long int works around a bug in the HP C Compiler
 # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
@@ -4595,16 +4652,61 @@ cat >>confdefs.h <<_ACEOF
 #define LG_SIZEOF_LONG $LG_SIZEOF_LONG
 _ACEOF
 
 
 # The cast to long int works around a bug in the HP C Compiler
 # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
 # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
 # This bug is HP SR number 8606223364.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long long" >&5
+$as_echo_n "checking size of long long... " >&6; }
+if ${ac_cv_sizeof_long_long+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long long))" "ac_cv_sizeof_long_long"        "$ac_includes_default"; then :
+
+else
+  if test "$ac_cv_type_long_long" = yes; then
+     { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot compute sizeof (long long)
+See \`config.log' for more details" "$LINENO" 5; }
+   else
+     ac_cv_sizeof_long_long=0
+   fi
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long_long" >&5
+$as_echo "$ac_cv_sizeof_long_long" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_LONG_LONG $ac_cv_sizeof_long_long
+_ACEOF
+
+
+if test "x${ac_cv_sizeof_long_long}" = "x8" ; then
+  LG_SIZEOF_LONG_LONG=3
+elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then
+  LG_SIZEOF_LONG_LONG=2
+else
+  as_fn_error $? "Unsupported long long size: ${ac_cv_sizeof_long_long}" "$LINENO" 5
+fi
+cat >>confdefs.h <<_ACEOF
+#define LG_SIZEOF_LONG_LONG $LG_SIZEOF_LONG_LONG
+_ACEOF
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of intmax_t" >&5
 $as_echo_n "checking size of intmax_t... " >&6; }
 if ${ac_cv_sizeof_intmax_t+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (intmax_t))" "ac_cv_sizeof_intmax_t"        "$ac_includes_default"; then :
 
 else
@@ -4712,17 +4814,55 @@ shift; shift
 host_os=$*
 IFS=$ac_save_IFS
 case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
 
 
 CPU_SPINWAIT=""
 case "${host_cpu}" in
   i686|x86_64)
-	if ${je_cv_pause+:} false; then :
+	if test "x${je_cv_msvc}" = "xyes" ; then
+	    if ${je_cv_pause_msvc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pause instruction MSVC is compilable" >&5
+$as_echo_n "checking whether pause instruction MSVC is compilable... " >&6; }
+if ${je_cv_pause_msvc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+_mm_pause(); return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_pause_msvc=yes
+else
+  je_cv_pause_msvc=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pause_msvc" >&5
+$as_echo "$je_cv_pause_msvc" >&6; }
+
+fi
+
+	    if test "x${je_cv_pause_msvc}" = "xyes" ; then
+		CPU_SPINWAIT='_mm_pause()'
+	    fi
+	else
+	    if ${je_cv_pause+:} false; then :
   $as_echo_n "(cached) " >&6
 else
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pause instruction is compilable" >&5
 $as_echo_n "checking whether pause instruction is compilable... " >&6; }
 if ${je_cv_pause+:} false; then :
   $as_echo_n "(cached) " >&6
 else
@@ -4745,18 +4885,19 @@ fi
 rm -f core conftest.err conftest.$ac_objext \
     conftest$ac_exeext conftest.$ac_ext
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pause" >&5
 $as_echo "$je_cv_pause" >&6; }
 
 fi
 
-	if test "x${je_cv_pause}" = "xyes" ; then
-	    CPU_SPINWAIT='__asm__ volatile("pause")'
+	    if test "x${je_cv_pause}" = "xyes" ; then
+		CPU_SPINWAIT='__asm__ volatile("pause")'
+	    fi
 	fi
 	;;
   powerpc)
 	cat >>confdefs.h <<_ACEOF
 #define HAVE_ALTIVEC
 _ACEOF
 
 	;;
@@ -5923,16 +6064,31 @@ if test "${with_install_suffix+set}" = s
 else
   INSTALL_SUFFIX=
 
 fi
 
 install_suffix="$INSTALL_SUFFIX"
 
 
+
+# Check whether --with-malloc_conf was given.
+if test "${with_malloc_conf+set}" = set; then :
+  withval=$with_malloc_conf; JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf"
+else
+  JEMALLOC_CONFIG_MALLOC_CONF=""
+
+fi
+
+config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF"
+cat >>confdefs.h <<_ACEOF
+#define JEMALLOC_CONFIG_MALLOC_CONF "$config_malloc_conf"
+_ACEOF
+
+
 je_="je_"
 
 
 cfgoutputs_in="Makefile.in"
 cfgoutputs_in="${cfgoutputs_in} jemalloc.pc.in"
 cfgoutputs_in="${cfgoutputs_in} doc/html.xsl.in"
 cfgoutputs_in="${cfgoutputs_in} doc/manpages.xsl.in"
 cfgoutputs_in="${cfgoutputs_in} doc/jemalloc.xml.in"
@@ -6778,16 +6934,18 @@ else
 fi
 rm -f core conftest.err conftest.$ac_objext \
     conftest$ac_exeext conftest.$ac_ext
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_builtin_ffsl" >&5
 $as_echo "$je_cv_gcc_builtin_ffsl" >&6; }
 
 if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll" >>confdefs.h
+
   $as_echo "#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl" >>confdefs.h
 
   $as_echo "#define JEMALLOC_INTERNAL_FFS __builtin_ffs" >>confdefs.h
 
 else
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using ffsl is compilable" >&5
 $as_echo_n "checking whether a program using ffsl is compilable... " >&6; }
@@ -6821,16 +6979,18 @@ else
 fi
 rm -f core conftest.err conftest.$ac_objext \
     conftest$ac_exeext conftest.$ac_ext
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_function_ffsl" >&5
 $as_echo "$je_cv_function_ffsl" >&6; }
 
   if test "x${je_cv_function_ffsl}" = "xyes" ; then
+    $as_echo "#define JEMALLOC_INTERNAL_FFSLL ffsll" >>confdefs.h
+
     $as_echo "#define JEMALLOC_INTERNAL_FFSL ffsl" >>confdefs.h
 
     $as_echo "#define JEMALLOC_INTERNAL_FFS ffs" >>confdefs.h
 
   else
     as_fn_error $? "Cannot build without ffsl(3) or __builtin_ffsl()" "$LINENO" 5
   fi
 fi
@@ -6909,17 +7069,17 @@ main ()
 	return 1;
     }
     result = JEMALLOC_INTERNAL_FFSL(result) - 1;
 
     f = fopen("conftest.out", "w");
     if (f == NULL) {
 	return 1;
     }
-    fprintf(f, "%d\n", result);
+    fprintf(f, "%d", result);
     fclose(f);
 
     return 0;
 
   ;
   return 0;
 }
 _ACEOF
@@ -9676,16 +9836,18 @@ fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: JEMALLOC_PREFIX    : ${JEMALLOC_PREFIX}" >&5
 $as_echo "JEMALLOC_PREFIX    : ${JEMALLOC_PREFIX}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: JEMALLOC_PRIVATE_NAMESPACE" >&5
 $as_echo "JEMALLOC_PRIVATE_NAMESPACE" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result:                    : ${JEMALLOC_PRIVATE_NAMESPACE}" >&5
 $as_echo "                   : ${JEMALLOC_PRIVATE_NAMESPACE}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: install_suffix     : ${install_suffix}" >&5
 $as_echo "install_suffix     : ${install_suffix}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: malloc_conf        : ${config_malloc_conf}" >&5
+$as_echo "malloc_conf        : ${config_malloc_conf}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: autogen            : ${enable_autogen}" >&5
 $as_echo "autogen            : ${enable_autogen}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: cc-silence         : ${enable_cc_silence}" >&5
 $as_echo "cc-silence         : ${enable_cc_silence}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: debug              : ${enable_debug}" >&5
 $as_echo "debug              : ${enable_debug}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: code-coverage      : ${enable_code_coverage}" >&5
 $as_echo "code-coverage      : ${enable_code_coverage}" >&6; }
--- a/memory/jemalloc/src/configure.ac
+++ b/memory/jemalloc/src/configure.ac
@@ -135,16 +135,17 @@ if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
     JE_CFLAGS_APPEND([-std=gnu99])
     if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
     fi
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
+    JE_CFLAGS_APPEND([-Wshorten-64-to-32])
     JE_CFLAGS_APPEND([-pipe])
     JE_CFLAGS_APPEND([-g3])
   elif test "x$je_cv_msvc" = "xyes" ; then
     CC="$CC -nologo"
     JE_CFLAGS_APPEND([-Zi])
     JE_CFLAGS_APPEND([-MT])
     JE_CFLAGS_APPEND([-W3])
     JE_CFLAGS_APPEND([-FS])
@@ -161,23 +162,28 @@ AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac
 if test "x${ac_cv_big_endian}" = "x1" ; then
   AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ])
 fi
 
 if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
   CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99"
 fi
 
-AC_CHECK_SIZEOF([void *])
-if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
-  LG_SIZEOF_PTR=3
-elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
-  LG_SIZEOF_PTR=2
+if test "x${je_cv_msvc}" = "xyes" ; then
+  LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN
+  AC_MSG_RESULT([Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit])
 else
-  AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
+  AC_CHECK_SIZEOF([void *])
+  if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
+    LG_SIZEOF_PTR=3
+  elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
+    LG_SIZEOF_PTR=2
+  else
+    AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
+  fi
 fi
 AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR])
 
 AC_CHECK_SIZEOF([int])
 if test "x${ac_cv_sizeof_int}" = "x8" ; then
   LG_SIZEOF_INT=3
 elif test "x${ac_cv_sizeof_int}" = "x4" ; then
   LG_SIZEOF_INT=2
@@ -191,16 +197,26 @@ if test "x${ac_cv_sizeof_long}" = "x8" ;
   LG_SIZEOF_LONG=3
 elif test "x${ac_cv_sizeof_long}" = "x4" ; then
   LG_SIZEOF_LONG=2
 else
   AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}])
 fi
 AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
 
+AC_CHECK_SIZEOF([long long])
+if test "x${ac_cv_sizeof_long_long}" = "x8" ; then
+  LG_SIZEOF_LONG_LONG=3
+elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then
+  LG_SIZEOF_LONG_LONG=2
+else
+  AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG])
+
 AC_CHECK_SIZEOF([intmax_t])
 if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then
   LG_SIZEOF_INTMAX_T=4
 elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then
   LG_SIZEOF_INTMAX_T=3
 elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then
   LG_SIZEOF_INTMAX_T=2
 else
@@ -208,22 +224,32 @@ else
 fi
 AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T])
 
 AC_CANONICAL_HOST
 dnl CPU-specific settings.
 CPU_SPINWAIT=""
 case "${host_cpu}" in
   i686|x86_64)
-	AC_CACHE_VAL([je_cv_pause],
-	  [JE_COMPILABLE([pause instruction], [],
-	                [[__asm__ volatile("pause"); return 0;]],
-	                [je_cv_pause])])
-	if test "x${je_cv_pause}" = "xyes" ; then
-	    CPU_SPINWAIT='__asm__ volatile("pause")'
+	if test "x${je_cv_msvc}" = "xyes" ; then
+	    AC_CACHE_VAL([je_cv_pause_msvc],
+	      [JE_COMPILABLE([pause instruction MSVC], [],
+					[[_mm_pause(); return 0;]],
+					[je_cv_pause_msvc])])
+	    if test "x${je_cv_pause_msvc}" = "xyes" ; then
+		CPU_SPINWAIT='_mm_pause()'
+	    fi
+	else
+	    AC_CACHE_VAL([je_cv_pause],
+	      [JE_COMPILABLE([pause instruction], [],
+					[[__asm__ volatile("pause"); return 0;]],
+					[je_cv_pause])])
+	    if test "x${je_cv_pause}" = "xyes" ; then
+		CPU_SPINWAIT='__asm__ volatile("pause")'
+	    fi
 	fi
 	;;
   powerpc)
 	AC_DEFINE_UNQUOTED([HAVE_ALTIVEC], [ ])
 	;;
   *)
 	;;
 esac
@@ -572,16 +598,25 @@ dnl Do not add suffix to installed files
 AC_ARG_WITH([install_suffix],
   [AS_HELP_STRING([--with-install-suffix=<suffix>], [Suffix to append to all installed files])],
   [INSTALL_SUFFIX="$with_install_suffix"],
   [INSTALL_SUFFIX=]
 )
 install_suffix="$INSTALL_SUFFIX"
 AC_SUBST([install_suffix])
 
+dnl Specify default malloc_conf.
+AC_ARG_WITH([malloc_conf],
+  [AS_HELP_STRING([--with-malloc-conf=<malloc_conf>], [config.malloc_conf options string])],
+  [JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf"],
+  [JEMALLOC_CONFIG_MALLOC_CONF=""]
+)
+config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF"
+AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"])
+
 dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of
 dnl jemalloc_protos_jet.h easy.
 je_="je_"
 AC_SUBST([je_])
 
 cfgoutputs_in="Makefile.in"
 cfgoutputs_in="${cfgoutputs_in} jemalloc.pc.in"
 cfgoutputs_in="${cfgoutputs_in} doc/html.xsl.in"
@@ -1011,43 +1046,45 @@ if test "x$enable_cache_oblivious" = "x1
   AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ])
 fi
 AC_SUBST([enable_cache_oblivious])
 
 dnl ============================================================================
 dnl Check for  __builtin_ffsl(), then ffsl(3), and fail if neither are found.
 dnl One of those two functions should (theoretically) exist on all platforms
 dnl that jemalloc currently has a chance of functioning on without modification.
-dnl We additionally assume ffs() or __builtin_ffs() are defined if
+dnl We additionally assume ffs[ll]() or __builtin_ffs[ll]() are defined if
 dnl ffsl() or __builtin_ffsl() are defined, respectively.
 JE_COMPILABLE([a program using __builtin_ffsl], [
 #include <stdio.h>
 #include <strings.h>
 #include <string.h>
 ], [
 	{
 		int rv = __builtin_ffsl(0x08);
 		printf("%d\n", rv);
 	}
 ], [je_cv_gcc_builtin_ffsl])
 if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll])
   AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl])
   AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs])
 else
   JE_COMPILABLE([a program using ffsl], [
   #include <stdio.h>
   #include <strings.h>
   #include <string.h>
   ], [
 	{
 		int rv = ffsl(0x08);
 		printf("%d\n", rv);
 	}
   ], [je_cv_function_ffsl])
   if test "x${je_cv_function_ffsl}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll])
     AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl])
     AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs])
   else
     AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()])
   fi
 fi
 
 AC_ARG_WITH([lg_tiny_min],
@@ -1097,17 +1134,17 @@ if test "x$LG_PAGE" = "xdetect"; then
 	return 1;
     }
     result = JEMALLOC_INTERNAL_FFSL(result) - 1;
 
     f = fopen("conftest.out", "w");
     if (f == NULL) {
 	return 1;
     }
-    fprintf(f, "%d\n", result);
+    fprintf(f, "%d", result);
     fclose(f);
 
     return 0;
 ]])],
                              [je_cv_lg_page=`cat conftest.out`],
                              [je_cv_lg_page=undefined],
                              [je_cv_lg_page=12]))
 fi
@@ -1721,16 +1758,17 @@ AC_MSG_RESULT([srcroot            : ${sr
 AC_MSG_RESULT([abs_srcroot        : ${abs_srcroot}])
 AC_MSG_RESULT([objroot            : ${objroot}])
 AC_MSG_RESULT([abs_objroot        : ${abs_objroot}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([JEMALLOC_PREFIX    : ${JEMALLOC_PREFIX}])
 AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
 AC_MSG_RESULT([                   : ${JEMALLOC_PRIVATE_NAMESPACE}])
 AC_MSG_RESULT([install_suffix     : ${install_suffix}])
+AC_MSG_RESULT([malloc_conf        : ${config_malloc_conf}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([cc-silence         : ${enable_cc_silence}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([code-coverage      : ${enable_code_coverage}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
--- a/memory/jemalloc/src/doc/jemalloc.xml.in
+++ b/memory/jemalloc/src/doc/jemalloc.xml.in
@@ -305,26 +305,24 @@
             arena index in the valid range.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
       <para>The <function>mallocx<parameter/></function> function allocates at
       least <parameter>size</parameter> bytes of memory, and returns a pointer
       to the base address of the allocation.  Behavior is undefined if
-      <parameter>size</parameter> is <constant>0</constant>, or if request size
-      overflows due to size class and/or alignment constraints.</para>
+      <parameter>size</parameter> is <constant>0</constant>.</para>
 
       <para>The <function>rallocx<parameter/></function> function resizes the
       allocation at <parameter>ptr</parameter> to be at least
       <parameter>size</parameter> bytes, and returns a pointer to the base
       address of the resulting allocation, which may or may not have moved from
       its original location.  Behavior is undefined if
-      <parameter>size</parameter> is <constant>0</constant>, or if request size
-      overflows due to size class and/or alignment constraints.</para>
+      <parameter>size</parameter> is <constant>0</constant>.</para>
 
       <para>The <function>xallocx<parameter/></function> function resizes the
       allocation at <parameter>ptr</parameter> in place to be at least
       <parameter>size</parameter> bytes, and returns the real size of the
       allocation.  If <parameter>extra</parameter> is non-zero, an attempt is
       made to resize the allocation to be at least <code
       language="C">(<parameter>size</parameter> +
       <parameter>extra</parameter>)</code> bytes, though inability to allocate
@@ -349,20 +347,20 @@
       size is the corresponding value returned by
       <function>nallocx<parameter/></function> or
       <function>sallocx<parameter/></function>.</para>
 
       <para>The <function>nallocx<parameter/></function> function allocates no
       memory, but it performs the same size computation as the
       <function>mallocx<parameter/></function> function, and returns the real
       size of the allocation that would result from the equivalent
-      <function>mallocx<parameter/></function> function call.  Behavior is
-      undefined if <parameter>size</parameter> is <constant>0</constant>, or if
-      request size overflows due to size class and/or alignment
-      constraints.</para>
+      <function>mallocx<parameter/></function> function call, or
+      <constant>0</constant> if the inputs exceed the maximum supported size
+      class and/or alignment.  Behavior is undefined if
+      <parameter>size</parameter> is <constant>0</constant>.</para>
 
       <para>The <function>mallctl<parameter/></function> function provides a
       general interface for introspecting the memory allocator, as well as
       setting modifiable parameters and triggering actions.  The
       period-separated <parameter>name</parameter> argument specifies a
       location in a tree-structured namespace; see the <xref
       linkend="mallctl_namespace" xrefstyle="template:%t"/> section for
       documentation on the tree contents.  To read a value, pass a pointer via
@@ -450,29 +448,30 @@ for (i = 0; i < nbins; i++) {
     </refsect2>
   </refsect1>
   <refsect1 id="tuning">
     <title>TUNING</title>
     <para>Once, when the first call is made to one of the memory allocation
     routines, the allocator initializes its internals based in part on various
     options that can be specified at compile- or run-time.</para>
 
-    <para>The string pointed to by the global variable
-    <varname>malloc_conf</varname>, the &ldquo;name&rdquo; of the file
-    referenced by the symbolic link named <filename
-    class="symlink">/etc/malloc.conf</filename>, and the value of the
+    <para>The string specified via <option>--with-malloc-conf</option>, the
+    string pointed to by the global variable <varname>malloc_conf</varname>, the
+    &ldquo;name&rdquo; of the file referenced by the symbolic link named
+    <filename class="symlink">/etc/malloc.conf</filename>, and the value of the
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
     <varname>malloc_conf</varname> may be read before
     <function>main<parameter/></function> is entered, so the declaration of
     <varname>malloc_conf</varname> should specify an initializer that contains
-    the final value to be read by jemalloc.  <varname>malloc_conf</varname> is
-    a compile-time setting, whereas <filename
-    class="symlink">/etc/malloc.conf</filename> and <envar>MALLOC_CONF</envar>
-    can be safely set any time prior to program invocation.</para>
+    the final value to be read by jemalloc.  <option>--with-malloc-conf</option>
+    and <varname>malloc_conf</varname> are compile-time mechanisms, whereas
+    <filename class="symlink">/etc/malloc.conf</filename> and
+    <envar>MALLOC_CONF</envar> can be safely set any time prior to program
+    invocation.</para>
 
     <para>An options string is a comma-separated list of option:value pairs.
     There is one key corresponding to each <link
     linkend="opt.abort"><mallctl>opt.*</mallctl></link> mallctl (see the <xref
     linkend="mallctl_namespace" xrefstyle="template:%t"/> section for options
     documentation).  For example, <literal>abort:true,narenas:1</literal> sets
     the <link linkend="opt.abort"><mallctl>opt.abort</mallctl></link> and <link
     linkend="opt.narenas"><mallctl>opt.narenas</mallctl></link> options.  Some
@@ -512,33 +511,28 @@ for (i = 0; i < nbins; i++) {
     <para>In addition to multiple arenas, unless
     <option>--disable-tcache</option> is specified during configuration, this
     allocator supports thread-specific caching for small and large objects, in
     order to make it possible to completely avoid synchronization for most
     allocation requests.  Such caching allows very fast allocation in the
     common case, but it increases memory usage and fragmentation, since a
     bounded number of objects can remain allocated in each thread cache.</para>
 
-    <para>Memory is conceptually broken into equal-sized chunks, where the
-    chunk size is a power of two that is greater than the page size.  Chunks
-    are always aligned to multiples of the chunk size.  This alignment makes it
-    possible to find metadata for user objects very quickly.</para>
-
-    <para>User objects are broken into three categories according to size:
-    small, large, and huge.  Small and large objects are managed entirely by
-    arenas; huge objects are additionally aggregated in a single data structure
-    that is shared by all threads.  Huge objects are typically used by
-    applications infrequently enough that this single data structure is not a
-    scalability issue.</para>
-
-    <para>Each chunk that is managed by an arena tracks its contents as runs of
+    <para>Memory is conceptually broken into equal-sized chunks, where the chunk
+    size is a power of two that is greater than the page size.  Chunks are
+    always aligned to multiples of the chunk size.  This alignment makes it
+    possible to find metadata for user objects very quickly.  User objects are
+    broken into three categories according to size: small, large, and huge.
+    Multiple small and large objects can reside within a single chunk, whereas
+    huge objects each have one or more chunks backing them.  Each chunk that
+    contains small and/or large objects tracks its contents as runs of
     contiguous pages (unused, backing a set of small objects, or backing one
-    large object).  The combination of chunk alignment and chunk page maps
-    makes it possible to determine all metadata regarding small and large
-    allocations in constant time.</para>
+    large object).  The combination of chunk alignment and chunk page maps makes
+    it possible to determine all metadata regarding small and large allocations
+    in constant time.</para>
 
     <para>Small objects are managed in groups by page runs.  Each run maintains
     a bitmap to track which regions are in use.  Allocation requests that are no
     more than half the quantum (8 or 16, depending on architecture) are rounded
     up to the nearest power of two that is at least <code
     language="C">sizeof(<type>double</type>)</code>.  All other object size
     classes are multiples of the quantum, spaced such that there are four size
     classes for each doubling in size, which limits internal fragmentation to
@@ -771,16 +765,27 @@ for (i = 0; i < nbins; i++) {
           <mallctl>config.lazy_lock</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
         <listitem><para><option>--enable-lazy-lock</option> was specified
         during build configuration.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="config.malloc_conf">
+        <term>
+          <mallctl>config.malloc_conf</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Embedded configure-time-specified run-time options
+        string, empty unless <option>--with-malloc-conf</option> was specified
+        during build configuration.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="config.munmap">
         <term>
           <mallctl>config.munmap</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
         <listitem><para><option>--enable-munmap</option> was specified during
         build configuration.</para></listitem>
@@ -924,24 +929,38 @@ for (i = 0; i < nbins; i++) {
         silently clipped to the minimum/maximum supported size.  The default
         chunk size is 2 MiB (2^21).
         </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.narenas">
         <term>
           <mallctl>opt.narenas</mallctl>
-          (<type>size_t</type>)
+          (<type>unsigned</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Maximum number of arenas to use for automatic
         multiplexing of threads and arenas.  The default is four times the
         number of CPUs, or one if there is a single CPU.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.purge">
+        <term>
+          <mallctl>opt.purge</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Purge mode is &ldquo;ratio&rdquo; (default) or
+        &ldquo;decay&rdquo;.  See <link
+        linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
+        for details of the ratio mode.  See <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
+        details of the decay mode.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.lg_dirty_mult">
         <term>
           <mallctl>opt.lg_dirty_mult</mallctl>
           (<type>ssize_t</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Per-arena minimum ratio (log base 2) of active to dirty
         pages.  Some dirty unused pages may be allowed to accumulate, within
@@ -954,16 +973,36 @@ for (i = 0; i < nbins; i++) {
         default minimum ratio is 8:1 (2^3:1); an option value of -1 will
         disable dirty page purging.  See <link
         linkend="arenas.lg_dirty_mult"><mallctl>arenas.lg_dirty_mult</mallctl></link>
         and <link
         linkend="arena.i.lg_dirty_mult"><mallctl>arena.&lt;i&gt;.lg_dirty_mult</mallctl></link>
         for related dynamic control options.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.decay_time">
+        <term>
+          <mallctl>opt.decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Approximate time in seconds from the creation of a set
+        of unused dirty pages until an equivalent set of unused dirty pages is
+        purged and/or reused.  The pages are incrementally purged according to a
+        sigmoidal decay curve that starts and ends with zero purge rate.  A
+        decay time of 0 causes all unused dirty pages to be purged immediately
+        upon creation.  A decay time of -1 disables purging.  The default decay
+        time is 10 seconds.  See <link
+        linkend="arenas.decay_time"><mallctl>arenas.decay_time</mallctl></link>
+        and <link
+        linkend="arena.i.decay_time"><mallctl>arena.&lt;i&gt;.decay_time</mallctl></link>
+        for related dynamic control options.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.stats_print">
         <term>
           <mallctl>opt.stats_print</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Enable/disable statistics printing at exit.  If
         enabled, the <function>malloc_stats_print<parameter/></function>
@@ -1145,17 +1184,18 @@ malloc_conf = "xmalloc:true";]]></progra
         option for information on interval-triggered profile dumping, the <link
         linkend="opt.prof_gdump"><mallctl>opt.prof_gdump</mallctl></link>
         option for information on high-water-triggered profile dumping, and the
         <link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl></link>
         option for final profile dumping.  Profile output is compatible with
         the <command>jeprof</command> command, which is based on the
         <command>pprof</command> that is developed as part of the <ulink
         url="http://code.google.com/p/gperftools/">gperftools
-        package</ulink>.</para></listitem>
+        package</ulink>.  See <link linkend="heap_profile_format">HEAP PROFILE
+        FORMAT</link> for heap profile format documentation.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.prof_prefix">
         <term>
           <mallctl>opt.prof_prefix</mallctl>
           (<type>const char *</type>)
           <literal>r-</literal>
           [<option>--enable-prof</option>]
@@ -1462,17 +1502,17 @@ malloc_conf = "xmalloc:true";]]></progra
           <mallctl>tcache.flush</mallctl>
           (<type>unsigned</type>)
           <literal>-w</literal>
           [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Flush the specified thread-specific cache (tcache).  The
         same considerations apply to this interface as to <link
         linkend="thread.tcache.flush"><mallctl>thread.tcache.flush</mallctl></link>,
-        except that the tcache will never be automatically be discarded.
+        except that the tcache will never be automatically discarded.
         </para></listitem>
       </varlistentry>
 
       <varlistentry id="tcache.destroy">
         <term>
           <mallctl>tcache.destroy</mallctl>
           (<type>unsigned</type>)
           <literal>-w</literal>
@@ -1484,22 +1524,37 @@ malloc_conf = "xmalloc:true";]]></progra
       </varlistentry>
 
       <varlistentry id="arena.i.purge">
         <term>
           <mallctl>arena.&lt;i&gt;.purge</mallctl>
           (<type>void</type>)
           <literal>--</literal>
         </term>
-        <listitem><para>Purge unused dirty pages for arena &lt;i&gt;, or for
+        <listitem><para>Purge all unused dirty pages for arena &lt;i&gt;, or for
         all arenas if &lt;i&gt; equals <link
         linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.decay">
+        <term>
+          <mallctl>arena.&lt;i&gt;.decay</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Trigger decay-based purging of unused dirty pages for
+        arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals <link
+        linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.
+        The proportion of unused dirty pages to be purged depends on the current
+        time; see <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
+        details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
           (<type>const char *</type>)
           <literal>rw</literal>
         </term>
         <listitem><para>Set the precedence of dss allocation as related to mmap
         allocation for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
@@ -1518,16 +1573,32 @@ malloc_conf = "xmalloc:true";]]></progra
         <listitem><para>Current per-arena minimum ratio (log base 2) of active
         to dirty pages for arena &lt;i&gt;.  Each time this interface is set and
         the ratio is increased, pages are synchronously purged as necessary to
         impose the new ratio.  See <link
         linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
         for additional information.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.decay_time">
+        <term>
+          <mallctl>arena.&lt;i&gt;.decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Current per-arena approximate time in seconds from the
+        creation of a set of unused dirty pages until an equivalent set of
+        unused dirty pages is purged and/or reused.  Each time this interface is
+        set, all currently unused dirty pages are considered to have fully
+        decayed, which causes immediate purging of all unused dirty pages unless
+        the decay time is set to -1 (i.e. purging disabled).  See <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
+        additional information.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.chunk_hooks">
         <term>
           <mallctl>arena.&lt;i&gt;.chunk_hooks</mallctl>
           (<type>chunk_hooks_t</type>)
           <literal>rw</literal>
         </term>
         <listitem><para>Get or set the chunk management hook functions for arena
         &lt;i&gt;.  The functions must be capable of operating on all extant
@@ -1752,16 +1823,31 @@ typedef struct {
         <listitem><para>Current default per-arena minimum ratio (log base 2) of
         active to dirty pages, used to initialize <link
         linkend="arena.i.lg_dirty_mult"><mallctl>arena.&lt;i&gt;.lg_dirty_mult</mallctl></link>
         during arena creation.  See <link
         linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
         for additional information.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arenas.decay_time">
+        <term>
+          <mallctl>arenas.decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Current default per-arena approximate time in seconds
+        from the creation of a set of unused dirty pages until an equivalent set
+        of unused dirty pages is purged and/or reused, used to initialize <link
+        linkend="arena.i.decay_time"><mallctl>arena.&lt;i&gt;.decay_time</mallctl></link>
+        during arena creation.  See <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
+        additional information.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arenas.quantum">
         <term>
           <mallctl>arenas.quantum</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Quantum size.</para></listitem>
       </varlistentry>
@@ -2096,16 +2182,29 @@ typedef struct {
           <literal>r-</literal>
         </term>
         <listitem><para>Minimum ratio (log base 2) of active to dirty pages.
         See <link
         linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
         for details.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.decay_time">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Approximate time in seconds from the creation of a set
+        of unused dirty pages until an equivalent set of unused dirty pages is
+        purged and/or reused.  See <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link>
+        for details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.nthreads">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.nthreads</mallctl>
           (<type>unsigned</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Number of threads currently assigned to
         arena.</para></listitem>
@@ -2518,16 +2617,63 @@ typedef struct {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Current number of huge allocations for this size class.
         </para></listitem>
       </varlistentry>
     </variablelist>
   </refsect1>
+  <refsect1 id="heap_profile_format">
+    <title>HEAP PROFILE FORMAT</title>
+    <para>Although the heap profiling functionality was originally designed to
+    be compatible with the
+    <command>pprof</command> command that is developed as part of the <ulink
+    url="http://code.google.com/p/gperftools/">gperftools
+    package</ulink>, the addition of per thread heap profiling functionality
+    required a different heap profile format.  The <command>jeprof</command>
+    command is derived from <command>pprof</command>, with enhancements to
+    support the heap profile format described here.</para>
+
+    <para>In the following hypothetical heap profile, <constant>[...]</constant>
+    indicates elision for the sake of compactness.  <programlisting><![CDATA[
+heap_v2/524288
+  t*: 28106: 56637512 [0: 0]
+  [...]
+  t3: 352: 16777344 [0: 0]
+  [...]
+  t99: 17754: 29341640 [0: 0]
+  [...]
+@ 0x5f86da8 0x5f5a1dc [...] 0x29e4d4e 0xa200316 0xabb2988 [...]
+  t*: 13: 6688 [0: 0]
+  t3: 12: 6496 [0: ]
+  t99: 1: 192 [0: 0]
+[...]
+
+MAPPED_LIBRARIES:
+[...]]]></programlisting> The following matches the above heap profile, but most
+tokens are replaced with <constant>&lt;description&gt;</constant> to indicate
+descriptions of the corresponding fields.  <programlisting><![CDATA[
+<heap_profile_format_version>/<mean_sample_interval>
+  <aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  [...]
+  <thread_3_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  [...]
+  <thread_99_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  [...]
+@ <top_frame> <frame> [...] <frame> <frame> <frame> [...]
+  <backtrace_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  <backtrace_thread_3>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  <backtrace_thread_99>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+[...]
+
+MAPPED_LIBRARIES:
+</proc/<pid>/maps>]]></programlisting></para>
+  </refsect1>
+
   <refsect1 id="debugging_malloc_problems">
     <title>DEBUGGING MALLOC PROBLEMS</title>
     <para>When debugging, it is a good idea to configure/build jemalloc with
     the <option>--enable-debug</option> and <option>--enable-fill</option>
     options, and recompile the program with suitable options and symbols for
     debugger support.  When so configured, jemalloc incorporates a wide variety
     of run-time assertions that catch application errors such as double-free,
     write-after-free, etc.</para>
--- a/memory/jemalloc/src/include/jemalloc/internal/arena.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/arena.h
@@ -18,24 +18,37 @@
  *
  *   (nactive >> lg_dirty_mult) >= ndirty
  *
  * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as
  * many active pages as dirty pages.
  */
 #define	LG_DIRTY_MULT_DEFAULT	3
 
+typedef enum {
+	purge_mode_ratio = 0,
+	purge_mode_decay = 1,
+
+	purge_mode_limit = 2
+} purge_mode_t;
+#define	PURGE_DEFAULT		purge_mode_ratio
+/* Default decay time in seconds. */
+#define	DECAY_TIME_DEFAULT	10
+/* Number of event ticks between time checks. */
+#define	DECAY_NTICKS_PER_UPDATE	1000
+
 typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
 typedef struct arena_run_s arena_run_t;
 typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
 typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
 typedef struct arena_chunk_s arena_chunk_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
+typedef struct arena_tdata_s arena_tdata_t;
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #ifdef JEMALLOC_ARENA_STRUCTS_A
 struct arena_run_s {
 	/* Index of bin this run is associated with. */
@@ -149,25 +162,24 @@ struct arena_chunk_map_misc_s {
 	rb_node(arena_chunk_map_misc_t)		rb_link;
 
 	union {
 		/* Linkage for list of dirty runs. */
 		arena_runs_dirty_link_t		rd;
 
 		/* Profile counters, used for large object runs. */
 		union {
-			void				*prof_tctx_pun;
-			prof_tctx_t			*prof_tctx;
+			void			*prof_tctx_pun;
+			prof_tctx_t		*prof_tctx;
 		};
 
 		/* Small region run metadata. */
 		arena_run_t			run;
 	};
 };
-typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t;
 typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
 #ifdef JEMALLOC_ARENA_STRUCTS_B
 /* Arena chunk header. */
 struct arena_chunk_s {
 	/*
 	 * A pointer to the arena that owns the chunk is stored within the node.
@@ -215,82 +227,82 @@ struct arena_chunk_s {
  *               \--------------------/
  *
  * reg_interval has at least the same minimum alignment as reg_size; this
  * preserves the alignment constraint that sa2u() depends on.  Alignment pad is
  * either 0 or redzone_size; it is present only if needed to align reg0_offset.
  */
 struct arena_bin_info_s {
 	/* Size of regions in a run for this bin's size class. */
-	size_t		reg_size;
+	size_t			reg_size;
 
 	/* Redzone size. */
-	size_t		redzone_size;
+	size_t			redzone_size;
 
 	/* Interval between regions (reg_size + (redzone_size << 1)). */
-	size_t		reg_interval;
+	size_t			reg_interval;
 
 	/* Total size of a run for this bin's size class. */
-	size_t		run_size;
+	size_t			run_size;
 
 	/* Total number of regions in a run for this bin's size class. */
-	uint32_t	nregs;
+	uint32_t		nregs;
 
 	/*
 	 * Metadata used to manipulate bitmaps for runs associated with this
 	 * bin.
 	 */
-	bitmap_info_t	bitmap_info;
+	bitmap_info_t		bitmap_info;
 
 	/* Offset of first region in a run for this bin's size class. */
-	uint32_t	reg0_offset;
+	uint32_t		reg0_offset;
 };
 
 struct arena_bin_s {
 	/*
 	 * All operations on runcur, runs, and stats require that lock be
 	 * locked.  Run allocation/deallocation are protected by the arena lock,
 	 * which may be acquired while holding one or more bin locks, but not
 	 * vise versa.
 	 */
-	malloc_mutex_t	lock;
+	malloc_mutex_t		lock;
 
 	/*
 	 * Current run being used to service allocations of this bin's size
 	 * class.
 	 */
-	arena_run_t	*runcur;
+	arena_run_t		*runcur;
 
 	/*
 	 * Tree of non-full runs.  This tree is used when looking for an
 	 * existing run when runcur is no longer usable.  We choose the
 	 * non-full run that is lowest in memory; this policy tends to keep
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	arena_run_tree_t runs;
+	arena_run_tree_t	runs;
 
 	/* Bin statistics. */
-	malloc_bin_stats_t stats;
+	malloc_bin_stats_t	stats;
 };
 
 struct arena_s {
 	/* This arena's index within the arenas array. */
 	unsigned		ind;
 
 	/*
 	 * Number of threads currently assigned to this arena.  This field is
-	 * protected by arenas_lock.
+	 * synchronized via atomic operations.
 	 */
 	unsigned		nthreads;
 
 	/*
 	 * There are three classes of arena operations from a locking
 	 * perspective:
-	 * 1) Thread assignment (modifies nthreads) is protected by arenas_lock.
+	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
 	 * 2) Bin-related operations are protected by bin locks.
 	 * 3) Chunk- and run-related operations are protected by this mutex.
 	 */
 	malloc_mutex_t		lock;
 
 	arena_stats_t		stats;
 	/*
 	 * List of tcaches for extant threads associated with this arena.
@@ -319,37 +331,31 @@ struct arena_s {
 	 * order to avoid interactions between multiple threads that could make
 	 * a single spare inadequate.
 	 */
 	arena_chunk_t		*spare;
 
 	/* Minimum ratio (log base 2) of nactive:ndirty. */
 	ssize_t			lg_dirty_mult;
 
-	/* True if a thread is currently executing arena_purge(). */
+	/* True if a thread is currently executing arena_purge_to_limit(). */
 	bool			purging;
 
 	/* Number of pages in active runs and huge regions. */
 	size_t			nactive;
 
 	/*
 	 * Current count of pages within unused runs that are potentially
 	 * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
 	 * By tracking this, we can institute a limit on how much dirty unused
 	 * memory is mapped for each arena.
 	 */
 	size_t			ndirty;
 
 	/*
-	 * Size/address-ordered tree of this arena's available runs.  The tree
-	 * is used for first-best-fit run allocation.
-	 */
-	arena_avail_tree_t	runs_avail;
-
-	/*
 	 * Unused dirty memory this arena manages.  Dirty memory is conceptually
 	 * tracked as an arbitrarily interleaved LRU of dirty runs and cached
 	 * chunks, but the list linkage is actually semi-duplicated in order to
 	 * avoid extra arena_chunk_map_misc_t space overhead.
 	 *
 	 *   LRU-----------------------------------------------------------MRU
 	 *
 	 *        /-- arena ---\
@@ -370,16 +376,63 @@ struct arena_s {
 	 *        |            |    \-------/    \-------/   |         |
 	 *        |            |                             |         |
 	 *        |            |                             |         |
 	 *        \------------/                             \---------/
 	 */
 	arena_runs_dirty_link_t	runs_dirty;
 	extent_node_t		chunks_cache;
 
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			decay_time;
+	/* decay_time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		decay_interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		decay_epoch;
+	/* decay_deadline randomness generator. */
+	uint64_t		decay_jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of decay_interval and
+	 * per epoch jitter which is a uniform random variable in
+	 * [0..decay_interval).  Epochs always advance by precise multiples of
+	 * decay_interval, but we randomize the deadline to reduce the
+	 * likelihood of arenas purging in lockstep.
+	 */
+	nstime_t		decay_deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between decay_ndirty and ndirty to
+	 * determine how many dirty pages, if any, were generated, and record
+	 * the result in decay_backlog.
+	 */
+	size_t			decay_ndirty;
+	/*
+	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
+	 * to the current contents of decay_backlog, i.e. the limit on how many
+	 * pages are allowed to exist for the decay epochs.
+	 */
+	size_t			decay_backlog_npages_limit;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to decay_epoch.
+	 */
+	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
+
 	/* Extant huge allocations. */
 	ql_head(extent_node_t)	huge;
 	/* Synchronizes all huge allocation/update/deallocation. */
 	malloc_mutex_t		huge_mtx;
 
 	/*
 	 * Trees of chunks that were previously allocated (trees differ only in
 	 * node ordering).  These are used when allocating chunks, in an attempt
@@ -397,42 +450,62 @@ struct arena_s {
 	ql_head(extent_node_t)	node_cache;
 	malloc_mutex_t		node_cache_mtx;
 
 	/* User-configurable chunk hook functions. */
 	chunk_hooks_t		chunk_hooks;
 
 	/* bins is used to store trees of free regions. */
 	arena_bin_t		bins[NBINS];
+
+	/*
+	 * Quantized address-ordered trees of this arena's available runs.  The
+	 * trees are used for first-best-fit run allocation.
+	 */
+	arena_run_tree_t	runs_avail[1]; /* Dynamically sized. */
+};
+
+/* Used in conjunction with tsd for fast arena-related context lookup. */
+struct arena_tdata_s {
+	ticker_t		decay_ticker;
 };
 #endif /* JEMALLOC_ARENA_STRUCTS_B */
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 static const size_t	large_pad =
 #ifdef JEMALLOC_CACHE_OBLIVIOUS
     PAGE
 #else
     0
 #endif
     ;
 
+extern purge_mode_t	opt_purge;
+extern const char	*purge_mode_names[];
 extern ssize_t		opt_lg_dirty_mult;
+extern ssize_t		opt_decay_time;
 
 extern arena_bin_info_t	arena_bin_info[NBINS];
 
 extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
+extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
+#ifdef JEMALLOC_JET
+typedef size_t (run_quantize_t)(size_t);
+extern run_quantize_t *run_quantize_floor;
+extern run_quantize_t *run_quantize_ceil;
+#endif
 void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
     bool cache);
 extent_node_t	*arena_node_alloc(arena_t *arena);
 void	arena_node_dalloc(arena_t *arena, extent_node_t *node);
 void	*arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
     bool *zero);
@@ -440,70 +513,80 @@ void	arena_chunk_dalloc_huge(arena_t *ar
 void	arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk,
     size_t oldsize, size_t usize);
 void	arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk,
     size_t oldsize, size_t usize);
 bool	arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk,
     size_t oldsize, size_t usize, bool *zero);
 ssize_t	arena_lg_dirty_mult_get(arena_t *arena);
 bool	arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
+ssize_t	arena_decay_time_get(arena_t *arena);
+bool	arena_decay_time_set(arena_t *arena, ssize_t decay_time);
 void	arena_maybe_purge(arena_t *arena);
-void	arena_purge_all(arena_t *arena);
-void	arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
+void	arena_purge(arena_t *arena, bool all);
+void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
 typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t,
     uint8_t);
 extern arena_redzone_corruption_t *arena_redzone_corruption;
 typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *);
 extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 #else
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_small(arena_t *arena, size_t size, szind_t ind,
-    bool zero);
-void	*arena_malloc_large(arena_t *arena, size_t size, szind_t ind,
-    bool zero);
+void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
+void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+    bool zero, tcache_t *tcache);
 void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
 void	arena_prof_promoted(const void *ptr, size_t size);
 void	arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
     void *ptr, arena_chunk_map_bits_t *bitselm);
 void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind);
+void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
 extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
 #else
 void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
 void	arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
-void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr);
 #ifdef JEMALLOC_JET
 typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
 extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
-bool	arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
 dss_prec_t	arena_dss_prec_get(arena_t *arena);
 bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
-void	arena_stats_merge(arena_t *arena, const char **dss,
-    ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty,
-    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+ssize_t	arena_decay_time_default_get(void);
+bool	arena_decay_time_default_set(ssize_t decay_time);
+void	arena_basic_stats_merge(arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty);
+void	arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
+    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
     malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
+unsigned	arena_nthreads_get(arena_t *arena);
+void	arena_nthreads_inc(arena_t *arena);
+void	arena_nthreads_dec(arena_t *arena);
 arena_t	*arena_new(unsigned ind);
 bool	arena_boot(void);
 void	arena_prefork(arena_t *arena);
 void	arena_postfork_parent(arena_t *arena);
 void	arena_postfork_child(arena_t *arena);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -549,22 +632,24 @@ void	arena_mapbits_small_set(arena_chunk
 void	arena_metadata_allocated_add(arena_t *arena, size_t size);
 void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
-unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
+size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
 void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	arena_prof_tctx_reset(const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
+void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
+void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
 void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
 size_t	arena_salloc(const void *ptr, bool demote);
 void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
 void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 #endif
 
@@ -967,63 +1052,62 @@ arena_ptr_small_binind_get(const void *p
 		assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		rpages_ind = pageind - arena_mapbits_small_runind_get(chunk,
 		    pageind);
 		miscelm = arena_miscelm_get(chunk, rpages_ind);
 		run = &miscelm->run;
 		run_binind = run->binind;
 		bin = &arena->bins[run_binind];
-		actual_binind = bin - arena->bins;
+		actual_binind = (szind_t)(bin - arena->bins);
 		assert(run_binind == actual_binind);
 		bin_info = &arena_bin_info[actual_binind];
 		rpages = arena_miscelm_to_rpages(miscelm);
 		assert(((uintptr_t)ptr - ((uintptr_t)rpages +
 		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval
 		    == 0);
 	}
 
 	return (binind);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_A */
 
 #  ifdef JEMALLOC_ARENA_INLINE_B
 JEMALLOC_INLINE szind_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 {
-	szind_t binind = bin - arena->bins;
+	szind_t binind = (szind_t)(bin - arena->bins);
 	assert(binind < NBINS);
 	return (binind);
 }
 
-JEMALLOC_INLINE unsigned
+JEMALLOC_INLINE size_t
 arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 {
-	unsigned shift, diff, regind;
-	size_t interval;
+	size_t diff, interval, shift, regind;
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	void *rpages = arena_miscelm_to_rpages(miscelm);
 
 	/*
 	 * Freeing a pointer lower than region zero can cause assertion
 	 * failure.
 	 */
 	assert((uintptr_t)ptr >= (uintptr_t)rpages +
 	    (uintptr_t)bin_info->reg0_offset);
 
 	/*
 	 * Avoid doing division with a variable divisor if possible.  Using
 	 * actual division here can reduce allocator throughput by over 20%!
 	 */
-	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages -
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages -
 	    bin_info->reg0_offset);
 
 	/* Rescale (factor powers of 2 out of the numerator and denominator). */
 	interval = bin_info->reg_interval;
-	shift = jemalloc_ffs(interval) - 1;
+	shift = ffs_zu(interval) - 1;
 	diff >>= shift;
 	interval >>= shift;
 
 	if (interval == 1) {
 		/* The divisor was a power of 2. */
 		regind = diff;
 	} else {
 		/*
@@ -1035,31 +1119,31 @@ arena_run_regind(arena_run_t *run, arena
 		 * becomes
 		 *
 		 *   (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT
 		 *
 		 * We can omit the first three elements, because we never
 		 * divide by 0, and 1 and 2 are both powers of two, which are
 		 * handled above.
 		 */
-#define	SIZE_INV_SHIFT	((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
-#define	SIZE_INV(s)	(((1U << SIZE_INV_SHIFT) / (s)) + 1)
-		static const unsigned interval_invs[] = {
+#define	SIZE_INV_SHIFT	((sizeof(size_t) << 3) - LG_RUN_MAXREGS)
+#define	SIZE_INV(s)	(((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1)
+		static const size_t interval_invs[] = {
 		    SIZE_INV(3),
 		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
 		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
 		    SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
 		    SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
 		    SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
 		    SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
 		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
 		};
 
-		if (likely(interval <= ((sizeof(interval_invs) /
-		    sizeof(unsigned)) + 2))) {
+		if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t))
+		    + 2))) {
 			regind = (diff * interval_invs[interval - 3]) >>
 			    SIZE_INV_SHIFT;
 		} else
 			regind = diff / interval;
 #undef SIZE_INV
 #undef SIZE_INV_SHIFT
 	}
 	assert(diff == regind * interval);
@@ -1154,19 +1238,40 @@ arena_prof_tctx_reset(const void *ptr, s
 			elm = arena_miscelm_get(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
 			huge_prof_tctx_reset(ptr);
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks)
+{
+	ticker_t *decay_ticker;
+
+	if (unlikely(tsd == NULL))
+		return;
+	decay_ticker = decay_ticker_get(tsd, arena->ind);
+	if (unlikely(decay_ticker == NULL))
+		return;
+	if (unlikely(ticker_ticks(decay_ticker, nticks)))
+		arena_purge(arena, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_tick(tsd_t *tsd, arena_t *arena)
+{
+
+	arena_decay_ticks(tsd, arena, 1);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache, bool slow_path)
+arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
+    tcache_t *tcache, bool slow_path)
 {
 
 	assert(size != 0);
 
 	if (likely(tcache != NULL)) {
 		if (likely(size <= SMALL_MAXCLASS)) {
 			return (tcache_alloc_small(tsd, arena, tcache, size,
 			    ind, zero, slow_path));
@@ -1174,25 +1279,17 @@ arena_malloc(tsd_t *tsd, arena_t *arena,
 		if (likely(size <= tcache_maxclass)) {
 			return (tcache_alloc_large(tsd, arena, tcache, size,
 			    ind, zero, slow_path));
 		}
 		/* (size > tcache_maxclass) case falls through. */
 		assert(size > tcache_maxclass);
 	}
 
-	arena = arena_choose(tsd, arena);
-	if (unlikely(arena == NULL))
-		return (NULL);
-
-	if (likely(size <= SMALL_MAXCLASS))
-		return (arena_malloc_small(arena, size, ind, zero));
-	if (likely(size <= large_maxclass))
-		return (arena_malloc_large(arena, size, ind, zero));
-	return (huge_malloc(tsd, arena, size, zero, tcache));
+	return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(const void *ptr)
 {
 	arena_chunk_t *chunk;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -1268,32 +1365,32 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcac
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = arena_ptr_small_binind_get(ptr,
 				    mapbits);
 				tcache_dalloc_small(tsd, tcache, ptr, binind,
 				    slow_path);
 			} else {
-				arena_dalloc_small(extent_node_arena_get(
+				arena_dalloc_small(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr, pageind);
 			}
 		} else {
 			size_t size = arena_mapbits_large_size_get(chunk,
 			    pageind);
 
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
 			if (likely(tcache != NULL) && size - large_pad <=
 			    tcache_maxclass) {
 				tcache_dalloc_large(tsd, tcache, ptr, size -
 				    large_pad, slow_path);
 			} else {
-				arena_dalloc_large(extent_node_arena_get(
+				arena_dalloc_large(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr);
 			}
 		}
 	} else
 		huge_dalloc(tsd, ptr, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1301,47 +1398,50 @@ arena_sdalloc(tsd_t *tsd, void *ptr, siz
 {
 	arena_chunk_t *chunk;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		if (config_prof && opt_prof) {
 			size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
 			    LG_PAGE;
-			assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+			assert(arena_mapbits_allocated_get(chunk, pageind) !=
+			    0);
 			if (arena_mapbits_large_get(chunk, pageind) != 0) {
 				/*
 				 * Make sure to use promoted size, not request
 				 * size.
 				 */
 				size = arena_mapbits_large_size_get(chunk,
 				    pageind) - large_pad;
 			}
 		}
 		assert(s2u(size) == s2u(arena_salloc(ptr, false)));
 
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
-				tcache_dalloc_small(tsd, tcache, ptr, binind, true);
+				tcache_dalloc_small(tsd, tcache, ptr, binind,
+				    true);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_dalloc_small(extent_node_arena_get(
+				arena_dalloc_small(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr, pageind);
 			}
 		} else {
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
-			if (likely(tcache != NULL) && size <= tcache_maxclass)
-				tcache_dalloc_large(tsd, tcache, ptr, size, true);
-			else {
-				arena_dalloc_large(extent_node_arena_get(
+			if (likely(tcache != NULL) && size <= tcache_maxclass) {
+				tcache_dalloc_large(tsd, tcache, ptr, size,
+				    true);
+			} else {
+				arena_dalloc_large(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr);
 			}
 		}
 	} else
 		huge_dalloc(tsd, ptr, tcache);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
--- a/memory/jemalloc/src/include/jemalloc/internal/atomic.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/atomic.h
@@ -23,18 +23,18 @@
  * All arithmetic functions return the arithmetic result of the atomic
  * operation.  Some atomic operation APIs return the value prior to mutation, in
  * which case the following functions must redundantly compute the result so
  * that it can be returned.  These functions are normally inlined, so the extra
  * operations can be optimized away if the return values aren't used by the
  * callers.
  *
  *   <t> atomic_read_<t>(<t> *p) { return (*p); }
- *   <t> atomic_add_<t>(<t> *p, <t> x) { return (*p + x); }
- *   <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p - x); }
+ *   <t> atomic_add_<t>(<t> *p, <t> x) { return (*p += x); }
+ *   <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p -= x); }
  *   bool atomic_cas_<t>(<t> *p, <t> c, <t> s)
  *   {
  *     if (*p != c)
  *       return (true);
  *     *p = s;
  *     return (false);
  *   }
  *   void atomic_write_<t>(<t> *p, <t> x) { *p = x; }
--- a/memory/jemalloc/src/include/jemalloc/internal/bitmap.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/bitmap.h
@@ -10,16 +10,25 @@ typedef struct bitmap_info_s bitmap_info
 typedef unsigned long bitmap_t;
 #define	LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
 
 /* Number of bits per group. */
 #define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
 #define	BITMAP_GROUP_NBITS		(ZU(1) << LG_BITMAP_GROUP_NBITS)
 #define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
 
+/*
+ * Do some analysis on how big the bitmap is before we use a tree.  For a brute
+ * force linear search, if we would have to call ffsl more than 2^3 times, use a
+ * tree instead.
+ */
+#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
+#  define USE_TREE
+#endif
+
 /* Number of groups required to store a given number of bits. */
 #define	BITMAP_BITS2GROUPS(nbits)					\
     ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
 
 /*
  * Number of groups required at a particular level for a given number of bits.
  */
 #define	BITMAP_GROUPS_L0(nbits)						\
@@ -43,16 +52,18 @@ typedef unsigned long bitmap_t;
 #define	BITMAP_GROUPS_3_LEVEL(nbits)					\
     (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
 #define	BITMAP_GROUPS_4_LEVEL(nbits)					\
     (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
 
 /*
  * Maximum number of groups required to support LG_BITMAP_MAXBITS.
  */
+#ifdef USE_TREE
+
 #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
 #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
 #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
 #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
@@ -60,47 +71,57 @@ typedef unsigned long bitmap_t;
 #  error "Unsupported bitmap size"
 #endif
 
 /* Maximum number of levels possible. */
 #define	BITMAP_MAX_LEVELS						\
     (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP)				\
     + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
 
+#else /* USE_TREE */
+
+#define	BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
+
+#endif /* USE_TREE */
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 struct bitmap_level_s {
 	/* Offset of this level's groups within the array of groups. */
 	size_t group_offset;
 };
 
 struct bitmap_info_s {
 	/* Logical number of bits in bitmap (stored at bottom level). */
 	size_t nbits;
 
+#ifdef USE_TREE
 	/* Number of levels necessary for nbits. */
 	unsigned nlevels;
 
 	/*
 	 * Only the first (nlevels+1) elements are used, and levels are ordered
 	 * bottom to top (e.g. the bottom level is stored in levels[0]).
 	 */
 	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
+#else /* USE_TREE */
+	/* Number of groups necessary for nbits. */
+	size_t ngroups;
+#endif /* USE_TREE */
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-size_t	bitmap_info_ngroups(const bitmap_info_t *binfo);
-size_t	bitmap_size(size_t nbits);
 void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
+size_t	bitmap_size(const bitmap_info_t *binfo);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 bool	bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
 bool	bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
@@ -108,123 +129,146 @@ void	bitmap_set(bitmap_t *bitmap, const 
 size_t	bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
 void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
 JEMALLOC_INLINE bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
 {
-	unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+#ifdef USE_TREE
+	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
 	bitmap_t rg = bitmap[rgoff];
 	/* The bitmap is full iff the root group is 0. */
 	return (rg == 0);
+#else
+	size_t i;
+
+	for (i = 0; i < binfo->ngroups; i++) {
+		if (bitmap[i] != 0)
+			return (false);
+	}
+	return (true);
+#endif
 }
 
 JEMALLOC_INLINE bool
 bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 {
 	size_t goff;
 	bitmap_t g;
 
 	assert(bit < binfo->nbits);
 	goff = bit >> LG_BITMAP_GROUP_NBITS;
 	g = bitmap[goff];
-	return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
+	return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))));
 }
 
 JEMALLOC_INLINE void
 bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 {
 	size_t goff;
 	bitmap_t *gp;
 	bitmap_t g;
 
 	assert(bit < binfo->nbits);
 	assert(!bitmap_get(bitmap, binfo, bit));
 	goff = bit >> LG_BITMAP_GROUP_NBITS;
 	gp = &bitmap[goff];
 	g = *gp;
-	assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
-	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+	assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(bitmap_get(bitmap, binfo, bit));
+#ifdef USE_TREE
 	/* Propagate group state transitions up the tree. */
 	if (g == 0) {
 		unsigned i;
 		for (i = 1; i < binfo->nlevels; i++) {
 			bit = goff;
 			goff = bit >> LG_BITMAP_GROUP_NBITS;
 			gp = &bitmap[binfo->levels[i].group_offset + goff];
 			g = *gp;
-			assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
-			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+			assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 			*gp = g;
 			if (g != 0)
 				break;
 		}
 	}
+#endif
 }
 
 /* sfu: set first unset. */
 JEMALLOC_INLINE size_t
 bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 {
 	size_t bit;
 	bitmap_t g;
 	unsigned i;
 
 	assert(!bitmap_full(bitmap, binfo));
 
+#ifdef USE_TREE
 	i = binfo->nlevels - 1;
 	g = bitmap[binfo->levels[i].group_offset];
-	bit = jemalloc_ffsl(g) - 1;
+	bit = ffs_lu(g) - 1;
 	while (i > 0) {
 		i--;
 		g = bitmap[binfo->levels[i].group_offset + bit];
-		bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1);
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
 	}
-
+#else
+	i = 0;
+	g = bitmap[0];
+	while ((bit = ffs_lu(g)) == 0) {
+		i++;
+		g = bitmap[i];
+	}
+	bit = (bit - 1) + (i << 6);
+#endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);
 }
 
 JEMALLOC_INLINE void
 bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 {
 	size_t goff;
 	bitmap_t *gp;
 	bitmap_t g;
-	bool propagate;
+	UNUSED bool propagate;
 
 	assert(bit < binfo->nbits);
 	assert(bitmap_get(bitmap, binfo, bit));
 	goff = bit >> LG_BITMAP_GROUP_NBITS;
 	gp = &bitmap[goff];
 	g = *gp;
 	propagate = (g == 0);
-	assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
-	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+	assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
+	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(!bitmap_get(bitmap, binfo, bit));
+#ifdef USE_TREE
 	/* Propagate group state transitions up the tree. */
 	if (propagate) {
 		unsigned i;
 		for (i = 1; i < binfo->nlevels; i++) {
 			bit = goff;
 			goff = bit >> LG_BITMAP_GROUP_NBITS;
 			gp = &bitmap[binfo->levels[i].group_offset + goff];
 			g = *gp;
 			propagate = (g == 0);
-			assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
+			assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))
 			    == 0);
-			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 			*gp = g;
 			if (!propagate)
 				break;
 		}
 	}
+#endif /* USE_TREE */
 }
 
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/chunk_mmap.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/chunk_mmap.h
@@ -4,18 +4,18 @@
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*chunk_alloc_mmap(size_t size, size_t alignment, bool *zero,
-    bool *commit);
+void	*chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit);
 bool	chunk_dalloc_mmap(void *chunk, size_t size);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/ckh.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/ckh.h
@@ -35,19 +35,17 @@ struct ckh_s {
 	uint64_t	ngrows;
 	uint64_t	nshrinks;
 	uint64_t	nshrinkfails;
 	uint64_t	ninserts;
 	uint64_t	nrelocs;
 #endif
 
 	/* Used for pseudo-random number generation. */
-#define	CKH_A		1103515241
-#define	CKH_C		12347
-	uint32_t	prng_state;
+	uint64_t	prng_state;
 
 	/* Total number of items. */
 	size_t		count;
 
 	/*
 	 * Minimum and current number of hash table buckets.  There are
 	 * 2^LG_CKH_BUCKET_CELLS cells per bucket.
 	 */
@@ -69,17 +67,17 @@ struct ckh_s {
 bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp);
 void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
 size_t	ckh_count(ckh_t *ckh);
 bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
 bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
 bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
     void **data);
-bool	ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
+bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 void	ckh_string_hash(const void *key, size_t r_hash[2]);
 bool	ckh_string_keycomp(const void *k1, const void *k2);
 void	ckh_pointer_hash(const void *key, size_t r_hash[2]);
 bool	ckh_pointer_keycomp(const void *k1, const void *k2);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
--- a/memory/jemalloc/src/include/jemalloc/internal/ctl.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/ctl.h
@@ -30,18 +30,22 @@ struct ctl_indexed_node_s {
 	const ctl_named_node_t	*(*index)(const size_t *, size_t, size_t);
 };
 
 struct ctl_arena_stats_s {
 	bool			initialized;
 	unsigned		nthreads;
 	const char		*dss;
 	ssize_t			lg_dirty_mult;
+	ssize_t			decay_time;
 	size_t			pactive;
 	size_t			pdirty;
+
+	/* The remainder are only populated if config_stats is true. */
+
 	arena_stats_t		astats;
 
 	/* Aggregate stats for small size classes, based on bin stats. */
 	size_t			allocated_small;
 	uint64_t		nmalloc_small;
 	uint64_t		ndalloc_small;
 	uint64_t		nrequests_small;
 
--- a/memory/jemalloc/src/include/jemalloc/internal/hash.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/hash.h
@@ -1,11 +1,11 @@
 /*
  * The following hash function is based on MurmurHash3, placed into the public
- * domain by Austin Appleby.  See http://code.google.com/p/smhasher/ for
+ * domain by Austin Appleby.  See https://github.com/aappleby/smhasher for
  * details.
  */
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
@@ -44,23 +44,39 @@ hash_rotl_64(uint64_t x, int8_t r)
 
 	return ((x << r) | (x >> (64 - r)));
 }
 
 JEMALLOC_INLINE uint32_t
 hash_get_block_32(const uint32_t *p, int i)
 {
 
+	/* Handle unaligned read. */
+	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
+		uint32_t ret;
+
+		memcpy(&ret, &p[i], sizeof(uint32_t));
+		return (ret);
+	}
+
 	return (p[i]);
 }
 
 JEMALLOC_INLINE uint64_t
 hash_get_block_64(const uint64_t *p, int i)
 {
 
+	/* Handle unaligned read. */
+	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
+		uint64_t ret;
+
+		memcpy(&ret, &p[i], sizeof(uint64_t));
+		return (ret);
+	}
+
 	return (p[i]);
 }
 
 JEMALLOC_INLINE uint32_t
 hash_fmix_32(uint32_t h)
 {
 
 	h ^= h >> 16;
@@ -316,21 +332,26 @@ hash_x64_128(const void *key, const int 
 	r_out[1] = h2;
 }
 
 /******************************************************************************/
 /* API. */
 JEMALLOC_INLINE void
 hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2])
 {
+
+	assert(len <= INT_MAX); /* Unfortunate implementation limitation. */
+
 #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
-	hash_x64_128(key, len, seed, (uint64_t *)r_hash);
+	hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash);
 #else
-	uint64_t hashes[2];
-	hash_x86_128(key, len, seed, hashes);
-	r_hash[0] = (size_t)hashes[0];
-	r_hash[1] = (size_t)hashes[1];
+	{
+		uint64_t hashes[2];
+		hash_x86_128(key, (int)len, seed, hashes);
+		r_hash[0] = (size_t)hashes[0];
+		r_hash[1] = (size_t)hashes[1];
+	}
 #endif
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/huge.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/huge.h
@@ -4,22 +4,22 @@
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
     tcache_t *tcache);
-void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
+void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache);
-bool	huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
-    size_t usize_max, bool zero);
+bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
+    size_t usize_min, size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
 typedef void (huge_dalloc_junk_t)(void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
 void	huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
 arena_t	*huge_aalloc(const void *ptr);
--- a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in
@@ -44,16 +44,17 @@ static const bool config_fill =
     ;
 static const bool config_lazy_lock =
 #ifdef JEMALLOC_LAZY_LOCK
     true
 #else
     false
 #endif
     ;
+static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
 static const bool config_prof =
 #ifdef JEMALLOC_PROF
     true
 #else
     false
 #endif
     ;
 static const bool config_prof_libgcc =
@@ -350,22 +351,25 @@ typedef unsigned szind_t;
 #    endif
 #  endif
 #  define VARIABLE_ARRAY(type, name, count) \
 	type *name = alloca(sizeof(type) * (count))
 #else
 #  define VARIABLE_ARRAY(type, name, count) type name[(count)]
 #endif
 
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/bitmap.h"
@@ -378,22 +382,25 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/quarantine.h"
 #include "jemalloc/internal/prof.h"
 
 #undef JEMALLOC_H_TYPES
 /******************************************************************************/
 #define	JEMALLOC_H_STRUCTS
 
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #define	JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_STRUCTS_A
@@ -421,64 +428,71 @@ extern bool	opt_abort;
 extern const char	*opt_junk;
 extern bool	opt_junk_alloc;
 extern bool	opt_junk_free;
 extern size_t	opt_quarantine;
 extern bool	opt_redzone;
 extern bool	opt_utrace;
 extern bool	opt_xmalloc;
 extern bool	opt_zero;
-extern size_t	opt_narenas;
+extern unsigned	opt_narenas;
 
 extern bool	in_valgrind;
 
 /* Number of CPUs. */
-extern unsigned		ncpus;
+extern unsigned	ncpus;
+
+/*
+ * Arenas that are used to service external requests.  Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ */
+extern arena_t	**arenas;
 
 /*
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
  */
 extern size_t const	index2size_tab[NSIZES+1];
 /*
  * size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
  * and all accesses are via size2index().
  */
 extern uint8_t const	size2index_tab[];
 
-arena_t	*a0get(void);
 void	*a0malloc(size_t size);
 void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
 arena_t	*arenas_extend(unsigned ind);
+unsigned	narenas_total_get(void);
 arena_t	*arena_init(unsigned ind);
-unsigned	narenas_total_get(void);
-arena_t	*arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing);
+arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
 arena_t	*arena_choose_hard(tsd_t *tsd);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
-unsigned	arena_nbound(unsigned ind);
 void	thread_allocated_cleanup(tsd_t *tsd);
 void	thread_deallocated_cleanup(tsd_t *tsd);
 void	arena_cleanup(tsd_t *tsd);
-void	arenas_cache_cleanup(tsd_t *tsd);
-void	narenas_cache_cleanup(tsd_t *tsd);
-void	arenas_cache_bypass_cleanup(tsd_t *tsd);
+void	arenas_tdata_cleanup(tsd_t *tsd);
+void	narenas_tdata_cleanup(tsd_t *tsd);
+void	arenas_tdata_bypass_cleanup(tsd_t *tsd);
 void	jemalloc_prefork(void);
 void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
 
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
@@ -491,22 +505,25 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/quarantine.h"
 #include "jemalloc/internal/prof.h"
 #include "jemalloc/internal/tsd.h"
 
 #undef JEMALLOC_H_EXTERNS
 /******************************************************************************/
 #define	JEMALLOC_H_INLINES
 
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
@@ -521,60 +538,61 @@ szind_t	size2index(size_t size);
 size_t	index2size_compute(szind_t index);
 size_t	index2size_lookup(szind_t index);
 size_t	index2size(szind_t index);
 size_t	s2u_compute(size_t size);
 size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
+arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
+arena_t	*arena_get(unsigned ind, bool init_if_missing);
+ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
 
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		size_t lg_ceil = lg_floor(pow2_ceil(size));
+		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
 	}
 #endif
 	{
-		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
+		szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
 		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
 		    : lg_floor((size<<1)-1);
-		size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
 		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
-		size_t grp = shift << LG_SIZE_CLASS_GROUP;
+		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
 
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 
 		size_t delta_inverse_mask = ZI(-1) << lg_delta;
-		size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 
-		size_t index = NTBINS + grp + mod;
+		szind_t index = NTBINS + grp + mod;
 		return (index);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
 size2index_lookup(size_t size)
 {
 
 	assert(size <= LOOKUP_MAXCLASS);
 	{
-		size_t ret = ((size_t)(size2index_tab[(size-1) >>
-		    LG_TINY_MIN]));
+		szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]);
 		assert(ret == size2index_compute(size));
 		return (ret);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
 size2index(size_t size)
 {
@@ -619,28 +637,28 @@ index2size_lookup(szind_t index)
 	assert(ret == index2size_compute(index));
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 index2size(szind_t index)
 {
 
-	assert(index <= NSIZES);
+	assert(index < NSIZES);
 	return (index2size_lookup(index));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size)
 {
 
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		size_t lg_ceil = lg_floor(pow2_ceil(size));
+		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
 		    (ZU(1) << lg_ceil));
 	}
 #endif
 	{
 		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
 		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
 		    : lg_floor((size<<1)-1);
@@ -722,27 +740,26 @@ sa2u(size_t size, size_t alignment)
 		/*
 		 * Calculate the size of the over-size run that arena_palloc()
 		 * would need to allocate in order to guarantee the alignment.
 		 */
 		if (usize + large_pad + alignment - PAGE <= arena_maxrun)
 			return (usize);
 	}
 
-	/* Huge size class.  Beware of size_t overflow. */
+	/* Huge size class.  Beware of overflow. */
+
+	if (unlikely(alignment > HUGE_MAXCLASS))
+		return (0);
 
 	/*
 	 * We can't achieve subchunk alignment, so round up alignment to the
 	 * minimum that can actually be supported.
 	 */
 	alignment = CHUNK_CEILING(alignment);
-	if (alignment == 0) {
-		/* size_t overflow. */
-		return (0);
-	}
 
 	/* Make sure result is a huge size class. */
 	if (size <= chunksize)
 		usize = chunksize;
 	else {
 		usize = s2u(size);
 		if (usize < size) {
 			/* size_t overflow. */
@@ -771,42 +788,66 @@ arena_choose(tsd_t *tsd, arena_t *arena)
 		return (arena);
 
 	if (unlikely((ret = tsd_arena_get(tsd)) == NULL))
 		ret = arena_choose_hard(tsd);
 
 	return (ret);
 }
 
-JEMALLOC_INLINE arena_t *
-arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
-    bool refresh_if_missing)
+JEMALLOC_INLINE arena_tdata_t *
+arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 {
-	arena_t *arena;
-	arena_t **arenas_cache = tsd_arenas_cache_get(tsd);
+	arena_tdata_t *tdata;
+	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
 
-	/* init_if_missing requires refresh_if_missing. */
-	assert(!init_if_missing || refresh_if_missing);
-
-	if (unlikely(arenas_cache == NULL)) {
-		/* arenas_cache hasn't been initialized yet. */
-		return (arena_get_hard(tsd, ind, init_if_missing));
+	if (unlikely(arenas_tdata == NULL)) {
+		/* arenas_tdata hasn't been initialized yet. */
+		return (arena_tdata_get_hard(tsd, ind));
 	}
-	if (unlikely(ind >= tsd_narenas_cache_get(tsd))) {
+	if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
 		/*
-		 * ind is invalid, cache is old (too small), or arena to be
+		 * ind is invalid, cache is old (too small), or tdata to be
 		 * initialized.
 		 */
-		return (refresh_if_missing ? arena_get_hard(tsd, ind,
-		    init_if_missing) : NULL);
+		return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) :
+		    NULL);
 	}
-	arena = arenas_cache[ind];
-	if (likely(arena != NULL) || !refresh_if_missing)
-		return (arena);
-	return (arena_get_hard(tsd, ind, init_if_missing));
+
+	tdata = &arenas_tdata[ind];
+	if (likely(tdata != NULL) || !refresh_if_missing)
+		return (tdata);
+	return (arena_tdata_get_hard(tsd, ind));
+}
+
+JEMALLOC_INLINE arena_t *
+arena_get(unsigned ind, bool init_if_missing)
+{
+	arena_t *ret;
+
+	assert(ind <= MALLOCX_ARENA_MAX);
+
+	ret = arenas[ind];
+	if (unlikely(ret == NULL)) {
+		ret = atomic_read_p((void *)&arenas[ind]);
+		if (init_if_missing && unlikely(ret == NULL))
+			ret = arena_init(ind);
+	}
+	return (ret);
+}
+
+JEMALLOC_INLINE ticker_t *
+decay_ticker_get(tsd_t *tsd, unsigned ind)
+{
+	arena_tdata_t *tdata;
+
+	tdata = arena_tdata_get(tsd, ind, true);
+	if (unlikely(tdata == NULL))
+		return (NULL);
+	return (&tdata->decay_ticker);
 }
 #endif
 
 #include "jemalloc/internal/bitmap.h"
 /*
  * Include portions of arena.h interleaved with tcache.h in order to resolve
  * circular dependencies.
  */
@@ -848,18 +889,18 @@ void	isdalloct(tsd_t *tsd, void *ptr, si
 void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 void	*iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
 void	*iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
 void	*iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero);
-bool	ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra,
-    size_t alignment, bool zero);
+bool	ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+    size_t extra, size_t alignment, bool zero);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(const void *ptr)
 {
 
 	assert(ptr != NULL);
@@ -954,18 +995,18 @@ ipalloct(tsd_t *tsd, size_t usize, size_
 
 	return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd,
-	    NULL), false, NULL));
+	return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, true),
+	    false, NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 ivsalloc(const void *ptr, bool demote)
 {
 	extent_node_t *node;
 
 	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
@@ -1059,25 +1100,25 @@ isqalloc(tsd_t *tsd, void *ptr, size_t s
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
 {
 	void *p;
 	size_t usize, copysize;
 
 	usize = sa2u(size + extra, alignment);
-	if (usize == 0)
+	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
 	p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
 		if (extra == 0)
 			return (NULL);
 		/* Try again, without extra this time. */
 		usize = sa2u(size, alignment);
-		if (usize == 0)
+		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			return (NULL);
 		p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
 	}
 	/*
 	 * Copy at most size bytes (not size+extra), since the caller has no
 	 * expectation that the extra bytes will be reliably preserved.
@@ -1115,30 +1156,30 @@ iralloc(tsd_t *tsd, void *ptr, size_t ol
     bool zero)
 {
 
 	return (iralloct(tsd, ptr, oldsize, size, alignment, zero,
 	    tcache_get(tsd, true), NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment,
-    bool zero)
+ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero)
 {
 
 	assert(ptr != NULL);
 	assert(size != 0);
 
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
 		/* Existing object alignment is inadequate. */
 		return (true);
 	}
 
-	return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero));
+	return (arena_ralloc_no_move(tsd, ptr, oldsize, size, extra, zero));
 }
 #endif
 
 #include "jemalloc/internal/prof.h"
 
 #undef JEMALLOC_H_INLINES
 /******************************************************************************/
 #endif /* JEMALLOC_INTERNAL_H */
--- a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -13,16 +13,17 @@
 #    include <sys/syscall.h>
 #    if !defined(SYS_write) && defined(__NR_write)
 #      define SYS_write __NR_write
 #    endif
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
 #  include <errno.h>
+#  include <sys/time.h>
 #endif
 #include <sys/types.h>
 
 #include <limits.h>
 #ifndef SIZE_T_MAX
 #  define SIZE_T_MAX	SIZE_MAX
 #endif
 #include <stdarg.h>
--- a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -184,19 +184,20 @@
  * of mmap()/munmap() calls will cause virtual memory map holes.
  */
 #undef JEMALLOC_MUNMAP
 
 /* TLS is used to map arenas and magazine caches to threads. */
 #undef JEMALLOC_TLS
 
 /*
- * ffs()/ffsl() functions to use for bitmapping.  Don't use these directly;
- * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h.
+ * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
+ * use ffs_*() from util.h.
  */
+#undef JEMALLOC_INTERNAL_FFSLL
 #undef JEMALLOC_INTERNAL_FFSL
 #undef JEMALLOC_INTERNAL_FFS
 
 /*
  * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
  * within jemalloc-owned chunks before dereferencing them.
  */
 #undef JEMALLOC_IVSALLOC
@@ -236,16 +237,19 @@
 #undef JEMALLOC_BIG_ENDIAN
 
 /* sizeof(int) == 2^LG_SIZEOF_INT. */
 #undef LG_SIZEOF_INT
 
 /* sizeof(long) == 2^LG_SIZEOF_LONG. */
 #undef LG_SIZEOF_LONG
 
+/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
+#undef LG_SIZEOF_LONG_LONG
+
 /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
 #undef LG_SIZEOF_INTMAX_T
 
 /* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
 #undef JEMALLOC_GLIBC_MALLOC_HOOK
 
 /* glibc memalign hook. */
 #undef JEMALLOC_GLIBC_MEMALIGN_HOOK
@@ -254,9 +258,12 @@
 #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
 
 /*
  * If defined, jemalloc symbols are not exported (doesn't work when
  * JEMALLOC_PREFIX is not defined).
  */
 #undef JEMALLOC_EXPORT
 
+/* config.malloc_conf options string. */
+#undef JEMALLOC_CONFIG_MALLOC_CONF
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/nstime.h
@@ -0,0 +1,48 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
+    && _POSIX_MONOTONIC_CLOCK >= 0
+
+typedef struct nstime_s nstime_t;
+
+/* Maximum supported number of seconds (~584 years). */
+#define	NSTIME_SEC_MAX	18446744072
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct nstime_s {
+	uint64_t	ns;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	nstime_init(nstime_t *time, uint64_t ns);
+void	nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
+uint64_t	nstime_ns(const nstime_t *time);
+uint64_t	nstime_sec(const nstime_t *time);
+uint64_t	nstime_nsec(const nstime_t *time);
+void	nstime_copy(nstime_t *time, const nstime_t *source);
+int	nstime_compare(const nstime_t *a, const nstime_t *b);
+void	nstime_add(nstime_t *time, const nstime_t *addend);
+void	nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
+void	nstime_imultiply(nstime_t *time, uint64_t multiplier);
+void	nstime_idivide(nstime_t *time, uint64_t divisor);
+uint64_t	nstime_divide(const nstime_t *time, const nstime_t *divisor);
+#ifdef JEMALLOC_JET
+typedef bool (nstime_update_t)(nstime_t *);
+extern nstime_update_t *nstime_update;
+#else
+bool	nstime_update(nstime_t *time);
+#endif
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt
+++ b/memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt
@@ -1,13 +1,13 @@
 a0dalloc
-a0get
 a0malloc
 arena_aalloc
 arena_alloc_junk_small
+arena_basic_stats_merge
 arena_bin_index
 arena_bin_info
 arena_bitselm_get
 arena_boot
 arena_choose
 arena_choose_hard
 arena_chunk_alloc_huge
 arena_chunk_cache_maybe_insert
@@ -20,89 +20,99 @@ arena_cleanup
 arena_dalloc
 arena_dalloc_bin
 arena_dalloc_bin_junked_locked
 arena_dalloc_junk_large
 arena_dalloc_junk_small
 arena_dalloc_large
 arena_dalloc_large_junked_locked
 arena_dalloc_small
+arena_decay_tick
+arena_decay_ticks
+arena_decay_time_default_get
+arena_decay_time_default_set
+arena_decay_time_get
+arena_decay_time_set
 arena_dss_prec_get
 arena_dss_prec_set
 arena_get
-arena_get_hard
 arena_init
 arena_lg_dirty_mult_default_get
 arena_lg_dirty_mult_default_set
 arena_lg_dirty_mult_get
 arena_lg_dirty_mult_set
 arena_malloc
+arena_malloc_hard
 arena_malloc_large
-arena_malloc_small
 arena_mapbits_allocated_get
 arena_mapbits_binind_get
 arena_mapbits_decommitted_get
 arena_mapbits_dirty_get
 arena_mapbits_get
 arena_mapbits_internal_set
 arena_mapbits_large_binind_set
 arena_mapbits_large_get
 arena_mapbits_large_set
 arena_mapbits_large_size_get
-arena_mapbitsp_get
-arena_mapbitsp_read
-arena_mapbitsp_write
 arena_mapbits_size_decode
 arena_mapbits_size_encode
 arena_mapbits_small_runind_get
 arena_mapbits_small_set
 arena_mapbits_unallocated_set
 arena_mapbits_unallocated_size_get
 arena_mapbits_unallocated_size_set
 arena_mapbits_unzeroed_get
+arena_mapbitsp_get
+arena_mapbitsp_read
+arena_mapbitsp_write
 arena_maxrun
 arena_maybe_purge
 arena_metadata_allocated_add
 arena_metadata_allocated_get
 arena_metadata_allocated_sub
 arena_migrate
 arena_miscelm_get
 arena_miscelm_to_pageind
 arena_miscelm_to_rpages
-arena_nbound
 arena_new
 arena_node_alloc
 arena_node_dalloc
+arena_nthreads_dec
+arena_nthreads_get
+arena_nthreads_inc
 arena_palloc
 arena_postfork_child
 arena_postfork_parent
 arena_prefork
 arena_prof_accum
 arena_prof_accum_impl
 arena_prof_accum_locked
 arena_prof_promoted
 arena_prof_tctx_get
 arena_prof_tctx_reset
 arena_prof_tctx_set
 arena_ptr_small_binind_get
-arena_purge_all
+arena_purge
 arena_quarantine_junk_small
 arena_ralloc
 arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
-arenas_cache_bypass_cleanup
-arenas_cache_cleanup
 arena_sdalloc
 arena_stats_merge
 arena_tcache_fill_small
+arena_tdata_get
+arena_tdata_get_hard
+arenas
+arenas_tdata_bypass_cleanup
+arenas_tdata_cleanup
 atomic_add_p
 atomic_add_u
 atomic_add_uint32
 atomic_add_uint64
 atomic_add_z
 atomic_cas_p
 atomic_cas_u
 atomic_cas_uint32
@@ -117,17 +127,16 @@ base_alloc
 base_boot
 base_postfork_child
 base_postfork_parent
 base_prefork
 base_stats_get
 bitmap_full
 bitmap_get
 bitmap_info_init
-bitmap_info_ngroups
 bitmap_init
 bitmap_set
 bitmap_sfu
 bitmap_size
 bitmap_unset
 bootstrap_calloc
 bootstrap_free
 bootstrap_malloc
@@ -157,19 +166,19 @@ chunk_in_dss
 chunk_lookup
 chunk_npages
 chunk_postfork_child
 chunk_postfork_parent
 chunk_prefork
 chunk_purge_arena
 chunk_purge_wrapper
 chunk_register
+chunks_rtree
 chunksize
 chunksize_mask
-chunks_rtree
 ckh_count
 ckh_delete
 ckh_insert
 ckh_iter
 ckh_new
 ckh_pointer_hash
 ckh_pointer_keycomp
 ckh_remove
@@ -178,16 +187,17 @@ ckh_string_hash
 ckh_string_keycomp
 ctl_boot
 ctl_bymib
 ctl_byname
 ctl_nametomib
 ctl_postfork_child
 ctl_postfork_parent
 ctl_prefork
+decay_ticker_get
 dss_prec_names
 extent_node_achunk_get
 extent_node_achunk_set
 extent_node_addr_get
 extent_node_addr_set
 extent_node_arena_get
 extent_node_arena_set
 extent_node_dirty_insert
@@ -229,16 +239,22 @@ extent_tree_szad_next
 extent_tree_szad_nsearch
 extent_tree_szad_prev
 extent_tree_szad_psearch
 extent_tree_szad_remove
 extent_tree_szad_reverse_iter
 extent_tree_szad_reverse_iter_recurse
 extent_tree_szad_reverse_iter_start
 extent_tree_szad_search
+ffs_llu
+ffs_lu
+ffs_u
+ffs_u32
+ffs_u64
+ffs_zu
 get_errno
 hash
 hash_fmix_32
 hash_fmix_64
 hash_get_block_32
 hash_get_block_64
 hash_rotl_32
 hash_rotl_64
@@ -260,21 +276,21 @@ iaalloc
 iallocztm
 icalloc
 icalloct
 idalloc
 idalloct
 idalloctm
 imalloc
 imalloct
+in_valgrind
 index2size
 index2size_compute
 index2size_lookup
 index2size_tab
-in_valgrind
 ipalloc
 ipalloct
 ipallocztm
 iqalloc
 iralloc
 iralloct
 iralloct_realign
 isalloc
@@ -306,21 +322,35 @@ malloc_tsd_malloc
 malloc_tsd_no_cleanup
 malloc_vcprintf
 malloc_vsnprintf
 malloc_write
 map_bias
 map_misc_offset
 mb_write
 mutex_boot
-narenas_cache_cleanup
+narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
+nstime_add
+nstime_compare
+nstime_copy
+nstime_divide
+nstime_idivide
+nstime_imultiply
+nstime_init
+nstime_init2
+nstime_ns
+nstime_nsec
+nstime_sec
+nstime_subtract
+nstime_update
 opt_abort
+opt_decay_time
 opt_dss
 opt_junk
 opt_junk_alloc
 opt_junk_free
 opt_lg_chunk
 opt_lg_dirty_mult
 opt_lg_prof_interval
 opt_lg_prof_sample
@@ -329,31 +359,36 @@ opt_narenas
 opt_prof
 opt_prof_accum
 opt_prof_active
 opt_prof_final
 opt_prof_gdump
 opt_prof_leak
 opt_prof_prefix
 opt_prof_thread_active_init
+opt_purge
 opt_quarantine
 opt_redzone
 opt_stats_print
 opt_tcache
 opt_utrace
 opt_xmalloc
 opt_zero
 p2rz
 pages_commit
 pages_decommit
 pages_map
 pages_purge
 pages_trim
 pages_unmap
-pow2_ceil
+pow2_ceil_u32
+pow2_ceil_u64
+pow2_ceil_zu
+prng_lg_range
+prng_range
 prof_active_get
 prof_active_get_unlocked
 prof_active_set
 prof_alloc_prep
 prof_alloc_rollback
 prof_backtrace
 prof_boot0
 prof_boot1
@@ -388,16 +423,17 @@ prof_tdata_get
 prof_tdata_init
 prof_tdata_reinit
 prof_thread_active_get
 prof_thread_active_init_get
 prof_thread_active_init_set
 prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
+purge_mode_names
 quarantine
 quarantine_alloc_hook
 quarantine_alloc_hook_work
 quarantine_cleanup
 register_zone
 rtree_child_read
 rtree_child_read_hard
 rtree_child_tryread
@@ -408,16 +444,19 @@ rtree_node_valid
 rtree_set
 rtree_start_level
 rtree_subkey
 rtree_subtree_read
 rtree_subtree_read_hard
 rtree_subtree_tryread
 rtree_val_read
 rtree_val_write
+run_quantize_ceil
+run_quantize_floor
+run_quantize_max
 s2u
 s2u_compute
 s2u_lookup
 sa2u
 set_errno
 size2index
 size2index_compute
 size2index_lookup
@@ -446,25 +485,30 @@ tcache_enabled_cleanup
 tcache_enabled_get
 tcache_enabled_set
 tcache_event
 tcache_event_hard
 tcache_flush
 tcache_get
 tcache_get_hard
 tcache_maxclass
+tcache_salloc
+tcache_stats_merge
 tcaches
-tcache_salloc
 tcaches_create
 tcaches_destroy
 tcaches_flush
 tcaches_get
-tcache_stats_merge
 thread_allocated_cleanup
 thread_deallocated_cleanup
+ticker_copy
+ticker_init
+ticker_read
+ticker_tick
+ticker_ticks
 tsd_arena_get
 tsd_arena_set
 tsd_boot
 tsd_boot0
 tsd_boot1
 tsd_booted
 tsd_cleanup
 tsd_cleanup_wrapper
@@ -472,28 +516,28 @@ tsd_fetch
 tsd_get
 tsd_wrapper_get
 tsd_wrapper_set
 tsd_initialized
 tsd_init_check_recursion
 tsd_init_finish
 tsd_init_head
 tsd_nominal
+tsd_prof_tdata_get
+tsd_prof_tdata_set
 tsd_quarantine_get
 tsd_quarantine_set
 tsd_set
 tsd_tcache_enabled_get
 tsd_tcache_enabled_set
 tsd_tcache_get
 tsd_tcache_set
-tsd_tls
-tsd_tsd
-tsd_prof_tdata_get
-tsd_prof_tdata_set
 tsd_thread_allocated_get
 tsd_thread_allocated_set
 tsd_thread_deallocated_get
 tsd_thread_deallocated_set
+tsd_tls
+tsd_tsd
 u2rz
 valgrind_freelike_block
 valgrind_make_mem_defined
 valgrind_make_mem_noaccess
 valgrind_make_mem_undefined
--- a/memory/jemalloc/src/include/jemalloc/internal/prng.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/prng.h
@@ -13,48 +13,67 @@
  *   m == 2^32
  *
  * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
  *
  * This choice of m has the disadvantage that the quality of the bits is
  * proportional to bit position.  For example, the lowest bit has a cycle of 2,
  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
  * bits.
- *
- * Macro parameters:
- *   uint32_t r          : Result.
- *   unsigned lg_range   : (0..32], number of least significant bits to return.
- *   uint32_t state      : Seed value.
- *   const uint32_t a, c : See above discussion.
  */
-#define	prng32(r, lg_range, state, a, c) do {				\
-	assert((lg_range) > 0);						\
-	assert((lg_range) <= 32);					\
-									\
-	r = (state * (a)) + (c);					\
-	state = r;							\
-	r >>= (32 - (lg_range));					\
-} while (false)
-
-/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
-#define	prng64(r, lg_range, state, a, c) do {				\
-	assert((lg_range) > 0);						\
-	assert((lg_range) <= 64);					\
-									\
-	r = (state * (a)) + (c);					\
-	state = r;							\
-	r >>= (64 - (lg_range));					\
-} while (false)
+#define	PRNG_A	UINT64_C(6364136223846793005)
+#define	PRNG_C	UINT64_C(1442695040888963407)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
+#ifndef JEMALLOC_ENABLE_INLINE
+uint64_t	prng_lg_range(uint64_t *state, unsigned lg_range);
+uint64_t	prng_range(uint64_t *state, uint64_t range);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_lg_range(uint64_t *state, unsigned lg_range)
+{
+	uint64_t ret;
+
+	assert(lg_range > 0);
+	assert(lg_range <= 64);
+
+	ret = (*state * PRNG_A) + PRNG_C;
+	*state = ret;
+	ret >>= (64 - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_range(uint64_t *state, uint64_t range)
+{
+	uint64_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range(state, lg_range);
+	} while (ret >= range);
+
+	return (ret);
+}
+#endif
+
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/rb.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/rb.h
@@ -37,17 +37,16 @@ struct {								\
     bool rbn_red;							\
 }
 #endif
 
 /* Root structure. */
 #define	rb_tree(a_type)							\
 struct {								\
     a_type *rbt_root;							\
-    a_type rbt_nil;							\
 }
 
 /* Left accessors. */
 #define	rbtn_left_get(a_type, a_field, a_node)				\
     ((a_node)->a_field.rbn_left)
 #define	rbtn_left_set(a_type, a_field, a_node, a_left) do {		\
     (a_node)->a_field.rbn_left = a_left;				\
 } while (0)
@@ -79,18 +78,18 @@ struct {								\
     (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t)		\
       (a_node)->a_field.rbn_right_red) & ((ssize_t)-2));		\
 } while (0)
 
 /* Node initializer. */
 #define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
     /* Bookkeeping bit cannot be used by node pointer. */		\
     assert(((uintptr_t)(a_node) & 0x1) == 0);				\
-    rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
-    rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_left_set(a_type, a_field, (a_node), NULL);	\
+    rbtn_right_set(a_type, a_field, (a_node), NULL);	\
     rbtn_red_set(a_type, a_field, (a_node));				\
 } while (0)
 #else
 /* Right accessors. */
 #define	rbtn_right_get(a_type, a_field, a_node)				\
     ((a_node)->a_field.rbn_right)
 #define	rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
     (a_node)->a_field.rbn_right = a_right;				\
@@ -106,46 +105,43 @@ struct {								\
     (a_node)->a_field.rbn_red = true;					\
 } while (0)
 #define	rbtn_black_set(a_type, a_field, a_node) do {			\
     (a_node)->a_field.rbn_red = false;					\
 } while (0)
 
 /* Node initializer. */
 #define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
-    rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
-    rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_left_set(a_type, a_field, (a_node), NULL);	\
+    rbtn_right_set(a_type, a_field, (a_node), NULL);	\
     rbtn_red_set(a_type, a_field, (a_node));				\
 } while (0)
 #endif
 
 /* Tree initializer. */
 #define	rb_new(a_type, a_field, a_rbt) do {				\
-    (a_rbt)->rbt_root = &(a_rbt)->rbt_nil;				\
-    rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil);		\
-    rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil);			\
+    (a_rbt)->rbt_root = NULL;						\
 } while (0)
 
 /* Internal utility macros. */
 #define	rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do {		\
     (r_node) = (a_root);						\
-    if ((r_node) != &(a_rbt)->rbt_nil) {				\
+    if ((r_node) != NULL) {						\
 	for (;								\
-	  rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\
+	  rbtn_left_get(a_type, a_field, (r_node)) != NULL;		\
 	  (r_node) = rbtn_left_get(a_type, a_field, (r_node))) {	\
 	}								\
     }									\
 } while (0)
 
 #define	rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do {		\
     (r_node) = (a_root);						\
-    if ((r_node) != &(a_rbt)->rbt_nil) {				\
-	for (; rbtn_right_get(a_type, a_field, (r_node)) !=		\
-	  &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field,	\
-	  (r_node))) {							\
+    if ((r_node) != NULL) {						\
+	for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL;	\
+	  (r_node) = rbtn_right_get(a_type, a_field, (r_node))) {	\
 	}								\
     }									\
 } while (0)
 
 #define	rbtn_rotate_left(a_type, a_field, a_node, r_node) do {		\
     (r_node) = rbtn_right_get(a_type, a_field, (a_node));		\
     rbtn_right_set(a_type, a_field, (a_node),				\
       rbtn_left_get(a_type, a_field, (r_node)));			\
@@ -330,175 +326,154 @@ a_prefix##destroy(a_rbt_type *rbtree, vo
  *       Description: Iterate over the tree with post-order traversal, remove
  *                    each node, and run the callback if non-null.  This is
  *                    used for destroying a tree without paying the cost to
  *                    rebalance it.  The tree must not be otherwise altered
  *                    during traversal.
  *       Args:
  *         tree: Pointer to an initialized red-black tree object.
  *         cb  : Callback function, which, if non-null, is called for each node
- *               during iteration.  There is no way to stop iteration once it has
- *               begun.
+ *               during iteration.  There is no way to stop iteration once it
+ *               has begun.
  *         arg : Opaque pointer passed to cb().
  */
 #define	rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
 a_attr void								\
 a_prefix##new(a_rbt_type *rbtree) {					\
     rb_new(a_type, a_field, rbtree);					\
 }									\
 a_attr bool								\
 a_prefix##empty(a_rbt_type *rbtree) {					\
-    return (rbtree->rbt_root == &rbtree->rbt_nil);			\
+    return (rbtree->rbt_root == NULL);					\
 }									\
 a_attr a_type *								\
 a_prefix##first(a_rbt_type *rbtree) {					\
     a_type *ret;							\
     rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = NULL;							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##last(a_rbt_type *rbtree) {					\
     a_type *ret;							\
     rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = NULL;							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##next(a_rbt_type *rbtree, a_type *node) {			\
     a_type *ret;							\
-    if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+    if (rbtn_right_get(a_type, a_field, node) != NULL) {		\
 	rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type,	\
 	  a_field, node), ret);						\
     } else {								\
 	a_type *tnode = rbtree->rbt_root;				\
-	assert(tnode != &rbtree->rbt_nil);				\
-	ret = &rbtree->rbt_nil;						\
+	assert(tnode != NULL);						\
+	ret = NULL;							\
 	while (true) {							\
 	    int cmp = (a_cmp)(node, tnode);				\
 	    if (cmp < 0) {						\
 		ret = tnode;						\
 		tnode = rbtn_left_get(a_type, a_field, tnode);		\
 	    } else if (cmp > 0) {					\
 		tnode = rbtn_right_get(a_type, a_field, tnode);		\
 	    } else {							\
 		break;							\
 	    }								\
-	    assert(tnode != &rbtree->rbt_nil);				\
+	    assert(tnode != NULL);					\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##prev(a_rbt_type *rbtree, a_type *node) {			\
     a_type *ret;							\
-    if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+    if (rbtn_left_get(a_type, a_field, node) != NULL) {			\
 	rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type,	\
 	  a_field, node), ret);						\
     } else {								\
 	a_type *tnode = rbtree->rbt_root;				\
-	assert(tnode != &rbtree->rbt_nil);				\
-	ret = &rbtree->rbt_nil;						\
+	assert(tnode != NULL);						\
+	ret = NULL;							\
 	while (true) {							\
 	    int cmp = (a_cmp)(node, tnode);				\
 	    if (cmp < 0) {						\
 		tnode = rbtn_left_get(a_type, a_field, tnode);		\
 	    } else if (cmp > 0) {					\
 		ret = tnode;						\
 		tnode = rbtn_right_get(a_type, a_field, tnode);		\
 	    } else {							\
 		break;							\
 	    }								\
-	    assert(tnode != &rbtree->rbt_nil);				\
+	    assert(tnode != NULL);					\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##search(a_rbt_type *rbtree, const a_type *key) {		\
     a_type *ret;							\
     int cmp;								\
     ret = rbtree->rbt_root;						\
-    while (ret != &rbtree->rbt_nil					\
+    while (ret != NULL							\
       && (cmp = (a_cmp)(key, ret)) != 0) {				\
 	if (cmp < 0) {							\
 	    ret = rbtn_left_get(a_type, a_field, ret);			\
 	} else {							\
 	    ret = rbtn_right_get(a_type, a_field, ret);			\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) {		\
     a_type *ret;							\
     a_type *tnode = rbtree->rbt_root;					\
-    ret = &rbtree->rbt_nil;						\
-    while (tnode != &rbtree->rbt_nil) {					\
+    ret = NULL;								\
+    while (tnode != NULL) {						\
 	int cmp = (a_cmp)(key, tnode);					\
 	if (cmp < 0) {							\
 	    ret = tnode;						\
 	    tnode = rbtn_left_get(a_type, a_field, tnode);		\
 	} else if (cmp > 0) {						\
 	    tnode = rbtn_right_get(a_type, a_field, tnode);		\
 	} else {							\
 	    ret = tnode;						\
 	    break;							\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) {		\
     a_type *ret;							\
     a_type *tnode = rbtree->rbt_root;					\
-    ret = &rbtree->rbt_nil;						\
-    while (tnode != &rbtree->rbt_nil) {					\
+    ret = NULL;								\
+    while (tnode != NULL) {						\
 	int cmp = (a_cmp)(key, tnode);					\
 	if (cmp < 0) {							\
 	    tnode = rbtn_left_get(a_type, a_field, tnode);		\
 	} else if (cmp > 0) {						\
 	    ret = tnode;						\
 	    tnode = rbtn_right_get(a_type, a_field, tnode);		\
 	} else {							\
 	    ret = tnode;						\
 	    break;							\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr void								\
 a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
     struct {								\
 	a_type *node;							\
 	int cmp;							\
     } path[sizeof(void *) << 4], *pathp;				\
     rbt_node_new(a_type, a_field, rbtree, node);			\
     /* Wind. */								\
     path->node = rbtree->rbt_root;					\
-    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+    for (pathp = path; pathp->node != NULL; pathp++) {			\
 	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
 	assert(cmp != 0);						\
 	if (cmp < 0) {							\
 	    pathp[1].node = rbtn_left_get(a_type, a_field,		\
 	      pathp->node);						\
 	} else {							\
 	    pathp[1].node = rbtn_right_get(a_type, a_field,		\
 	      pathp->node);						\
@@ -508,32 +483,34 @@ a_prefix##insert(a_rbt_type *rbtree, a_t
     /* Unwind. */							\
     for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
 	a_type *cnode = pathp->node;					\
 	if (pathp->cmp < 0) {						\
 	    a_type *left = pathp[1].node;				\
 	    rbtn_left_set(a_type, a_field, cnode, left);		\
 	    if (rbtn_red_get(a_type, a_field, left)) {			\
 		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
-		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		if (leftleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  leftleft)) {						\
 		    /* Fix up 4-node. */				\
 		    a_type *tnode;					\
 		    rbtn_black_set(a_type, a_field, leftleft);		\
 		    rbtn_rotate_right(a_type, a_field, cnode, tnode);	\
 		    cnode = tnode;					\
 		}							\
 	    } else {							\
 		return;							\
 	    }								\
 	} else {							\
 	    a_type *right = pathp[1].node;				\
 	    rbtn_right_set(a_type, a_field, cnode, right);		\
 	    if (rbtn_red_get(a_type, a_field, right)) {			\
 		a_type *left = rbtn_left_get(a_type, a_field, cnode);	\
-		if (rbtn_red_get(a_type, a_field, left)) {		\
+		if (left != NULL && rbtn_red_get(a_type, a_field,	\
+		  left)) {						\
 		    /* Split 4-node. */					\
 		    rbtn_black_set(a_type, a_field, left);		\
 		    rbtn_black_set(a_type, a_field, right);		\
 		    rbtn_red_set(a_type, a_field, cnode);		\
 		} else {						\
 		    /* Lean left. */					\
 		    a_type *tnode;					\
 		    bool tred = rbtn_red_get(a_type, a_field, cnode);	\
@@ -556,29 +533,29 @@ a_attr void								\
 a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
     struct {								\
 	a_type *node;							\
 	int cmp;							\
     } *pathp, *nodep, path[sizeof(void *) << 4];			\
     /* Wind. */								\
     nodep = NULL; /* Silence compiler warning. */			\
     path->node = rbtree->rbt_root;					\
-    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+    for (pathp = path; pathp->node != NULL; pathp++) {			\
 	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
 	if (cmp < 0) {							\
 	    pathp[1].node = rbtn_left_get(a_type, a_field,		\
 	      pathp->node);						\
 	} else {							\
 	    pathp[1].node = rbtn_right_get(a_type, a_field,		\
 	      pathp->node);						\
 	    if (cmp == 0) {						\
 	        /* Find node's successor, in preparation for swap. */	\
 		pathp->cmp = 1;						\
 		nodep = pathp;						\
-		for (pathp++; pathp->node != &rbtree->rbt_nil;		\
+		for (pathp++; pathp->node != NULL;			\
 		  pathp++) {						\
 		    pathp->cmp = -1;					\
 		    pathp[1].node = rbtn_left_get(a_type, a_field,	\
 		      pathp->node);					\
 		}							\
 		break;							\
 	    }								\
 	}								\
@@ -611,17 +588,17 @@ a_prefix##remove(a_rbt_type *rbtree, a_t
 		  nodep->node);						\
 	    } else {							\
 		rbtn_right_set(a_type, a_field, nodep[-1].node,		\
 		  nodep->node);						\
 	    }								\
 	}								\
     } else {								\
 	a_type *left = rbtn_left_get(a_type, a_field, node);		\
-	if (left != &rbtree->rbt_nil) {					\
+	if (left != NULL) {						\
 	    /* node has no successor, but it has a left child.        */\
 	    /* Splice node out, without losing the left child.        */\
 	    assert(!rbtn_red_get(a_type, a_field, node));		\
 	    assert(rbtn_red_get(a_type, a_field, left));		\
 	    rbtn_black_set(a_type, a_field, left);			\
 	    if (pathp == path) {					\
 		rbtree->rbt_root = left;				\
 	    } else {							\
@@ -631,43 +608,42 @@ a_prefix##remove(a_rbt_type *rbtree, a_t
 		} else {						\
 		    rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 		      left);						\
 		}							\
 	    }								\
 	    return;							\
 	} else if (pathp == path) {					\
 	    /* The tree only contained one node. */			\
-	    rbtree->rbt_root = &rbtree->rbt_nil;			\
+	    rbtree->rbt_root = NULL;					\
 	    return;							\
 	}								\
     }									\
     if (rbtn_red_get(a_type, a_field, pathp->node)) {			\
 	/* Prune red node, which requires no fixup. */			\
 	assert(pathp[-1].cmp < 0);					\
-	rbtn_left_set(a_type, a_field, pathp[-1].node,			\
-	  &rbtree->rbt_nil);						\
+	rbtn_left_set(a_type, a_field, pathp[-1].node, NULL);		\
 	return;								\
     }									\
     /* The node to be pruned is black, so unwind until balance is     */\
     /* restored.                                                      */\
-    pathp->node = &rbtree->rbt_nil;					\
+    pathp->node = NULL;							\
     for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
 	assert(pathp->cmp != 0);					\
 	if (pathp->cmp < 0) {						\
 	    rbtn_left_set(a_type, a_field, pathp->node,			\
 	      pathp[1].node);						\
-	    assert(!rbtn_red_get(a_type, a_field, pathp[1].node));	\
 	    if (rbtn_red_get(a_type, a_field, pathp->node)) {		\
 		a_type *right = rbtn_right_get(a_type, a_field,		\
 		  pathp->node);						\
 		a_type *rightleft = rbtn_left_get(a_type, a_field,	\
 		  right);						\
 		a_type *tnode;						\
-		if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+		if (rightleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  rightleft)) {						\
 		    /* In the following diagrams, ||, //, and \\      */\
 		    /* indicate the path to the removed node.         */\
 		    /*                                                */\
 		    /*      ||                                        */\
 		    /*    pathp(r)                                    */\
 		    /*  //        \                                   */\
 		    /* (b)        (b)                                 */\
 		    /*           /                                    */\
@@ -700,17 +676,18 @@ a_prefix##remove(a_rbt_type *rbtree, a_t
 		      tnode);						\
 		}							\
 		return;							\
 	    } else {							\
 		a_type *right = rbtn_right_get(a_type, a_field,		\
 		  pathp->node);						\
 		a_type *rightleft = rbtn_left_get(a_type, a_field,	\
 		  right);						\
-		if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+		if (rightleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  rightleft)) {						\
 		    /*      ||                                        */\
 		    /*    pathp(b)                                    */\
 		    /*  //        \                                   */\
 		    /* (b)        (b)                                 */\
 		    /*           /                                    */\
 		    /*          (r)                                   */\
 		    a_type *tnode;					\
 		    rbtn_black_set(a_type, a_field, rightleft);		\
@@ -754,17 +731,18 @@ a_prefix##remove(a_rbt_type *rbtree, a_t
 	      pathp[1].node);						\
 	    left = rbtn_left_get(a_type, a_field, pathp->node);		\
 	    if (rbtn_red_get(a_type, a_field, left)) {			\
 		a_type *tnode;						\
 		a_type *leftright = rbtn_right_get(a_type, a_field,	\
 		  left);						\
 		a_type *leftrightleft = rbtn_left_get(a_type, a_field,	\
 		  leftright);						\
-		if (rbtn_red_get(a_type, a_field, leftrightleft)) {	\
+		if (leftrightleft != NULL && rbtn_red_get(a_type,	\
+		  a_field, leftrightleft)) {				\
 		    /*      ||                                        */\
 		    /*    pathp(b)                                    */\
 		    /*   /        \\                                  */\
 		    /* (r)        (b)                                 */\
 		    /*   \                                            */\
 		    /*   (b)                                          */\
 		    /*   /                                            */\
 		    /* (r)                                            */\
@@ -780,17 +758,17 @@ a_prefix##remove(a_rbt_type *rbtree, a_t
 		    /*      ||                                        */\
 		    /*    pathp(b)                                    */\
 		    /*   /        \\                                  */\
 		    /* (r)        (b)                                 */\
 		    /*   \                                            */\
 		    /*   (b)                                          */\
 		    /*   /                                            */\
 		    /* (b)                                            */\
-		    assert(leftright != &rbtree->rbt_nil);		\
+		    assert(leftright != NULL);				\
 		    rbtn_red_set(a_type, a_field, leftright);		\
 		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
 		      tnode);						\
 		    rbtn_black_set(a_type, a_field, tnode);		\
 		}							\
 		/* Balance restored, but rotation modified subtree    */\
 		/* root, which may actually be the tree root.         */\
 		if (pathp == path) {					\
@@ -803,17 +781,18 @@ a_prefix##remove(a_rbt_type *rbtree, a_t
 		    } else {						\
 			rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 			  tnode);					\
 		    }							\
 		}							\
 		return;							\
 	    } else if (rbtn_red_get(a_type, a_field, pathp->node)) {	\
 		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
-		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		if (leftleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  leftleft)) {						\
 		    /*        ||                                      */\
 		    /*      pathp(r)                                  */\
 		    /*     /        \\                                */\
 		    /*   (b)        (b)                               */\
 		    /*   /                                            */\
 		    /* (r)                                            */\
 		    a_type *tnode;					\
 		    rbtn_black_set(a_type, a_field, pathp->node);	\
@@ -841,17 +820,18 @@ a_prefix##remove(a_rbt_type *rbtree, a_t
 		    /* (b)                                            */\
 		    rbtn_red_set(a_type, a_field, left);		\
 		    rbtn_black_set(a_type, a_field, pathp->node);	\
 		    /* Balance restored. */				\
 		    return;						\
 		}							\
 	    } else {							\
 		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
-		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		if (leftleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  leftleft)) {						\
 		    /*               ||                               */\
 		    /*             pathp(b)                           */\
 		    /*            /        \\                         */\
 		    /*          (b)        (b)                        */\
 		    /*          /                                     */\
 		    /*        (r)                                     */\
 		    a_type *tnode;					\
 		    rbtn_black_set(a_type, a_field, leftleft);		\
@@ -887,38 +867,38 @@ a_prefix##remove(a_rbt_type *rbtree, a_t
     }									\
     /* Set root. */							\
     rbtree->rbt_root = path->node;					\
     assert(!rbtn_red_get(a_type, a_field, rbtree->rbt_root));		\
 }									\
 a_attr a_type *								\
 a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node,		\
   a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
-    if (node == &rbtree->rbt_nil) {					\
-	return (&rbtree->rbt_nil);					\
+    if (node == NULL) {							\
+	return (NULL);							\
     } else {								\
 	a_type *ret;							\
 	if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type,	\
-	  a_field, node), cb, arg)) != &rbtree->rbt_nil			\
-	  || (ret = cb(rbtree, node, arg)) != NULL) {			\
+	  a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node,	\
+	  arg)) != NULL) {						\
 	    return (ret);						\
 	}								\
 	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
 	  a_field, node), cb, arg));					\
     }									\
 }									\
 a_attr a_type *								\
 a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node,	\
   a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
     int cmp = a_cmp(start, node);					\
     if (cmp < 0) {							\
 	a_type *ret;							\
 	if ((ret = a_prefix##iter_start(rbtree, start,			\
-	  rbtn_left_get(a_type, a_field, node), cb, arg)) !=		\
-	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	  rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL ||	\
+	  (ret = cb(rbtree, node, arg)) != NULL) {			\
 	    return (ret);						\
 	}								\
 	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
 	  a_field, node), cb, arg));					\
     } else if (cmp > 0) {						\
 	return (a_prefix##iter_start(rbtree, start,			\
 	  rbtn_right_get(a_type, a_field, node), cb, arg));		\
     } else {								\
@@ -935,47 +915,44 @@ a_prefix##iter(a_rbt_type *rbtree, a_typ
   a_rbt_type *, a_type *, void *), void *arg) {				\
     a_type *ret;							\
     if (start != NULL) {						\
 	ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root,	\
 	  cb, arg);							\
     } else {								\
 	ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = NULL;							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node,	\
   a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
-    if (node == &rbtree->rbt_nil) {					\
-	return (&rbtree->rbt_nil);					\
+    if (node == NULL) {							\
+	return (NULL);							\
     } else {								\
 	a_type *ret;							\
 	if ((ret = a_prefix##reverse_iter_recurse(rbtree,		\
-	  rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
-	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	  rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL ||	\
+	  (ret = cb(rbtree, node, arg)) != NULL) {			\
 	    return (ret);						\
 	}								\
 	return (a_prefix##reverse_iter_recurse(rbtree,			\
 	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
     }									\
 }									\
 a_attr a_type *								\
 a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start,		\
   a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
   void *arg) {								\
     int cmp = a_cmp(start, node);					\
     if (cmp > 0) {							\
 	a_type *ret;							\
 	if ((ret = a_prefix##reverse_iter_start(rbtree, start,		\
-	  rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
-	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	  rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL ||	\
+	  (ret = cb(rbtree, node, arg)) != NULL) {			\
 	    return (ret);						\
 	}								\
 	return (a_prefix##reverse_iter_recurse(rbtree,			\
 	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
     } else if (cmp < 0) {						\
 	return (a_prefix##reverse_iter_start(rbtree, start,		\
 	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
     } else {								\
@@ -993,37 +970,34 @@ a_prefix##reverse_iter(a_rbt_type *rbtre
     a_type *ret;							\
     if (start != NULL) {						\
 	ret = a_prefix##reverse_iter_start(rbtree, start,		\
 	  rbtree->rbt_root, cb, arg);					\
     } else {								\
 	ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root,	\
 	  cb, arg);							\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = NULL;							\
-    }									\
     return (ret);							\
 }									\
 a_attr void								\
 a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)(	\
   a_type *, void *), void *arg) {					\
-    if (node == &rbtree->rbt_nil) {					\
+    if (node == NULL) {							\
 	return;								\
     }									\
     a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field,	\
       node), cb, arg);							\
-    rbtn_left_set(a_type, a_field, (node), &rbtree->rbt_nil);		\
+    rbtn_left_set(a_type, a_field, (node), NULL);			\
     a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field,	\
       node), cb, arg);							\
-    rbtn_right_set(a_type, a_field, (node), &rbtree->rbt_nil);		\
+    rbtn_right_set(a_type, a_field, (node), NULL);			\
     if (cb) {								\
 	cb(node, arg);							\
     }									\
 }									\
 a_attr void								\
 a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
   void *arg) {								\
     a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg);	\
-    rbtree->rbt_root = &rbtree->rbt_nil;				\
+    rbtree->rbt_root = NULL;						\
 }
 
 #endif /* RB_H_ */
--- a/memory/jemalloc/src/include/jemalloc/internal/size_classes.sh
+++ b/memory/jemalloc/src/include/jemalloc/internal/size_classes.sh
@@ -137,20 +137,20 @@ size_classes() {
   while [ ${ndelta} -lt ${g} ] ; do
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     ndelta=$((${ndelta} + 1))
   done
 
   # All remaining groups.
   lg_grp=$((${lg_grp} + ${lg_g}))
-  while [ ${lg_grp} -lt ${ptr_bits} ] ; do
+  while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do
     sep_line
     ndelta=1
-    if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then
+    if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then
       ndelta_limit=$((${g} - 1))
     else
       ndelta_limit=${g}
     fi
     while [ ${ndelta} -le ${ndelta_limit} ] ; do
       size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
       if [ ${lg_delta_lookup} != "no" ] ; then
         nlbins=$((${index} + 1))
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/smoothstep.h
@@ -0,0 +1,246 @@
+/*
+ * This file was generated by the following command:
+ *   sh smoothstep.sh smoother 200 24 3 15
+ */
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * This header defines a precomputed table based on the smoothstep family of
+ * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0
+ * to 1 in 0 <= x <= 1.  The table is stored as integer fixed point values so
+ * that floating point math can be avoided.
+ *
+ *                      3     2
+ *   smoothstep(x) = -2x  + 3x
+ *
+ *                       5      4      3
+ *   smootherstep(x) = 6x  - 15x  + 10x
+ *
+ *                          7      6      5      4
+ *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
+ */
+
+#define	SMOOTHSTEP_VARIANT	"smoother"
+#define	SMOOTHSTEP_NSTEPS	200
+#define	SMOOTHSTEP_BFP		24
+#define	SMOOTHSTEP \
+ /* STEP(step, h,                            x,     y) */ \
+    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
+    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
+    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \
+    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \
+    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \
+    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \
+    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \
+    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \
+    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \
+    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \
+    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \
+    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \
+    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \
+    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \
+    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \
+    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \
+    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \
+    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \
+    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \
+    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \
+    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \
+    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \
+    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \
+    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \
+    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \
+    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \
+    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \
+    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \
+    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \
+    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \
+    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \
+    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \
+    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \
+    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \
+    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \
+    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \
+    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \
+    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \
+    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \
+    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \
+    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \
+    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \
+    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \
+    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \
+    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \
+    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \
+    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \
+    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \
+    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \
+    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \
+    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \
+    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \
+    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \
+    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \
+    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \
+    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \
+    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \
+    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \
+    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \
+    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \
+    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \
+    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \
+    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \
+    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \
+    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \
+    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \
+    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \
+    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \
+    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \
+    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \
+    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \
+    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \
+    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \
+    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \
+    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \
+    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \
+    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \
+    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \
+    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \
+    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \
+    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \
+    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \
+    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \
+    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \
+    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \
+    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \
+    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \
+    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \
+    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \
+    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \
+    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \
+    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \
+    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \
+    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \
+    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \
+    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \
+    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \
+    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \
+    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \
+    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \
+    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \
+    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \
+    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \
+    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \
+    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \
+    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \
+    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \
+    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \
+    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \
+    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \
+    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \
+    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \
+    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \
+    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \
+    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \
+    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \
+    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \
+    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \
+    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \
+    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \
+    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \
+    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \
+    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \
+    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \
+    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \
+    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \
+    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \
+    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \
+    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \
+    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \
+    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \
+    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \
+    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \
+    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \
+    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \
+    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \
+    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \
+    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \
+    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \
+    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \
+    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \
+    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \
+    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \
+    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \
+    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \
+    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \
+    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \
+    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \
+    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \
+    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \
+    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \
+    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \
+    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \
+    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \
+    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \
+    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \
+    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \
+    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \
+    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \
+    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \
+    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \
+    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \
+    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \
+    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \
+    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \
+    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \
+    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \
+    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \
+    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \
+    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \
+    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \
+    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \
+    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \
+    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \
+    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \
+    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \
+    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \
+    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \
+    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \
+    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \
+    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \
+    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \
+    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \
+    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \
+    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \
+    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \
+    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \
+    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \
+    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \
+    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \
+    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \
+    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \
+    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \
+    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \
+    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \
+    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \
+    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \
+    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \
+    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
+    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
new file mode 100755
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/smoothstep.sh
@@ -0,0 +1,115 @@
+#!/bin/sh
+#
+# Generate a discrete lookup table for a sigmoid function in the smoothstep
+# family (https://en.wikipedia.org/wiki/Smoothstep), where the lookup table
+# entries correspond to x in [1/nsteps, 2/nsteps, ..., nsteps/nsteps].  Encode
+# the entries using a binary fixed point representation.
+#
+# Usage: smoothstep.sh <variant> <nsteps> <bfp> <xprec> <yprec>
+#
+#        <variant> is in {smooth, smoother, smoothest}.
+#        <nsteps> must be greater than zero.
+#        <bfp> must be in [0..62]; reasonable values are roughly [10..30].
+#        <xprec> is x decimal precision.
+#        <yprec> is y decimal precision.
+
+#set -x
+
+cmd="sh smoothstep.sh $*"
+variant=$1
+nsteps=$2
+bfp=$3
+xprec=$4
+yprec=$5
+
+case "${variant}" in
+  smooth)
+    ;;
+  smoother)
+    ;;
+  smoothest)
+    ;;
+  *)
+    echo "Unsupported variant"
+    exit 1
+    ;;
+esac
+
+smooth() {
+  step=$1
+  y=`echo ${yprec} k ${step} ${nsteps} / sx _2 lx 3 ^ '*' 3 lx 2 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+  h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' `
+}
+
+smoother() {
+  step=$1
+  y=`echo ${yprec} k ${step} ${nsteps} / sx 6 lx 5 ^ '*' _15 lx 4 ^ '*' + 10 lx 3 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+  h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' `
+}
+
+smoothest() {
+  step=$1
+  y=`echo ${yprec} k ${step} ${nsteps} / sx _20 lx 7 ^ '*' 70 lx 6 ^ '*' + _84 lx 5 ^ '*' + 35 lx 4 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+  h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' `
+}
+
+cat <<EOF
+/*
+ * This file was generated by the following command:
+ *   $cmd
+ */
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * This header defines a precomputed table based on the smoothstep family of
+ * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0
+ * to 1 in 0 <= x <= 1.  The table is stored as integer fixed point values so
+ * that floating point math can be avoided.
+ *
+ *                      3     2
+ *   smoothstep(x) = -2x  + 3x
+ *
+ *                       5      4      3
+ *   smootherstep(x) = 6x  - 15x  + 10x
+ *
+ *                          7      6      5      4
+ *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
+ */
+
+#define	SMOOTHSTEP_VARIANT	"${variant}"
+#define	SMOOTHSTEP_NSTEPS	${nsteps}
+#define	SMOOTHSTEP_BFP		${bfp}
+#define	SMOOTHSTEP \\
+ /* STEP(step, h,                            x,     y) */ \\
+EOF
+
+s=1
+while [ $s -le $nsteps ] ; do
+  $variant ${s}
+  x=`echo ${xprec} k ${s} ${nsteps} / p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+  printf '    STEP(%4d, UINT64_C(0x%016x), %s, %s) \\\n' ${s} ${h} ${x} ${y}
+
+  s=$((s+1))
+done
+echo
+
+cat <<EOF
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+EOF
--- a/memory/jemalloc/src/include/jemalloc/internal/stats.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/stats.h
@@ -162,22 +162,32 @@ stats_cactive_get(void)
 {
 
 	return (atomic_read_z(&stats_cactive));
 }
 
 JEMALLOC_INLINE void
 stats_cactive_add(size_t size)
 {
+	UNUSED size_t cactive;
 
-	atomic_add_z(&stats_cactive, size);
+	assert(size > 0);
+	assert((size & chunksize_mask) == 0);
+
+	cactive = atomic_add_z(&stats_cactive, size);
+	assert(cactive - size < cactive);
 }
 
 JEMALLOC_INLINE void
 stats_cactive_sub(size_t size)
 {
+	UNUSED size_t cactive;
 
-	atomic_sub_z(&stats_cactive, size);
+	assert(size > 0);
+	assert((size & chunksize_mask) == 0);
+
+	cactive = atomic_sub_z(&stats_cactive, size);
+	assert(cactive + size > cactive);
 }
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/tcache.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/tcache.h
@@ -78,17 +78,17 @@ struct tcache_bin_s {
 	 * be allocated first.
 	 */
 	void		**avail;	/* Stack of available objects. */
 };
 
 struct tcache_s {
 	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
 	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
-	unsigned	ev_cnt;		/* Event count since incremental GC. */
+	ticker_t	gc_ticker;	/* Drives incremental GC. */
 	szind_t		next_gc_bin;	/* Next bin to GC. */
 	tcache_bin_t	tbins[1];	/* Dynamically sized. */
 	/*
 	 * The pointer stacks associated with tbins follow as a contiguous
 	 * array.  During tcache initialization, the avail pointer in each
 	 * element of tbins is initialized to point to the proper offset within
 	 * this array.
 	 */
@@ -110,17 +110,17 @@ extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
 
 extern tcache_bin_info_t	*tcache_bin_info;
 
 /*
  * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
  * large-object bins.
  */
-extern size_t	nhbins;
+extern unsigned	nhbins;
 
 /* Maximum cached size class. */
 extern size_t	tcache_maxclass;
 
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
  * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
  * completely disjoint from this data structure.  tcaches starts off as a sparse
@@ -242,19 +242,17 @@ tcache_get(tsd_t *tsd, bool create)
 
 JEMALLOC_ALWAYS_INLINE void
 tcache_event(tsd_t *tsd, tcache_t *tcache)
 {
 
 	if (TCACHE_GC_INCR == 0)
 		return;
 
-	tcache->ev_cnt++;
-	assert(tcache->ev_cnt <= TCACHE_GC_INCR);
-	if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR))
+	if (unlikely(ticker_tick(&tcache->gc_ticker)))
 		tcache_event_hard(tsd, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success)
 {
 	void *ret;
 
@@ -341,39 +339,39 @@ tcache_alloc_small(tsd_t *tsd, arena_t *
 
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path)
 {
 	void *ret;
 	tcache_bin_t *tbin;
 	bool tcache_success;
-	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(binind < nhbins);
 	tbin = &tcache->tbins[binind];
 	ret = tcache_alloc_easy(tbin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
 		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
 		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		usize = index2size(binind);
-		assert(usize <= tcache_maxclass);
-		ret = arena_malloc_large(arena, usize, binind, zero);
+		ret = arena_malloc_large(tsd, arena, binind, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
+		size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+
 		/* Only compute usize on demand */
-		if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
+		if (config_prof || (slow_path && config_fill) ||
+		    unlikely(zero)) {
 			usize = index2size(binind);
 			assert(usize <= tcache_maxclass);
 		}
 
 		if (config_prof && usize == LARGE_MINCLASS) {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
 			size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/include/jemalloc/internal/ticker.h
@@ -0,0 +1,75 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ticker_s ticker_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct ticker_s {
+	int32_t	tick;
+	int32_t	nticks;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	ticker_init(ticker_t *ticker, int32_t nticks);
+void	ticker_copy(ticker_t *ticker, const ticker_t *other);
+int32_t	ticker_read(const ticker_t *ticker);
+bool	ticker_ticks(ticker_t *ticker, int32_t nticks);
+bool	ticker_tick(ticker_t *ticker);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_))
+JEMALLOC_INLINE void
+ticker_init(ticker_t *ticker, int32_t nticks)
+{
+
+	ticker->tick = nticks;
+	ticker->nticks = nticks;
+}
+
+JEMALLOC_INLINE void
+ticker_copy(ticker_t *ticker, const ticker_t *other)
+{
+
+	*ticker = *other;
+}
+
+JEMALLOC_INLINE int32_t
+ticker_read(const ticker_t *ticker)
+{
+
+	return (ticker->tick);
+}
+
+JEMALLOC_INLINE bool
+ticker_ticks(ticker_t *ticker, int32_t nticks)
+{
+
+	if (unlikely(ticker->tick < nticks)) {
+		ticker->tick = ticker->nticks;
+		return (true);
+	}
+	ticker->tick -= nticks;
+	return(false);
+}
+
+JEMALLOC_INLINE bool
+ticker_tick(ticker_t *ticker)
+{
+
+	return (ticker_ticks(ticker, 1));
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/memory/jemalloc/src/include/jemalloc/internal/tsd.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/tsd.h
@@ -532,19 +532,19 @@ struct tsd_init_head_s {
 
 #define	MALLOC_TSD							\
 /*  O(name,			type) */				\
     O(tcache,			tcache_t *)				\
     O(thread_allocated,		uint64_t)				\
     O(thread_deallocated,	uint64_t)				\
     O(prof_tdata,		prof_tdata_t *)				\
     O(arena,			arena_t *)				\
-    O(arenas_cache,		arena_t **)				\
-    O(narenas_cache,		unsigned)				\
-    O(arenas_cache_bypass,	bool)					\
+    O(arenas_tdata,		arena_tdata_t *)			\
+    O(narenas_tdata,		unsigned)				\
+    O(arenas_tdata_bypass,	bool)					\
     O(tcache_enabled,		tcache_enabled_t)			\
     O(quarantine,		quarantine_t *)				\
 
 #define	TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
     NULL,								\
     0,									\
     0,									\
--- a/memory/jemalloc/src/include/jemalloc/internal/util.h
+++ b/memory/jemalloc/src/include/jemalloc/internal/util.h
@@ -116,135 +116,207 @@ void malloc_cprintf(void (*write)(void *
     const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void	malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-int	jemalloc_ffsl(long bitmap);
-int	jemalloc_ffs(int bitmap);
-size_t	pow2_ceil(size_t x);
-size_t	lg_floor(size_t x);
+unsigned	ffs_llu(unsigned long long bitmap);
+unsigned	ffs_lu(unsigned long bitmap);
+unsigned	ffs_u(unsigned bitmap);
+unsigned	ffs_zu(size_t bitmap);
+unsigned	ffs_u64(uint64_t bitmap);
+unsigned	ffs_u32(uint32_t bitmap);
+uint64_t	pow2_ceil_u64(uint64_t x);
+uint32_t	pow2_ceil_u32(uint32_t x);
+size_t	pow2_ceil_zu(size_t x);
+unsigned	lg_floor(size_t x);
 void	set_errno(int errnum);
 int	get_errno(void);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
 
 /* Sanity check. */
-#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS)
-#  error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure
+#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
+    || !defined(JEMALLOC_INTERNAL_FFS)
+#  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
 #endif
 
-JEMALLOC_ALWAYS_INLINE int
-jemalloc_ffsl(long bitmap)
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_llu(unsigned long long bitmap)
+{
+
+	return (JEMALLOC_INTERNAL_FFSLL(bitmap));
+}
+
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_lu(unsigned long bitmap)
 {
 
 	return (JEMALLOC_INTERNAL_FFSL(bitmap));
 }
 
-JEMALLOC_ALWAYS_INLINE int
-jemalloc_ffs(int bitmap)
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_u(unsigned bitmap)
 {
 
 	return (JEMALLOC_INTERNAL_FFS(bitmap));
 }
 
-/* Compute the smallest power of 2 that is >= x. */
-JEMALLOC_INLINE size_t
-pow2_ceil(size_t x)
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_zu(size_t bitmap)
+{
+
+#if LG_SIZEOF_PTR == LG_SIZEOF_INT
+	return (ffs_u(bitmap));
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
+	return (ffs_lu(bitmap));
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
+	return (ffs_llu(bitmap));
+#else
+#error No implementation for size_t ffs()
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_u64(uint64_t bitmap)
+{
+
+#if LG_SIZEOF_LONG == 3
+	return (ffs_lu(bitmap));
+#elif LG_SIZEOF_LONG_LONG == 3
+	return (ffs_llu(bitmap));
+#else
+#error No implementation for 64-bit ffs()
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_u32(uint32_t bitmap)
+{
+
+#if LG_SIZEOF_INT == 2
+	return (ffs_u(bitmap));
+#else
+#error No implementation for 32-bit ffs()
+#endif
+	return (ffs_u(bitmap));
+}
+
+JEMALLOC_INLINE uint64_t
+pow2_ceil_u64(uint64_t x)
 {
 
 	x--;
 	x |= x >> 1;
 	x |= x >> 2;
 	x |= x >> 4;
 	x |= x >> 8;
 	x |= x >> 16;
-#if (LG_SIZEOF_PTR == 3)
 	x |= x >> 32;
-#endif
 	x++;
 	return (x);
 }
 
+JEMALLOC_INLINE uint32_t
+pow2_ceil_u32(uint32_t x)
+{
+
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x++;
+	return (x);
+}
+
+/* Compute the smallest power of 2 that is >= x. */
+JEMALLOC_INLINE size_t
+pow2_ceil_zu(size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+	return (pow2_ceil_u64(x));
+#else
+	return (pow2_ceil_u32(x));
+#endif
+}
+
 #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
 	size_t ret;
 
 	assert(x != 0);
 
 	asm ("bsr %1, %0"
 	    : "=r"(ret) // Outputs.
 	    : "r"(x)    // Inputs.
 	    );
-	return (ret);
+	assert(ret < UINT_MAX);
+	return ((unsigned)ret);
 }
 #elif (defined(_MSC_VER))
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
 	unsigned long ret;
 
 	assert(x != 0);
 
 #if (LG_SIZEOF_PTR == 3)
 	_BitScanReverse64(&ret, x);
 #elif (LG_SIZEOF_PTR == 2)
 	_BitScanReverse(&ret, x);
 #else
-#  error "Unsupported type sizes for lg_floor()"
+#  error "Unsupported type size for lg_floor()"
 #endif
-	return (ret);
+	assert(ret < UINT_MAX);
+	return ((unsigned)ret);
 }
 #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
 
 	assert(x != 0);
 
 #if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
 	return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x));
 #elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
 	return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x));
 #else
-#  error "Unsupported type sizes for lg_floor()"
+#  error "Unsupported type size for lg_floor()"
 #endif
 }
 #else
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
 
 	assert(x != 0);
 
 	x |= (x >> 1);
 	x |= (x >> 2);
 	x |= (x >> 4);
 	x |= (x >> 8);
 	x |= (x >> 16);
-#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+#if (LG_SIZEOF_PTR == 3)
 	x |= (x >> 32);
-	if (x == KZU(0xffffffffffffffff))
-		return (63);
+#endif
+	if (x == SIZE_T_MAX)
+		return ((8 << LG_SIZEOF_PTR) - 1);
 	x++;
-	return (jemalloc_ffsl(x) - 2);
-#elif (LG_SIZEOF_PTR == 2)
-	if (x == KZU(0xffffffff))
-		return (31);
-	x++;
-	return (jemalloc_ffs(x) - 2);
-#else
-#  error "Unsupported type sizes for lg_floor()"
-#endif
+	return (ffs_zu(x) - 2);
 }
 #endif
 
 /* Set error code. */
 JEMALLOC_INLINE void
 set_errno(int errnum)
 {
 
--- a/memory/jemalloc/src/include/jemalloc/jemalloc_defs.h.in
+++ b/memory/jemalloc/src/include/jemalloc/jemalloc_defs.h.in
@@ -28,10 +28,18 @@
 
 /*
  * If defined, specify throw() for the public function prototypes when compiling
  * with C++.  The only justification for this is to match the prototypes that
  * glibc defines.
  */
 #undef JEMALLOC_USE_CXX_THROW
 
+#ifdef _MSC_VER
+#  ifdef _WIN64
+#    define LG_SIZEOF_PTR_WIN 3
+#  else
+#    define LG_SIZEOF_PTR_WIN 2
+#  endif
+#endif
+
 /* sizeof(void *) == 2^LG_SIZEOF_PTR. */
 #undef LG_SIZEOF_PTR
--- a/memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in
+++ b/memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in
@@ -6,22 +6,23 @@
 
 #define	JEMALLOC_VERSION "@jemalloc_version@"
 #define	JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
 #define	JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
 #define	JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
 #define	JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
 #define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
 
-#  define MALLOCX_LG_ALIGN(la)	(la)
+#  define MALLOCX_LG_ALIGN(la)	((int)(la))
 #  if LG_SIZEOF_PTR == 2
-#    define MALLOCX_ALIGN(a)	(ffs(a)-1)
+#    define MALLOCX_ALIGN(a)	((int)(ffs(a)-1))
 #  else
 #    define MALLOCX_ALIGN(a)						\
-	 ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
+       ((int)(((a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :		\
+       ffs((int)((a)>>32))+31))
 #  endif
 #  define MALLOCX_ZERO	((int)0x40)
 /*
  * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1
  * encodes MALLOCX_TCACHE_NONE.
  */
 #  define MALLOCX_TCACHE(tc)	((int)(((tc)+2) << 8))
 #  define MALLOCX_TCACHE_NONE	MALLOCX_TCACHE(-1)
--- a/memory/jemalloc/src/include/msvc_compat/strings.h
+++ b/memory/jemalloc/src/include/msvc_compat/strings.h
@@ -16,14 +16,44 @@ static __forceinline int ffsl(long x)
 }
 
 static __forceinline int ffs(int x)
 {
 
 	return (ffsl(x));
 }
 
+#  ifdef  _M_X64
+#    pragma intrinsic(_BitScanForward64)
+#  endif
+
+static __forceinline int ffsll(unsigned __int64 x)
+{
+	unsigned long i;
+#ifdef  _M_X64
+	if (_BitScanForward64(&i, x))
+		return (i + 1);
+	return (0);
 #else
+// Fallback for 32-bit build where 64-bit version not available
+// assuming little endian
+	union {
+		unsigned __int64 ll;
+		unsigned   long l[2];
+	} s;
+
+	s.ll = x;
+
+	if (_BitScanForward(&i, s.l[0]))
+		return (i + 1);
+	else if(_BitScanForward(&i, s.l[1]))
+		return (i + 33);
+	return (0);
+#endif
+}
+
+#else
+#  define ffsll(x) __builtin_ffsll(x)
 #  define ffsl(x) __builtin_ffsl(x)
 #  define ffs(x) __builtin_ffs(x)
 #endif
 
 #endif /* strings_h */
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/msvc/ReadMe.txt
@@ -0,0 +1,24 @@
+
+How to build jemalloc for Windows
+=================================
+
+1. Install Cygwin with at least the following packages:
+   * autoconf
+   * autogen
+   * gawk
+   * grep
+   * sed
+
+2. Install Visual Studio 2015 with Visual C++
+
+3. Add Cygwin\bin to the PATH environment variable
+
+4. Open "VS2015 x86 Native Tools Command Prompt"
+   (note: x86/x64 doesn't matter at this point)
+
+5. Generate header files:
+   sh -c "./autogen.sh CC=cl --enable-lazy-lock=no"
+
+6. Now the project can be opened and built in Visual Studio:
+   msvc\jemalloc_vc2015.sln
+
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/msvc/jemalloc_vc2015.sln
@@ -0,0 +1,63 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2015\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2015\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,392 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\arena.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\assert.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\atomic.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\base.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\bitmap.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_dss.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_mmap.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ctl.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\huge.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_defs.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prof.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_namespace.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_unnamespace.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ql.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\quarantine.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_mangle.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos_jet.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_rename.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_typedefs.h" />
+    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdbool.h" />
+    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdint.h" />
+    <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h" />
+    <ClInclude Include="..\..\..\..\include\msvc_compat\windows_extra.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\atomic.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\chunk.c" />
+    <ClCompile Include="..\..\..\..\src\chunk_dss.c" />
+    <ClCompile Include="..\..\..\..\src\chunk_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\hash.c" />
+    <ClCompile Include="..\..\..\..\src\huge.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\mb.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\quarantine.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,245 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Header Files\internal">
+      <UniqueIdentifier>{5697dfa3-16cf-4932-b428-6e0ec6e9f98e}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\msvc_compat">
+      <UniqueIdentifier>{0cbd2ca6-42a7-4f82-8517-d7e7a14fd986}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\msvc_compat\C99">
+      <UniqueIdentifier>{0abe6f30-49b5-46dd-8aca-6e33363fa52c}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_mangle.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos_jet.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_rename.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_typedefs.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\arena.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\assert.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\atomic.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\base.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\bitmap.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_dss.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_mmap.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ctl.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\huge.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_defs.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prof.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_namespace.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_unnamespace.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ql.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\quarantine.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h">
+      <Filter>Header Files\msvc_compat</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\msvc_compat\windows_extra.h">
+      <Filter>Header Files\msvc_compat</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdbool.h">
+      <Filter>Header Files\msvc_compat\C99</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdint.h">
+      <Filter>Header Files\msvc_compat\C99</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\atomic.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\chunk.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\chunk_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\chunk_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hash.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\huge.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mb.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\quarantine.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads.cpp
@@ -0,0 +1,89 @@
+// jemalloc C++ threaded test
+// Author: Rustam Abdullaev
+// Public Domain
+
+#include <atomic>
+#include <functional>
+#include <future>
+#include <random>
+#include <thread>
+#include <vector>
+#include <stdio.h>
+#include <jemalloc/jemalloc.h>
+
+using std::vector;
+using std::thread;
+using std::uniform_int_distribution;
+using std::minstd_rand;
+
+int test_threads()
+{
+  je_malloc_conf = "narenas:3";
+  int narenas = 0;
+  size_t sz = sizeof(narenas);
+  je_mallctl("opt.narenas", &narenas, &sz, NULL, 0);
+  if (narenas != 3) {
+    printf("Error: unexpected number of arenas: %d\n", narenas);
+    return 1;
+  }
+  static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
+  static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
+  vector<thread> workers;
+  static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
+  je_malloc_stats_print(NULL, NULL, NULL);
+  size_t allocated1;
+  size_t sz1 = sizeof(allocated1);
+  je_mallctl("stats.active", &allocated1, &sz1, NULL, 0);
+  printf("\nPress Enter to start threads...\n");
+  getchar();
+  printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
+  for (int i = 0; i < numThreads; i++) {
+    workers.emplace_back([tid=i]() {
+      uniform_int_distribution<int> sizeDist(0, numSizes - 1);
+      minstd_rand rnd(tid * 17);
+      uint8_t* ptrs[numAllocsMax];
+      int ptrsz[numAllocsMax];
+      for (int i = 0; i < numIter1; ++i) {
+        thread t([&]() {
+          for (int i = 0; i < numIter2; ++i) {
+            const int numAllocs = numAllocsMax - sizeDist(rnd);
+            for (int j = 0; j < numAllocs; j += 64) {
+              const int x = sizeDist(rnd);
+              const int sz = sizes[x];
+              ptrsz[j] = sz;
+              ptrs[j] = (uint8_t*)je_malloc(sz);
+              if (!ptrs[j]) {
+                printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
+                exit(1);
+              }
+              for (int k = 0; k < sz; k++)
+                ptrs[j][k] = tid + k;
+            }
+            for (int j = 0; j < numAllocs; j += 64) {
+              for (int k = 0, sz = ptrsz[j]; k < sz; k++)
+                if (ptrs[j][k] != (uint8_t)(tid + k)) {
+                  printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
+                  exit(1);
+                }
+              je_free(ptrs[j]);
+            }
+          }
+        });
+        t.join();
+      }
+    });
+  }
+  for (thread& t : workers) {
+    t.join();
+  }
+  je_malloc_stats_print(NULL, NULL, NULL);
+  size_t allocated2;
+  je_mallctl("stats.active", &allocated2, &sz1, NULL, 0);
+  size_t leaked = allocated2 - allocated1;
+  printf("\nDone. Leaked: %zd bytes\n", leaked);
+  bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
+  printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
+  printf("\nPress Enter to continue...\n");
+  getchar();
+  return failed ? 1 : 0;
+}
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads.h
@@ -0,0 +1,3 @@
+#pragma once
+
+int test_threads();
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads.vcxproj
@@ -0,0 +1,327 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="test_threads.cpp" />
+    <ClCompile Include="test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/src/msvc/projects/vc2015/test_threads/test_threads_main.cpp
@@ -0,0 +1,12 @@
+#include "test_threads.h"
+#include <future>
+#include <functional>
+#include <chrono>
+
+using namespace std::chrono_literals;
+
+int main(int argc, char** argv)
+{
+  int rc = test_threads();
+  return rc;
+}
--- a/memory/jemalloc/src/src/arena.c
+++ b/memory/jemalloc/src/src/arena.c
@@ -1,103 +1,88 @@
 #define	JEMALLOC_ARENA_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
 /* Data. */
 
+purge_mode_t	opt_purge = PURGE_DEFAULT;
+const char	*purge_mode_names[] = {
+	"ratio",
+	"decay",
+	"N/A"
+};
 ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
 static ssize_t	lg_dirty_mult_default;
+ssize_t		opt_decay_time = DECAY_TIME_DEFAULT;
+static ssize_t	decay_time_default;
+
 arena_bin_info_t	arena_bin_info[NBINS];
 
 size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-static size_t	small_maxrun; /* Max run size used for small size classes. */
+size_t		run_quantize_max; /* Max run_quantize_*() input. */
+static size_t	small_maxrun; /* Max run size for small size classes. */
 static bool	*small_run_tab; /* Valid small run page multiples. */
+static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
+static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
+static szind_t	runs_avail_bias; /* Size index for first runs_avail tree. */
+static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
 
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
  * definition.
  */
 
-static void	arena_purge(arena_t *arena, bool all);
+static void	arena_purge_to_limit(arena_t *arena, size_t ndirty_limit);
 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
     bool cleaned, bool decommitted);
 static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin);
 static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin);
 
 /******************************************************************************/
 
-#define	CHUNK_MAP_KEY		((uintptr_t)0x1U)
-
-JEMALLOC_INLINE_C arena_chunk_map_misc_t *
-arena_miscelm_key_create(size_t size)
-{
-
-	return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) |
-	    CHUNK_MAP_KEY));
-}
-
-JEMALLOC_INLINE_C bool
-arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm)
-{
-
-	return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0);
-}
-
-#undef CHUNK_MAP_KEY
-
-JEMALLOC_INLINE_C size_t
-arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm)
-{
-
-	assert(arena_miscelm_is_key(miscelm));
-
-	return (arena_mapbits_size_decode((uintptr_t)miscelm));
-}
-
 JEMALLOC_INLINE_C size_t
 arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
-	assert(!arena_miscelm_is_key(miscelm));
-
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	pageind = arena_miscelm_to_pageind(miscelm);
 	mapbits = arena_mapbits_get(chunk, pageind);
 	return (arena_mapbits_size_decode(mapbits));
 }
 
 JEMALLOC_INLINE_C int
-arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
+arena_run_addr_comp(const arena_chunk_map_misc_t *a,
+    const arena_chunk_map_misc_t *b)
 {
 	uintptr_t a_miscelm = (uintptr_t)a;
 	uintptr_t b_miscelm = (uintptr_t)b;
 
 	assert(a != NULL);
 	assert(b != NULL);
 
 	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
 }
 
 /* Generate red-black tree functions. */
 rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
-    rb_link, arena_run_comp)
+    rb_link, arena_run_addr_comp)
 
 static size_t
-run_quantize(size_t size)
+run_quantize_floor_compute(size_t size)
 {
 	size_t qsize;
 
 	assert(size != 0);
 	assert(size == PAGE_CEILING(size));
 
 	/* Don't change sizes that are valid small run sizes. */
 	if (size <= small_maxrun && small_run_tab[size >> LG_PAGE])
@@ -105,23 +90,23 @@ run_quantize(size_t size)
 
 	/*
 	 * Round down to the nearest run size that can actually be requested
 	 * during normal large allocation.  Add large_pad so that cache index
 	 * randomization can offset the allocation from the page boundary.
 	 */
 	qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
 	if (qsize <= SMALL_MAXCLASS + large_pad)
-		return (run_quantize(size - large_pad));
+		return (run_quantize_floor_compute(size - large_pad));
 	assert(qsize <= size);
 	return (qsize);
 }
 
 static size_t
-run_quantize_next(size_t size)
+run_quantize_ceil_compute_hard(size_t size)
 {
 	size_t large_run_size_next;
 
 	assert(size != 0);
 	assert(size == PAGE_CEILING(size));
 
 	/*
 	 * Return the next quantized size greater than the input size.
@@ -145,89 +130,112 @@ run_quantize_next(size_t size)
 			if (large_run_size_next < size)
 				return (large_run_size_next);
 			return (size);
 		}
 	}
 }
 
 static size_t
-run_quantize_first(size_t size)
+run_quantize_ceil_compute(size_t size)
 {
-	size_t qsize = run_quantize(size);
+	size_t qsize = run_quantize_floor_compute(size);
 
 	if (qsize < size) {
 		/*
 		 * Skip a quantization that may have an adequately large run,
 		 * because under-sized runs may be mixed in.  This only happens
 		 * when an unusual size is requested, i.e. for aligned
 		 * allocation, and is just one of several places where linear
 		 * search would potentially find sufficiently aligned available
 		 * memory somewhere lower.
 		 */
-		qsize = run_quantize_next(size);
+		qsize = run_quantize_ceil_compute_hard(qsize);
 	}
 	return (qsize);
 }
 
-JEMALLOC_INLINE_C int
-arena_avail_comp(const arena_chunk_map_misc_t *a,
-    const arena_chunk_map_misc_t *b)
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define	run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
+#endif
+static size_t
+run_quantize_floor(size_t size)
 {
-	int ret;
-	uintptr_t a_miscelm = (uintptr_t)a;
-	size_t a_qsize = run_quantize(arena_miscelm_is_key(a) ?
-	    arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a));
-	size_t b_qsize = run_quantize(arena_miscelm_size_get(b));
-
-	/*
-	 * Compare based on quantized size rather than size, in order to sort
-	 * equally useful runs only by address.
-	 */
-	ret = (a_qsize > b_qsize) - (a_qsize < b_qsize);
-	if (ret == 0) {
-		if (!arena_miscelm_is_key(a)) {
-			uintptr_t b_miscelm = (uintptr_t)b;
-
-			ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm);
-		} else {
-			/*
-			 * Treat keys as if they are lower than anything else.
-			 */
-			ret = -1;
-		}
-	}
-
+	size_t ret;
+
+	assert(size > 0);
+	assert(size <= run_quantize_max);
+	assert((size & PAGE_MASK) == 0);
+
+	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
+	assert(ret == run_quantize_floor_compute(size));
 	return (ret);
 }
-
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t,
-    arena_chunk_map_misc_t, rb_link, arena_avail_comp)
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
+run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef run_quantize_ceil
+#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
+#endif
+static size_t
+run_quantize_ceil(size_t size)
+{
+	size_t ret;
+
+	assert(size > 0);
+	assert(size <= run_quantize_max);
+	assert((size & PAGE_MASK) == 0);
+
+	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
+	assert(ret == run_quantize_ceil_compute(size));
+	return (ret);
+}
+#ifdef JEMALLOC_JET
+#undef run_quantize_ceil
+#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
+run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
+#endif
+
+static arena_run_tree_t *
+arena_runs_avail_get(arena_t *arena, szind_t ind)
+{
+
+	assert(ind >= runs_avail_bias);
+	assert(ind - runs_avail_bias < runs_avail_nclasses);
+
+	return (&arena->runs_avail[ind - runs_avail_bias]);
+}
 
 static void
 arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-
+	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk,
-	    pageind));
+	arena_run_tree_insert(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get(chunk, pageind));
 }
 
 static void
 arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-
+	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk,
-	    pageind));
+	arena_run_tree_remove(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get(chunk, pageind));
 }
 
 static void
 arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
 	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
 
@@ -288,41 +296,41 @@ arena_chunk_cache_maybe_remove(arena_t *
 		arena->ndirty -= arena_chunk_dirty_npages(node);
 	}
 }
 
 JEMALLOC_INLINE_C void *
 arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
 {
 	void *ret;
-	unsigned regind;
+	size_t regind;
 	arena_chunk_map_misc_t *miscelm;
 	void *rpages;
 
 	assert(run->nfree > 0);
 	assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info));
 
-	regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info);
+	regind = (unsigned)bitmap_sfu(run->bitmap, &bin_info->bitmap_info);
 	miscelm = arena_run_to_miscelm(run);
 	rpages = arena_miscelm_to_rpages(miscelm);
 	ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset +
 	    (uintptr_t)(bin_info->reg_interval * regind));
 	run->nfree--;
 	return (ret);
 }
 
 JEMALLOC_INLINE_C void
 arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
 	szind_t binind = arena_ptr_small_binind_get(ptr, mapbits);
 	arena_bin_info_t *bin_info = &arena_bin_info[binind];
-	unsigned regind = arena_run_regind(run, bin_info, ptr);
+	size_t regind = arena_run_regind(run, bin_info, ptr);
 
 	assert(run->nfree < bin_info->nregs);
 	/* Freeing an interior pointer can cause assertion failure. */
 	assert(((uintptr_t)ptr -
 	    ((uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)) +
 	    (uintptr_t)bin_info->reg0_offset)) %
 	    (uintptr_t)bin_info->reg_interval == 0);
 	assert((uintptr_t)ptr >=
@@ -360,26 +368,40 @@ arena_run_page_validate_zeroed(arena_chu
 	UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE));
 
 	arena_run_page_mark_zeroed(chunk, run_ind);
 	for (i = 0; i < PAGE / sizeof(size_t); i++)
 		assert(p[i] == 0);
 }
 
 static void
-arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages)
+arena_nactive_add(arena_t *arena, size_t add_pages)
 {
 
 	if (config_stats) {
-		ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages
-		    - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive <<
+		size_t cactive_add = CHUNK_CEILING((arena->nactive +
+		    add_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive <<
 		    LG_PAGE);
-		if (cactive_diff != 0)
-			stats_cactive_add(cactive_diff);
+		if (cactive_add != 0)
+			stats_cactive_add(cactive_add);
 	}
+	arena->nactive += add_pages;
+}
+
+static void
+arena_nactive_sub(arena_t *arena, size_t sub_pages)
+{
+
+	if (config_stats) {
+		size_t cactive_sub = CHUNK_CEILING(arena->nactive << LG_PAGE) -
+		    CHUNK_CEILING((arena->nactive - sub_pages) << LG_PAGE);
+		if (cactive_sub != 0)
+			stats_cactive_sub(cactive_sub);
+	}
+	arena->nactive -= sub_pages;
 }
 
 static void
 arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
     size_t flag_dirty, size_t flag_decommitted, size_t need_pages)
 {
 	size_t total_pages, rem_pages;
 
@@ -390,18 +412,17 @@ arena_run_split_remove(arena_t *arena, a
 	assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
 	    flag_dirty);
 	assert(need_pages <= total_pages);
 	rem_pages = total_pages - need_pages;
 
 	arena_avail_remove(arena, chunk, run_ind, total_pages);
 	if (flag_dirty != 0)
 		arena_run_dirty_remove(arena, chunk, run_ind, total_pages);
-	arena_cactive_update(arena, need_pages, 0);
-	arena->nactive += need_pages;
+	arena_nactive_add(arena, need_pages);
 
 	/* Keep track of trailing unused pages for later use. */
 	if (rem_pages > 0) {
 		size_t flags = flag_dirty | flag_decommitted;
 		size_t flag_unzeroed_mask = (flags == 0) ?  CHUNK_MAP_UNZEROED :
 		    0;
 
 		arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
@@ -707,17 +728,16 @@ arena_chunk_alloc(arena_t *arena)
 	if (arena->spare != NULL)
 		chunk = arena_chunk_init_spare(arena);
 	else {
 		chunk = arena_chunk_init_hard(arena);
 		if (chunk == NULL)
 			return (NULL);
 	}
 
-	/* Insert the run into the runs_avail tree. */
 	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
 
 	return (chunk);
 }
 
 static void
 arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
 {
@@ -728,20 +748,17 @@ arena_chunk_dalloc(arena_t *arena, arena
 	    arena_maxrun);
 	assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
 	    arena_maxrun);
 	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
 	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
 	assert(arena_mapbits_decommitted_get(chunk, map_bias) ==
 	    arena_mapbits_decommitted_get(chunk, chunk_npages-1));
 
-	/*
-	 * Remove run from the runs_avail tree, so that the arena does not use
-	 * it.
-	 */
+	/* Remove run from runs_avail, so that the arena does not use it. */
 	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
 
 	if (arena->spare != NULL) {
 		arena_chunk_t *spare = arena->spare;
 		chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 		bool committed;
 
 		arena->spare = chunk;
@@ -884,17 +901,17 @@ arena_chunk_alloc_huge_hard(arena_t *are
 	    zero, &commit);
 	if (ret == NULL) {
 		/* Revert optimistic stats updates. */
 		malloc_mutex_lock(&arena->lock);
 		if (config_stats) {
 			arena_huge_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
-		arena->nactive -= (usize >> LG_PAGE);
+		arena_nactive_sub(arena, usize >> LG_PAGE);
 		malloc_mutex_unlock(&arena->lock);
 	}
 
 	return (ret);
 }
 
 void *
 arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
@@ -906,91 +923,79 @@ arena_chunk_alloc_huge(arena_t *arena, s
 
 	malloc_mutex_lock(&arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
 		arena_huge_malloc_stats_update(arena, usize);
 		arena->stats.mapped += usize;
 	}
-	arena->nactive += (usize >> LG_PAGE);
+	arena_nactive_add(arena, usize >> LG_PAGE);
 
 	ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment,
 	    zero, true);
 	malloc_mutex_unlock(&arena->lock);
 	if (ret == NULL) {
 		ret = arena_chunk_alloc_huge_hard(arena, &chunk_hooks, usize,
 		    alignment, zero, csize);
 	}
 
-	if (config_stats && ret != NULL)
-		stats_cactive_add(usize);
 	return (ret);
 }
 
 void
 arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize;
 
 	csize = CHUNK_CEILING(usize);
 	malloc_mutex_lock(&arena->lock);
 	if (config_stats) {
 		arena_huge_dalloc_stats_update(arena, usize);
 		arena->stats.mapped -= usize;
-		stats_cactive_sub(usize);
 	}
-	arena->nactive -= (usize >> LG_PAGE);
+	arena_nactive_sub(arena, usize >> LG_PAGE);
 
 	chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true);
 	malloc_mutex_unlock(&arena->lock);
 }
 
 void
 arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize,
     size_t usize)
 {
 
 	assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
 	assert(oldsize != usize);
 
 	malloc_mutex_lock(&arena->lock);
 	if (config_stats)
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
-	if (oldsize < usize) {
-		size_t udiff = usize - oldsize;
-		arena->nactive += udiff >> LG_PAGE;
-		if (config_stats)
-			stats_cactive_add(udiff);
-	} else {
-		size_t udiff = oldsize - usize;
-		arena->nactive -= udiff >> LG_PAGE;
-		if (config_stats)
-			stats_cactive_sub(udiff);
-	}
+	if (oldsize < usize)
+		arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE);
+	else
+		arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE);
 	malloc_mutex_unlock(&arena->lock);
 }
 
 void
 arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize,
     size_t usize)
 {
 	size_t udiff = oldsize - usize;
 	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 
 	malloc_mutex_lock(&arena->lock);
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
-		if (cdiff != 0) {
+		if (cdiff != 0)
 			arena->stats.mapped -= cdiff;
-			stats_cactive_sub(udiff);
-		}
 	}
-	arena->nactive -= udiff >> LG_PAGE;
+	arena_nactive_sub(arena, udiff >> LG_PAGE);
 
 	if (cdiff != 0) {
 		chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 		void *nchunk = (void *)((uintptr_t)chunk +
 		    CHUNK_CEILING(usize));
 
 		chunk_dalloc_cache(arena, &chunk_hooks, nchunk, cdiff, true);
 	}
@@ -1010,17 +1015,17 @@ arena_chunk_ralloc_huge_expand_hard(aren
 	if (err) {
 		/* Revert optimistic stats updates. */
 		malloc_mutex_lock(&arena->lock);
 		if (config_stats) {
 			arena_huge_ralloc_stats_update_undo(arena, oldsize,
 			    usize);
 			arena->stats.mapped -= cdiff;
 		}
-		arena->nactive -= (udiff >> LG_PAGE);
+		arena_nactive_sub(arena, udiff >> LG_PAGE);
 		malloc_mutex_unlock(&arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
 		chunk_dalloc_arena(arena, chunk_hooks, nchunk, cdiff, *zero,
 		    true);
 		err = true;
 	}
 	return (err);
@@ -1038,52 +1043,54 @@ arena_chunk_ralloc_huge_expand(arena_t *
 
 	malloc_mutex_lock(&arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		arena->stats.mapped += cdiff;
 	}
-	arena->nactive += (udiff >> LG_PAGE);
+	arena_nactive_add(arena, udiff >> LG_PAGE);
 
 	err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff,
 	    chunksize, zero, true) == NULL);
 	malloc_mutex_unlock(&arena->lock);
 	if (err) {
 		err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks,
 		    chunk, oldsize, usize, zero, nchunk, udiff,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
 		chunk_dalloc_arena(arena, &chunk_hooks, nchunk, cdiff, *zero,
 		    true);
 		err = true;
 	}
 
-	if (config_stats && !err)
-		stats_cactive_add(udiff);
 	return (err);
 }
 
 /*
  * Do first-best-fit run selection, i.e. select the lowest run that best fits.
- * Run sizes are quantized, so not all candidate runs are necessarily exactly
- * the same size.
+ * Run sizes are indexed, so not all candidate runs are necessarily exactly the
+ * same size.
  */
 static arena_run_t *
 arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	size_t search_size = run_quantize_first(size);
-	arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size);
-	arena_chunk_map_misc_t *miscelm =
-	    arena_avail_tree_nsearch(&arena->runs_avail, key);
-	if (miscelm == NULL)
-		return (NULL);
-	return (&miscelm->run);
+	szind_t ind, i;
+
+	ind = size2index(run_quantize_ceil(size));
+	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
+		arena_chunk_map_misc_t *miscelm = arena_run_tree_first(
+		    arena_runs_avail_get(arena, i));
+		if (miscelm != NULL)
+			return (&miscelm->run);
+	}
+
+	return (NULL);
 }
 
 static arena_run_t *
 arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 {
 	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
 	if (run != NULL) {
 		if (arena_run_split_large(arena, run, size, zero))
@@ -1200,42 +1207,269 @@ arena_lg_dirty_mult_set(arena_t *arena, 
 	malloc_mutex_lock(&arena->lock);
 	arena->lg_dirty_mult = lg_dirty_mult;
 	arena_maybe_purge(arena);
 	malloc_mutex_unlock(&arena->lock);
 
 	return (false);
 }
 
-void
-arena_maybe_purge(arena_t *arena)
+static void
+arena_decay_deadline_init(arena_t *arena)
+{
+
+	assert(opt_purge == purge_mode_decay);
+
+	/*
+	 * Generate a new deadline that is uniformly random within the next
+	 * epoch after the current one.
+	 */
+	nstime_copy(&arena->decay_deadline, &arena->decay_epoch);
+	nstime_add(&arena->decay_deadline, &arena->decay_interval);
+	if (arena->decay_time > 0) {
+		nstime_t jitter;
+
+		nstime_init(&jitter, prng_range(&arena->decay_jitter_state,
+		    nstime_ns(&arena->decay_interval)));
+		nstime_add(&arena->decay_deadline, &jitter);
+	}
+}
+
+static bool
+arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)
+{
+
+	assert(opt_purge == purge_mode_decay);
+
+	return (nstime_compare(&arena->decay_deadline, time) <= 0);
+}
+
+static size_t
+arena_decay_backlog_npages_limit(const arena_t *arena)
+{
+	static const uint64_t h_steps[] = {
+#define	STEP(step, h, x, y) \
+		h,
+		SMOOTHSTEP
+#undef STEP
+	};
+	uint64_t sum;
+	size_t npages_limit_backlog;
+	unsigned i;
+
+	assert(opt_purge == purge_mode_decay);
+
+	/*
+	 * For each element of decay_backlog, multiply by the corresponding
+	 * fixed-point smoothstep decay factor.  Sum the products, then divide
+	 * to round down to the nearest whole number of pages.
+	 */
+	sum = 0;
+	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
+		sum += arena->decay_backlog[i] * h_steps[i];
+	npages_limit_backlog = (sum >> SMOOTHSTEP_BFP);
+
+	return (npages_limit_backlog);
+}
+
+static void
+arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 {
+	uint64_t nadvance;
+	nstime_t delta;
+	size_t ndirty_delta;
+
+	assert(opt_purge == purge_mode_decay);
+	assert(arena_decay_deadline_reached(arena, time));
+
+	nstime_copy(&delta, time);
+	nstime_subtract(&delta, &arena->decay_epoch);
+	nadvance = nstime_divide(&delta, &arena->decay_interval);
+	assert(nadvance > 0);
+
+	/* Add nadvance decay intervals to epoch. */
+	nstime_copy(&delta, &arena->decay_interval);
+	nstime_imultiply(&delta, nadvance);
+	nstime_add(&arena->decay_epoch, &delta);
+
+	/* Set a new deadline. */
+	arena_decay_deadline_init(arena);
+
+	/* Update the backlog. */
+	if (nadvance >= SMOOTHSTEP_NSTEPS) {
+		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		    sizeof(size_t));
+	} else {
+		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance],
+		    (SMOOTHSTEP_NSTEPS - nadvance) * sizeof(size_t));
+		if (nadvance > 1) {
+			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
+			    nadvance], 0, (nadvance-1) * sizeof(size_t));
+		}
+	}
+	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -
+	    arena->decay_ndirty : 0;
+	arena->decay_ndirty = arena->ndirty;
+	arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+	arena->decay_backlog_npages_limit =
+	    arena_decay_backlog_npages_limit(arena);
+}
+
+static size_t
+arena_decay_npages_limit(arena_t *arena)
+{
+	size_t npages_limit;
+
+	assert(opt_purge == purge_mode_decay);
+
+	npages_limit = arena->decay_backlog_npages_limit;
+
+	/* Add in any dirty pages created during the current epoch. */
+	if (arena->ndirty > arena->decay_ndirty)
+		npages_limit += arena->ndirty - arena->decay_ndirty;
+
+	return (npages_limit);
+}
+
+static void
+arena_decay_init(arena_t *arena, ssize_t decay_time)
+{
+
+	arena->decay_time = decay_time;
+	if (decay_time > 0) {
+		nstime_init2(&arena->decay_interval, decay_time, 0);
+		nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS);
+	}
+
+	nstime_init(&arena->decay_epoch, 0);
+	nstime_update(&arena->decay_epoch);
+	arena->decay_jitter_state = (uint64_t)(uintptr_t)arena;
+	arena_decay_deadline_init(arena);
+	arena->decay_ndirty = arena->ndirty;
+	arena->decay_backlog_npages_limit = 0;
+	memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+}
+
+static bool
+arena_decay_time_valid(ssize_t decay_time)
+{
+
+	return (decay_time >= -1 && decay_time <= NSTIME_SEC_MAX);
+}
+
+ssize_t
+arena_decay_time_get(arena_t *arena)
+{
+	ssize_t decay_time;
+
+	malloc_mutex_lock(&arena->lock);
+	decay_time = arena->decay_time;
+	malloc_mutex_unlock(&arena->lock);
+
+	return (decay_time);
+}
+
+bool
+arena_decay_time_set(arena_t *arena, ssize_t decay_time)
+{
+
+	if (!arena_decay_time_valid(decay_time))
+		return (true);
+
+	malloc_mutex_lock(&arena->lock);
+	/*
+	 * Restart decay backlog from scratch, which may cause many dirty pages
+	 * to be immediately purged.  It would conceptually be possible to map
+	 * the old backlog onto the new backlog, but there is no justification
+	 * for such complexity since decay_time changes are intended to be
+	 * infrequent, either between the {-1, 0, >0} states, or a one-time
+	 * arbitrary change during initial arena configuration.
+	 */
+	arena_decay_init(arena, decay_time);
+	arena_maybe_purge(arena);
+	malloc_mutex_unlock(&arena->lock);
+
+	return (false);
+}
+
+static void
+arena_maybe_purge_ratio(arena_t *arena)
+{
+
+	assert(opt_purge == purge_mode_ratio);
 
 	/* Don't purge if the option is disabled. */
 	if (arena->lg_dirty_mult < 0)
 		return;
-	/* Don't recursively purge. */
-	if (arena->purging)
-		return;
+
 	/*
 	 * Iterate, since preventing recursive purging could otherwise leave too
 	 * many dirty pages.
 	 */
 	while (true) {
 		size_t threshold = (arena->nactive >> arena->lg_dirty_mult);
 		if (threshold < chunk_npages)
 			threshold = chunk_npages;
 		/*
 		 * Don't purge unless the number of purgeable pages exceeds the
 		 * threshold.
 		 */
 		if (arena->ndirty <= threshold)
 			return;
-		arena_purge(arena, false);
+		arena_purge_to_limit(arena, threshold);
+	}
+}
+
+static void
+arena_maybe_purge_decay(arena_t *arena)
+{
+	nstime_t time;
+	size_t ndirty_limit;
+
+	assert(opt_purge == purge_mode_decay);
+
+	/* Purge all or nothing if the option is disabled. */
+	if (arena->decay_time <= 0) {
+		if (arena->decay_time == 0)
+			arena_purge_to_limit(arena, 0);
+		return;
+	}
+
+	nstime_copy(&time, &arena->decay_epoch);
+	if (unlikely(nstime_update(&time))) {
+		/* Time went backwards.  Force an epoch advance. */
+		nstime_copy(&time, &arena->decay_deadline);
 	}
+
+	if (arena_decay_deadline_reached(arena, &time))
+		arena_decay_epoch_advance(arena, &time);
+
+	ndirty_limit = arena_decay_npages_limit(arena);
+
+	/*
+	 * Don't try to purge unless the number of purgeable pages exceeds the
+	 * current limit.
+	 */
+	if (arena->ndirty <= ndirty_limit)
+		return;
+	arena_purge_to_limit(arena, ndirty_limit);
+}
+
+void
+arena_maybe_purge(arena_t *arena)
+{
+
+	/* Don't recursively purge. */
+	if (arena->purging)
+		return;
+
+	if (opt_purge == purge_mode_ratio)
+		arena_maybe_purge_ratio(arena);
+	else
+		arena_maybe_purge_decay(arena);
 }
 
 static size_t
 arena_dirty_count(arena_t *arena)
 {
 	size_t ndirty = 0;
 	arena_runs_dirty_link_t *rdelm;
 	extent_node_t *chunkselm;
@@ -1263,83 +1497,72 @@ arena_dirty_count(arena_t *arena)
 		}
 		ndirty += npages;
 	}
 
 	return (ndirty);
 }
 
 static size_t
-arena_compute_npurge(arena_t *arena, bool all)
-{
-	size_t npurge;
-
-	/*
-	 * Compute the minimum number of pages that this thread should try to
-	 * purge.
-	 */
-	if (!all) {
-		size_t threshold = (arena->nactive >> arena->lg_dirty_mult);
-		threshold = threshold < chunk_npages ? chunk_npages : threshold;
-
-		npurge = arena->ndirty - threshold;
-	} else
-		npurge = arena->ndirty;
-
-	return (npurge);
-}
-
-static size_t
-arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all,
-    size_t npurge, arena_runs_dirty_link_t *purge_runs_sentinel,
+arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
+    size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
 	arena_runs_dirty_link_t *rdelm, *rdelm_next;
 	extent_node_t *chunkselm;
 	size_t nstashed = 0;
 
-	/* Stash at least npurge pages. */
+	/* Stash runs/chunks according to ndirty_limit. */
 	for (rdelm = qr_next(&arena->runs_dirty, rd_link),
 	    chunkselm = qr_next(&arena->chunks_cache, cc_link);
 	    rdelm != &arena->runs_dirty; rdelm = rdelm_next) {
 		size_t npages;
 		rdelm_next = qr_next(rdelm, rd_link);
 
 		if (rdelm == &chunkselm->rd) {
 			extent_node_t *chunkselm_next;
 			bool zero;
 			UNUSED void *chunk;
 
+			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
+			if (opt_purge == purge_mode_decay && arena->ndirty -
+			    (nstashed + npages) < ndirty_limit)
+				break;
+
 			chunkselm_next = qr_next(chunkselm, cc_link);
 			/*
 			 * Allocate.  chunkselm remains valid due to the
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
 			chunk = chunk_alloc_cache(arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
 			    false);
 			assert(chunk == extent_node_addr_get(chunkselm));
 			assert(zero == extent_node_zeroed_get(chunkselm));
 			extent_node_dirty_insert(chunkselm, purge_runs_sentinel,
 			    purge_chunks_sentinel);
-			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
+			assert(npages == (extent_node_size_get(chunkselm) >>
+			    LG_PAGE));
 			chunkselm = chunkselm_next;
 		} else {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(rdelm);
 			size_t pageind = arena_miscelm_to_pageind(miscelm);
 			arena_run_t *run = &miscelm->run;
 			size_t run_size =
 			    arena_mapbits_unallocated_size_get(chunk, pageind);
 
 			npages = run_size >> LG_PAGE;
+			if (opt_purge == purge_mode_decay && arena->ndirty -
+			    (nstashed + npages) < ndirty_limit)
+				break;
 
 			assert(pageind + npages <= chunk_npages);
 			assert(arena_mapbits_dirty_get(chunk, pageind) ==
 			    arena_mapbits_dirty_get(chunk, pageind+npages-1));
 
 			/*
 			 * If purging the spare chunk's run, make it available
 			 * prior to allocation.
@@ -1355,17 +1578,18 @@ arena_stash_dirty(arena_t *arena, chunk_
 			else {
 				assert(qr_next(rdelm, rd_link) == rdelm);
 				assert(qr_prev(rdelm, rd_link) == rdelm);
 			}
 			qr_meld(purge_runs_sentinel, rdelm, rd_link);
 		}
 
 		nstashed += npages;
-		if (!all && nstashed >= npurge)
+		if (opt_purge == purge_mode_ratio && arena->ndirty - nstashed <=
+		    ndirty_limit)
 			break;
 	}
 
 	return (nstashed);
 }
 
 static size_t
 arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
@@ -1495,61 +1719,75 @@ arena_unstash_purged(arena_t *arena, chu
 			    pageind) != 0);
 			arena_run_t *run = &miscelm->run;
 			qr_remove(rdelm, rd_link);
 			arena_run_dalloc(arena, run, false, true, decommitted);
 		}
 	}
 }
 
+/*
+ * NB: ndirty_limit is interpreted differently depending on opt_purge:
+ *   - purge_mode_ratio: Purge as few dirty run/chunks as possible to reach the
+ *                       desired state:
+ *                       (arena->ndirty <= ndirty_limit)
+ *   - purge_mode_decay: Purge as many dirty runs/chunks as possible without
+ *                       violating the invariant:
+ *                       (arena->ndirty >= ndirty_limit)
+ */
 static void
-arena_purge(arena_t *arena, bool all)
+arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
 {
 	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
-	size_t npurge, npurgeable, npurged;
+	size_t npurge, npurged;
 	arena_runs_dirty_link_t purge_runs_sentinel;
 	extent_node_t purge_chunks_sentinel;
 
 	arena->purging = true;
 
 	/*
 	 * Calls to arena_dirty_count() are disabled even for debug builds
 	 * because overhead grows nonlinearly as memory usage increases.
 	 */
 	if (false && config_debug) {
 		size_t ndirty = arena_dirty_count(arena);
 		assert(ndirty == arena->ndirty);
 	}
-	assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || all);
+	assert(opt_purge != purge_mode_ratio || (arena->nactive >>
+	    arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0);
+
+	qr_new(&purge_runs_sentinel, rd_link);
+	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
+
+	npurge = arena_stash_dirty(arena, &chunk_hooks, ndirty_limit,
+	    &purge_runs_sentinel, &purge_chunks_sentinel);
+	if (npurge == 0)
+		goto label_return;
+	npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel,
+	    &purge_chunks_sentinel);
+	assert(npurged == npurge);
+	arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel,
+	    &purge_chunks_sentinel);
 
 	if (config_stats)
 		arena->stats.npurge++;
 
-	npurge = arena_compute_npurge(arena, all);
-	qr_new(&purge_runs_sentinel, rd_link);
-	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
-
-	npurgeable = arena_stash_dirty(arena, &chunk_hooks, all, npurge,
-	    &purge_runs_sentinel, &purge_chunks_sentinel);
-	assert(npurgeable >= npurge);
-	npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel,
-	    &purge_chunks_sentinel);
-	assert(npurged == npurgeable);
-	arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel,
-	    &purge_chunks_sentinel);
-
+label_return:
 	arena->purging = false;
 }
 
 void
-arena_purge_all(arena_t *arena)
+arena_purge(arena_t *arena, bool all)
 {
 
 	malloc_mutex_lock(&arena->lock);
-	arena_purge(arena, true);
+	if (all)
+		arena_purge_to_limit(arena, 0);
+	else
+		arena_maybe_purge(arena);
 	malloc_mutex_unlock(&arena->lock);
 }
 
 static void
 arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
     size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty,
     size_t flag_decommitted)
 {
@@ -1671,18 +1909,17 @@ arena_run_dalloc(arena_t *arena, arena_r
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	miscelm = arena_run_to_miscelm(run);
 	run_ind = arena_miscelm_to_pageind(miscelm);
 	assert(run_ind >= map_bias);
 	assert(run_ind < chunk_npages);
 	size = arena_run_size_get(arena, chunk, run, run_ind);
 	run_pages = (size >> LG_PAGE);
-	arena_cactive_update(arena, 0, run_pages);
-	arena->nactive -= run_pages;
+	arena_nactive_sub(arena, run_pages);
 
 	/*
 	 * The run is dirty if the caller claims to have dirtied it, as well as
 	 * if it was already dirty before being allocated and the caller
 	 * doesn't claim to have cleaned it.
 	 */
 	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
 	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
@@ -1961,18 +2198,18 @@ arena_bin_malloc_hard(arena_t *arena, ar
 	bin->runcur = run;
 
 	assert(bin->runcur->nfree > 0);
 
 	return (arena_run_reg_alloc(bin->runcur, bin_info));
 }
 
 void
-arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
-    uint64_t prof_accumbytes)
+arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
+    szind_t binind, uint64_t prof_accumbytes)
 {
 	unsigned i, nfill;
 	arena_bin_t *bin;
 
 	assert(tbin->ncached == 0);
 
 	if (config_prof && arena_prof_accum(arena, prof_accumbytes))
 		prof_idump();
@@ -2009,16 +2246,17 @@ arena_tcache_fill_small(arena_t *arena, 
 		bin->stats.nmalloc += i;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		bin->stats.curregs += i;
 		bin->stats.nfills++;
 		tbin->tstats.nrequests = 0;
 	}
 	malloc_mutex_unlock(&bin->lock);
 	tbin->ncached = i;
+	arena_decay_tick(tsd, arena);
 }
 
 void
 arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
 {
 
 	if (zero) {
 		size_t redzone_size = bin_info->redzone_size;
@@ -2118,26 +2356,27 @@ arena_quarantine_junk_small(void *ptr, s
 	assert(opt_quarantine);
 	assert(usize <= SMALL_MAXCLASS);
 
 	binind = size2index(usize);
 	bin_info = &arena_bin_info[binind];
 	arena_redzones_validate(ptr, bin_info, true);
 }
 
-void *
-arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero)
+static void *
+arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	arena_bin_t *bin;
+	size_t usize;
 	arena_run_t *run;
 
 	assert(binind < NBINS);
 	bin = &arena->bins[binind];
-	size = index2size(binind);
+	usize = index2size(binind);
 
 	malloc_mutex_lock(&bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
 		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 	else
 		ret = arena_bin_malloc_hard(arena, bin);
 
 	if (ret == NULL) {
@@ -2146,42 +2385,43 @@ arena_malloc_small(arena_t *arena, size_
 	}
 
 	if (config_stats) {
 		bin->stats.nmalloc++;
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
 	malloc_mutex_unlock(&bin->lock);
-	if (config_prof && !isthreaded && arena_prof_accum(arena, size))
+	if (config_prof && !isthreaded && arena_prof_accum(arena, usize))
 		prof_idump();
 
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc)) {
 				arena_alloc_junk_small(ret,
 				    &arena_bin_info[binind], false);
 			} else if (unlikely(opt_zero))
-				memset(ret, 0, size);
+				memset(ret, 0, usize);
 		}
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize);
 	} else {
 		if (config_fill && unlikely(opt_junk_alloc)) {
 			arena_alloc_junk_small(ret, &arena_bin_info[binind],
 			    true);
 		}
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
-		memset(ret, 0, size);
+		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize);
+		memset(ret, 0, usize);
 	}
 
+	arena_decay_tick(tsd, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero)
+arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	size_t usize;
 	uintptr_t random_offset;
 	arena_run_t *run;
 	arena_chunk_map_misc_t *miscelm;
 	UNUSED bool idump;
 
@@ -2191,19 +2431,17 @@ arena_malloc_large(arena_t *arena, size_
 	if (config_cache_oblivious) {
 		uint64_t r;
 
 		/*
 		 * Compute a uniformly distributed offset within the first page
 		 * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
 		 * for 4 KiB pages and 64-byte cachelines.
 		 */
-		prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state,
-		    UINT64_C(6364136223846793009),
-		    UINT64_C(1442695040888963409));
+		r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE);
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
 	run = arena_run_alloc_large(arena, usize + large_pad, zero);
 	if (run == NULL) {
 		malloc_mutex_unlock(&arena->lock);
 		return (NULL);
 	}
@@ -2230,19 +2468,36 @@ arena_malloc_large(arena_t *arena, size_
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc))
 				memset(ret, 0xa5, usize);
 			else if (unlikely(opt_zero))
 				memset(ret, 0, usize);
 		}
 	}
 
+	arena_decay_tick(tsd, arena);
 	return (ret);
 }
 
+void *
+arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+    bool zero, tcache_t *tcache)
+{
+
+	arena = arena_choose(tsd, arena);
+	if (unlikely(arena == NULL))
+		return (NULL);
+
+	if (likely(size <= SMALL_MAXCLASS))
+		return (arena_malloc_small(tsd, arena, ind, zero));
+	if (likely(size <= large_maxclass))
+		return (arena_malloc_large(tsd, arena, ind, zero));
+	return (huge_malloc(tsd, arena, index2size(ind), zero, tcache));
+}
+
 /* Only handles large allocations that require more than page alignment. */
 static void *
 arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
 	void *ret;
 	size_t alloc_size, leadsize, trailsize;
 	arena_run_t *run;
@@ -2316,16 +2571,17 @@ arena_palloc_large(tsd_t *tsd, arena_t *
 	malloc_mutex_unlock(&arena->lock);
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk_alloc))
 			memset(ret, 0xa5, usize);
 		else if (unlikely(opt_zero))
 			memset(ret, 0, usize);
 	}
+	arena_decay_tick(tsd, arena);
 	return (ret);
 }
 
 void *
 arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache)
 {
 	void *ret;
@@ -2502,28 +2758,29 @@ arena_dalloc_bin(arena_t *arena, arena_c
 	run = &arena_miscelm_get(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
 	malloc_mutex_lock(&bin->lock);
 	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false);
 	malloc_mutex_unlock(&bin->lock);
 }
 
 void
-arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind)
 {
 	arena_chunk_map_bits_t *bitselm;
 
 	if (config_debug) {
 		/* arena_ptr_small_binind_get() does extra sanity checking. */
 		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
 		    pageind)) != BININD_INVALID);
 	}
 	bitselm = arena_bitselm_get(chunk, pageind);
 	arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
+	arena_decay_tick(tsd, arena);
 }
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_large
 #define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl)
 #endif
 void
 arena_dalloc_junk_large(void *ptr, size_t usize)
@@ -2570,22 +2827,23 @@ void
 arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
     void *ptr)
 {
 
 	arena_dalloc_large_locked_impl(arena, chunk, ptr, true);
 }
 
 void
-arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr)
 {
 
 	malloc_mutex_lock(&arena->lock);
 	arena_dalloc_large_locked_impl(arena, chunk, ptr, false);
 	malloc_mutex_unlock(&arena->lock);
+	arena_decay_tick(tsd, arena);
 }
 
 static void
 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
@@ -2776,81 +3034,90 @@ arena_ralloc_large(void *ptr, size_t old
 	assert(oldsize > usize_max);
 	/* Fill before shrinking in order avoid a race. */
 	arena_ralloc_junk_large(ptr, oldsize, usize_max);
 	arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max);
 	return (false);
 }
 
 bool
-arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
-    bool zero)
+arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+    size_t extra, bool zero)
 {
 	size_t usize_min, usize_max;
 
+	/* Calls with non-zero extra had to clamp extra. */
+	assert(extra == 0 || size + extra <= HUGE_MAXCLASS);
+
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (true);
+
 	usize_min = s2u(size);
 	usize_max = s2u(size + extra);
 	if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) {
+		arena_chunk_t *chunk;
+
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
 		if (oldsize <= SMALL_MAXCLASS) {
 			assert(arena_bin_info[size2index(oldsize)].reg_size ==
 			    oldsize);
-			if ((usize_max <= SMALL_MAXCLASS &&
-			    size2index(usize_max) == size2index(oldsize)) ||
-			    (size <= oldsize && usize_max >= oldsize))
-				return (false);
+			if ((usize_max > SMALL_MAXCLASS ||
+			    size2index(usize_max) != size2index(oldsize)) &&
+			    (size > oldsize || usize_max < oldsize))
+				return (true);
 		} else {
-			if (usize_max > SMALL_MAXCLASS) {
-				if (!arena_ralloc_large(ptr, oldsize, usize_min,
-				    usize_max, zero))
-					return (false);
-			}
+			if (usize_max <= SMALL_MAXCLASS)
+				return (true);
+			if (arena_ralloc_large(ptr, oldsize, usize_min,
+			    usize_max, zero))
+				return (true);
 		}
 
-		/* Reallocation would require a move. */
-		return (true);
+		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+		arena_decay_tick(tsd, extent_node_arena_get(&chunk->node));
+		return (false);
 	} else {
-		return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max,
-		    zero));
+		return (huge_ralloc_no_move(tsd, ptr, oldsize, usize_min,
+		    usize_max, zero));
 	}
 }
 
 static void *
 arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache)
 {
 
 	if (alignment == 0)
 		return (arena_malloc(tsd, arena, usize, size2index(usize), zero,
 		    tcache, true));
 	usize = sa2u(usize, alignment);
-	if (usize == 0)
+	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
 	return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
 }
 
 void *
 arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t usize;
 
 	usize = s2u(size);
-	if (usize == 0)
+	if (unlikely(usize == 0 || size > HUGE_MAXCLASS))
 		return (NULL);
 
 	if (likely(usize <= large_maxclass)) {
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero))
+		if (!arena_ralloc_no_move(tsd, ptr, oldsize, usize, 0, zero))
 			return (ptr);
 
 		/*
 		 * size and oldsize are different enough that we need to move
 		 * the object.  In that case, fall back to allocating new space
 		 * and copying.
 		 */
 		ret = arena_ralloc_move_helper(tsd, arena, usize, alignment,
@@ -2903,35 +3170,82 @@ arena_lg_dirty_mult_default_get(void)
 
 	return ((ssize_t)atomic_read_z((size_t *)&lg_dirty_mult_default));
 }
 
 bool
 arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult)
 {
 
+	if (opt_purge != purge_mode_ratio)
+		return (true);
 	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
 		return (true);
 	atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult);
 	return (false);
 }
 
+ssize_t
+arena_decay_time_default_get(void)
+{
+
+	return ((ssize_t)atomic_read_z((size_t *)&decay_time_default));
+}
+
+bool
+arena_decay_time_default_set(ssize_t decay_time)
+{
+
+	if (opt_purge != purge_mode_decay)
+		return (true);
+	if (!arena_decay_time_valid(decay_time))
+		return (true);
+	atomic_write_z((size_t *)&decay_time_default, (size_t)decay_time);
+	return (false);
+}
+
+static void
+arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty)
+{
+
+	*nthreads += arena_nthreads_get(arena);
+	*dss = dss_prec_names[arena->dss_prec];
+	*lg_dirty_mult = arena->lg_dirty_mult;
+	*decay_time = arena->decay_time;
+	*nactive += arena->nactive;
+	*ndirty += arena->ndirty;
+}
+
 void
-arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult,
-    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
-    malloc_huge_stats_t *hstats)
+arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
+    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty)
+{
+
+	malloc_mutex_lock(&arena->lock);
+	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
+	    decay_time, nactive, ndirty);
+	malloc_mutex_unlock(&arena->lock);
+}
+
+void
+arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
+    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats)
 {
 	unsigned i;
 
+	cassert(config_stats);
+
 	malloc_mutex_lock(&arena->lock);
-	*dss = dss_prec_names[arena->dss_prec];
-	*lg_dirty_mult = arena->lg_dirty_mult;
-	*nactive += arena->nactive;
-	*ndirty += arena->ndirty;
+	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
+	    decay_time, nactive, ndirty);
 
 	astats->mapped += arena->stats.mapped;
 	astats->npurge += arena->stats.npurge;
 	astats->nmadvise += arena->stats.nmadvise;
 	astats->purged += arena->stats.purged;
 	astats->metadata_mapped += arena->stats.metadata_mapped;
 	astats->metadata_allocated += arena_metadata_allocated_get(arena);
 	astats->allocated_large += arena->stats.allocated_large;
@@ -2970,49 +3284,74 @@ arena_stats_merge(arena_t *arena, const 
 		}
 		bstats[i].nruns += bin->stats.nruns;
 		bstats[i].reruns += bin->stats.reruns;
 		bstats[i].curruns += bin->stats.curruns;
 		malloc_mutex_unlock(&bin->lock);
 	}
 }
 
+unsigned
+arena_nthreads_get(arena_t *arena)
+{
+
+	return (atomic_read_u(&arena->nthreads));
+}
+
+void
+arena_nthreads_inc(arena_t *arena)
+{
+
+	atomic_add_u(&arena->nthreads, 1);
+}
+
+void
+arena_nthreads_dec(arena_t *arena)
+{
+
+	atomic_sub_u(&arena->nthreads, 1);
+}
+
 arena_t *
 arena_new(unsigned ind)
 {
 	arena_t *arena;
+	size_t arena_size;
 	unsigned i;
 	arena_bin_t *bin;
 
+	/* Compute arena size to incorporate sufficient runs_avail elements. */
+	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) *
+	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
-		arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t))
-		    + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
+		arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) +
+		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
 		    nhclasses) * sizeof(malloc_huge_stats_t));
 	} else
-		arena = (arena_t *)base_alloc(sizeof(arena_t));
+		arena = (arena_t *)base_alloc(arena_size);
 	if (arena == NULL)
 		return (NULL);
 
 	arena->ind = ind;
 	arena->nthreads = 0;
 	if (malloc_mutex_init(&arena->lock))
 		return (NULL);
 
 	if (config_stats) {
 		memset(&arena->stats, 0, sizeof(arena_stats_t));
 		arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(sizeof(arena_t)));
+		    + CACHELINE_CEILING(arena_size));
 		memset(arena->stats.lstats, 0, nlclasses *
 		    sizeof(malloc_large_stats_t));
 		arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(sizeof(arena_t)) +
+		    + CACHELINE_CEILING(arena_size) +
 		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
 		memset(arena->stats.hstats, 0, nhclasses *
 		    sizeof(malloc_huge_stats_t));
 		if (config_tcache)
 			ql_new(&arena->tcache_ql);
 	}
 
 	if (config_prof)
@@ -3034,20 +3373,24 @@ arena_new(unsigned ind)
 
 	arena->spare = NULL;
 
 	arena->lg_dirty_mult = arena_lg_dirty_mult_default_get();
 	arena->purging = false;
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	arena_avail_tree_new(&arena->runs_avail);
+	for(i = 0; i < runs_avail_nclasses; i++)
+		arena_run_tree_new(&arena->runs_avail[i]);
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
+	if (opt_purge == purge_mode_decay)
+		arena_decay_init(arena, arena_decay_time_default_get());
+
 	ql_new(&arena->huge);
 	if (malloc_mutex_init(&arena->huge_mtx))
 		return (NULL);
 
 	extent_tree_szad_new(&arena->chunks_szad_cached);
 	extent_tree_ad_new(&arena->chunks_ad_cached);
 	extent_tree_szad_new(&arena->chunks_szad_retained);
 	extent_tree_ad_new(&arena->chunks_ad_retained);
@@ -3092,18 +3435,17 @@ bin_info_run_size_calc(arena_bin_info_t 
 	/*
 	 * Determine redzone size based on minimum alignment and minimum
 	 * redzone size.  Add padding to the end of the run if it is needed to
 	 * align the regions.  The padding allows each redzone to be half the
 	 * minimum alignment; without the padding, each redzone would have to
 	 * be twice as large in order to maintain alignment.
 	 */
 	if (config_fill && unlikely(opt_redzone)) {
-		size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) -
-		    1);
+		size_t align_min = ZU(1) << (ffs_zu(bin_info->reg_size) - 1);
 		if (align_min <= REDZONE_MINSIZE) {
 			bin_info->redzone_size = REDZONE_MINSIZE;
 			pad_size = 0;
 		} else {
 			bin_info->redzone_size = align_min >> 1;
 			pad_size = bin_info->redzone_size;
 		}
 	} else {
@@ -3113,59 +3455,60 @@ bin_info_run_size_calc(arena_bin_info_t 
 	bin_info->reg_interval = bin_info->reg_size +
 	    (bin_info->redzone_size << 1);
 
 	/*
 	 * Compute run size under ideal conditions (no redzones, no limit on run
 	 * size).
 	 */
 	try_run_size = PAGE;
-	try_nregs = try_run_size / bin_info->reg_size;
+	try_nregs = (uint32_t)(try_run_size / bin_info->reg_size);
 	do {
 		perfect_run_size = try_run_size;
 		perfect_nregs = try_nregs;
 
 		try_run_size += PAGE;
-		try_nregs = try_run_size / bin_info->reg_size;
+		try_nregs = (uint32_t)(try_run_size / bin_info->reg_size);
 	} while (perfect_run_size != perfect_nregs * bin_info->reg_size);
 	assert(perfect_nregs <= RUN_MAXREGS);
 
 	actual_run_size = perfect_run_size;
-	actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval;
+	actual_nregs = (uint32_t)((actual_run_size - pad_size) /
+	    bin_info->reg_interval);
 
 	/*
 	 * Redzones can require enough padding that not even a single region can
 	 * fit within the number of pages that would normally be dedicated to a
 	 * run for this size class.  Increase the run size until at least one
 	 * region fits.
 	 */
 	while (actual_nregs == 0) {
 		assert(config_fill && unlikely(opt_redzone));
 
 		actual_run_size += PAGE;
-		actual_nregs = (actual_run_size - pad_size) /
-		    bin_info->reg_interval;
+		actual_nregs = (uint32_t)((actual_run_size - pad_size) /
+		    bin_info->reg_interval);
 	}
 
 	/*
 	 * Make sure that the run will fit within an arena chunk.
 	 */
 	while (actual_run_size > arena_maxrun) {
 		actual_run_size -= PAGE;
-		actual_nregs = (actual_run_size - pad_size) /
-		    bin_info->reg_interval;
+		actual_nregs = (uint32_t)((actual_run_size - pad_size) /
+		    bin_info->reg_interval);
 	}
 	assert(actual_nregs > 0);
 	assert(actual_run_size == s2u(actual_run_size));
 
 	/* Copy final settings. */
 	bin_info->run_size = actual_run_size;
 	bin_info->nregs = actual_nregs;
-	bin_info->reg0_offset = actual_run_size - (actual_nregs *
-	    bin_info->reg_interval) - pad_size + bin_info->redzone_size;
+	bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs *
+	    bin_info->reg_interval) - pad_size + bin_info->redzone_size);
 
 	if (actual_run_size > small_maxrun)
 		small_maxrun = actual_run_size;
 
 	assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
 	    * bin_info->reg_interval) + pad_size == bin_info->run_size);
 }
 
@@ -3209,22 +3552,52 @@ small_run_size_init(void)
 	SIZE_CLASSES
 #undef TAB_INIT_bin_yes
 #undef TAB_INIT_bin_no
 #undef SC
 
 	return (false);
 }
 
+static bool
+run_quantize_init(void)
+{
+	unsigned i;
+
+	run_quantize_max = chunksize + large_pad;
+
+	run_quantize_floor_tab = (size_t *)base_alloc(sizeof(size_t) *
+	    (run_quantize_max >> LG_PAGE));
+	if (run_quantize_floor_tab == NULL)
+		return (true);
+
+	run_quantize_ceil_tab = (size_t *)base_alloc(sizeof(size_t) *
+	    (run_quantize_max >> LG_PAGE));
+	if (run_quantize_ceil_tab == NULL)
+		return (true);
+
+	for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) {
+		size_t run_size = i << LG_PAGE;
+
+		run_quantize_floor_tab[i-1] =
+		    run_quantize_floor_compute(run_size);
+		run_quantize_ceil_tab[i-1] =
+		    run_quantize_ceil_compute(run_size);
+	}
+
+	return (false);
+}
+
 bool
 arena_boot(void)
 {
 	unsigned i;
 
 	arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
+	arena_decay_time_default_set(opt_decay_time);
 
 	/*
 	 * Compute the header size such that it is large enough to contain the
 	 * page map.  The page map is biased to omit entries for the header
 	 * itself, so some iteration is necessary to compute the map bias.
 	 *
 	 * 1) Compute safe header_size and map_bias values that include enough
 	 *    space for an unbiased page map.
@@ -3256,17 +3629,25 @@ arena_boot(void)
 		 */
 		large_maxclass = arena_maxrun;
 	}
 	assert(large_maxclass > 0);
 	nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS);
 	nhclasses = NSIZES - nlclasses - NBINS;
 
 	bin_info_init();
-	return (small_run_size_init());
+	if (small_run_size_init())
+		return (true);
+	if (run_quantize_init())
+		return (true);
+
+	runs_avail_bias = size2index(PAGE);
+	runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
+
+	return (false);
 }
 
 void
 arena_prefork(arena_t *arena)
 {
 	unsigned i;
 
 	malloc_mutex_prefork(&arena->lock);
--- a/memory/jemalloc/src/src/bitmap.c
+++ b/memory/jemalloc/src/src/bitmap.c
@@ -1,13 +1,15 @@
 #define	JEMALLOC_BITMAP_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
 
+#ifdef USE_TREE
+
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 {
 	unsigned i;
 	size_t group_count;
 
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
@@ -27,52 +29,86 @@ bitmap_info_init(bitmap_info_t *binfo, s
 	}
 	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
 	    + group_count;
 	assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX);
 	binfo->nlevels = i;
 	binfo->nbits = nbits;
 }
 
-size_t
+static size_t
 bitmap_info_ngroups(const bitmap_info_t *binfo)
 {
 
-	return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
-}
-
-size_t
-bitmap_size(size_t nbits)
-{
-	bitmap_info_t binfo;
-
-	bitmap_info_init(&binfo, nbits);
-	return (bitmap_info_ngroups(&binfo));
+	return (binfo->levels[binfo->nlevels].group_offset);
 }
 
 void
 bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 {
 	size_t extra;
 	unsigned i;
 
 	/*
 	 * Bits are actually inverted with regard to the external bitmap
 	 * interface, so the bitmap starts out with all 1 bits, except for
 	 * trailing unused bits (if any).  Note that each group uses bit 0 to
 	 * correspond to the first logical bit in the group, so extra bits
 	 * are the most significant bits of the last group.
 	 */
-	memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
-	    LG_SIZEOF_BITMAP);
+	memset(bitmap, 0xffU, bitmap_size(binfo));
 	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
 	    & BITMAP_GROUP_NBITS_MASK;
 	if (extra != 0)
 		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
 	for (i = 1; i < binfo->nlevels; i++) {
 		size_t group_count = binfo->levels[i].group_offset -
 		    binfo->levels[i-1].group_offset;
 		extra = (BITMAP_GROUP_NBITS - (group_count &
 		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
 		if (extra != 0)
 			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
 	}
 }
+
+#else /* USE_TREE */
+
+void
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
+{
+	size_t i;
+
+	assert(nbits > 0);
+	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
+
+	i = nbits >> LG_BITMAP_GROUP_NBITS;
+	if (nbits % BITMAP_GROUP_NBITS != 0)
+		i++;
+	binfo->ngroups = i;
+	binfo->nbits = nbits;
+}
+
+static size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo)
+{
+
+	return (binfo->ngroups);
+}
+
+void
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	size_t extra;
+
+	memset(bitmap, 0xffU, bitmap_size(binfo));
+	extra = (binfo->nbits % (binfo->ngroups * BITMAP_GROUP_NBITS));
+	if (extra != 0)
+		bitmap[binfo->ngroups - 1] >>= (BITMAP_GROUP_NBITS - extra);
+}
+
+#endif /* USE_TREE */
+
+size_t
+bitmap_size(const bitmap_info_t *binfo)
+{
+
+	return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP);
+}
--- a/memory/jemalloc/src/src/chunk.c
+++ b/memory/jemalloc/src/src/chunk.c
@@ -327,40 +327,30 @@ chunk_recycle(arena_t *arena, chunk_hook
  * advantage of this to avoid demanding zeroed chunks, but taking advantage of
  * them if they are returned.
  */
 static void *
 chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit, dss_prec_t dss_prec)
 {
 	void *ret;
-	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	/* Retained. */
-	if ((ret = chunk_recycle(arena, &chunk_hooks,
-	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
-	    new_addr, size, alignment, zero, commit, true)) != NULL)
-		return (ret);
-
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
 	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
 	    NULL)
 		return (ret);
-	/*
-	 * mmap.  Requesting an address is not implemented for
-	 * chunk_alloc_mmap(), so only call it if (new_addr == NULL).
-	 */
-	if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero,
-	    commit)) != NULL)
+	/* mmap. */
+	if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) !=
+	    NULL)
 		return (ret);
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
 	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
 	    NULL)
 		return (ret);
 
 	/* All strategies for allocation failed. */
@@ -375,17 +365,17 @@ chunk_alloc_base(size_t size)
 
 	/*
 	 * Directly call chunk_alloc_mmap() rather than chunk_alloc_core()
 	 * because it's critical that chunk_alloc_base() return untouched
 	 * demand-zeroed virtual memory.
 	 */
 	zero = true;
 	commit = true;
-	ret = chunk_alloc_mmap(size, chunksize, &zero, &commit);
+	ret = chunk_alloc_mmap(NULL, size, chunksize, &zero, &commit);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 
 	return (ret);
 }
 
@@ -413,19 +403,17 @@ chunk_alloc_cache(arena_t *arena, chunk_
 	return (ret);
 }
 
 static arena_t *
 chunk_arena_get(unsigned arena_ind)
 {
 	arena_t *arena;
 
-	/* Dodge tsd for a0 in order to avoid bootstrapping issues. */
-	arena = (arena_ind == 0) ? a0get() : arena_get(tsd_fetch(), arena_ind,
-	     false, true);
+	arena = arena_get(arena_ind, false);
 	/*
 	 * The arena we're allocating on behalf of must have been initialized
 	 * already.
 	 */
 	assert(arena != NULL);
 	return (arena);
 }
 
@@ -442,27 +430,48 @@ chunk_alloc_default(void *new_addr, size
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 
 	return (ret);
 }
 
+static void *
+chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit)
+{
+
+	assert(size != 0);
+	assert((size & chunksize_mask) == 0);
+	assert(alignment != 0);
+	assert((alignment & chunksize_mask) == 0);
+
+	return (chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_retained,
+	    &arena->chunks_ad_retained, false, new_addr, size, alignment, zero,
+	    commit, true));
+}
+
 void *
 chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
 
 	chunk_hooks_assure_initialized(arena, chunk_hooks);
-	ret = chunk_hooks->alloc(new_addr, size, alignment, zero, commit,
-	    arena->ind);
-	if (ret == NULL)
-		return (NULL);
+
+	ret = chunk_alloc_retained(arena, chunk_hooks, new_addr, size,
+	    alignment, zero, commit);
+	if (ret == NULL) {
+		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
+		    commit, arena->ind);
+		if (ret == NULL)
+			return (NULL);
+	}
+
 	if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize);
 	return (ret);
 }
 
 static void
 chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
@@ -711,34 +720,34 @@ chunk_boot(void)
 	if (info.dwPageSize & ((1U << LG_PAGE) - 1))
 		return (true);
 
 	/*
 	 * Configure chunksize (if not set) to match granularity (usually 64K),
 	 * so pages_map will always take fast path.
 	 */
 	if (!opt_lg_chunk) {
-		opt_lg_chunk = jemalloc_ffs((int)info.dwAllocationGranularity)
+		opt_lg_chunk = ffs_u((unsigned)info.dwAllocationGranularity)
 		    - 1;
 	}
 #else
 	if (!opt_lg_chunk)
 		opt_lg_chunk = LG_CHUNK_DEFAULT;
 #endif
 
 	/* Set variables according to the value of opt_lg_chunk. */
 	chunksize = (ZU(1) << opt_lg_chunk);
 	assert(chunksize >= PAGE);
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> LG_PAGE);
 
 	if (have_dss && chunk_dss_boot())
 		return (true);
-	if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) -
-	    opt_lg_chunk, chunks_rtree_node_alloc, NULL))
+	if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
+	    opt_lg_chunk), chunks_rtree_node_alloc, NULL))
 		return (true);
 
 	return (false);
 }
 
 void
 chunk_prefork(void)
 {
--- a/memory/jemalloc/src/src/chunk_mmap.c
+++ b/memory/jemalloc/src/src/chunk_mmap.c
@@ -27,17 +27,18 @@ chunk_alloc_mmap_slow(size_t size, size_
 	assert(ret != NULL);
 	*zero = true;
 	if (!*commit)
 		*commit = pages_decommit(ret, size);
 	return (ret);
 }
 
 void *
-chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
+    bool *commit)
 {
 	void *ret;
 	size_t offset;
 
 	/*
 	 * Ideally, there would be a way to specify alignment to mmap() (like
 	 * NetBSD has), but in the absence of such a feature, we have to work
 	 * hard to efficiently create aligned mappings.  The reliable, but
@@ -48,19 +49,20 @@ chunk_alloc_mmap(size_t size, size_t ali
 	 * Optimistically try mapping precisely the right amount before falling
 	 * back to the slow method, with the expectation that the optimistic
 	 * approach works most of the time.
 	 */
 
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = pages_map(NULL, size);
-	if (ret == NULL)
-		return (NULL);
+	ret = pages_map(new_addr, size);
+	if (ret == NULL || ret == new_addr)
+		return (ret);
+	assert(new_addr == NULL);
 	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
 	if (offset != 0) {
 		pages_unmap(ret, size);
 		return (chunk_alloc_mmap_slow(size, alignment, zero, commit));
 	}
 
 	assert(ret != NULL);
 	*zero = true;
--- a/memory/jemalloc/src/src/ckh.c
+++ b/memory/jemalloc/src/src/ckh.c
@@ -94,17 +94,17 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t
 {
 	ckhc_t *cell;
 	unsigned offset, i;
 
 	/*
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+	offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
 		if (cell->key == NULL) {
 			cell->key = key;
 			cell->data = data;
 			ckh->count++;
 			return (false);
@@ -136,17 +136,18 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_
 		/*
 		 * Choose a random item within the bucket to evict.  This is
 		 * critical to correct function, because without (eventually)
 		 * evicting all items within a bucket during iteration, it
 		 * would be possible to get stuck in an infinite loop if there
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+		i = (unsigned)prng_lg_range(&ckh->prng_state,
+		    LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
 		/* Swap cell->{key,data} and {key,data} (evict). */
 		tkey = cell->key; tdata = cell->data;
 		cell->key = key; cell->data = data;
 		key = tkey; data = tdata;
 
@@ -242,18 +243,17 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 	return (false);
 }
 
 static bool
 ckh_grow(tsd_t *tsd, ckh_t *ckh)
 {
 	bool ret;
 	ckhc_t *tab, *ttab;
-	size_t lg_curcells;
-	unsigned lg_prevbuckets;
+	unsigned lg_prevbuckets, lg_curcells;
 
 #ifdef CKH_COUNT
 	ckh->ngrows++;
 #endif
 
 	/*
 	 * It is possible (though unlikely, given well behaved hashes) that the
 	 * table will have to be doubled more than once in order to create a
@@ -261,17 +261,17 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 	 */
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
 	while (true) {
 		size_t usize;
 
 		lg_curcells++;
 		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-		if (usize == 0) {
+		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
 		}
 		tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
 		    true, NULL);
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -297,27 +297,27 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 label_return:
 	return (ret);
 }
 
 static void
 ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
-	size_t lg_curcells, usize;
-	unsigned lg_prevbuckets;
+	size_t usize;
+	unsigned lg_prevbuckets, lg_curcells;
 
 	/*
 	 * It is possible (though unlikely, given well behaved hashes) that the
 	 * table rebuild will fail.
 	 */
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-	if (usize == 0)
+	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
 	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
 	    NULL);
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
 		 * prevent this or future operations from proceeding.
 		 */
@@ -382,17 +382,17 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t m
 	    lg_mincells++)
 		; /* Do nothing. */
 	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 	ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 	ckh->hash = hash;
 	ckh->keycomp = keycomp;
 
 	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
-	if (usize == 0) {
+	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
 		ret = true;
 		goto label_return;
 	}
 	ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
 	    NULL);
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
--- a/memory/jemalloc/src/src/ctl.c
+++ b/memory/jemalloc/src/src/ctl.c
@@ -19,17 +19,17 @@ static ctl_stats_t	ctl_stats;
 JEMALLOC_INLINE_C const ctl_named_node_t *
 ctl_named_node(const ctl_node_t *node)
 {
 
 	return ((node->named) ? (const ctl_named_node_t *)node : NULL);
 }
 
 JEMALLOC_INLINE_C const ctl_named_node_t *
-ctl_named_children(const ctl_named_node_t *node, int index)
+ctl_named_children(const ctl_named_node_t *node, size_t index)
 {
 	const ctl_named_node_t *children = ctl_named_node(node->children);
 
 	return (children ? &children[index] : NULL);
 }
 
 JEMALLOC_INLINE_C const ctl_indexed_node_t *
 ctl_indexed_node(const ctl_node_t *node)
@@ -72,31 +72,34 @@ CTL_PROTO(thread_arena)
 CTL_PROTO(thread_allocated)
 CTL_PROTO(thread_allocatedp)
 CTL_PROTO(thread_deallocated)
 CTL_PROTO(thread_deallocatedp)
 CTL_PROTO(config_cache_oblivious)
 CTL_PROTO(config_debug)
 CTL_PROTO(config_fill)
 CTL_PROTO(config_lazy_lock)
+CTL_PROTO(config_malloc_conf)
 CTL_PROTO(config_munmap)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
 CTL_PROTO(config_tcache)
 CTL_PROTO(config_tls)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_valgrind)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_lg_chunk)
 CTL_PROTO(opt_narenas)
+CTL_PROTO(opt_purge)
 CTL_PROTO(opt_lg_dirty_mult)
+CTL_PROTO(opt_decay_time)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
 CTL_PROTO(opt_quarantine)
 CTL_PROTO(opt_redzone)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
@@ -109,33 +112,36 @@ CTL_PROTO(opt_lg_prof_sample)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
+static void	arena_i_purge(unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
-static void	arena_i_purge(unsigned arena_ind);
+CTL_PROTO(arena_i_decay)
 CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_lg_dirty_mult)
+CTL_PROTO(arena_i_decay_time)
 CTL_PROTO(arena_i_chunk_hooks)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
 CTL_PROTO(arenas_bin_i_run_size)
 INDEX_PROTO(arenas_bin_i)
 CTL_PROTO(arenas_lrun_i_size)
 INDEX_PROTO(arenas_lrun_i)
 CTL_PROTO(arenas_hchunk_i_size)
 INDEX_PROTO(arenas_hchunk_i)
 CTL_PROTO(arenas_narenas)
 CTL_PROTO(arenas_initialized)
 CTL_PROTO(arenas_lg_dirty_mult)
+CTL_PROTO(arenas_decay_time)
 CTL_PROTO(arenas_quantum)
 CTL_PROTO(arenas_page)
 CTL_PROTO(arenas_tcache_max)
 CTL_PROTO(arenas_nbins)
 CTL_PROTO(arenas_nhbins)
 CTL_PROTO(arenas_nlruns)
 CTL_PROTO(arenas_nhchunks)
 CTL_PROTO(arenas_extend)
@@ -176,16 +182,17 @@ INDEX_PROTO(stats_arenas_i_lruns_j)
 CTL_PROTO(stats_arenas_i_hchunks_j_nmalloc)
 CTL_PROTO(stats_arenas_i_hchunks_j_ndalloc)
 CTL_PROTO(stats_arenas_i_hchunks_j_nrequests)
 CTL_PROTO(stats_arenas_i_hchunks_j_curhchunks)
 INDEX_PROTO(stats_arenas_i_hchunks_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_dss)
 CTL_PROTO(stats_arenas_i_lg_dirty_mult)
+CTL_PROTO(stats_arenas_i_decay_time)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
 CTL_PROTO(stats_arenas_i_mapped)
 CTL_PROTO(stats_arenas_i_npurge)
 CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
 CTL_PROTO(stats_arenas_i_metadata_mapped)
 CTL_PROTO(stats_arenas_i_metadata_allocated)
@@ -236,16 +243,17 @@ static const ctl_named_node_t	thread_nod
 	{NAME("prof"),		CHILD(named, thread_prof)}
 };
 
 static const ctl_named_node_t	config_node[] = {
 	{NAME("cache_oblivious"), CTL(config_cache_oblivious)},
 	{NAME("debug"),		CTL(config_debug)},
 	{NAME("fill"),		CTL(config_fill)},
 	{NAME("lazy_lock"),	CTL(config_lazy_lock)},
+	{NAME("malloc_conf"),	CTL(config_malloc_conf)},
 	{NAME("munmap"),	CTL(config_munmap)},
 	{NAME("prof"),		CTL(config_prof)},
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
 	{NAME("stats"),		CTL(config_stats)},
 	{NAME("tcache"),	CTL(config_tcache)},
 	{NAME("tls"),		CTL(config_tls)},
 	{NAME("utrace"),	CTL(config_utrace)},
@@ -253,17 +261,19 @@ static const ctl_named_node_t	config_nod
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
 };
 
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("lg_chunk"),	CTL(opt_lg_chunk)},
 	{NAME("narenas"),	CTL(opt_narenas)},
+	{NAME("purge"),		CTL(opt_purge)},
 	{NAME("lg_dirty_mult"),	CTL(opt_lg_dirty_mult)},
+	{NAME("decay_time"),	CTL(opt_decay_time)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("junk"),		CTL(opt_junk)},
 	{NAME("zero"),		CTL(opt_zero)},
 	{NAME("quarantine"),	CTL(opt_quarantine)},
 	{NAME("redzone"),	CTL(opt_redzone)},
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
@@ -283,18 +293,20 @@ static const ctl_named_node_t opt_node[]
 static const ctl_named_node_t	tcache_node[] = {
 	{NAME("create"),	CTL(tcache_create)},
 	{NAME("flush"),		CTL(tcache_flush)},
 	{NAME("destroy"),	CTL(tcache_destroy)}
 };
 
 static const ctl_named_node_t arena_i_node[] = {
 	{NAME("purge"),		CTL(arena_i_purge)},
+	{NAME("decay"),		CTL(arena_i_decay)},
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(arena_i_lg_dirty_mult)},
+	{NAME("decay_time"),	CTL(arena_i_decay_time)},
 	{NAME("chunk_hooks"),	CTL(arena_i_chunk_hooks)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
 	{NAME(""),		CHILD(named, arena_i)}
 };
 
 static const ctl_indexed_node_t arena_node[] = {
 	{INDEX(arena_i)}
@@ -334,16 +346,17 @@ static const ctl_named_node_t super_aren
 static const ctl_indexed_node_t arenas_hchunk_node[] = {
 	{INDEX(arenas_hchunk_i)}
 };
 
 static const ctl_named_node_t arenas_node[] = {
 	{NAME("narenas"),	CTL(arenas_narenas)},
 	{NAME("initialized"),	CTL(arenas_initialized)},
 	{NAME("lg_dirty_mult"),	CTL(arenas_lg_dirty_mult)},
+	{NAME("decay_time"),	CTL(arenas_decay_time)},
 	{NAME("quantum"),	CTL(arenas_quantum)},
 	{NAME("page"),		CTL(arenas_page)},
 	{NAME("tcache_max"),	CTL(arenas_tcache_max)},
 	{NAME("nbins"),		CTL(arenas_nbins)},
 	{NAME("nhbins"),	CTL(arenas_nhbins)},
 	{NAME("bin"),		CHILD(indexed, arenas_bin)},
 	{NAME("nlruns"),	CTL(arenas_nlruns)},
 	{NAME("lrun"),		CHILD(indexed, arenas_lrun)},
@@ -434,16 +447,17 @@ static const ctl_named_node_t super_stat
 static const ctl_indexed_node_t stats_arenas_i_hchunks_node[] = {
 	{INDEX(stats_arenas_i_hchunks_j)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
 	{NAME("dss"),		CTL(stats_arenas_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(stats_arenas_i_lg_dirty_mult)},
+	{NAME("decay_time"),	CTL(stats_arenas_i_decay_time)},
 	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
 	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
 	{NAME("npurge"),	CTL(stats_arenas_i_npurge)},
 	{NAME("nmadvise"),	CTL(stats_arenas_i_nmadvise)},
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
 	{NAME("metadata"),	CHILD(named, stats_arenas_i_metadata)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
@@ -514,18 +528,20 @@ ctl_arena_init(ctl_arena_stats_t *astats
 
 	return (false);
 }
 
 static void
 ctl_arena_clear(ctl_arena_stats_t *astats)
 {
 
+	astats->nthreads = 0;
 	astats->dss = dss_prec_names[dss_prec_limit];
 	astats->lg_dirty_mult = -1;
+	astats->decay_time = -1;
 	astats->pactive = 0;
 	astats->pdirty = 0;
 	if (config_stats) {
 		memset(&astats->astats, 0, sizeof(arena_stats_t));
 		astats->allocated_small = 0;
 		astats->nmalloc_small = 0;
 		astats->ndalloc_small = 0;
 		astats->nrequests_small = 0;
@@ -537,108 +553,116 @@ ctl_arena_clear(ctl_arena_stats_t *astat
 	}
 }
 
 static void
 ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
 {
 	unsigned i;
 
-	arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult,
-	    &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats,
-	    cstats->lstats, cstats->hstats);
+	if (config_stats) {
+		arena_stats_merge(arena, &cstats->nthreads, &cstats->dss,
+		    &cstats->lg_dirty_mult, &cstats->decay_time,
+		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
+		    cstats->bstats, cstats->lstats, cstats->hstats);
 
-	for (i = 0; i < NBINS; i++) {
-		cstats->allocated_small += cstats->bstats[i].curregs *
-		    index2size(i);
-		cstats->nmalloc_small += cstats->bstats[i].nmalloc;
-		cstats->ndalloc_small += cstats->bstats[i].ndalloc;
-		cstats->nrequests_small += cstats->bstats[i].nrequests;
+		for (i = 0; i < NBINS; i++) {
+			cstats->allocated_small += cstats->bstats[i].curregs *
+			    index2size(i);
+			cstats->nmalloc_small += cstats->bstats[i].nmalloc;
+			cstats->ndalloc_small += cstats->bstats[i].ndalloc;
+			cstats->nrequests_small += cstats->bstats[i].nrequests;
+		}
+	} else {
+		arena_basic_stats_merge(arena, &cstats->nthreads, &cstats->dss,
+		    &cstats->lg_dirty_mult, &cstats->decay_time,
+		    &cstats->pactive, &cstats->pdirty);
 	}
 }
 
 static void
 ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 {
 	unsigned i;
 
+	sstats->nthreads += astats->nthreads;
 	sstats->pactive += astats->pactive;
 	sstats->pdirty += astats->pdirty;
 
-	sstats->astats.mapped += astats->astats.mapped;
-	sstats->astats.npurge += astats->astats.npurge;
-	sstats->astats.nmadvise += astats->astats.nmadvise;
-	sstats->astats.purged += astats->astats.purged;
+	if (config_stats) {
+		sstats->astats.mapped += astats->astats.mapped;
+		sstats->astats.npurge += astats->astats.npurge;
+		sstats->astats.nmadvise += astats->astats.nmadvise;
+		sstats->astats.purged += astats->astats.purged;
 
-	sstats->astats.metadata_mapped += astats->astats.metadata_mapped;
-	sstats->astats.metadata_allocated += astats->astats.metadata_allocated;
+		sstats->astats.metadata_mapped +=
+		    astats->astats.metadata_mapped;
+		sstats->astats.metadata_allocated +=
+		    astats->astats.metadata_allocated;
 
-	sstats->allocated_small += astats->allocated_small;
-	sstats->nmalloc_small += astats->nmalloc_small;
-	sstats->ndalloc_small += astats->ndalloc_small;
-	sstats->nrequests_small += astats->nrequests_small;
+		sstats->allocated_small += astats->allocated_small;
+		sstats->nmalloc_small += astats->nmalloc_small;
+		sstats->ndalloc_small += astats->ndalloc_small;
+		sstats->nrequests_small += astats->nrequests_small;
 
-	sstats->astats.allocated_large += astats->astats.allocated_large;
-	sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
-	sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
-	sstats->astats.nrequests_large += astats->astats.nrequests_large;
+		sstats->astats.allocated_large +=
+		    astats->astats.allocated_large;
+		sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
+		sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
+		sstats->astats.nrequests_large +=
+		    astats->astats.nrequests_large;
 
-	sstats->astats.allocated_huge += astats->astats.allocated_huge;
-	sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge;
-	sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge;
+		sstats->astats.allocated_huge += astats->astats.allocated_huge;
+		sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge;
+		sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge;
 
-	for (i = 0; i < NBINS; i++) {
-		sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
-		sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
-		sstats->bstats[i].nrequests += astats->bstats[i].nrequests;
-		sstats->bstats[i].curregs += astats->bstats[i].curregs;
-		if (config_tcache) {
-			sstats->bstats[i].nfills += astats->bstats[i].nfills;
-			sstats->bstats[i].nflushes +=
-			    astats->bstats[i].nflushes;
+		for (i = 0; i < NBINS; i++) {
+			sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
+			sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
+			sstats->bstats[i].nrequests +=
+			    astats->bstats[i].nrequests;
+			sstats->bstats[i].curregs += astats->bstats[i].curregs;
+			if (config_tcache) {
+				sstats->bstats[i].nfills +=
+				    astats->bstats[i].nfills;
+				sstats->bstats[i].nflushes +=
+				    astats->bstats[i].nflushes;
+			}
+			sstats->bstats[i].nruns += astats->bstats[i].nruns;
+			sstats->bstats[i].reruns += astats->bstats[i].reruns;
+			sstats->bstats[i].curruns += astats->bstats[i].curruns;
 		}
-		sstats->bstats[i].nruns += astats->bstats[i].nruns;
-		sstats->bstats[i].reruns += astats->bstats[i].reruns;
-		sstats->bstats[i].curruns += astats->bstats[i].curruns;
-	}
 
-	for (i = 0; i < nlclasses; i++) {
-		sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
-		sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
-		sstats->lstats[i].nrequests += astats->lstats[i].nrequests;
-		sstats->lstats[i].curruns += astats->lstats[i].curruns;
-	}
+		for (i = 0; i < nlclasses; i++) {
+			sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
+			sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
+			sstats->lstats[i].nrequests +=
+			    astats->lstats[i].nrequests;
+			sstats->lstats[i].curruns += astats->lstats[i].curruns;
+		}
 
-	for (i = 0; i < nhclasses; i++) {
-		sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc;
-		sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc;
-		sstats->hstats[i].curhchunks += astats->hstats[i].curhchunks;
+		for (i = 0; i < nhclasses; i++) {
+			sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc;
+			sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc;
+			sstats->hstats[i].curhchunks +=
+			    astats->hstats[i].curhchunks;
+		}
 	}
 }
 
 static void
 ctl_arena_refresh(arena_t *arena, unsigned i)
 {
 	ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
 	ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
 
 	ctl_arena_clear(astats);
-
-	sstats->nthreads += astats->nthreads;
-	if (config_stats) {
-		ctl_arena_stats_amerge(astats, arena);
-		/* Merge into sum stats as well. */
-		ctl_arena_stats_smerge(sstats, astats);
-	} else {
-		astats->pactive += arena->nactive;
-		astats->pdirty += arena->ndirty;
-		/* Merge into sum stats as well. */
-		sstats->pactive += arena->nactive;
-		sstats->pdirty += arena->ndirty;
-	}
+	ctl_arena_stats_amerge(astats, arena);
+	/* Merge into sum stats as well. */
+	ctl_arena_stats_smerge(sstats, astats);
 }
 
 static bool
 ctl_grow(void)
 {
 	ctl_arena_stats_t *astats;
 
 	/* Initialize new arena. */
@@ -674,43 +698,27 @@ ctl_grow(void)
 	ctl_stats.narenas++;
 
 	return (false);
 }
 
 static void
 ctl_refresh(void)
 {
-	tsd_t *tsd;
 	unsigned i;
-	bool refreshed;
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
 	 * ctl_arena_refresh().
 	 */
-	ctl_stats.arenas[ctl_stats.narenas].nthreads = 0;
 	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
 
-	tsd = tsd_fetch();
-	for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) {
-		tarenas[i] = arena_get(tsd, i, false, false);
-		if (tarenas[i] == NULL && !refreshed) {
-			tarenas[i] = arena_get(tsd, i, false, true);
-			refreshed = true;
-		}
-	}
-
-	for (i = 0; i < ctl_stats.narenas; i++) {
-		if (tarenas[i] != NULL)
-			ctl_stats.arenas[i].nthreads = arena_nbound(i);
-		else
-			ctl_stats.arenas[i].nthreads = 0;
-	}
+	for (i = 0; i < ctl_stats.narenas; i++)
+		tarenas[i] = arena_get(i, false);
 
 	for (i = 0; i < ctl_stats.narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
 
 		ctl_stats.arenas[i].initialized = initialized;
 		if (initialized)
 			ctl_arena_refresh(tarenas[i], i);
 	}
@@ -955,17 +963,17 @@ ctl_bymib(const size_t *mib, size_t mibl
 
 	/* Iterate down the tree. */
 	node = super_root_node;
 	for (i = 0; i < miblen; i++) {
 		assert(node);
 		assert(node->nchildren > 0);
 		if (ctl_named_node(node->children) != NULL) {
 			/* Children are named. */
-			if (node->nchildren <= mib[i]) {
+			if (node->nchildren <= (unsigned)mib[i]) {
 				ret = ENOENT;
 				goto label_return;
 			}
 			node = ctl_named_children(node, mib[i]);
 		} else {
 			const ctl_indexed_node_t *inode;
 
 			/* Indexed element. */
@@ -1194,27 +1202,27 @@ n##_ctl(const size_t *mib, size_t miblen
 	oldval = (m(tsd));						\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
 	return (ret);							\
 }
 
-#define	CTL_RO_BOOL_CONFIG_GEN(n)					\
+#define	CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
 n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
     void *newp, size_t newlen)						\
 {									\
 	int ret;							\
-	bool oldval;							\
+	t oldval;							\
 									\
 	READONLY();							\
 	oldval = n;							\
-	READ(oldval, bool);						\
+	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
 	return (ret);							\
 }
 
 /******************************************************************************/
 
@@ -1236,38 +1244,41 @@ epoch_ctl(const size_t *mib, size_t mibl
 	ret = 0;
 label_return:
 	malloc_mutex_unlock(&ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
-CTL_RO_BOOL_CONFIG_GEN(config_cache_oblivious)
-CTL_RO_BOOL_CONFIG_GEN(config_debug)
-CTL_RO_BOOL_CONFIG_GEN(config_fill)
-CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock)
-CTL_RO_BOOL_CONFIG_GEN(config_munmap)
-CTL_RO_BOOL_CONFIG_GEN(config_prof)
-CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc)
-CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind)
-CTL_RO_BOOL_CONFIG_GEN(config_stats)
-CTL_RO_BOOL_CONFIG_GEN(config_tcache)
-CTL_RO_BOOL_CONFIG_GEN(config_tls)
-CTL_RO_BOOL_CONFIG_GEN(config_utrace)
-CTL_RO_BOOL_CONFIG_GEN(config_valgrind)
-CTL_RO_BOOL_CONFIG_GEN(config_xmalloc)
+CTL_RO_CONFIG_GEN(config_cache_oblivious, bool)
+CTL_RO_CONFIG_GEN(config_debug, bool)
+CTL_RO_CONFIG_GEN(config_fill, bool)
+CTL_RO_CONFIG_GEN(config_lazy_lock, bool)
+CTL_RO_CONFIG_GEN(config_malloc_conf, const char *)
+CTL_RO_CONFIG_GEN(config_munmap, bool)
+CTL_RO_CONFIG_GEN(config_prof, bool)
+CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
+CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
+CTL_RO_CONFIG_GEN(config_stats, bool)
+CTL_RO_CONFIG_GEN(config_tcache, bool)
+CTL_RO_CONFIG_GEN(config_tls, bool)
+CTL_RO_CONFIG_GEN(config_utrace, bool)
+CTL_RO_CONFIG_GEN(config_valgrind, bool)
+CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 /******************************************************************************/
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
-CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t)
+CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
+CTL_RO_NL_GEN(opt_purge, purge_mode_names[opt_purge], const char *)
 CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
+CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t)
 CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool)
@@ -1309,17 +1320,17 @@ thread_arena_ctl(const size_t *mib, size
 
 		if (newind >= ctl_stats.narenas) {
 			/* New arena index is out of range. */
 			ret = EFAULT;
 			goto label_return;
 		}
 
 		/* Initialize arena if necessary. */
-		newarena = arena_get(tsd, newind, true, true);
+		newarena = arena_get(newind, true);
 		if (newarena == NULL) {
 			ret = EAGAIN;
 			goto label_return;
 		}
 		/* Set new arena/tcache associations. */
 		arena_migrate(tsd, oldind, newin