Bug 1073662 - Part 6: Add JEMALLOC_RECYCLE, a mode that keeps a limited amount of chunks alive. r=glandium
authorEmanuel Hoogeveen <emanuel.hoogeveen@gmail.com>
Thu, 13 Nov 2014 22:55:00 +0100
changeset 215829 562222233fbf9bacf81513c9fbe073ce0a6419ec
parent 215828 980888a644bd00928a5bd669eada6b27a0721957
child 215830 6e664f1aed21dd69ab4aa203106d5868f356f4d2
push id27827
push userryanvm@gmail.com
push dateFri, 14 Nov 2014 22:48:07 +0000
treeherdermozilla-central@acbd7b68fa8c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersglandium
bugs1073662
milestone36.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1073662 - Part 6: Add JEMALLOC_RECYCLE, a mode that keeps a limited amount of chunks alive. r=glandium
memory/mozjemalloc/jemalloc.c
--- a/memory/mozjemalloc/jemalloc.c
+++ b/memory/mozjemalloc/jemalloc.c
@@ -285,16 +285,18 @@ typedef long ssize_t;
 #endif
 
 /*
  * Allow unmapping pages on all platforms. Note that if this is disabled,
  * jemalloc will never unmap anything, instead recycling pages for later use.
  */
 #define JEMALLOC_MUNMAP
 
+#undef JEMALLOC_RECYCLE
+
 #ifndef MOZ_MEMORY_WINDOWS
 #ifndef MOZ_MEMORY_SOLARIS
 #include <sys/cdefs.h>
 #endif
 #ifndef __DECONST
 #  define __DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
 #endif
 #ifndef MOZ_MEMORY
@@ -1065,16 +1067,22 @@ static unsigned		ncpus;
 #endif
 
 #ifdef JEMALLOC_MUNMAP
 static const bool config_munmap = true;
 #else
 static const bool config_munmap = false;
 #endif
 
+#ifdef JEMALLOC_RECYCLE
+static const bool config_recycle = true;
+#else
+static const bool config_recycle = false;
+#endif
+
 /*
  * When MALLOC_STATIC_SIZES is defined most of the parameters
  * controlling the malloc behavior are defined as compile-time constants
  * for best performance and cannot be altered at runtime.
  */
 #if !defined(__ia64__) && !defined(__sparc__) && !defined(__mips__)
 #define MALLOC_STATIC_SIZES 1
 #endif
@@ -1159,30 +1167,41 @@ static size_t		quantum_mask; /* (quantum
 #define calculate_arena_header_pages()					\
 	((calculate_arena_header_size() >> pagesize_2pow) +		\
 	 ((calculate_arena_header_size() & pagesize_mask) ? 1 : 0))
 
 /* Max size class for arenas. */
 #define calculate_arena_maxclass()					\
 	(chunksize - (arena_chunk_header_npages << pagesize_2pow))
 
+/*
+ * Recycle at most 128 chunks. With 1 MiB chunks, this means we retain at most
+ * 6.25% of the process address space on a 32-bit OS for later use.
+ */
+#define CHUNK_RECYCLE_LIMIT 128
+
 #ifdef MALLOC_STATIC_SIZES
 #define CHUNKSIZE_DEFAULT		((size_t) 1 << CHUNK_2POW_DEFAULT)
 static const size_t	chunksize =	CHUNKSIZE_DEFAULT;
 static const size_t	chunksize_mask =CHUNKSIZE_DEFAULT - 1;
 static const size_t	chunk_npages =	CHUNKSIZE_DEFAULT >> pagesize_2pow;
 #define arena_chunk_header_npages	calculate_arena_header_pages()
 #define arena_maxclass			calculate_arena_maxclass()
+static const size_t	recycle_limit = CHUNK_RECYCLE_LIMIT * CHUNKSIZE_DEFAULT;
 #else
 static size_t		chunksize;
 static size_t		chunksize_mask; /* (chunksize - 1). */
 static size_t		chunk_npages;
 static size_t		arena_chunk_header_npages;
 static size_t		arena_maxclass; /* Max size class for arenas. */
-#endif
+static size_t		recycle_limit;
+#endif
+
+/* The current amount of recycled bytes, updated atomically. */
+static size_t recycled_size;
 
 /********/
 /*
  * Chunks.
  */
 
 #ifdef MALLOC_VALIDATE
 static malloc_rtree_t *chunk_rtree;
@@ -1480,16 +1499,34 @@ static const bool osx_use_jemalloc = tru
 
 #endif
 
 /*
  * End function prototypes.
  */
 /******************************************************************************/
 
+static inline size_t
+load_acquire_z(size_t *p)
+{
+	volatile size_t result = *p;
+#  ifdef MOZ_MEMORY_WINDOWS
+	/*
+	 * We use InterlockedExchange with a dummy value to insert a memory
+	 * barrier. This has been confirmed to generate the right instruction
+	 * and is also used by MinGW.
+	 */
+	volatile long dummy = 0;
+	InterlockedExchange(&dummy, 1);
+#  else
+	__sync_synchronize();
+#  endif
+	return result;
+}
+
 /*
  * umax2s() provides minimal integer printing functionality, which is
  * especially useful for situations where allocation in vsnprintf() calls would
  * potentially cause deadlock.
  */
 #define	UMAX2S_BUFSIZE	65
 char *
 umax2s(uintmax_t x, unsigned base, char *s)
@@ -2814,16 +2851,20 @@ chunk_recycle(extent_tree_t *chunks_szad
 		}
 		node->addr = (void *)((uintptr_t)(ret) + size);
 		node->size = trailsize;
 		node->zeroed = zeroed;
 		extent_tree_szad_insert(chunks_szad, node);
 		extent_tree_ad_insert(chunks_ad, node);
 		node = NULL;
 	}
+
+	if (config_munmap && config_recycle)
+		recycled_size -= size;
+
 	malloc_mutex_unlock(&chunks_mtx);
 
 	if (node != NULL)
 		base_node_dealloc(node);
 #ifdef MALLOC_DECOMMIT
 	pages_commit(ret, size);
 #endif
 	if (*zero) {
@@ -2837,27 +2878,39 @@ chunk_recycle(extent_tree_t *chunks_szad
 			for (i = 0; i < size / sizeof(size_t); i++)
 				assert(p[i] == 0);
 		}
 #endif
 	}
 	return (ret);
 }
 
+#ifdef MOZ_MEMORY_WINDOWS
+/*
+ * On Windows, calls to VirtualAlloc and VirtualFree must be matched, making it
+ * awkward to recycle allocations of varying sizes. Therefore we only allow
+ * recycling when the size equals the chunksize, unless deallocation is entirely
+ * disabled.
+ */
+#define CAN_RECYCLE(size) (size == chunksize)
+#else
+#define CAN_RECYCLE(size) true
+#endif
+
 static void *
 chunk_alloc(size_t size, size_t alignment, bool base, bool zero)
 {
 	void *ret;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	if (!config_munmap) {
+	if (!config_munmap || (config_recycle && CAN_RECYCLE(size))) {
 		ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap,
 			size, alignment, base, &zero);
 		if (ret != NULL)
 			goto RETURN;
 	}
 	ret = chunk_alloc_mmap(size, alignment);
 	if (ret != NULL) {
 		goto RETURN;
@@ -2950,38 +3003,44 @@ chunk_record(extent_tree_t *chunks_szad,
 		node->addr = prev->addr;
 		node->size += prev->size;
 		node->zeroed = (node->zeroed && prev->zeroed);
 		extent_tree_szad_insert(chunks_szad, node);
 
 		xprev = prev;
 	}
 
+	if (config_munmap && config_recycle)
+		recycled_size += size;
+
 label_return:
 	malloc_mutex_unlock(&chunks_mtx);
 	/*
 	 * Deallocate xnode and/or xprev after unlocking chunks_mtx in order to
 	 * avoid potential deadlock.
 	 */
 	if (xnode != NULL)
 		base_node_dealloc(xnode);
 	if (xprev != NULL)
 		base_node_dealloc(xprev);
 }
 
 static bool
 chunk_dalloc_mmap(void *chunk, size_t size)
 {
-	if (!config_munmap)
+	if (!config_munmap || (config_recycle && CAN_RECYCLE(size) &&
+			load_acquire_z(&recycled_size) < recycle_limit))
 		return true;
 
 	pages_unmap(chunk, size);
 	return false;
 }
 
+#undef CAN_RECYCLE
+
 static void
 chunk_dealloc(void *chunk, size_t size)
 {
 
 	assert(chunk != NULL);
 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
@@ -5805,17 +5864,21 @@ MALLOC_OUT:
 
 	/* Set variables according to the value of opt_chunk_2pow. */
 	chunksize = (1LU << opt_chunk_2pow);
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> pagesize_2pow);
 
 	arena_chunk_header_npages = calculate_arena_header_pages();
 	arena_maxclass = calculate_arena_maxclass();
-#endif
+
+	recycle_limit = CHUNK_RECYCLE_LIMIT * chunksize;
+#endif
+
+	recycled_size = 0;
 
 #ifdef JEMALLOC_USES_MAP_ALIGN
 	/*
 	 * When using MAP_ALIGN, the alignment parameter must be a power of two
 	 * multiple of the system pagesize, or mmap will fail.
 	 */
 	assert((chunksize % pagesize) == 0);
 	assert((1 << (ffs(chunksize / pagesize) - 1)) == (chunksize/pagesize));
@@ -6675,17 +6738,17 @@ MOZ_JEMALLOC_API void
 jemalloc_purge_freed_pages_impl()
 {
 	size_t i;
 	for (i = 0; i < narenas; i++) {
 		arena_t *arena = arenas[i];
 		if (arena != NULL)
 			hard_purge_arena(arena);
 	}
-	if (!config_munmap) {
+	if (!config_munmap || config_recycle) {
 		extent_node_t *node = extent_tree_szad_first(&chunks_szad_mmap);
 		while (node) {
 			pages_decommit(node->addr, node->size);
 			pages_commit(node->addr, node->size);
 			node->zeroed = true;
 			node = extent_tree_szad_next(&chunks_szad_mmap, node);
 		}
 	}