bug 423036. decommit for huge allocations. patch from Jason Evans. r/sr=me a=vlad
authorpavlov@pavlov.net
Fri, 14 Mar 2008 20:51:44 -0700
changeset 13110 bd0768970b981f0c64549dc4f1eece9f6276527f
parent 13109 550c5061ec27a2d16217a756fb92a04265a50387
child 13111 254bdfcff2cacf86e83b3c5d3d90351cb807a324
push idunknown
push userunknown
push dateunknown
reviewersvlad
bugs423036
milestone1.9b5pre
bug 423036. decommit for huge allocations. patch from Jason Evans. r/sr=me a=vlad
memory/jemalloc/jemalloc.c
--- a/memory/jemalloc/jemalloc.c
+++ b/memory/jemalloc/jemalloc.c
@@ -150,20 +150,17 @@
 #define	MALLOC_DSS
 #endif
 
 #ifdef MOZ_MEMORY_LINUX
 #define	_GNU_SOURCE /* For mremap(2). */
 #define	issetugid() 0
 #if 0 /* Enable in order to test decommit code on Linux. */
 #  define MALLOC_DECOMMIT
-/*
- * The decommit code for Unix doesn't bother to make sure deallocated DSS
- * chunks are writable.
- */
+/* The decommit code for Unix doesn't support DSS chunks. */
 #  undef MALLOC_DSS
 #endif
 #endif
 
 #include <sys/types.h>
 
 #include <errno.h>
 #include <limits.h>
@@ -982,16 +979,19 @@ static size_t		huge_allocated;
 
 /*
  * Current pages that are being used for internal memory allocations.  These
  * pages are carved up in cacheline-size quanta, so that there is no chance of
  * false cache line sharing.
  */
 static void		*base_pages;
 static void		*base_next_addr;
+#ifdef MALLOC_DECOMMIT
+static void		*base_next_decommitted;
+#endif
 static void		*base_past_addr; /* Addr immediately past base_pages. */
 static extent_node_t	*base_nodes;
 static malloc_mutex_t	base_mtx;
 #ifdef MALLOC_STATS
 static size_t		base_mapped;
 #endif
 
 /********/
@@ -1620,16 +1620,44 @@ umax2s(uintmax_t x, char *s)
 		x /= 10;
 	} while (x > 0);
 
 	return (&s[i]);
 }
 
 /******************************************************************************/
 
+#ifdef MALLOC_DECOMMIT
+static inline void
+pages_decommit(void *addr, size_t size)
+{
+
+#ifdef MOZ_MEMORY_WINDOWS
+	VirtualFree(addr, size, MEM_DECOMMIT);
+#else
+	if (mmap(addr, size, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1,
+	    0) == MAP_FAILED)
+		abort();
+#endif
+}
+
+static inline void
+pages_commit(void *addr, size_t size)
+{
+
+#  ifdef MOZ_MEMORY_WINDOWS
+	VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE);
+#  else
+	if (mmap(addr, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE |
+	    MAP_ANON, -1, 0) == MAP_FAILED)
+		abort();
+#  endif
+}
+#endif
+
 #ifdef MALLOC_DSS
 static bool
 base_pages_alloc_dss(size_t minsize)
 {
 
 	/*
 	 * Do special DSS allocation here, since base allocations don't need to
 	 * be chunk-aligned.
@@ -1674,24 +1702,37 @@ base_pages_alloc_dss(size_t minsize)
 	return (true);
 }
 #endif
 
 static bool
 base_pages_alloc_mmap(size_t minsize)
 {
 	size_t csize;
+#ifdef MALLOC_DECOMMIT
+	size_t pminsize;
+#endif
 
 	assert(minsize != 0);
 	csize = PAGE_CEILING(minsize);
 	base_pages = pages_map(NULL, csize);
 	if (base_pages == NULL)
 		return (true);
 	base_next_addr = base_pages;
 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
+#ifdef MALLOC_DECOMMIT
+	/*
+	 * Leave enough pages for minsize committed, since otherwise they would
+	 * have to be immediately recommitted.
+	 */
+	pminsize = PAGE_CEILING(minsize);
+	base_next_decommitted = (void *)((uintptr_t)base_pages + pminsize);
+	if (pminsize < csize)
+		pages_decommit(base_next_decommitted, csize - pminsize);
+#endif
 #ifdef MALLOC_STATS
 	base_mapped += csize;
 #endif
 
 	return (false);
 }
 
 static bool
@@ -1727,16 +1768,27 @@ base_alloc(size_t size)
 	/* Make sure there's enough space for the allocation. */
 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
 		if (base_pages_alloc(csize))
 			return (NULL);
 	}
 	/* Allocate. */
 	ret = base_next_addr;
 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
+#ifdef MALLOC_DECOMMIT
+	/* Make sure enough pages are committed for the new allocation. */
+	if ((uintptr_t)base_next_addr > (uintptr_t)base_next_decommitted) {
+		void *pbase_next_addr =
+		    (void *)(PAGE_CEILING((uintptr_t)base_next_addr));
+
+		pages_commit(base_next_decommitted, (uintptr_t)pbase_next_addr -
+		    (uintptr_t)base_next_decommitted);
+		base_next_decommitted = pbase_next_addr;
+	}
+#endif
 	malloc_mutex_unlock(&base_mtx);
 
 	return (ret);
 }
 
 static void *
 base_calloc(size_t number, size_t size)
 {
@@ -2064,44 +2116,16 @@ pages_unmap(void *addr, size_t size)
 		_malloc_message(_getprogname(),
 		    ": (malloc) Error in munmap(): ", buf, "\n");
 		if (opt_abort)
 			abort();
 	}
 }
 #endif
 
-#ifdef MALLOC_DECOMMIT
-static inline void
-pages_decommit(void *addr, size_t size)
-{
-
-#ifdef MOZ_MEMORY_WINDOWS
-	VirtualFree(addr, size, MEM_DECOMMIT);
-#else
-	if (mmap(addr, size, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1,
-	    0) == MAP_FAILED)
-		abort();
-#endif
-}
-
-static inline void
-pages_commit(void *addr, size_t size)
-{
-
-#  ifdef MOZ_MEMORY_WINDOWS
-	VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE);
-#  else
-	if (mmap(addr, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE |
-	    MAP_ANON, -1, 0) == MAP_FAILED)
-		abort();
-#  endif
-}
-#endif
-
 #ifdef MALLOC_DSS
 static void *
 chunk_alloc_dss(size_t size)
 {
 
 	malloc_mutex_lock(&dss_mtx);
 	if (dss_prev != (void *)-1) {
 		intptr_t incr;
@@ -4536,16 +4560,19 @@ arenas_extend(unsigned ind)
  * Begin general internal functions.
  */
 
 static void *
 huge_malloc(size_t size, bool zero)
 {
 	void *ret;
 	size_t csize;
+#ifdef MALLOC_DECOMMIT
+	size_t psize;
+#endif
 	extent_node_t *node;
 
 	/* Allocate one or more contiguous chunks for this request. */
 
 	csize = CHUNK_CEILING(size);
 	if (csize == 0) {
 		/* size is large enough to cause size_t wrap-around. */
 		return (NULL);
@@ -4559,44 +4586,69 @@ huge_malloc(size_t size, bool zero)
 	ret = chunk_alloc(csize, zero);
 	if (ret == NULL) {
 		base_node_dealloc(node);
 		return (NULL);
 	}
 
 	/* Insert node into huge. */
 	node->addr = ret;
+#ifdef MALLOC_DECOMMIT
+	psize = PAGE_CEILING(size);
+	node->size = psize;
+#else
 	node->size = csize;
+#endif
 
 	malloc_mutex_lock(&huge_mtx);
 	RB_INSERT(extent_tree_ad_s, &huge, node);
 #ifdef MALLOC_STATS
 	huge_nmalloc++;
+#  ifdef MALLOC_DECOMMIT
+	huge_allocated += psize;
+#  else
 	huge_allocated += csize;
+#  endif
 #endif
 	malloc_mutex_unlock(&huge_mtx);
 
+#ifdef MALLOC_DECOMMIT
+	if (csize - psize > 0)
+		pages_decommit((void *)((uintptr_t)ret + psize), csize - psize);
+#endif
+
 #ifdef MALLOC_FILL
 	if (zero == false) {
 		if (opt_junk)
+#  ifdef MALLOC_DECOMMIT
+			memset(ret, 0xa5, psize);
+#  else
 			memset(ret, 0xa5, csize);
+#  endif
 		else if (opt_zero)
+#  ifdef MALLOC_DECOMMIT
+			memset(ret, 0, psize);
+#  else
 			memset(ret, 0, csize);
+#  endif
 	}
 #endif
 
 	return (ret);
 }
 
 /* Only handles large allocations that require more than chunk alignment. */
 static void *
 huge_palloc(size_t alignment, size_t size)
 {
 	void *ret;
 	size_t alloc_size, chunk_size, offset;
+#ifdef MALLOC_DECOMMIT
+	size_t psize;
+#endif
 	extent_node_t *node;
 
 	/*
 	 * This allocation requires alignment that is even larger than chunk
 	 * alignment.  This means that huge_malloc() isn't good enough.
 	 *
 	 * Allocate almost twice as many chunks as are demanded by the size or
 	 * alignment, in order to assure the alignment can be achieved, then
@@ -4611,16 +4663,43 @@ huge_palloc(size_t alignment, size_t siz
 	else
 		alloc_size = (alignment << 1) - chunksize;
 
 	/* Allocate an extent node with which to track the chunk. */
 	node = base_node_alloc();
 	if (node == NULL)
 		return (NULL);
 
+#ifdef MOZ_MEMORY_WINDOWS
+	/*
+	 * Windows requires that there be a 1:1 mapping between VM
+	 * allocation/deallocation operations.  Therefore, take care here to
+	 * acquire the final result via one mapping operation.
+	 */
+	do {
+		void *over;
+
+		over = chunk_alloc(alloc_size, false);
+		if (over == NULL) {
+			base_node_dealloc(node);
+			return (NULL);
+		}
+
+		offset = (uintptr_t)over & (alignment - 1);
+		assert((offset & chunksize_mask) == 0);
+		assert(offset < alloc_size);
+		ret = (void *)((uintptr_t)over + offset);
+		chunk_dealloc(over, alloc_size);
+		ret = pages_map(ret, chunk_size);
+		/*
+		 * Failure here indicates a race with another thread, so try
+		 * again.
+		 */
+	} while (ret == NULL);
+#else
 	ret = chunk_alloc(alloc_size, false);
 	if (ret == NULL) {
 		base_node_dealloc(node);
 		return (NULL);
 	}
 
 	offset = (uintptr_t)ret & (alignment - 1);
 	assert((offset & chunksize_mask) == 0);
@@ -4640,53 +4719,122 @@ huge_palloc(size_t alignment, size_t siz
 		trailsize = alloc_size - (alignment - offset) - chunk_size;
 		if (trailsize != 0) {
 		    /* Trim trailing space. */
 		    assert(trailsize < alloc_size);
 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
 			trailsize);
 		}
 	}
+#endif
 
 	/* Insert node into huge. */
 	node->addr = ret;
+#ifdef MALLOC_DECOMMIT
+	psize = PAGE_CEILING(size);
+	node->size = psize;
+#else
 	node->size = chunk_size;
+#endif
 
 	malloc_mutex_lock(&huge_mtx);
 	RB_INSERT(extent_tree_ad_s, &huge, node);
 #ifdef MALLOC_STATS
 	huge_nmalloc++;
+#  ifdef MALLOC_DECOMMIT
+	huge_allocated += psize;
+#  else
 	huge_allocated += chunk_size;
+#  endif
 #endif
 	malloc_mutex_unlock(&huge_mtx);
 
+#ifdef MALLOC_DECOMMIT
+	if (chunk_size - psize > 0) {
+		pages_decommit((void *)((uintptr_t)ret + psize),
+		    chunk_size - psize);
+	}
+#endif
+
 #ifdef MALLOC_FILL
 	if (opt_junk)
+#  ifdef MALLOC_DECOMMIT
+		memset(ret, 0xa5, psize);
+#  else
 		memset(ret, 0xa5, chunk_size);
+#  endif
 	else if (opt_zero)
+#  ifdef MALLOC_DECOMMIT
+		memset(ret, 0, psize);
+#  else
 		memset(ret, 0, chunk_size);
+#  endif
 #endif
 
 	return (ret);
 }
 
 static void *
 huge_ralloc(void *ptr, size_t size, size_t oldsize)
 {
 	void *ret;
 	size_t copysize;
 
 	/* Avoid moving the allocation if the size class would not change. */
+
 	if (oldsize > arena_maxclass &&
 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
+#ifdef MALLOC_DECOMMIT
+		size_t psize = PAGE_CEILING(size);
+#endif
 #ifdef MALLOC_FILL
 		if (opt_junk && size < oldsize) {
 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
 			    - size);
-		} else if (opt_zero && size > oldsize) {
+		}
+#endif
+#ifdef MALLOC_DECOMMIT
+		if (psize < oldsize) {
+			extent_node_t *node, key;
+
+			pages_decommit((void *)((uintptr_t)ptr + psize),
+			    oldsize - psize);
+
+			/* Update recorded size. */
+			malloc_mutex_lock(&huge_mtx);
+			key.addr = __DECONST(void *, ptr);
+			node = RB_FIND(extent_tree_ad_s, &huge, &key);
+			assert(node != NULL);
+			assert(node->size == oldsize);
+#  ifdef MALLOC_STATS
+			huge_allocated -= oldsize - psize;
+#  endif
+			node->size = psize;
+			malloc_mutex_unlock(&huge_mtx);
+		} else if (psize > oldsize) {
+			extent_node_t *node, key;
+
+			pages_commit((void *)((uintptr_t)ptr + oldsize),
+			    psize - oldsize);
+
+			/* Update recorded size. */
+			malloc_mutex_lock(&huge_mtx);
+			key.addr = __DECONST(void *, ptr);
+			node = RB_FIND(extent_tree_ad_s, &huge, &key);
+			assert(node != NULL);
+			assert(node->size == oldsize);
+#  ifdef MALLOC_STATS
+			huge_allocated += psize - oldsize;
+#  endif
+			node->size = psize;
+			malloc_mutex_unlock(&huge_mtx);
+		}
+#endif
+#ifdef MALLOC_FILL
+		if (opt_zero && size > oldsize) {
 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
 			    - oldsize);
 		}
 #endif
 		return (ptr);
 	}
 
 	/*
@@ -4732,17 +4880,21 @@ huge_dalloc(void *ptr)
 
 	/* Unmap chunk. */
 #ifdef MALLOC_DSS
 #ifdef MALLOC_FILL
 	if (opt_dss && opt_junk)
 		memset(node->addr, 0x5a, node->size);
 #endif
 #endif
+#ifdef MALLOC_DECOMMIT
+	chunk_dealloc(node->addr, CHUNK_CEILING(node->size));
+#else
 	chunk_dealloc(node->addr, node->size);
+#endif
 
 	base_node_dealloc(node);
 }
 
 #ifdef MOZ_MEMORY_BSD
 static inline unsigned
 malloc_ncpus(void)
 {