Bug 1397101 - Reduce the number of dirty pages we allow to be kept in thread local arenas. r=njn
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 14 Sep 2017 07:26:30 +0900
changeset 430288 60f6c456364517795ccc0ccfdbf981e57c224e25
parent 430287 e3f4a1ad4928e9fc3a3ec03dbb4b59fdd4a52e57
child 430289 d9efe6d06931e47b020fb9a39e18ff8a13882f33
push id7761
push userjlund@mozilla.com
push dateFri, 15 Sep 2017 00:19:52 +0000
treeherdermozilla-beta@c38455951db4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnjn
bugs1397101, 1361258
milestone57.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1397101 - Reduce the number of dirty pages we allow to be kept in thread local arenas. r=njn Until bug 1361258, there was only ever one mozjemalloc arena, and the number of dirty pages we allow to be kept dirty, fixed to 1MB per arena, was, in fact, 1MB for an entire process. With stylo using thread local arenas, we now can have multiple arenas per process, multiplying that number of dirty pages. While those dirty pages may be reused later on, when other allocations end up filling them later on, the fact that a relatively large number of them is kept around for each stylo thread (in proportion to the amount of memory ever allocated by stylo), combined with the fact that the memory use from stylo depends on the workload generated by the pages being visited, those dirty pages may very well not be used for a rather long time. This is less of a problem with the main arena, used for most everything else. So, for each arena except the main one, we decrease the number of dirty pages we allow to be kept around to 1/8 of the current value. We do this by introducing a per-arena configuration of that maximum number.
memory/build/mozjemalloc.cpp
--- a/memory/build/mozjemalloc.cpp
+++ b/memory/build/mozjemalloc.cpp
@@ -728,16 +728,20 @@ struct arena_t {
 
 	/*
 	 * Current count of pages within unused runs that are potentially
 	 * dirty, and for which madvise(... MADV_FREE) has not been called.  By
 	 * tracking this, we can institute a limit on how much dirty unused
 	 * memory is mapped for each arena.
 	 */
 	size_t			ndirty;
+	/*
+	 * Maximum value allowed for ndirty.
+	 */
+	size_t			dirty_max;
 
 	/*
 	 * Size/address-ordered tree of this arena's available runs.  This tree
 	 * is used for first-best-fit run allocation.
 	 */
 	arena_avail_tree_t	runs_avail;
 
 	/*
@@ -2792,26 +2796,26 @@ arena_run_alloc(arena_t *arena, arena_bi
 }
 
 static void
 arena_purge(arena_t *arena, bool all)
 {
 	arena_chunk_t *chunk;
 	size_t i, npages;
 	/* If all is set purge all dirty pages. */
-	size_t dirty_max = all ? 1 : opt_dirty_max;
+	size_t dirty_max = all ? 1 : arena->dirty_max;
 #ifdef MOZ_DEBUG
 	size_t ndirty = 0;
 	rb_foreach_begin(arena_chunk_t, link_dirty, &arena->chunks_dirty,
 	    chunk) {
 		ndirty += chunk->ndirty;
 	} rb_foreach_end(arena_chunk_t, link_dirty, &arena->chunks_dirty, chunk)
 	MOZ_ASSERT(ndirty == arena->ndirty);
 #endif
-	MOZ_DIAGNOSTIC_ASSERT(all || (arena->ndirty > opt_dirty_max));
+	MOZ_DIAGNOSTIC_ASSERT(all || (arena->ndirty > arena->dirty_max));
 
 	/*
 	 * Iterate downward through chunks until enough dirty memory has been
 	 * purged.  Terminate as soon as possible in order to minimize the
 	 * number of system calls, even if a chunk has only been partially
 	 * purged.
 	 */
 	while (arena->ndirty > (dirty_max >> 1)) {
@@ -2981,18 +2985,18 @@ arena_run_dalloc(arena_t *arena, arena_r
 	/* Insert into runs_avail, now that coalescing is complete. */
 	arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
 
 	/* Deallocate chunk if it is now completely unused. */
 	if ((chunk->map[arena_chunk_header_npages].bits & (~pagesize_mask |
 	    CHUNK_MAP_ALLOCATED)) == arena_maxclass)
 		arena_chunk_dealloc(arena, chunk);
 
-	/* Enforce opt_dirty_max. */
-	if (arena->ndirty > opt_dirty_max)
+	/* Enforce dirty_max. */
+	if (arena->ndirty > arena->dirty_max)
 		arena_purge(arena, false);
 }
 
 static void
 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
     size_t oldsize, size_t newsize)
 {
 	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> pagesize_2pow;
@@ -4021,16 +4025,19 @@ arena_new(arena_t *arena)
 	/* Initialize chunks. */
 	arena_chunk_tree_dirty_new(&arena->chunks_dirty);
 #ifdef MALLOC_DOUBLE_PURGE
 	new (&arena->chunks_madvised) mozilla::DoublyLinkedList<arena_chunk_t>();
 #endif
 	arena->spare = nullptr;
 
 	arena->ndirty = 0;
+	// Reduce the maximum amount of dirty pages we allow to be kept on
+	// thread local arenas. TODO: make this more flexible.
+	arena->dirty_max = opt_dirty_max >> 3;
 
 	arena_avail_tree_new(&arena->runs_avail);
 
 	/* Initialize bins. */
 	prev_run_size = pagesize;
 
 	/* (2^n)-spaced tiny bins. */
 	for (i = 0; i < ntbins; i++) {
@@ -4615,16 +4622,20 @@ MALLOC_OUT:
    */
   arenas_extend();
   if (!arenas || !arenas[0]) {
 #ifndef XP_WIN
     malloc_mutex_unlock(&init_lock);
 #endif
     return true;
   }
+  /* arena_new() sets this to a lower value for thread local arenas;
+   * reset to the default value for the main arenas */
+  arenas[0]->dirty_max = opt_dirty_max;
+
 #ifndef NO_TLS
   /*
    * Assign the initial arena to the initial thread.
    */
   thread_arena.set(arenas[0]);
 #endif
 
   chunk_rtree = malloc_rtree_new((SIZEOF_PTR << 3) - opt_chunk_2pow);