Bug 1399031 - Use mozilla/ThreadLocal.h in mozjemalloc. r=njn
authorMike Hommey <mh+mozilla@glandium.org>
Tue, 12 Sep 2017 16:29:11 +0900
changeset 430259 0eb129729d90b4c03b0a88e34398578a97f59828
parent 430258 3025f5550677377d6234f296dc0d903b4da8176c
child 430260 72a9bca22654ea28ba17a50d9dd9d078e14fa309
push id7761
push userjlund@mozilla.com
push dateFri, 15 Sep 2017 00:19:52 +0000
treeherdermozilla-beta@c38455951db4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnjn
bugs1399031
milestone57.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1399031 - Use mozilla/ThreadLocal.h in mozjemalloc. r=njn
memory/build/mozjemalloc.cpp
--- a/memory/build/mozjemalloc.cpp
+++ b/memory/build/mozjemalloc.cpp
@@ -159,20 +159,16 @@
 #define _CRT_SPINCOUNT 5000
 #include <io.h>
 #include <windows.h>
 #include <intrin.h>
 
 #define	SIZE_T_MAX SIZE_MAX
 #define	STDERR_FILENO 2
 
-#ifndef NO_TLS
-static unsigned long tlsIndex = 0xffffffff;
-#endif
-
 /* use MSVC intrinsics */
 #pragma intrinsic(_BitScanForward)
 static __forceinline int
 ffs(int x)
 {
 	unsigned long i;
 
 	if (_BitScanForward(&i, x) != 0)
@@ -247,16 +243,17 @@ typedef long ssize_t;
 #include <mach/mach_error.h>
 #include <mach/mach_init.h>
 #include <mach/vm_map.h>
 #include <malloc/malloc.h>
 #endif
 
 #endif
 
+#include "mozilla/ThreadLocal.h"
 #include "mozjemalloc_types.h"
 
 /* Some tools, such as /dev/dsp wrappers, LD_PRELOAD libraries that
  * happen to override mmap() and call dlsym() from their overridden
  * mmap(). The problem is that dlsym() calls malloc(), and this ends
  * up in a dead lock in jemalloc.
  * On these systems, we prefer to directly use the system call.
  * We do that for Linux systems and kfreebsd with GNU userland.
@@ -300,20 +297,16 @@ void *_mmap(void *addr, size_t length, i
 #endif
 #endif
 }
 #define mmap _mmap
 #define munmap(a, l) syscall(SYS_munmap, a, l)
 #endif
 #endif
 
-#ifdef XP_DARWIN
-static pthread_key_t tlsIndex;
-#endif
-
 #ifdef XP_WIN
    /* MSVC++ does not support C99 variable-length arrays. */
 #  define RB_NO_C99_VARARRAYS
 #endif
 #include "rb.h"
 
 #ifdef MOZ_DEBUG
    /* Disable inlining to make debugging easier. */
@@ -958,21 +951,27 @@ static size_t		base_committed;
  * arenas array are necessarily used; arenas are created lazily as needed.
  */
 static arena_t		**arenas;
 static unsigned		narenas;
 static malloc_spinlock_t arenas_lock; /* Protects arenas initialization. */
 
 #ifndef NO_TLS
 /*
- * Map of pthread_self() --> arenas[???], used for selecting an arena to use
- * for allocations.
+ * The arena associated with the current thread (per jemalloc_thread_local_arena)
+ * On OSX, __thread/thread_local circles back calling malloc to allocate storage
+ * on first access on each thread, which leads to an infinite loop, but
+ * pthread-based TLS somehow doesn't have this problem.
+ * On Windows, we use Tls{Get,Set}Value-based TLS for historical reasons.
+ * TODO: we may want to use native TLS instead.
  */
 #if !defined(XP_WIN) && !defined(XP_DARWIN)
-static __thread arena_t	*arenas_map;
+static MOZ_THREAD_LOCAL(arena_t*) thread_arena;
+#else
+static mozilla::detail::ThreadLocal<arena_t*, mozilla::detail::ThreadLocalKeyStorage> thread_arena;
 #endif
 #endif
 
 /*******************************/
 /*
  * Runtime configuration options.
  */
 const uint8_t kAllocJunk = 0xe4;
@@ -2246,80 +2245,67 @@ chunk_dealloc(void *chunk, size_t size, 
 /*
  * Begin arena.
  */
 
 static inline arena_t *
 thread_local_arena(bool enabled)
 {
 #ifndef NO_TLS
-	arena_t *arena;
-
-	if (enabled) {
-		/* The arena will essentially be leaked if this function is
-		 * called with `false`, but it doesn't matter at the moment.
-		 * because in practice nothing actually calls this function
-		 * with `false`, except maybe at shutdown. */
-		arena = arenas_extend();
-	} else {
-		malloc_spin_lock(&arenas_lock);
-		arena = arenas[0];
-		malloc_spin_unlock(&arenas_lock);
-	}
-#ifdef XP_WIN
-	TlsSetValue(tlsIndex, arena);
-#elif defined(XP_DARWIN)
-	pthread_setspecific(tlsIndex, arena);
+  arena_t *arena;
+
+  if (enabled) {
+    /* The arena will essentially be leaked if this function is
+     * called with `false`, but it doesn't matter at the moment.
+     * because in practice nothing actually calls this function
+     * with `false`, except maybe at shutdown. */
+    arena = arenas_extend();
+  } else {
+    malloc_spin_lock(&arenas_lock);
+    arena = arenas[0];
+    malloc_spin_unlock(&arenas_lock);
+  }
+  thread_arena.set(arena);
+  return arena;
 #else
-	arenas_map = arena;
-#endif
-
-	return arena;
-#else
-	return arenas[0];
+  return arenas[0];
 #endif
 }
 
 template<> inline void
 MozJemalloc::jemalloc_thread_local_arena(bool aEnabled)
 {
   thread_local_arena(aEnabled);
 }
 
 /*
  * Choose an arena based on a per-thread value.
  */
 static inline arena_t *
 choose_arena(void)
 {
-	arena_t *ret;
-
-	/*
-	 * We can only use TLS if this is a PIC library, since for the static
-	 * library version, libc's malloc is used by TLS allocation, which
-	 * introduces a bootstrapping issue.
-	 */
+  arena_t *ret;
+
+  /*
+   * We can only use TLS if this is a PIC library, since for the static
+   * library version, libc's malloc is used by TLS allocation, which
+   * introduces a bootstrapping issue.
+   */
 #ifndef NO_TLS
 
-#  ifdef XP_WIN
-	ret = (arena_t*)TlsGetValue(tlsIndex);
-#  elif defined(XP_DARWIN)
-	ret = (arena_t*)pthread_getspecific(tlsIndex);
-#  else
-	ret = arenas_map;
-#  endif
-
-	if (!ret) {
-                ret = thread_local_arena(false);
-	}
+  ret = thread_arena.get();
+
+  if (!ret) {
+    ret = thread_local_arena(false);
+  }
 #else
-	ret = arenas[0];
+  ret = arenas[0];
 #endif
-	MOZ_DIAGNOSTIC_ASSERT(ret);
-	return (ret);
+  MOZ_DIAGNOSTIC_ASSERT(ret);
+  return (ret);
 }
 
 static inline int
 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
 {
 	uintptr_t a_chunk = (uintptr_t)a;
 	uintptr_t b_chunk = (uintptr_t)b;
 
@@ -4411,267 +4397,261 @@ GetKernelPageSize()
 }
 
 #if !defined(XP_WIN)
 static
 #endif
 bool
 malloc_init_hard(void)
 {
-	unsigned i;
-	const char *opts;
-	long result;
+  unsigned i;
+  const char *opts;
+  long result;
 
 #ifndef XP_WIN
-	malloc_mutex_lock(&init_lock);
+  malloc_mutex_lock(&init_lock);
 #endif
 
-	if (malloc_initialized) {
-		/*
-		 * Another thread initialized the allocator before this one
-		 * acquired init_lock.
-		 */
+  if (malloc_initialized) {
+    /*
+     * Another thread initialized the allocator before this one
+     * acquired init_lock.
+     */
 #ifndef XP_WIN
-		malloc_mutex_unlock(&init_lock);
+    malloc_mutex_unlock(&init_lock);
 #endif
-		return (false);
-	}
-
-#ifdef XP_WIN
-	/* get a thread local storage index */
-	tlsIndex = TlsAlloc();
-#elif defined(XP_DARWIN)
-	pthread_key_create(&tlsIndex, nullptr);
+    return false;
+  }
+
+#ifndef NO_TLS
+  if (!thread_arena.init()) {
+    return false;
+  }
 #endif
 
-	/* Get page size and number of CPUs */
-	result = GetKernelPageSize();
-	/* We assume that the page size is a power of 2. */
-	MOZ_ASSERT(((result - 1) & result) == 0);
+  /* Get page size and number of CPUs */
+  result = GetKernelPageSize();
+  /* We assume that the page size is a power of 2. */
+  MOZ_ASSERT(((result - 1) & result) == 0);
 #ifdef MALLOC_STATIC_SIZES
-	if (pagesize % (size_t) result) {
-		_malloc_message(_getprogname(),
-				"Compile-time page size does not divide the runtime one.\n");
-		MOZ_CRASH();
-	}
+  if (pagesize % (size_t) result) {
+    _malloc_message(_getprogname(),
+        "Compile-time page size does not divide the runtime one.\n");
+    MOZ_CRASH();
+  }
 #else
-	pagesize = (size_t) result;
-	pagesize_mask = (size_t) result - 1;
-	pagesize_2pow = ffs((int)result) - 1;
+  pagesize = (size_t) result;
+  pagesize_mask = (size_t) result - 1;
+  pagesize_2pow = ffs((int)result) - 1;
 #endif
 
-	/* Get runtime configuration. */
-	if ((opts = getenv("MALLOC_OPTIONS"))) {
-		for (i = 0; opts[i] != '\0'; i++) {
-			unsigned j, nreps;
-			bool nseen;
-
-			/* Parse repetition count, if any. */
-			for (nreps = 0, nseen = false;; i++, nseen = true) {
-				switch (opts[i]) {
-					case '0': case '1': case '2': case '3':
-					case '4': case '5': case '6': case '7':
-					case '8': case '9':
-						nreps *= 10;
-						nreps += opts[i] - '0';
-						break;
-					default:
-						goto MALLOC_OUT;
-				}
-			}
+  /* Get runtime configuration. */
+  if ((opts = getenv("MALLOC_OPTIONS"))) {
+    for (i = 0; opts[i] != '\0'; i++) {
+      unsigned j, nreps;
+      bool nseen;
+
+      /* Parse repetition count, if any. */
+      for (nreps = 0, nseen = false;; i++, nseen = true) {
+        switch (opts[i]) {
+          case '0': case '1': case '2': case '3':
+          case '4': case '5': case '6': case '7':
+          case '8': case '9':
+            nreps *= 10;
+            nreps += opts[i] - '0';
+            break;
+          default:
+            goto MALLOC_OUT;
+        }
+      }
 MALLOC_OUT:
-			if (nseen == false)
-				nreps = 1;
-
-			for (j = 0; j < nreps; j++) {
-				switch (opts[i]) {
-				case 'f':
-					opt_dirty_max >>= 1;
-					break;
-				case 'F':
-					if (opt_dirty_max == 0)
-						opt_dirty_max = 1;
-					else if ((opt_dirty_max << 1) != 0)
-						opt_dirty_max <<= 1;
-					break;
+      if (nseen == false)
+        nreps = 1;
+
+      for (j = 0; j < nreps; j++) {
+        switch (opts[i]) {
+        case 'f':
+          opt_dirty_max >>= 1;
+          break;
+        case 'F':
+          if (opt_dirty_max == 0)
+            opt_dirty_max = 1;
+          else if ((opt_dirty_max << 1) != 0)
+            opt_dirty_max <<= 1;
+          break;
 #ifdef MOZ_DEBUG
-				case 'j':
-					opt_junk = false;
-					break;
-				case 'J':
-					opt_junk = true;
-					break;
+        case 'j':
+          opt_junk = false;
+          break;
+        case 'J':
+          opt_junk = true;
+          break;
 #endif
 #ifndef MALLOC_STATIC_SIZES
-				case 'k':
-					/*
-					 * Chunks always require at least one
-					 * header page, so chunks can never be
-					 * smaller than two pages.
-					 */
-					if (opt_chunk_2pow > pagesize_2pow + 1)
-						opt_chunk_2pow--;
-					break;
-				case 'K':
-					if (opt_chunk_2pow + 1 <
-					    (sizeof(size_t) << 3))
-						opt_chunk_2pow++;
-					break;
+        case 'k':
+          /*
+           * Chunks always require at least one
+           * header page, so chunks can never be
+           * smaller than two pages.
+           */
+          if (opt_chunk_2pow > pagesize_2pow + 1)
+            opt_chunk_2pow--;
+          break;
+        case 'K':
+          if (opt_chunk_2pow + 1 <
+              (sizeof(size_t) << 3))
+            opt_chunk_2pow++;
+          break;
 #endif
 #ifndef MALLOC_STATIC_SIZES
-				case 'q':
-					if (opt_quantum_2pow > QUANTUM_2POW_MIN)
-						opt_quantum_2pow--;
-					break;
-				case 'Q':
-					if (opt_quantum_2pow < pagesize_2pow -
-					    1)
-						opt_quantum_2pow++;
-					break;
-				case 's':
-					if (opt_small_max_2pow >
-					    QUANTUM_2POW_MIN)
-						opt_small_max_2pow--;
-					break;
-				case 'S':
-					if (opt_small_max_2pow < pagesize_2pow
-					    - 1)
-						opt_small_max_2pow++;
-					break;
+        case 'q':
+          if (opt_quantum_2pow > QUANTUM_2POW_MIN)
+            opt_quantum_2pow--;
+          break;
+        case 'Q':
+          if (opt_quantum_2pow < pagesize_2pow -
+              1)
+            opt_quantum_2pow++;
+          break;
+        case 's':
+          if (opt_small_max_2pow >
+              QUANTUM_2POW_MIN)
+            opt_small_max_2pow--;
+          break;
+        case 'S':
+          if (opt_small_max_2pow < pagesize_2pow
+              - 1)
+            opt_small_max_2pow++;
+          break;
 #endif
 #ifdef MOZ_DEBUG
-				case 'z':
-					opt_zero = false;
-					break;
-				case 'Z':
-					opt_zero = true;
-					break;
+        case 'z':
+          opt_zero = false;
+          break;
+        case 'Z':
+          opt_zero = true;
+          break;
 #endif
-				default: {
-					char cbuf[2];
-
-					cbuf[0] = opts[i];
-					cbuf[1] = '\0';
-					_malloc_message(_getprogname(),
-					    ": (malloc) Unsupported character "
-					    "in malloc options: '", cbuf,
-					    "'\n");
-				}
-				}
-			}
-		}
-	}
+        default: {
+          char cbuf[2];
+
+          cbuf[0] = opts[i];
+          cbuf[1] = '\0';
+          _malloc_message(_getprogname(),
+              ": (malloc) Unsupported character "
+              "in malloc options: '", cbuf,
+              "'\n");
+        }
+        }
+      }
+    }
+  }
 
 #ifndef MALLOC_STATIC_SIZES
-	/* Set variables according to the value of opt_small_max_2pow. */
-	if (opt_small_max_2pow < opt_quantum_2pow)
-		opt_small_max_2pow = opt_quantum_2pow;
-	small_max = (1U << opt_small_max_2pow);
-
-	/* Set bin-related variables. */
-	bin_maxclass = (pagesize >> 1);
-	MOZ_ASSERT(opt_quantum_2pow >= TINY_MIN_2POW);
-	ntbins = opt_quantum_2pow - TINY_MIN_2POW;
-	MOZ_ASSERT(ntbins <= opt_quantum_2pow);
-	nqbins = (small_max >> opt_quantum_2pow);
-	nsbins = pagesize_2pow - opt_small_max_2pow - 1;
-
-	/* Set variables according to the value of opt_quantum_2pow. */
-	quantum = (1U << opt_quantum_2pow);
-	quantum_mask = quantum - 1;
-	if (ntbins > 0)
-		small_min = (quantum >> 1) + 1;
-	else
-		small_min = 1;
-	MOZ_ASSERT(small_min <= quantum);
-
-	/* Set variables according to the value of opt_chunk_2pow. */
-	chunksize = (1LU << opt_chunk_2pow);
-	chunksize_mask = chunksize - 1;
-	chunk_npages = (chunksize >> pagesize_2pow);
-
-	arena_chunk_header_npages = calculate_arena_header_pages();
-	arena_maxclass = calculate_arena_maxclass();
-
-	recycle_limit = CHUNK_RECYCLE_LIMIT * chunksize;
+  /* Set variables according to the value of opt_small_max_2pow. */
+  if (opt_small_max_2pow < opt_quantum_2pow) {
+    opt_small_max_2pow = opt_quantum_2pow;
+  }
+  small_max = (1U << opt_small_max_2pow);
+
+  /* Set bin-related variables. */
+  bin_maxclass = (pagesize >> 1);
+  MOZ_ASSERT(opt_quantum_2pow >= TINY_MIN_2POW);
+  ntbins = opt_quantum_2pow - TINY_MIN_2POW;
+  MOZ_ASSERT(ntbins <= opt_quantum_2pow);
+  nqbins = (small_max >> opt_quantum_2pow);
+  nsbins = pagesize_2pow - opt_small_max_2pow - 1;
+
+  /* Set variables according to the value of opt_quantum_2pow. */
+  quantum = (1U << opt_quantum_2pow);
+  quantum_mask = quantum - 1;
+  if (ntbins > 0) {
+    small_min = (quantum >> 1) + 1;
+  } else {
+    small_min = 1;
+  }
+  MOZ_ASSERT(small_min <= quantum);
+
+  /* Set variables according to the value of opt_chunk_2pow. */
+  chunksize = (1LU << opt_chunk_2pow);
+  chunksize_mask = chunksize - 1;
+  chunk_npages = (chunksize >> pagesize_2pow);
+
+  arena_chunk_header_npages = calculate_arena_header_pages();
+  arena_maxclass = calculate_arena_maxclass();
+
+  recycle_limit = CHUNK_RECYCLE_LIMIT * chunksize;
 #endif
 
-	recycled_size = 0;
-
-	/* Various sanity checks that regard configuration. */
-	MOZ_ASSERT(quantum >= sizeof(void *));
-	MOZ_ASSERT(quantum <= pagesize);
-	MOZ_ASSERT(chunksize >= pagesize);
-	MOZ_ASSERT(quantum * 4 <= chunksize);
-
-	/* Initialize chunks data. */
-	malloc_mutex_init(&chunks_mtx);
-	extent_tree_szad_new(&chunks_szad_mmap);
-	extent_tree_ad_new(&chunks_ad_mmap);
-
-	/* Initialize huge allocation data. */
-	malloc_mutex_init(&huge_mtx);
-	extent_tree_ad_new(&huge);
-	huge_nmalloc = 0;
-	huge_ndalloc = 0;
-	huge_allocated = 0;
-	huge_mapped = 0;
-
-	/* Initialize base allocation data structures. */
-	base_mapped = 0;
-	base_committed = 0;
-	base_nodes = nullptr;
-	malloc_mutex_init(&base_mtx);
-
-	malloc_spin_init(&arenas_lock);
-
-	/*
-	 * Initialize one arena here.
-	 */
-	arenas_extend();
-	if (!arenas || !arenas[0]) {
+  recycled_size = 0;
+
+  /* Various sanity checks that regard configuration. */
+  MOZ_ASSERT(quantum >= sizeof(void *));
+  MOZ_ASSERT(quantum <= pagesize);
+  MOZ_ASSERT(chunksize >= pagesize);
+  MOZ_ASSERT(quantum * 4 <= chunksize);
+
+  /* Initialize chunks data. */
+  malloc_mutex_init(&chunks_mtx);
+  extent_tree_szad_new(&chunks_szad_mmap);
+  extent_tree_ad_new(&chunks_ad_mmap);
+
+  /* Initialize huge allocation data. */
+  malloc_mutex_init(&huge_mtx);
+  extent_tree_ad_new(&huge);
+  huge_nmalloc = 0;
+  huge_ndalloc = 0;
+  huge_allocated = 0;
+  huge_mapped = 0;
+
+  /* Initialize base allocation data structures. */
+  base_mapped = 0;
+  base_committed = 0;
+  base_nodes = nullptr;
+  malloc_mutex_init(&base_mtx);
+
+  malloc_spin_init(&arenas_lock);
+
+  /*
+   * Initialize one arena here.
+   */
+  arenas_extend();
+  if (!arenas || !arenas[0]) {
 #ifndef XP_WIN
-		malloc_mutex_unlock(&init_lock);
+    malloc_mutex_unlock(&init_lock);
 #endif
-		return (true);
-	}
+    return true;
+  }
 #ifndef NO_TLS
-	/*
-	 * Assign the initial arena to the initial thread, in order to avoid
-	 * spurious creation of an extra arena if the application switches to
-	 * threaded mode.
-	 */
-#ifdef XP_WIN
-	TlsSetValue(tlsIndex, arenas[0]);
-#elif defined(XP_DARWIN)
-	pthread_setspecific(tlsIndex, arenas[0]);
-#else
-	arenas_map = arenas[0];
+  /*
+   * Assign the initial arena to the initial thread.
+   */
+  thread_arena.set(arenas[0]);
 #endif
-#endif
-
-	chunk_rtree = malloc_rtree_new((SIZEOF_PTR << 3) - opt_chunk_2pow);
-	if (!chunk_rtree)
-		return (true);
-
-	malloc_initialized = true;
+
+  chunk_rtree = malloc_rtree_new((SIZEOF_PTR << 3) - opt_chunk_2pow);
+  if (!chunk_rtree) {
+    return true;
+  }
+
+  malloc_initialized = true;
 
 #if !defined(XP_WIN) && !defined(XP_DARWIN)
-	/* Prevent potential deadlock on malloc locks after fork. */
-	pthread_atfork(_malloc_prefork, _malloc_postfork_parent, _malloc_postfork_child);
+  /* Prevent potential deadlock on malloc locks after fork. */
+  pthread_atfork(_malloc_prefork, _malloc_postfork_parent, _malloc_postfork_child);
 #endif
 
 #if defined(XP_DARWIN)
-	register_zone();
+  register_zone();
 #endif
 
 #ifndef XP_WIN
-	malloc_mutex_unlock(&init_lock);
+  malloc_mutex_unlock(&init_lock);
 #endif
-	return (false);
+  return false;
 }
 
 /*
  * End general internal functions.
  */
 /******************************************************************************/
 /*
  * Begin malloc(3)-compatible functions.