Bug 414946 (part 1): Fix jemalloc on Mac, but leave disabled (r=pavlov)
authorPaul Biggar <pbiggar@mozilla.com>, Jason Evans <jasone@canonware.com> and Dale Kim <dalekim1@illinois.edu>
Sat, 21 May 2011 20:27:00 -0700
changeset 72464 2b2f584dc5fd8cedbbd49b3456a32adaf99627da
parent 72463 763ff2d737e77abd90524ae630b7c77724fe07bd
child 72465 1ad1fd67e97a67e04f47f050b30c0084936e86f1
push idunknown
push userunknown
push dateunknown
reviewerspavlov
bugs414946
milestone8.0a1
Bug 414946 (part 1): Fix jemalloc on Mac, but leave disabled (r=pavlov) Overwrite the OSX default zone allocator, taking into account the malloc_zone_t version (supported versions are 3, 6, and 8) for Leopard, Snow Leopard and Lion. jemalloc can be dynamically disabled for unknown malloc_zone_t versions, for OSX 10.8 and beyond. The changeset does not enable jemalloc, to allow for easy disabling if there's a problem. It will be enabled in the next changeset. This should be a 15-20% improvement in memory usage.
browser/installer/package-manifest.in
configure.in
memory/jemalloc/jemalloc.c
memory/jemalloc/jemalloc.h
memory/jemalloc/jemalloc_types.h
memory/jemalloc/osx_zone_types.h
memory/mozalloc/Makefile.in
memory/mozalloc/mozalloc.cpp
memory/mozalloc/mozalloc.h
toolkit/library/libxul-rules.mk
xpcom/base/nsMemoryReporterManager.cpp
--- a/browser/installer/package-manifest.in
+++ b/browser/installer/package-manifest.in
@@ -40,16 +40,19 @@
 #ifndef MOZ_STATIC_JS
 @BINPATH@/@DLL_PREFIX@mozjs@DLL_SUFFIX@
 #endif
 @BINPATH@/@DLL_PREFIX@plc4@DLL_SUFFIX@
 @BINPATH@/@DLL_PREFIX@plds4@DLL_SUFFIX@
 @BINPATH@/@DLL_PREFIX@xpcom@DLL_SUFFIX@
 @BINPATH@/@DLL_PREFIX@nspr4@DLL_SUFFIX@
 @BINPATH@/@DLL_PREFIX@mozalloc@DLL_SUFFIX@
+#ifdef MOZ_MEMORY_DARWIN
+@BINPATH@/@DLL_PREFIX@jemalloc@DLL_SUFFIX@
+#endif
 #ifdef XP_MACOSX
 @BINPATH@/XUL
 #else
 @BINPATH@/@DLL_PREFIX@xul@DLL_SUFFIX@
 #endif
 #ifdef XP_MACOSX
 @BINPATH@/@MOZ_CHILD_PROCESS_NAME@.app/
 @BINPATH@/@DLL_PREFIX@plugin_child_interpose@DLL_SUFFIX@
--- a/configure.in
+++ b/configure.in
@@ -7454,22 +7454,22 @@ else
     DLLFLAGS="$DLLFLAGS $MOZ_MEMORY_LDFLAGS"
     export DLLFLAGS
     ;;
   *)
     AC_MSG_ERROR([--enable-jemalloc not supported on ${target}])
     ;;
   esac
 
-  if test "$OS_ARCH" != "Darwin"; then
+  if test "$OS_ARCH" != "WINNT"; then
     dnl NB: this must be kept in sync with jemalloc.h
     AC_DEFINE(HAVE_JEMALLOC_VALLOC)
-    AC_DEFINE(HAVE_JEMALLOC_POSIX_MEMALIGN)
-    AC_DEFINE(HAVE_JEMALLOC_MEMALIGN)
   fi
+  AC_DEFINE(HAVE_JEMALLOC_POSIX_MEMALIGN)
+  AC_DEFINE(HAVE_JEMALLOC_MEMALIGN)
 fi # MOZ_MEMORY
 AC_SUBST(MOZ_MEMORY)
 AC_SUBST(MOZ_MEMORY_LDFLAGS)
 AC_SUBST(WIN32_OLD_STYLE_JEMALLOC)
 AC_SUBST(WIN32_CRT_LIBS)
 AC_SUBST(WIN32_CRT_SRC_DIR)
 dnl Need to set this for make because NSS doesn't have configure
 AC_SUBST(DLLFLAGS)
--- a/memory/jemalloc/jemalloc.c
+++ b/memory/jemalloc/jemalloc.c
@@ -170,19 +170,16 @@
  */
 /* #define	MALLOC_BALANCE */
 
 #if (!defined(MOZ_MEMORY_WINDOWS) && !defined(MOZ_MEMORY_DARWIN))
    /*
     * MALLOC_PAGEFILE causes all mmap()ed memory to be backed by temporary
     * files, so that if a chunk is mapped, it is guaranteed to be swappable.
     * This avoids asynchronous OOM failures that are due to VM over-commit.
-    *
-    * XXX OS X over-commits, so we should probably use mmap() instead of
-    * vm_allocate(), so that MALLOC_PAGEFILE works.
     */
 #define MALLOC_PAGEFILE
 #endif
 
 #ifdef MALLOC_PAGEFILE
 /* Write size when initializing a page file. */
 #  define MALLOC_PAGEFILE_WRITE_SIZE 512
 #endif
@@ -1196,17 +1193,17 @@ typedef struct {
 #define	UTRACE(a, b, c)
 #endif
 
 /******************************************************************************/
 /*
  * Begin function prototypes for non-inline static functions.
  */
 
-static char	*umax2s(uintmax_t x, char *s);
+static char	*umax2s(uintmax_t x, unsigned base, char *s);
 static bool	malloc_mutex_init(malloc_mutex_t *mutex);
 static bool	malloc_spin_init(malloc_spinlock_t *lock);
 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
 		const char *p4);
 #ifdef MALLOC_STATS
 #ifdef MOZ_MEMORY_DARWIN
 /* Avoid namespace collision with OS X's malloc APIs. */
 #define malloc_printf moz_malloc_printf
@@ -1272,42 +1269,121 @@ static void	*huge_palloc(size_t alignmen
 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
 static void	huge_dalloc(void *ptr);
 static void	malloc_print_stats(void);
 #ifndef MOZ_MEMORY_WINDOWS
 static
 #endif
 bool		malloc_init_hard(void);
 
-void		_malloc_prefork(void);
-void		_malloc_postfork(void);
+static void	_malloc_prefork(void);
+static void	_malloc_postfork(void);
+
+#ifdef MOZ_MEMORY_DARWIN
+/*
+ * MALLOC_ZONE_T_NOTE
+ *
+ * On Darwin, we hook into the memory allocator using a malloc_zone_t struct.
+ * We must be very careful around this struct because of different behaviour on
+ * different versions of OSX.
+ *
+ * Each of OSX 10.5, 10.6 and 10.7 use different versions of the struct
+ * (with version numbers 3, 6 and 8 respectively). The binary we use on each of
+ * these platforms will not necessarily be built using the correct SDK [1].
+ * This means we need to statically know the correct struct size to use on all
+ * OSX releases, and have a fallback for unknown future versions. The struct
+ * sizes defined in osx_zone_types.h.
+ *
+ * For OSX 10.8 and later, we may expect the malloc_zone_t struct to change
+ * again, and need to dynamically account for this. By simply leaving
+ * malloc_zone_t alone, we don't quite deal with the problem, because there
+ * remain calls to jemalloc through the mozalloc interface. We check this
+ * dynamically on each allocation, using the CHECK_DARWIN macro.
+ *
+ *
+ * [1] Mozilla is built as a universal binary on Mac, supporting i386 and
+ *     x86_64. The i386 target is built using the 10.5 SDK, even if it runs on
+ *     10.6. The x86_64 target is built using the 10.6 SDK, even if it runs on
+ *     10.7 or later.
+ *
+ * FIXME:
+ *   When later versions of OSX come out (10.8 and up), we need to check their
+ *   malloc_zone_t versions. If they're greater than 8, we need a new version
+ *   of malloc_zone_t adapted into osx_zone_types.h.
+ */
+
+#include "osx_zone_types.h"
+
+#define LEOPARD_MALLOC_ZONE_T_VERSION 3
+#define SNOW_LEOPARD_MALLOC_ZONE_T_VERSION 6
+#define LION_MALLOC_ZONE_T_VERSION 8
+
+/*
+ * Avoid lots of casts below by allowing access to l_jemalloc_zone through a
+ * malloc_zone_t pointer.
+ */
+static lion_malloc_zone l_jemalloc_zone, l_szone;
+static malloc_zone_t * const jemalloc_zone = (malloc_zone_t*)(&l_jemalloc_zone);
+static malloc_zone_t * szone = (malloc_zone_t*)(&l_szone);
+
+/* Likewise for l_zone_introspect. */
+static lion_malloc_introspection l_zone_introspect, l_ozone_introspect;
+static malloc_introspection_t * const zone_introspect =
+	(malloc_introspection_t*)(&l_zone_introspect);
+static malloc_introspection_t * const ozone_introspect =
+	(malloc_introspection_t*)(&l_ozone_introspect);
+static malloc_zone_t *create_zone(unsigned version);
+static void szone2ozone(malloc_zone_t *zone, size_t size);
+static size_t zone_version_size(int version);
+#endif
+
+/* On unknown future versions of OSX, dynamically decide not to use jemalloc. */
+static bool use_jemalloc = false;
+
 
 /*
  * End function prototypes.
  */
 /******************************************************************************/
 
 /*
  * umax2s() provides minimal integer printing functionality, which is
  * especially useful for situations where allocation in vsnprintf() calls would
  * potentially cause deadlock.
  */
-#define	UMAX2S_BUFSIZE	21
-static char *
-umax2s(uintmax_t x, char *s)
+#define	UMAX2S_BUFSIZE	65
+char *
+umax2s(uintmax_t x, unsigned base, char *s)
 {
 	unsigned i;
 
 	i = UMAX2S_BUFSIZE - 1;
 	s[i] = '\0';
-	do {
-		i--;
-		s[i] = "0123456789"[x % 10];
-		x /= 10;
-	} while (x > 0);
+	switch (base) {
+	case 10:
+		do {
+			i--;
+			s[i] = "0123456789"[x % 10];
+			x /= 10;
+		} while (x > 0);
+		break;
+	case 16:
+		do {
+			i--;
+			s[i] = "0123456789abcdef"[x & 0xf];
+			x >>= 4;
+		} while (x > 0);
+		break;
+	default:
+		do {
+			i--;
+			s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base];
+			x /= base;
+		} while (x > 0);
+	}
 
 	return (&s[i]);
 }
 
 static void
 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
 {
 #if defined(MOZ_MEMORY) && !defined(MOZ_MEMORY_WINDOWS)
@@ -1323,17 +1399,17 @@ wrtmessage(const char *p1, const char *p
 
 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
 	    const char *p4) = wrtmessage;
 
 #ifdef MALLOC_DEBUG
 #  define assert(e) do {						\
 	if (!(e)) {							\
 		char line_buf[UMAX2S_BUFSIZE];				\
-		_malloc_message(__FILE__, ":", umax2s(__LINE__,		\
+		_malloc_message(__FILE__, ":", umax2s(__LINE__, 10,	\
 		    line_buf), ": Failed assertion: ");			\
 		_malloc_message("\"", #e, "\"\n", "");			\
 		abort();						\
 	}								\
 } while (0)
 #else
 #define assert(e)
 #endif
@@ -1654,29 +1730,52 @@ static __thread uint32_t balance_x;
 PRN_DEFINE(balance, balance_x, 1297, 1301)
 #endif
 
 #ifdef MALLOC_UTRACE
 static int
 utrace(const void *addr, size_t len)
 {
 	malloc_utrace_t *ut = (malloc_utrace_t *)addr;
+	char buf_a[UMAX2S_BUFSIZE];
+	char buf_b[UMAX2S_BUFSIZE];
 
 	assert(len == sizeof(malloc_utrace_t));
 
-	if (ut->p == NULL && ut->s == 0 && ut->r == NULL)
-		malloc_printf("%d x USER malloc_init()\n", getpid());
-	else if (ut->p == NULL && ut->r != NULL) {
-		malloc_printf("%d x USER %p = malloc(%zu)\n", getpid(), ut->r,
-		    ut->s);
+	if (ut->p == NULL && ut->s == 0 && ut->r == NULL) {
+		_malloc_message(
+		    umax2s(getpid(), 10, buf_a),
+		    " x USER malloc_init()\n", "", "");
+	} else if (ut->p == NULL && ut->r != NULL) {
+		_malloc_message(
+		    umax2s(getpid(), 10, buf_a),
+		    " x USER 0x",
+		    umax2s((uintptr_t)ut->r, 16, buf_b),
+		    " = malloc(");
+		_malloc_message(
+		    umax2s(ut->s, 10, buf_a),
+		    ")\n", "", "");
 	} else if (ut->p != NULL && ut->r != NULL) {
-		malloc_printf("%d x USER %p = realloc(%p, %zu)\n", getpid(),
-		    ut->r, ut->p, ut->s);
-	} else
-		malloc_printf("%d x USER free(%p)\n", getpid(), ut->p);
+		_malloc_message(
+		    umax2s(getpid(), 10, buf_a),
+		    " x USER 0x",
+		    umax2s((uintptr_t)ut->r, 16, buf_b),
+		    " = realloc(0x");
+		_malloc_message(
+		    umax2s((uintptr_t)ut->p, 16, buf_a),
+		    ", ",
+		    umax2s(ut->s, 10, buf_b),
+		    ")\n");
+	} else {
+		_malloc_message(
+		    umax2s(getpid(), 10, buf_a),
+		    " x USER free(0x",
+		    umax2s((uintptr_t)ut->p, 16, buf_b),
+		    ")\n");
+	}
 
 	return (0);
 }
 #endif
 
 static inline const char *
 _getprogname(void)
 {
@@ -2066,69 +2165,17 @@ pages_unmap(void *addr, size_t size)
 {
 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0) {
 		_malloc_message(_getprogname(),
 		    ": (malloc) Error in VirtualFree()\n", "", "");
 		if (opt_abort)
 			abort();
 	}
 }
-#elif (defined(MOZ_MEMORY_DARWIN))
-static void *
-pages_map(void *addr, size_t size, int pfd)
-{
-	void *ret;
-	kern_return_t err;
-	int flags;
-
-	if (addr != NULL) {
-		ret = addr;
-		flags = 0;
-	} else
-		flags = VM_FLAGS_ANYWHERE;
-
-	err = vm_allocate((vm_map_t)mach_task_self(), (vm_address_t *)&ret,
-	    (vm_size_t)size, flags);
-	if (err != KERN_SUCCESS)
-		ret = NULL;
-
-	assert(ret == NULL || (addr == NULL && ret != addr)
-	    || (addr != NULL && ret == addr));
-	return (ret);
-}
-
-static void
-pages_unmap(void *addr, size_t size)
-{
-	kern_return_t err;
-
-	err = vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)addr,
-	    (vm_size_t)size);
-	if (err != KERN_SUCCESS) {
-		malloc_message(_getprogname(),
-		    ": (malloc) Error in vm_deallocate(): ",
-		    mach_error_string(err), "\n");
-		if (opt_abort)
-			abort();
-	}
-}
-
-#define	VM_COPY_MIN (pagesize << 5)
-static inline void
-pages_copy(void *dest, const void *src, size_t n)
-{
-
-	assert((void *)((uintptr_t)dest & ~pagesize_mask) == dest);
-	assert(n >= VM_COPY_MIN);
-	assert((void *)((uintptr_t)src & ~pagesize_mask) == src);
-
-	vm_copy(mach_task_self(), (vm_address_t)src, (vm_size_t)n,
-	    (vm_address_t)dest);
-}
-#else /* MOZ_MEMORY_DARWIN */
+#else
 #ifdef JEMALLOC_USES_MAP_ALIGN
 static void *
 pages_map_align(size_t size, int pfd, size_t alignment)
 {
 	void *ret;
 
 	/*
 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
@@ -2207,32 +2254,47 @@ pages_unmap(void *addr, size_t size)
 		_malloc_message(_getprogname(),
 		    ": (malloc) Error in munmap(): ", buf, "\n");
 		if (opt_abort)
 			abort();
 	}
 }
 #endif
 
+#ifdef MOZ_MEMORY_DARWIN
+#define	VM_COPY_MIN (pagesize << 5)
+static inline void
+pages_copy(void *dest, const void *src, size_t n)
+{
+
+	assert((void *)((uintptr_t)dest & ~pagesize_mask) == dest);
+	assert(n >= VM_COPY_MIN);
+	assert((void *)((uintptr_t)src & ~pagesize_mask) == src);
+
+	vm_copy(mach_task_self(), (vm_address_t)src, (vm_size_t)n,
+	    (vm_address_t)dest);
+}
+#endif
+
 #ifdef MALLOC_VALIDATE
 static inline malloc_rtree_t *
 malloc_rtree_new(unsigned bits)
 {
 	malloc_rtree_t *ret;
 	unsigned bits_per_level, height, i;
 
 	bits_per_level = ffs(pow2_ceil((MALLOC_RTREE_NODESIZE /
 	    sizeof(void *)))) - 1;
 	height = bits / bits_per_level;
 	if (height * bits_per_level != bits)
 		height++;
 	assert(height * bits_per_level >= bits);
 
-	ret = (malloc_rtree_t*)base_calloc(1, sizeof(malloc_rtree_t) + (sizeof(unsigned) *
-	    (height - 1)));
+	ret = (malloc_rtree_t*)base_calloc(1, sizeof(malloc_rtree_t) +
+	    (sizeof(unsigned) * (height - 1)));
 	if (ret == NULL)
 		return (NULL);
 
 	malloc_spin_init(&ret->lock);
 	ret->height = height;
 	if (bits_per_level * height > bits)
 		ret->level2bits[0] = bits % bits_per_level;
 	else
@@ -2247,47 +2309,84 @@ malloc_rtree_new(unsigned bits)
 		 * deallocation.
 		 */
 		return (NULL);
 	}
 
 	return (ret);
 }
 
-/* The least significant bits of the key are ignored. */
-static inline void *
-malloc_rtree_get(malloc_rtree_t *rtree, uintptr_t key)
-{
-	void *ret;
-	uintptr_t subkey;
-	unsigned i, lshift, height, bits;
-	void **node, **child;
-
-	malloc_spin_lock(&rtree->lock);
-	for (i = lshift = 0, height = rtree->height, node = rtree->root;
-	    i < height - 1;
-	    i++, lshift += bits, node = child) {
-		bits = rtree->level2bits[i];
-		subkey = (key << lshift) >> ((SIZEOF_PTR << 3) - bits);
-		child = (void**)node[subkey];
-		if (child == NULL) {
-			malloc_spin_unlock(&rtree->lock);
-			return (NULL);
-		}
-	}
-
-	/* node is a leaf, so it contains values rather than node pointers. */
-	bits = rtree->level2bits[i];
-	subkey = (key << lshift) >> ((SIZEOF_PTR << 3) - bits);
-	ret = node[subkey];
-	malloc_spin_unlock(&rtree->lock);
-
-	return (ret);
+#define	MALLOC_RTREE_GET_GENERATE(f)					\
+/* The least significant bits of the key are ignored. */		\
+static inline void *							\
+f(malloc_rtree_t *rtree, uintptr_t key)					\
+{									\
+	void *ret;							\
+	uintptr_t subkey;						\
+	unsigned i, lshift, height, bits;				\
+	void **node, **child;						\
+									\
+	MALLOC_RTREE_LOCK(&rtree->lock);				\
+	for (i = lshift = 0, height = rtree->height, node = rtree->root;\
+	    i < height - 1;						\
+	    i++, lshift += bits, node = child) {			\
+		bits = rtree->level2bits[i];				\
+		subkey = (key << lshift) >> ((SIZEOF_PTR << 3) - bits);	\
+		child = (void**)node[subkey];				\
+		if (child == NULL) {					\
+			MALLOC_RTREE_UNLOCK(&rtree->lock);		\
+			return (NULL);					\
+		}							\
+	}								\
+									\
+	/*								\
+	 * node is a leaf, so it contains values rather than node	\
+	 * pointers.							\
+	 */								\
+	bits = rtree->level2bits[i];					\
+	subkey = (key << lshift) >> ((SIZEOF_PTR << 3) - bits);		\
+	ret = node[subkey];						\
+	MALLOC_RTREE_UNLOCK(&rtree->lock);				\
+									\
+	MALLOC_RTREE_GET_VALIDATE					\
+	return (ret);							\
 }
 
+#ifdef MALLOC_DEBUG
+#  define MALLOC_RTREE_LOCK(l)		malloc_spin_lock(l)
+#  define MALLOC_RTREE_UNLOCK(l)	malloc_spin_unlock(l)
+#  define MALLOC_RTREE_GET_VALIDATE
+MALLOC_RTREE_GET_GENERATE(malloc_rtree_get_locked)
+#  undef MALLOC_RTREE_LOCK
+#  undef MALLOC_RTREE_UNLOCK
+#  undef MALLOC_RTREE_GET_VALIDATE
+#endif
+
+#define	MALLOC_RTREE_LOCK(l)
+#define	MALLOC_RTREE_UNLOCK(l)
+#ifdef MALLOC_DEBUG
+   /*
+    * Suppose that it were possible for a jemalloc-allocated chunk to be
+    * munmap()ped, followed by a different allocator in another thread re-using
+    * overlapping virtual memory, all without invalidating the cached rtree
+    * value.  The result would be a false positive (the rtree would claim that
+    * jemalloc owns memory that it had actually discarded).  I don't think this
+    * scenario is possible, but the following assertion is a prudent sanity
+    * check.
+    */
+#  define MALLOC_RTREE_GET_VALIDATE					\
+	assert(malloc_rtree_get_locked(rtree, key) == ret);
+#else
+#  define MALLOC_RTREE_GET_VALIDATE
+#endif
+MALLOC_RTREE_GET_GENERATE(malloc_rtree_get)
+#undef MALLOC_RTREE_LOCK
+#undef MALLOC_RTREE_UNLOCK
+#undef MALLOC_RTREE_GET_VALIDATE
+
 static inline bool
 malloc_rtree_set(malloc_rtree_t *rtree, uintptr_t key, void *val)
 {
 	uintptr_t subkey;
 	unsigned i, lshift, height, bits;
 	void **node, **child;
 
 	malloc_spin_lock(&rtree->lock);
@@ -5009,33 +5108,37 @@ malloc_print_stats(void)
 		_malloc_message(opt_xmalloc ? "X" : "x", "", "", "");
 #endif
 #ifdef MALLOC_FILL
 		_malloc_message(opt_zero ? "Z" : "z", "", "", "");
 #endif
 		_malloc_message("\n", "", "", "");
 
 #ifndef MOZ_MEMORY_NARENAS_DEFAULT_ONE
-		_malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
-#endif
-		_malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
+		_malloc_message("CPUs: ", umax2s(ncpus, 10, s), "\n", "");
+#endif
+		_malloc_message("Max arenas: ", umax2s(narenas, 10, s), "\n",
+		    "");
 #ifdef MALLOC_BALANCE
 		_malloc_message("Arena balance threshold: ",
-		    umax2s(opt_balance_threshold, s), "\n", "");
-#endif
-		_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
+		    umax2s(opt_balance_threshold, 10, s), "\n", "");
+#endif
+		_malloc_message("Pointer size: ", umax2s(sizeof(void *), 10, s),
+		    "\n", "");
+		_malloc_message("Quantum size: ", umax2s(quantum, 10, s), "\n",
+		    "");
+		_malloc_message("Max small size: ", umax2s(small_max, 10, s),
 		    "\n", "");
-		_malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
-		_malloc_message("Max small size: ", umax2s(small_max, s), "\n",
+		_malloc_message("Max dirty pages per arena: ",
+		    umax2s(opt_dirty_max, 10, s), "\n", "");
+
+		_malloc_message("Chunk size: ", umax2s(chunksize, 10, s), "",
 		    "");
-		_malloc_message("Max dirty pages per arena: ",
-		    umax2s(opt_dirty_max, s), "\n", "");
-
-		_malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
-		_malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
+		_malloc_message(" (2^", umax2s(opt_chunk_2pow, 10, s), ")\n",
+		    "");
 
 #ifdef MALLOC_STATS
 		{
 			size_t allocated, mapped;
 #ifdef MALLOC_BALANCE
 			uint64_t nbalance = 0;
 #endif
 			unsigned i;
@@ -5151,16 +5254,19 @@ malloc_init_hard(void)
 {
 	unsigned i;
 	char buf[PATH_MAX + 1];
 	const char *opts;
 	long result;
 #ifndef MOZ_MEMORY_WINDOWS
 	int linklen;
 #endif
+#ifdef MOZ_MEMORY_DARWIN
+    malloc_zone_t* default_zone;
+#endif
 
 #ifndef MOZ_MEMORY_WINDOWS
 	malloc_mutex_lock(&init_lock);
 #endif
 
 	if (malloc_initialized) {
 		/*
 		 * Another thread initialized the allocator before this one
@@ -5677,16 +5783,49 @@ MALLOC_OUT:
 
 #ifdef MALLOC_VALIDATE
 	chunk_rtree = malloc_rtree_new((SIZEOF_PTR << 3) - opt_chunk_2pow);
 	if (chunk_rtree == NULL)
 		return (true);
 #endif
 
 	malloc_initialized = true;
+
+#ifdef MOZ_MEMORY_DARWIN
+    /*
+     * Overwrite the default memory allocator to use jemalloc everywhere.
+     */
+    default_zone = malloc_default_zone();
+
+    /* Don't use jemalloc on as-yet-unreleased versions of OSX. */
+    use_jemalloc = (default_zone->version <= LION_MALLOC_ZONE_T_VERSION);
+
+    /* Allow us dynamically turn off jemalloc for testing. */
+	if (getenv("NO_MAC_JEMALLOC"))
+        use_jemalloc = false;
+
+    if (use_jemalloc) {
+        size_t size;
+
+        /* Register the custom zone. */
+        malloc_zone_register(create_zone(default_zone->version));
+
+        /*
+         * Convert the default szone to an "overlay zone" that is capable
+         * of deallocating szone-allocated objects, but allocating new
+         * objects from jemalloc.
+         */
+        size = zone_version_size(default_zone->version);
+        szone2ozone(default_zone, size);
+    }
+    else {
+        szone = default_zone;
+    }
+#endif
+
 #ifndef MOZ_MEMORY_WINDOWS
 	malloc_mutex_unlock(&init_lock);
 #endif
 	return (false);
 }
 
 /* XXX Why not just expose malloc_print_stats()? */
 #ifdef MOZ_MEMORY_WINDOWS
@@ -5702,66 +5841,74 @@ malloc_shutdown()
  * End general internal functions.
  */
 /******************************************************************************/
 /*
  * Begin malloc(3)-compatible functions.
  */
 
 /*
- * Inline the standard malloc functions if they are being subsumed by Darwin's
- * zone infrastructure.
+ * Mangle standard interfaces, in order to avoid linking problems.
  */
-#ifdef MOZ_MEMORY_DARWIN
-#  define ZONE_INLINE	inline
-#else
-#  define ZONE_INLINE
-#endif
-
-/* Mangle standard interfaces on Darwin and Android, 
-   in order to avoid linking problems. */
-#if defined(MOZ_MEMORY_DARWIN)
-#define	malloc(a)	moz_malloc(a)
-#define	valloc(a)	moz_valloc(a)
-#define	calloc(a, b)	moz_calloc(a, b)
-#define	realloc(a, b)	moz_realloc(a, b)
-#define	free(a)		moz_free(a)
-#endif
-
-#if defined(MOZ_MEMORY_ANDROID) || defined(WRAP_MALLOC) || defined(WIN32_NEW_STYLE_JEMALLOC)
+#if defined(MOZ_MEMORY_DARWIN) || defined(MOZ_MEMORY_ANDROID) || \
+    defined(WRAP_MALLOC) || defined(WIN32_NEW_STYLE_JEMALLOC)
 inline void sys_free(void* ptr) {return free(ptr);}
-#define	malloc(a)	je_malloc(a)
-#define	valloc(a)	je_valloc(a)
-#define	calloc(a, b)	je_calloc(a, b)
-#define	realloc(a, b)	je_realloc(a, b)
-#define	free(a)		je_free(a)
-#if defined(WIN32_NEW_STYLE_JEMALLOC)
-#define memalign(a, b) je_memalign(a, b)
-#endif
-#define posix_memalign(a, b, c)  je_posix_memalign(a, b, c)
-#define malloc_usable_size(a) je_malloc_usable_size(a)
+#define	malloc(a)               je_malloc(a)
+#if defined(WIN32_NEW_STYLE_JEMALLOC) || defined(MOZ_MEMORY_DARWIN)
+#define	memalign(a, b)          je_memalign(a, b)
+#endif
+#define	posix_memalign(a, b, c) je_posix_memalign(a, b, c)
+#define	valloc(a)               je_valloc(a)
+#define	calloc(a, b)            je_calloc(a, b)
+#define	realloc(a, b)           je_realloc(a, b)
+#define	free(a)                 je_free(a)
+#define	malloc_usable_size(a)   je_malloc_usable_size(a)
+ 
 
 char    *je_strndup(const char *src, size_t len) {
   char* dst = (char*)je_malloc(len + 1);
   if(dst)
     strncpy(dst, src, len + 1);
   return dst;
 }
 char    *je_strdup(const char *src) {
   size_t len = strlen(src);
   return je_strndup(src, len );
 }
 #endif
 
-ZONE_INLINE
+/*
+ * We are not able to assume that we can replace the OSX allocator with
+ * jemalloc on future unreleased versions of OSX. Despite this, we call
+ * jemalloc functions directly from mozalloc. Since it's pretty dangerous to
+ * mix the allocators, we need to call the OSX allocators from the functions
+ * below, when use_jemalloc is not (dynamically) set.
+ *
+ * We call memalign from mozalloc, but the 10.5 SDK doesn't have a memalign
+ * function to forward the call to. However, use_jemalloc will _always_ be true
+ * on 10.5, so we just omit these checks statically. This allows us to build
+ * successfully on 10.5, and also makes it undetectably faster.
+ *
+ * FIXME:
+ *   This may lead to problems when using 32-bit plugins with a 64-bit process,
+ *   on OSX 10.8 or higher.
+ */
+#if defined(MOZ_MEMORY_DARWIN) && !defined(__i386__)
+#define DARWIN_ONLY(A) if (!use_jemalloc) { A; }
+#else
+#define DARWIN_ONLY(A)
+#endif
+
 void *
 malloc(size_t size)
 {
 	void *ret;
 
+	DARWIN_ONLY(return (szone->malloc)(szone, size));
+
 	if (malloc_init()) {
 		ret = NULL;
 		goto RETURN;
 	}
 
 	if (size == 0) {
 #ifdef MALLOC_SYSV
 		if (opt_sysv == false)
@@ -5830,16 +5977,18 @@ memalign(size_t alignment, size_t size);
 #define MEMALIGN memalign
 #endif
 
 void *
 MEMALIGN(size_t alignment, size_t size)
 {
 	void *ret;
 
+	DARWIN_ONLY(return (szone->memalign)(szone, alignment, size));
+
 	assert(((alignment - 1) & alignment) == 0);
 
 	if (malloc_init()) {
 		ret = NULL;
 		goto RETURN;
 	}
 
 	if (size == 0) {
@@ -5871,17 +6020,16 @@ RETURN:
 }
 
 #ifdef MOZ_MEMORY_ELF
 extern __typeof(memalign_internal)
         memalign __attribute__((alias ("memalign_internal"),
 				visibility ("default")));
 #endif
 
-ZONE_INLINE
 int
 posix_memalign(void **memptr, size_t alignment, size_t size)
 {
 	void *result;
 
 	/* Make sure that alignment is a large enough power of 2. */
 	if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) {
 #ifdef MALLOC_XMALLOC
@@ -5892,46 +6040,39 @@ posix_memalign(void **memptr, size_t ali
 			abort();
 		}
 #endif
 		return (EINVAL);
 	}
 
 	/* The 0-->1 size promotion is done in the memalign() call below */
 
-#ifdef MOZ_MEMORY_DARWIN
-	result = moz_memalign(alignment, size);
-#else
 	result = MEMALIGN(alignment, size);
-#endif
+
 	if (result == NULL)
 		return (ENOMEM);
 
 	*memptr = result;
 	return (0);
 }
 
-ZONE_INLINE
 void *
 valloc(size_t size)
 {
-#ifdef MOZ_MEMORY_DARWIN
-	return (moz_memalign(pagesize, size));
-#else
 	return (MEMALIGN(pagesize, size));
-#endif
 }
 
-ZONE_INLINE
 void *
 calloc(size_t num, size_t size)
 {
 	void *ret;
 	size_t num_size;
 
+	DARWIN_ONLY(return (szone->calloc)(szone, num, size));
+
 	if (malloc_init()) {
 		num_size = 0;
 		ret = NULL;
 		goto RETURN;
 	}
 
 	num_size = num * size;
 	if (num_size == 0) {
@@ -5971,22 +6112,23 @@ RETURN:
 #endif
 		errno = ENOMEM;
 	}
 
 	UTRACE(0, num_size, ret);
 	return (ret);
 }
 
-ZONE_INLINE
 void *
 realloc(void *ptr, size_t size)
 {
 	void *ret;
 
+	DARWIN_ONLY(return (szone->realloc)(szone, ptr, size));
+
 	if (size == 0) {
 #ifdef MALLOC_SYSV
 		if (opt_sysv == false)
 #endif
 			size = 1;
 #ifdef MALLOC_SYSV
 		else {
 			if (ptr != NULL)
@@ -6034,22 +6176,23 @@ realloc(void *ptr, size_t size)
 
 #ifdef MALLOC_SYSV
 RETURN:
 #endif
 	UTRACE(ptr, size, ret);
 	return (ret);
 }
 
-ZONE_INLINE
 void
 free(void *ptr)
 {
 	size_t offset;
 	
+	DARWIN_ONLY((szone->free)(szone, ptr); return);
+
 	UTRACE(ptr, 0, 0);
 
 	/*
 	 * A version of idalloc that checks for NULL pointer but only for
 	 * huge allocations assuming that CHUNK_ADDR2OFFSET(NULL) == 0.
 	 */
 	assert(CHUNK_ADDR2OFFSET(NULL) == 0);
 	offset = CHUNK_ADDR2OFFSET(ptr);
@@ -6069,16 +6212,17 @@ free(void *ptr)
 #ifdef MOZ_MEMORY_ANDROID
 size_t
 malloc_usable_size(void *ptr)
 #else
 size_t
 malloc_usable_size(const void *ptr)
 #endif
 {
+	DARWIN_ONLY(return (szone->size)(szone, ptr));
 
 #ifdef MALLOC_VALIDATE
 	return (isalloc_validate(ptr));
 #else
 	assert(ptr != NULL);
 
 	return (isalloc(ptr));
 #endif
@@ -6159,18 +6303,16 @@ jemalloc_stats(jemalloc_stats_t *stats)
 	stats->committed += base_committed;
 #endif
 	malloc_mutex_unlock(&base_mtx);
 
 	/* Iterate over arenas and their chunks. */
 	for (i = 0; i < narenas; i++) {
 		arena_t *arena = arenas[i];
 		if (arena != NULL) {
-			arena_chunk_t *chunk;
-
 			malloc_spin_lock(&arena->lock);
 			stats->allocated += arena->stats.allocated_small;
 			stats->allocated += arena->stats.allocated_large;
 #ifdef MALLOC_DECOMMIT
 			stats->committed += (arena->stats.committed <<
 			    pagesize_2pow);
 #endif
 			stats->dirty += (arena->ndirty << pagesize_2pow);
@@ -6236,17 +6378,17 @@ size_t
 /******************************************************************************/
 /*
  * Begin library-private functions, used by threading libraries for protection
  * of malloc during fork().  These functions are only called if the program is
  * running in threaded mode, so there is no need to check whether the program
  * is threaded here.
  */
 
-void
+static void
 _malloc_prefork(void)
 {
 	unsigned i;
 
 	/* Acquire all mutexes in a safe order. */
 
 	malloc_spin_lock(&arenas_lock);
 	for (i = 0; i < narenas; i++) {
@@ -6254,17 +6396,17 @@ void
 			malloc_spin_lock(&arenas[i]->lock);
 	}
 
 	malloc_mutex_lock(&base_mtx);
 
 	malloc_mutex_lock(&huge_mtx);
 }
 
-void
+static void
 _malloc_postfork(void)
 {
 	unsigned i;
 
 	/* Release all mutexes, now that fork() has completed. */
 
 	malloc_mutex_unlock(&huge_mtx);
 
@@ -6282,18 +6424,16 @@ void
  */
 /******************************************************************************/
 
 #ifdef HAVE_DLOPEN
 #  include <dlfcn.h>
 #endif
 
 #ifdef MOZ_MEMORY_DARWIN
-static malloc_zone_t zone;
-static struct malloc_introspection_t zone_introspect;
 
 static size_t
 zone_size(malloc_zone_t *zone, void *ptr)
 {
 
 	/*
 	 * There appear to be places within Darwin (such as setenv(3)) that
 	 * cause calls to this function with pointers that *no* zone owns.  If
@@ -6340,16 +6480,29 @@ zone_free(malloc_zone_t *zone, void *ptr
 static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
 	return (realloc(ptr, size));
 }
 
 static void *
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
+{
+	return (memalign(alignment, size));
+}
+
+static void
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+	assert(isalloc_validate(ptr) == size);
+	free(ptr);
+}
+
+static void *
 zone_destroy(malloc_zone_t *zone)
 {
 
 	/* This function should never be called. */
 	assert(false);
 	return (NULL);
 }
 
@@ -6384,72 +6537,241 @@ zone_force_lock(malloc_zone_t *zone)
 static void
 zone_force_unlock(malloc_zone_t *zone)
 {
 
 	_malloc_postfork();
 }
 
 static malloc_zone_t *
-create_zone(void)
+create_zone(unsigned version)
 {
-
 	assert(malloc_initialized);
 
-	zone.size = (void *)zone_size;
-	zone.malloc = (void *)zone_malloc;
-	zone.calloc = (void *)zone_calloc;
-	zone.valloc = (void *)zone_valloc;
-	zone.free = (void *)zone_free;
-	zone.realloc = (void *)zone_realloc;
-	zone.destroy = (void *)zone_destroy;
-	zone.zone_name = "jemalloc_zone";
-	zone.batch_malloc = NULL;
-	zone.batch_free = NULL;
-	zone.introspect = &zone_introspect;
-
-	zone_introspect.enumerator = NULL;
-	zone_introspect.good_size = (void *)zone_good_size;
-	zone_introspect.check = NULL;
-	zone_introspect.print = NULL;
-	zone_introspect.log = NULL;
-	zone_introspect.force_lock = (void *)zone_force_lock;
-	zone_introspect.force_unlock = (void *)zone_force_unlock;
-	zone_introspect.statistics = NULL;
-
-	return (&zone);
+	jemalloc_zone->size = (void *)zone_size;
+	jemalloc_zone->malloc = (void *)zone_malloc;
+	jemalloc_zone->calloc = (void *)zone_calloc;
+	jemalloc_zone->valloc = (void *)zone_valloc;
+	jemalloc_zone->free = (void *)zone_free;
+	jemalloc_zone->realloc = (void *)zone_realloc;
+	jemalloc_zone->destroy = (void *)zone_destroy;
+	jemalloc_zone->zone_name = "jemalloc_zone";
+	jemalloc_zone->version = version;
+	jemalloc_zone->batch_malloc = NULL;
+	jemalloc_zone->batch_free = NULL;
+	jemalloc_zone->introspect = zone_introspect;
+
+	zone_introspect->enumerator = NULL;
+	zone_introspect->good_size = (void *)zone_good_size;
+	zone_introspect->check = NULL;
+	zone_introspect->print = NULL;
+	zone_introspect->log = NULL;
+	zone_introspect->force_lock = (void *)zone_force_lock;
+	zone_introspect->force_unlock = (void *)zone_force_unlock;
+	zone_introspect->statistics = NULL;
+
+	/*
+	 * For these fields, see the comment labelled
+	 * MALLOC_ZONE_T_NOTE, above.
+	 */
+
+	/* Only used in 10.6 */
+	l_jemalloc_zone.m15 = (void (*)())zone_memalign;
+	l_jemalloc_zone.m16 = (void (*)())zone_free_definite_size;
+    l_zone_introspect.m9 = NULL;
+
+	/* Only used in 10.7 */
+	l_jemalloc_zone.m17 = NULL;
+	l_zone_introspect.m10 = NULL;
+	l_zone_introspect.m11 = NULL;
+	l_zone_introspect.m12 = NULL;
+	l_zone_introspect.m13 = NULL;
+
+	return jemalloc_zone;
+}
+
+static size_t
+ozone_size(malloc_zone_t *zone, void *ptr)
+{
+	size_t ret = isalloc_validate(ptr);
+	if (ret == 0)
+		ret = szone->size(zone, ptr);
+
+	return ret;
+}
+
+static void
+ozone_free(malloc_zone_t *zone, void *ptr)
+{
+	if (isalloc_validate(ptr) != 0)
+		free(ptr);
+	else {
+		size_t size = szone->size(zone, ptr);
+		if (size != 0)
+			(szone->free)(zone, ptr);
+		/* Otherwise we leak. */
+	}
+}
+
+static void *
+ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+    size_t oldsize;
+	if (ptr == NULL)
+		return (malloc(size));
+
+	oldsize = isalloc_validate(ptr);
+	if (oldsize != 0)
+		return (realloc(ptr, size));
+	else {
+		oldsize = szone->size(zone, ptr);
+		if (oldsize == 0)
+			return (malloc(size));
+		else {
+			void *ret = malloc(size);
+			if (ret != NULL) {
+				memcpy(ret, ptr, (oldsize < size) ? oldsize :
+				    size);
+				(szone->free)(zone, ptr);
+			}
+			return (ret);
+		}
+	}
+}
+
+static unsigned
+ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results,
+    unsigned num_requested)
+{
+	/* Don't bother implementing this interface, since it isn't required. */
+	return 0;
+}
+
+static void
+ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++)
+		ozone_free(zone, to_be_freed[i]);
+}
+
+static void
+ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+	if (isalloc_validate(ptr) != 0) {
+		assert(isalloc_validate(ptr) == size);
+		free(ptr);
+	} else {
+		assert(size == szone->size(zone, ptr));
+		l_szone.m16(zone, ptr, size);
+	}
+}
+
+static void
+ozone_force_lock(malloc_zone_t *zone)
+{
+	/* jemalloc locking is taken care of by the normal jemalloc zone. */
+	szone->introspect->force_lock(zone);
+}
+
+static void
+ozone_force_unlock(malloc_zone_t *zone)
+{
+	/* jemalloc locking is taken care of by the normal jemalloc zone. */
+	szone->introspect->force_unlock(zone);
+}
+
+static size_t
+zone_version_size(int version)
+{
+    switch (version)
+    {
+        case SNOW_LEOPARD_MALLOC_ZONE_T_VERSION:
+            return sizeof(snow_leopard_malloc_zone);
+        case LEOPARD_MALLOC_ZONE_T_VERSION:
+            return sizeof(leopard_malloc_zone);
+        default:
+        case LION_MALLOC_ZONE_T_VERSION:
+            return sizeof(lion_malloc_zone);
+    }
+}
+
+/*
+ * Overlay the default scalable zone (szone) such that existing allocations are
+ * drained, and further allocations come from jemalloc. This is necessary
+ * because Core Foundation directly accesses and uses the szone before the
+ * jemalloc library is even loaded.
+ */
+static void
+szone2ozone(malloc_zone_t *default_zone, size_t size)
+{
+    lion_malloc_zone *l_zone;
+	assert(malloc_initialized);
+
+	/*
+	 * Stash a copy of the original szone so that we can call its
+     * functions as needed. Note that internally, the szone stores its
+     * bookkeeping data structures immediately following the malloc_zone_t
+     * header, so when calling szone functions, we need to pass a pointer to
+     * the original zone structure.
+	 */
+	memcpy(szone, default_zone, size);
+
+	/* OSX 10.7 allocates the default zone in protected memory. */
+	if (default_zone->version >= LION_MALLOC_ZONE_T_VERSION) {
+		void* start_of_page = (void*)((size_t)(default_zone) & ~pagesize_mask);
+		mprotect (start_of_page, size, PROT_READ | PROT_WRITE);
+	}
+
+	default_zone->size = (void *)ozone_size;
+	default_zone->malloc = (void *)zone_malloc;
+	default_zone->calloc = (void *)zone_calloc;
+	default_zone->valloc = (void *)zone_valloc;
+	default_zone->free = (void *)ozone_free;
+	default_zone->realloc = (void *)ozone_realloc;
+	default_zone->destroy = (void *)zone_destroy;
+	default_zone->zone_name = "jemalloc_ozone";
+	default_zone->batch_malloc = NULL;
+	default_zone->batch_free = ozone_batch_free;
+	default_zone->introspect = ozone_introspect;
+
+	ozone_introspect->enumerator = NULL;
+	ozone_introspect->good_size = (void *)zone_good_size;
+	ozone_introspect->check = NULL;
+	ozone_introspect->print = NULL;
+	ozone_introspect->log = NULL;
+	ozone_introspect->force_lock = (void *)ozone_force_lock;
+	ozone_introspect->force_unlock = (void *)ozone_force_unlock;
+	ozone_introspect->statistics = NULL;
+
+    /* Platform-dependent structs */
+    l_zone = (lion_malloc_zone*)(default_zone);
+
+    if (default_zone->version >= SNOW_LEOPARD_MALLOC_ZONE_T_VERSION) {
+        l_zone->m15 = (void (*)())zone_memalign;
+        l_zone->m16 = (void (*)())ozone_free_definite_size;
+        l_ozone_introspect.m9 = NULL;
+    }
+
+    if (default_zone->version >= LION_MALLOC_ZONE_T_VERSION) {
+        l_zone->m17 = NULL;
+        l_ozone_introspect.m10 = NULL;
+        l_ozone_introspect.m11 = NULL;
+        l_ozone_introspect.m12 = NULL;
+        l_ozone_introspect.m13 = NULL;
+    }
 }
 
 __attribute__((constructor))
 void
 jemalloc_darwin_init(void)
 {
-	extern unsigned malloc_num_zones;
-	extern malloc_zone_t **malloc_zones;
-
 	if (malloc_init_hard())
 		abort();
-
-	/*
-	 * The following code is *not* thread-safe, so it's critical that
-	 * initialization be manually triggered.
-	 */
-
-	/* Register the custom zones. */
-	malloc_zone_register(create_zone());
-	assert(malloc_zones[malloc_num_zones - 1] == &zone);
-
-	/*
-	 * Shift malloc_zones around so that zone is first, which makes it the
-	 * default zone.
-	 */
-	assert(malloc_num_zones > 1);
-	memmove(&malloc_zones[1], &malloc_zones[0],
-		sizeof(malloc_zone_t *) * (malloc_num_zones - 1));
-	malloc_zones[0] = &zone;
 }
 
 #elif defined(__GLIBC__) && !defined(__UCLIBC__)
 /*
  * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
  * to inconsistently reference libc's malloc(3)-compatible functions
  * (bug 493541).
  *
--- a/memory/jemalloc/jemalloc.h
+++ b/memory/jemalloc/jemalloc.h
@@ -45,17 +45,18 @@ extern const char *_malloc_options;
 void	*malloc(size_t size);
 void	*valloc(size_t size);
 void	*calloc(size_t num, size_t size);
 void	*realloc(void *ptr, size_t size);
 void	free(void *ptr);
 int	posix_memalign(void **memptr, size_t alignment, size_t size);
 #endif /* MOZ_MEMORY_DARWIN, MOZ_MEMORY_LINUX */
 
-#if defined(MOZ_MEMORY_ANDROID) || defined(WRAP_MALLOC) || defined(WIN32_NEW_STYLE_JEMALLOC)
+#if defined(MOZ_MEMORY_DARWIN) || defined(MOZ_MEMORY_ANDROID) || \
+    defined(WRAP_MALLOC) || defined(WIN32_NEW_STYLE_JEMALLOC)
 void	*je_malloc(size_t size);
 void	*je_valloc(size_t size);
 void	*je_calloc(size_t num, size_t size);
 void	*je_realloc(void *ptr, size_t size);
 void	je_free(void *ptr);
 void *je_memalign(size_t alignment, size_t size);
 int	je_posix_memalign(void **memptr, size_t alignment, size_t size);
 char    *je_strndup(const char *src, size_t len);
--- a/memory/jemalloc/jemalloc_types.h
+++ b/memory/jemalloc/jemalloc_types.h
@@ -68,17 +68,17 @@ typedef struct {
 	size_t	chunksize;	/* Size of each virtual memory mapping. */
 	size_t	dirty_max;	/* Max dirty pages per arena. */
 
 	/*
 	 * Current memory usage statistics.
 	 */
 	size_t	mapped;		/* Bytes mapped (not necessarily committed). */
 	size_t	committed;	/* Bytes committed (readable/writable). */
-	size_t	allocated;	/* Bytes allocted (in use by application). */
+	size_t	allocated;	/* Bytes allocated (in use by application). */
 	size_t	dirty;		/* Bytes dirty (committed unused pages). */
 } jemalloc_stats_t;
 
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
 
 #endif /* _JEMALLOC_TYPES_H_ */
new file mode 100644
--- /dev/null
+++ b/memory/jemalloc/osx_zone_types.h
@@ -0,0 +1,141 @@
+/* -*- Mode: C; tab-width: 8; c-basic-offset: 8 -*- */
+/* vim:set softtabstop=8 shiftwidth=8: */
+/*-
+ * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice(s), this list of conditions and the following disclaimer as
+ *    the first lines of this file unmodified other than the possible
+ *    addition of one or more copyright notices.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice(s), this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * The purpose of these structs is described in jemalloc.c, in the comment
+ * marked MALLOC_ZONE_T_NOTE.
+ *
+ * We need access to some structs that come with a specific version of OSX 
+ * but can't copy them here because of licensing restrictions (see bug
+ * 603655). The structs below are equivalent in that they'll always be
+ * compiled to the same representation on all platforms.
+ *
+ * `void*` and `void (*)()` may not be the same size on weird platforms, but
+ * the size of a function pointer shouldn't vary according to its parameters
+ * or return type.
+ */
+
+/*
+ * OSX 10.5 - Leopard
+ */
+typedef struct _leopard_malloc_zone {
+ 	void *m1;
+	void *m2;
+	void (*m3)();
+	void (*m4)();
+	void (*m5)();
+	void (*m6)();
+	void (*m7)();
+	void (*m8)();
+	void (*m9)();
+	void *m10;
+	void (*m11)();
+	void (*m12)();
+	void *m13;
+	unsigned m14;
+} leopard_malloc_zone;
+
+/*
+ * OSX 10.6 - Snow Leopard
+ */
+typedef struct _snow_leopard_malloc_zone {
+	void *m1;
+	void *m2;
+	void (*m3)();
+	void (*m4)();
+	void (*m5)();
+	void (*m6)();
+	void (*m7)();
+	void (*m8)();
+	void (*m9)();
+	void *m10;
+	void (*m11)();
+	void (*m12)();
+	void *m13;
+	unsigned m14;
+	void (*m15)(); // this member added in 10.6
+	void (*m16)(); // this member added in 10.6
+} snow_leopard_malloc_zone;
+
+typedef struct _snow_leopard_malloc_introspection {
+    void (*m1)();
+    void (*m2)();
+    void (*m3)();
+    void (*m4)();
+    void (*m5)();
+    void (*m6)();
+    void (*m7)();
+    void (*m8)();
+    void (*m9)(); // this member added in 10.6
+} snow_leopard_malloc_introspection;
+
+/*
+ * OSX 10.7 - Lion
+ */
+typedef struct _lion_malloc_zone {
+	void *m1;
+	void *m2;
+	void (*m3)();
+	void (*m4)();
+	void (*m5)();
+	void (*m6)();
+	void (*m7)();
+	void (*m8)();
+	void (*m9)();
+	void *m10;
+	void (*m11)();
+	void (*m12)();
+	void *m13;
+	unsigned m14;
+	void (*m15)();
+	void (*m16)();
+	void (*m17)(); // this member added in 10.7
+} lion_malloc_zone;
+
+typedef struct _lion_malloc_introspection {
+    void (*m1)();
+    void (*m2)();
+    void (*m3)();
+    void (*m4)();
+    void (*m5)();
+    void (*m6)();
+    void (*m7)();
+    void (*m8)();
+    void (*m9)();
+    void (*m10)(); // this member added in 10.7
+    void (*m11)(); // this member added in 10.7
+    void (*m12)(); // this member added in 10.7
+#ifdef __BLOCKS__
+    void (*m13)(); // this member added in 10.7
+#else
+    void *m13; // this member added in 10.7
+#endif
+} lion_malloc_introspection;
--- a/memory/mozalloc/Makefile.in
+++ b/memory/mozalloc/Makefile.in
@@ -60,16 +60,19 @@ EXTRA_DSO_LDOPTS += $(DIST)/lib/libjemal
 WRAP_MALLOC_LIB=
 WRAP_MALLOC_CFLAGS=
 DEFINES += -DWRAP_MALLOC_WITH_JEMALLOC
 CSRCS = ld_malloc_wrappers.c
 ifeq (,$(filter-out Linux,$(OS_TARGET)))
 EXTRA_DSO_LDOPTS += -lpthread
 endif
 endif
+ifeq ($(OS_ARCH),Darwin)
+EXTRA_DSO_LDOPTS += -L$(DIST)/lib -ljemalloc
+endif
 endif
 
 ifeq (,$(filter-out OS2,$(OS_ARCH)))
 # The strndup declaration in string.h is in an ifdef __USE_GNU section
 DEFINES		+= -D_GNU_SOURCE
 endif
 
 EXPORTS_NAMESPACES 	= mozilla
--- a/memory/mozalloc/mozalloc.cpp
+++ b/memory/mozalloc/mozalloc.cpp
@@ -71,27 +71,36 @@
 #if defined(__GNUC__) && (__GNUC__ > 2)
 #define LIKELY(x)    (__builtin_expect(!!(x), 1))
 #define UNLIKELY(x)  (__builtin_expect(!!(x), 0))
 #else
 #define LIKELY(x)    (x)
 #define UNLIKELY(x)  (x)
 #endif
 
-#if defined(MOZ_MEMORY_ANDROID) || defined(WRAP_MALLOC_WITH_JEMALLOC)
+#if defined(MOZ_MEMORY_DARWIN) || defined(MOZ_MEMORY_ANDROID) || \
+    defined(WRAP_MALLOC_WITH_JEMALLOC)
 #include "jemalloc.h"
-#define malloc(a)     je_malloc(a)
-#define valloc(a)     je_valloc(a)
-#define calloc(a, b)  je_calloc(a, b)
-#define realloc(a, b) je_realloc(a, b)
-#define free(a)       je_free(a)
-#define strdup(a)     je_strdup(a)
-#define strndup(a, b) je_strndup(a, b)
-#define posix_memalign(a, b, c)  je_posix_memalign(a, b, c)
-#define malloc_usable_size(a) je_malloc_usable_size(a)
+#define malloc(a)               je_malloc(a)
+#define posix_memalign(a, b, c) je_posix_memalign(a, b, c)
+#define valloc(a)               je_valloc(a)
+#define calloc(a, b)            je_calloc(a, b)
+#ifndef MOZ_MEMORY_DARWIN
+   // These functions could be passed a memory region that was not allocated by
+   // jemalloc, so use the system-provided functions, which will in turn call
+   // the jemalloc versions when appropriate.
+#  define realloc(a, b)         je_realloc(a, b)
+#  define free(a)               je_free(a)
+#  define malloc_usable_size(a) je_malloc_usable_size(a)
+#endif
+#ifndef MOZ_MEMORY_ANDROID
+#define memalign(a, b)          je_memalign(a, b)
+#endif
+#define strdup(a)               je_strdup(a)
+#define strndup(a, b)           je_strndup(a, b)
 #endif
 
 void
 moz_free(void* ptr)
 {
     free(ptr);
 }
 
@@ -187,17 +196,33 @@ moz_xposix_memalign(void **ptr, size_t a
         return moz_xposix_memalign(ptr, alignment, size);
     }
     // else: (0 == err) or (EINVAL == err)
     return err;
 }
 int
 moz_posix_memalign(void **ptr, size_t alignment, size_t size)
 {
-    return posix_memalign(ptr, alignment, size);
+    int code = posix_memalign(ptr, alignment, size);
+    if (code)
+        return code;
+
+#if defined(XP_MACOSX)
+    // Workaround faulty OSX posix_memalign, which provides memory with the
+    // incorrect alignment sometimes, but returns 0 as if nothing was wrong.
+    size_t mask = alignment - 1;
+    if (((size_t)(*ptr) & mask) != 0) {
+        void* old = *ptr;
+        code = moz_posix_memalign(ptr, alignment, size);
+        free(old);
+    }
+#endif
+
+    return code;
+
 }
 #endif // if defined(HAVE_POSIX_MEMALIGN)
 
 #if defined(HAVE_MEMALIGN) || defined(HAVE_JEMALLOC_MEMALIGN)
 void*
 moz_xmemalign(size_t boundary, size_t size)
 {
     void* ptr = memalign(boundary, size);
@@ -210,17 +235,17 @@ moz_xmemalign(size_t boundary, size_t si
 }
 void*
 moz_memalign(size_t boundary, size_t size)
 {
     return memalign(boundary, size);
 }
 #endif // if defined(HAVE_MEMALIGN)
 
-#if defined(HAVE_VALLOC)
+#if defined(HAVE_VALLOC) || defined(HAVE_JEMALLOC_VALLOC)
 void*
 moz_xvalloc(size_t size)
 {
     void* ptr = valloc(size);
     if (UNLIKELY(!ptr)) {
         mozalloc_handle_oom();
         return moz_xvalloc(size);
     }
@@ -234,20 +259,20 @@ moz_valloc(size_t size)
 #endif // if defined(HAVE_VALLOC)
 
 size_t
 moz_malloc_usable_size(void *ptr)
 {
     if (!ptr)
         return 0;
 
-#if defined(MOZ_MEMORY)
+#if defined(XP_MACOSX)
+    return malloc_size(ptr);
+#elif defined(MOZ_MEMORY)
     return malloc_usable_size(ptr);
-#elif defined(XP_MACOSX)
-    return malloc_size(ptr);
 #elif defined(XP_WIN)
     return _msize(ptr);
 #else
     return 0;
 #endif
 }
 
 namespace mozilla {
--- a/memory/mozalloc/mozalloc.h
+++ b/memory/mozalloc/mozalloc.h
@@ -157,17 +157,17 @@ MOZALLOC_EXPORT int moz_posix_memalign(v
 MOZALLOC_EXPORT void* moz_xmemalign(size_t boundary, size_t size)
     NS_ATTR_MALLOC NS_WARN_UNUSED_RESULT;
 
 MOZALLOC_EXPORT void* moz_memalign(size_t boundary, size_t size)
     NS_ATTR_MALLOC NS_WARN_UNUSED_RESULT;
 #endif /* if defined(HAVE_MEMALIGN) */
 
 
-#if defined(HAVE_VALLOC)
+#if defined(HAVE_VALLOC) || defined(HAVE_JEMALLOC_VALLOC)
 MOZALLOC_EXPORT void* moz_xvalloc(size_t size)
     NS_ATTR_MALLOC NS_WARN_UNUSED_RESULT;
 
 MOZALLOC_EXPORT void* moz_valloc(size_t size)
     NS_ATTR_MALLOC NS_WARN_UNUSED_RESULT;
 #endif /* if defined(HAVE_VALLOC) */
 
 
--- a/toolkit/library/libxul-rules.mk
+++ b/toolkit/library/libxul-rules.mk
@@ -40,16 +40,22 @@
 LOCAL_INCLUDES += \
 	-I$(topsrcdir)/config \
 	-I$(topsrcdir)/widget/src/windows \
 	-I$(topsrcdir)/widget/src/build \
 	$(NULL)
 
 OS_LIBS += $(LIBICONV)
 
+ifdef MOZ_MEMORY
+ifeq ($(OS_ARCH),Darwin)
+EXTRA_DSO_LDOPTS += -L$(DIST)/lib -ljemalloc
+endif
+endif
+
 DEFINES += \
 	-D_IMPL_NS_COM \
 	-D_IMPL_NS_STRINGAPI \
 	-DEXPORT_XPT_API \
 	-DEXPORT_XPTC_API \
 	-D_IMPL_NS_GFX \
 	-D_IMPL_NS_WIDGET \
 	$(NULL)
--- a/xpcom/base/nsMemoryReporterManager.cpp
+++ b/xpcom/base/nsMemoryReporterManager.cpp
@@ -252,17 +252,17 @@ NS_MEMORY_REPORTER_IMPLEMENT(Resident,
 
 /**
  ** memory reporter implementation for jemalloc and OSX malloc,
  ** to obtain info on total memory in use (that we know about,
  ** at least -- on OSX, there are sometimes other zones in use).
  **/
 
 #if defined(MOZ_MEMORY)
-#  if defined(XP_WIN) || defined(SOLARIS) || defined(ANDROID)
+#  if defined(XP_WIN) || defined(SOLARIS) || defined(ANDROID) || defined(XP_MACOSX)
 #    define HAVE_JEMALLOC_STATS 1
 #    include "jemalloc.h"
 #  elif defined(XP_LINUX)
 #    define HAVE_JEMALLOC_STATS 1
 #    include "jemalloc_types.h"
 // jemalloc is directly linked into firefox-bin; libxul doesn't link
 // with it.  So if we tried to use jemalloc_stats directly here, it
 // wouldn't be defined.  Instead, we don't include the jemalloc header