bug 478044 and bug 485227 - enable jemalloc for windows ce, 3 patches rolled up
authorBrad Lassey <blassey@mozilla.com>
Wed, 15 Apr 2009 20:39:55 -0400
changeset 24769 8274e63ba0a58d09b24e1e8a0b7dc10f1fef3220
parent 24768 8421bdf725f39ec3d8c7be23378f99585cc3bd16
child 24770 d9a88ab363ea7a75bba8fded1373b8a29e5ac60c
push id1213
push userblassey@mozilla.com
push dateThu, 16 Apr 2009 00:42:45 +0000
bugs478044, 485227
milestone1.9.1b4pre
bug 478044 and bug 485227 - enable jemalloc for windows ce, 3 patches rolled up bug 478044 - enable jemalloc for windows ce r=stuart, jasone bug 478044 - Addresses problems with reallocating unaligned memory on windows ce r=pavlov, jasone bug 485227 - Don't force link jemalloc on windows ce r=ted, crowder
build/wince/shunt/Makefile.in
build/wince/tools/arm-wince-gcc.c
build/wince/tools/arm-wince-link.c
js/src/Makefile.in
memory/jemalloc/Makefile.in
memory/jemalloc/jemalloc.c
--- a/build/wince/shunt/Makefile.in
+++ b/build/wince/shunt/Makefile.in
@@ -65,18 +65,19 @@ EXPORTS = include/windows.h \
   include/process.h \
   include/signal.h \
   $(NULL)
 
 DIRS += include/sys
 
 ifdef MOZ_MEMORY
 REQUIRES = jemalloc 
-JEMALLOC_LIB = $(DIST)/lib/jemalloc.lib 
 CFLAGS += -DMOZ_MEMORY
 CXXFLAGS += -DMOZ_MEMORY
 endif
 
+EXTRA_LIBS += $(JEMALLOC_LIBS)
+
 CPPSRCS = map.cpp
 
 DEFINES += -DMOZCE_SHUNT_EXPORTS
 
 include $(topsrcdir)/config/rules.mk
--- a/build/wince/tools/arm-wince-gcc.c
+++ b/build/wince/tools/arm-wince-gcc.c
@@ -76,19 +76,16 @@ main(int argc, char **argv)
 
       args[i++] = "/ENTRY:main";
 
       args[i++] = "/SUBSYSTEM:WINDOWSCE,5.02";
 
       args[i++] = "/LIBPATH:\"" WCE_LIB "\"";
       args[i++] = "/LIBPATH:\"" WCE_CRT "\"";
       args[i++] = "/NODEFAULTLIB";
-#ifdef MOZ_MEMORY
-      args[i++] = JEMALLOC_LIB;
-#endif
 #ifdef HAVE_SHUNT   // simple test to see if we're in configure or not
       if(!getenv("NO_SHUNT")) {
 	args[i++] = "/LIBPATH:\"" SHUNT_LIB "\"";
 	args[i++] = "mozce_shunt.lib";
       }
 #endif
       args[i++] = "winsock.lib";
       args[i++] = "corelibc.lib";
--- a/build/wince/tools/arm-wince-link.c
+++ b/build/wince/tools/arm-wince-link.c
@@ -10,19 +10,16 @@ main(int argc, char **argv)
   int k = 0;
   int s = 0;
   args[i++] = LINK_PATH;
 
   args[i++] = "/LIBPATH:\"" WCE_LIB "\"";
   args[i++] = "/LIBPATH:\"" WCE_CRT "\"";
   args[i++] = "/NODEFAULTLIB";
 
-#ifdef MOZ_MEMORY
-  args[i++] = JEMALLOC_LIB;
-#endif
 #ifdef HAVE_SHUNT   // simple test to see if we're in configure or not
   if(getenv("NO_SHUNT") == NULL) {
     args[i++] = "/LIBPATH:\"" SHUNT_LIB "\"";
     args[i++] = "mozce_shunt.lib";
   }
 #endif
 
   args[i++] = "corelibc.lib";
--- a/js/src/Makefile.in
+++ b/js/src/Makefile.in
@@ -51,16 +51,20 @@ GRE_MODULE	= 1
 
 PROGRAM         = js$(BIN_SUFFIX)
 # The shell uses some 'HIDDEN' symbols to produce statistics, so we
 # link directly against the .o files, not against the JS shared
 # library.
 PROGOBJS	= js.$(OBJ_SUFFIX) $(OBJS)
 LIBS		= $(NSPR_LIBS)
 
+ifdef WINCE
+EXTRA_LIBS += $(JEMALLOC_LIBS)
+endif
+
 ifdef GNU_CXX
 ifdef INTEL_CXX
 # icc gets special optimize flags
 ifdef MOZ_PROFILE_GENERATE
 MODULE_OPTIMIZE_FLAGS = -O0
 else
 MODULE_OPTIMIZE_FLAGS = -O2 -ip
 #XXX: do we want different INTERP_OPTIMIZER flags here?
--- a/memory/jemalloc/Makefile.in
+++ b/memory/jemalloc/Makefile.in
@@ -44,16 +44,20 @@ VPATH		= @srcdir@
 include $(DEPTH)/config/autoconf.mk
 
 MODULE		= jemalloc
 
 # jemalloc.c properly uses 'static', so don't burden it with manually exposing
 # symbols.
 VISIBILITY_FLAGS=
 
+ifeq (WINCE,$(OS_TARGET))
+DEFINES+= -DMOZCE_SHUNT_H
+endif
+
 ifeq (WINNT,$(OS_TARGET))
 # Two options for Windows, either you build the CRT from source,
 # or you use a pre-built DLL.
 ifneq (,$(WIN32_CRT_SRC_DIR))
 # Building the CRT from source
 CRT_OBJ_DIR=./$(shell basename "$(WIN32_CRT_SRC_DIR)")
 libs:: $(CRT_OBJ_DIR)/build/intel/mozcrt19.dll
 	$(INSTALL) $< $(FINAL_TARGET)
--- a/memory/jemalloc/jemalloc.c
+++ b/memory/jemalloc/jemalloc.c
@@ -185,47 +185,61 @@
 #ifdef MOZ_MEMORY_LINUX
 #define	_GNU_SOURCE /* For mremap(2). */
 #define	issetugid() 0
 #if 0 /* Enable in order to test decommit code on Linux. */
 #  define MALLOC_DECOMMIT
 #endif
 #endif
 
+#ifndef MOZ_MEMORY_WINCE
 #include <sys/types.h>
 
 #include <errno.h>
+#include <stdlib.h>
+#endif
 #include <limits.h>
 #include <stdarg.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 
 #ifdef MOZ_MEMORY_WINDOWS
+#ifndef MOZ_MEMORY_WINCE
 #include <cruntime.h>
 #include <internal.h>
+#include <io.h>
+#else
+#include <crtdefs.h>
+#define SIZE_MAX UINT_MAX
+#endif
 #include <windows.h>
-#include <io.h>
 
 #pragma warning( disable: 4267 4996 4146 )
 
 #define	false FALSE
 #define	true TRUE
 #define	inline __inline
 #define	SIZE_T_MAX SIZE_MAX
 #define	STDERR_FILENO 2
 #define	PATH_MAX MAX_PATH
 #define	vsnprintf _vsnprintf
 
+#ifndef NO_TLS
 static unsigned long tlsIndex = 0xffffffff;
+#endif 
 
 #define	__thread
+#ifdef MOZ_MEMORY_WINCE
+#define	_pthread_self() GetCurrentThreadId()
+#else
 #define	_pthread_self() __threadid()
+#endif
 #define	issetugid() 0
 
+#ifndef MOZ_MEMORY_WINCE
 /* use MSVC intrinsics */
 #pragma intrinsic(_BitScanForward)
 static __forceinline int
 ffs(int x)
 {
 	unsigned long i;
 
 	if (_BitScanForward(&i, x) != 0)
@@ -243,16 +257,50 @@ getenv(const char *name)
 {
 
 	if (GetEnvironmentVariableA(name, (LPSTR)&mozillaMallocOptionsBuf,
 		    sizeof(mozillaMallocOptionsBuf)) > 0)
 		return (mozillaMallocOptionsBuf);
 
 	return (NULL);
 }
+#else
+
+static void abort() { 
+	DebugBreak();  
+        exit(-3); 
+}
+
+static int errno = 0;
+#define ENOMEM          12
+#define EINVAL          22
+
+static char *
+getenv(const char *name)
+{
+	return (NULL);
+}
+
+static int
+ffs(int x)
+{
+        int ret;
+
+        if (x == 0)
+                return 0;
+        ret = 2;
+        if ((x & 0x0000ffff) == 0) { ret += 16; x >>= 16;}
+        if ((x & 0x000000ff) == 0) { ret += 8;  x >>= 8;}
+        if ((x & 0x0000000f) == 0) { ret += 4;  x >>= 4;}
+        if ((x & 0x00000003) == 0) { ret += 2;  x >>= 2;}
+        ret -= (x & 1);
+
+        return (ret);
+}
+#endif
 
 typedef unsigned char uint8_t;
 typedef unsigned uint32_t;
 typedef unsigned long long uint64_t;
 typedef unsigned long long uintmax_t;
 typedef long ssize_t;
 
 #define	MALLOC_DECOMMIT
@@ -343,16 +391,20 @@ typedef long ssize_t;
 #ifdef MOZ_MEMORY_DARWIN
 static const bool __isthreaded = true;
 #endif
 
 #if defined(MOZ_MEMORY_SOLARIS) && defined(MAP_ALIGN) && !defined(JEMALLOC_NEVER_USES_MAP_ALIGN)
 #define JEMALLOC_USES_MAP_ALIGN	 /* Required on Solaris 10. Might improve performance elsewhere. */
 #endif
 
+#if defined(MOZ_MEMORY_WINCE)
+#define JEMALLOC_USES_MAP_ALIGN	 /* Required for Windows CE */
+#endif
+
 #define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
 
 #include "qr.h"
 #include "ql.h"
 #ifdef MOZ_MEMORY_WINDOWS
    /* MSVC++ does not support C99 variable-length arrays. */
 #  define RB_NO_C99_VARARRAYS
 #endif
@@ -442,18 +494,21 @@ static const bool __isthreaded = true;
 #    undef MALLOC_BALANCE
 #  endif
 #endif
 
 /*
  * Size and alignment of memory chunks that are allocated by the OS's virtual
  * memory system.
  */
+#ifdef MOZ_MEMORY_WINCE
+#define	CHUNK_2POW_DEFAULT	21
+#else
 #define	CHUNK_2POW_DEFAULT	20
-
+#endif
 /* Maximum number of dirty pages per arena. */
 #define	DIRTY_MAX_DEFAULT	(1U << 10)
 
 /* Default reserve chunks. */
 #define	RESERVE_MIN_2POW_DEFAULT	1
 /*
  * Default range (in chunks) between reserve_min and reserve_max, in addition
  * to the mandatory one chunk per arena.
@@ -1318,23 +1373,36 @@ umax2s(uintmax_t x, char *s)
 	} while (x > 0);
 
 	return (&s[i]);
 }
 
 static void
 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
 {
+#ifdef MOZ_MEMORY_WINCE
+       wchar_t buf[1024];
+#define WRT_PRINT(s) \
+       MultiByteToWideChar(CP_ACP, 0, s, -1, buf, 1024); \
+       OutputDebugStringW(buf)
+
+       WRT_PRINT(p1);
+       WRT_PRINT(p2);
+       WRT_PRINT(p3);
+       WRT_PRINT(p4);
+#else
 #if defined(MOZ_MEMORY) && !defined(MOZ_MEMORY_WINDOWS)
 #define	_write	write
 #endif
 	_write(STDERR_FILENO, p1, (unsigned int) strlen(p1));
 	_write(STDERR_FILENO, p2, (unsigned int) strlen(p2));
 	_write(STDERR_FILENO, p3, (unsigned int) strlen(p3));
 	_write(STDERR_FILENO, p4, (unsigned int) strlen(p4));
+#endif
+
 }
 
 #define _malloc_message malloc_message
 
 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
 	    const char *p4) = wrtmessage;
 
 #ifdef MALLOC_DEBUG
@@ -1356,17 +1424,19 @@ void	(*_malloc_message)(const char *p1, 
  * Begin mutex.  We can't use normal pthread mutexes in all places, because
  * they require malloc()ed memory, which causes bootstrapping issues in some
  * cases.
  */
 
 static bool
 malloc_mutex_init(malloc_mutex_t *mutex)
 {
-#if defined(MOZ_MEMORY_WINDOWS)
+#if defined(MOZ_MEMORY_WINCE)
+	InitializeCriticalSection(mutex);
+#elif defined(MOZ_MEMORY_WINDOWS)
 	if (__isthreaded)
 		if (! __crtInitCritSecAndSpinCount(mutex, _CRT_SPINCOUNT))
 			return (true);
 #elif defined(MOZ_MEMORY_DARWIN)
 	mutex->lock = OS_SPINLOCK_INIT;
 #elif defined(MOZ_MEMORY_LINUX)
 	pthread_mutexattr_t attr;
 	if (pthread_mutexattr_init(&attr) != 0)
@@ -1418,17 +1488,19 @@ malloc_mutex_unlock(malloc_mutex_t *mute
 	if (__isthreaded)
 		_SPINUNLOCK(&mutex->lock);
 #endif
 }
 
 static bool
 malloc_spin_init(malloc_spinlock_t *lock)
 {
-#if defined(MOZ_MEMORY_WINDOWS)
+#if defined(MOZ_MEMORY_WINCE)
+	InitializeCriticalSection(lock);
+#elif defined(MOZ_MEMORY_WINDOWS)
 	if (__isthreaded)
 		if (! __crtInitCritSecAndSpinCount(lock, _CRT_SPINCOUNT))
 			return (true);
 #elif defined(MOZ_MEMORY_DARWIN)
 	lock->lock = OS_SPINLOCK_INIT;
 #elif defined(MOZ_MEMORY_LINUX)
 	pthread_mutexattr_t attr;
 	if (pthread_mutexattr_init(&attr) != 0)
@@ -1699,23 +1771,25 @@ static inline const char *
 
 #ifdef MALLOC_STATS
 /*
  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
  */
 static void
 malloc_printf(const char *format, ...)
 {
+#ifndef WINCE
 	char buf[4096];
 	va_list ap;
 
 	va_start(ap, format);
 	vsnprintf(buf, sizeof(buf), format, ap);
 	va_end(ap);
 	_malloc_message(buf, "", "", "");
+#endif
 }
 #endif
 
 /******************************************************************************/
 
 #ifdef MALLOC_DECOMMIT
 static inline void
 pages_decommit(void *addr, size_t size)
@@ -2084,32 +2158,89 @@ rb_wrap(static, extent_tree_ad_, extent_
  * End extent tree code.
  */
 /******************************************************************************/
 /*
  * Begin chunk management functions.
  */
 
 #ifdef MOZ_MEMORY_WINDOWS
+#ifdef MOZ_MEMORY_WINCE
+#define ALIGN_ADDR2OFFSET(al, ad) \
+	((uintptr_t)ad & (al - 1))
+static void *
+pages_map_align(size_t size, int pfd, size_t alignment)
+{
+	
+	void *ret; 
+	int offset;
+	if (size % alignment)
+		size += (alignment - (size % alignment));
+	assert(size >= alignment);
+	ret = pages_map(NULL, size, pfd);
+	offset = ALIGN_ADDR2OFFSET(alignment, ret);
+	if (offset) {  
+		/* try to over allocate by the ammount we're offset */
+		void *tmp;
+		pages_unmap(ret, size);
+		tmp = VirtualAlloc(NULL, size + alignment - offset, 
+					 MEM_RESERVE, PAGE_NOACCESS);
+		if (offset == ALIGN_ADDR2OFFSET(alignment, tmp))
+			ret = VirtualAlloc((void*)((intptr_t)tmp + alignment 
+						   - offset), size, MEM_COMMIT,
+					   PAGE_READWRITE);
+		else 
+			VirtualFree(tmp, 0, MEM_RELEASE);
+		offset = ALIGN_ADDR2OFFSET(alignment, ret);
+		
+	
+		if (offset) {  
+			/* over allocate to ensure we have an aligned region */
+			ret = VirtualAlloc(NULL, size + alignment, MEM_RESERVE, 
+					   PAGE_NOACCESS);
+			offset = ALIGN_ADDR2OFFSET(alignment, ret);
+			ret = VirtualAlloc((void*)((intptr_t)ret + 
+						   alignment - offset),
+					   size, MEM_COMMIT, PAGE_READWRITE);
+		}
+	}
+	return (ret);
+}
+#endif
+
 static void *
 pages_map(void *addr, size_t size, int pfd)
 {
-	void *ret;
-
+	void *ret = NULL;
+#if defined(MOZ_MEMORY_WINCE)
+	void *va_ret;
+	assert(addr == NULL);
+	va_ret = VirtualAlloc(addr, size, MEM_RESERVE, PAGE_NOACCESS);
+	if (va_ret)
+		ret = VirtualAlloc(va_ret, size, MEM_COMMIT, PAGE_READWRITE);
+	assert(va_ret == ret);
+#elif defined(MOZ_MEMORY_WINDOWS)
 	ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
 	    PAGE_READWRITE);
-
+#endif
 	return (ret);
 }
 
 static void
 pages_unmap(void *addr, size_t size)
 {
-
 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0) {
+#ifdef MOZ_MEMORY_WINCE
+		if (GetLastError() == ERROR_INVALID_PARAMETER) {
+			MEMORY_BASIC_INFORMATION info;
+			VirtualQuery(addr, &info, sizeof(info));
+			if (VirtualFree(info.AllocationBase, 0, MEM_RELEASE))
+				return;
+		}
+#endif
 		_malloc_message(_getprogname(),
 		    ": (malloc) Error in VirtualFree()\n", "", "");
 		if (opt_abort)
 			abort();
 	}
 }
 #elif (defined(MOZ_MEMORY_DARWIN))
 static void *
@@ -2161,32 +2292,32 @@ pages_copy(void *dest, const void *src, 
 	assert((void *)((uintptr_t)src & ~pagesize_mask) == src);
 
 	vm_copy(mach_task_self(), (vm_address_t)src, (vm_size_t)n,
 	    (vm_address_t)dest);
 }
 #else /* MOZ_MEMORY_DARWIN */
 #ifdef JEMALLOC_USES_MAP_ALIGN
 static void *
-pages_map_align(size_t size, int pfd)
+pages_map_align(size_t size, int pfd, size_t alignment)
 {
 	void *ret;
 
 	/*
 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
 	 * of existing mappings, and we only want to create new mappings.
 	 */
 #ifdef MALLOC_PAGEFILE
 	if (pfd != -1) {
-		ret = mmap((void *)chunksize, size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+		ret = mmap((void *)alignment, size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
 		    MAP_NOSYNC | MAP_ALIGN, pfd, 0);
 	} else
 #endif
 	       {
-		ret = mmap((void *)chunksize, size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+		ret = mmap((void *)alignment, size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
 		    MAP_NOSYNC | MAP_ALIGN | MAP_ANON, -1, 0);
 	}
 	assert(ret != NULL);
 
 	if (ret == MAP_FAILED)
 		ret = NULL;
 	return (ret);
 }
@@ -2261,31 +2392,31 @@ malloc_rtree_new(unsigned bits)
 
 	bits_per_level = ffs(pow2_ceil((MALLOC_RTREE_NODESIZE /
 	    sizeof(void *)))) - 1;
 	height = bits / bits_per_level;
 	if (height * bits_per_level != bits)
 		height++;
 	assert(height * bits_per_level >= bits);
 
-	ret = base_calloc(1, sizeof(malloc_rtree_t) + (sizeof(unsigned) *
+	ret = (malloc_rtree_t*)base_calloc(1, sizeof(malloc_rtree_t) + (sizeof(unsigned) *
 	    (height - 1)));
 	if (ret == NULL)
 		return (NULL);
 
 	malloc_spin_init(&ret->lock);
 	ret->height = height;
 	if (bits_per_level * height > bits)
 		ret->level2bits[0] = bits % bits_per_level;
 	else
 		ret->level2bits[0] = bits_per_level;
 	for (i = 1; i < height; i++)
 		ret->level2bits[i] = bits_per_level;
 
-	ret->root = base_calloc(1, sizeof(void *) << ret->level2bits[0]);
+	ret->root = (void**)base_calloc(1, sizeof(void *) << ret->level2bits[0]);
 	if (ret->root == NULL) {
 		/*
 		 * We leak the rtree here, since there's no generic base
 		 * deallocation.
 		 */
 		return (NULL);
 	}
 
@@ -2302,17 +2433,17 @@ malloc_rtree_get(malloc_rtree_t *rtree, 
 	void **node, **child;
 
 	malloc_spin_lock(&rtree->lock);
 	for (i = lshift = 0, height = rtree->height, node = rtree->root;
 	    i < height - 1;
 	    i++, lshift += bits, node = child) {
 		bits = rtree->level2bits[i];
 		subkey = (key << lshift) >> ((SIZEOF_PTR << 3) - bits);
-		child = node[subkey];
+		child = (void**)node[subkey];
 		if (child == NULL) {
 			malloc_spin_unlock(&rtree->lock);
 			return (NULL);
 		}
 	}
 
 	/* node is a leaf, so it contains values rather than node pointers. */
 	bits = rtree->level2bits[i];
@@ -2331,19 +2462,19 @@ malloc_rtree_set(malloc_rtree_t *rtree, 
 	void **node, **child;
 
 	malloc_spin_lock(&rtree->lock);
 	for (i = lshift = 0, height = rtree->height, node = rtree->root;
 	    i < height - 1;
 	    i++, lshift += bits, node = child) {
 		bits = rtree->level2bits[i];
 		subkey = (key << lshift) >> ((SIZEOF_PTR << 3) - bits);
-		child = node[subkey];
+		child = (void**)node[subkey];
 		if (child == NULL) {
-			child = base_calloc(1, sizeof(void *) <<
+			child = (void**)base_calloc(1, sizeof(void *) <<
 			    rtree->level2bits[i+1]);
 			if (child == NULL) {
 				malloc_spin_unlock(&rtree->lock);
 				return (true);
 			}
 			node[subkey] = child;
 		}
 	}
@@ -2382,17 +2513,17 @@ chunk_alloc_mmap(size_t size, bool pagef
 	 * acquire the final result via one mapping operation.  This means
 	 * unmapping any preliminary result that is not correctly aligned.
 	 *
 	 * The MALLOC_PAGEFILE code also benefits from this mapping algorithm,
 	 * since it reduces the number of page files.
 	 */
 
 #ifdef JEMALLOC_USES_MAP_ALIGN
-	ret = pages_map_align(size, pfd);
+	ret = pages_map_align(size, pfd, chunksize);
 #else
 	ret = pages_map(NULL, size, pfd);
 	if (ret == NULL)
 		goto RETURN;
 
 	offset = CHUNK_ADDR2OFFSET(ret);
 	if (offset != 0) {
 		/* Deallocate, then try to allocate at (ret + size - offset). */
@@ -2420,17 +2551,16 @@ chunk_alloc_mmap(size_t size, bool pagef
 				    chunksize - offset), size, pfd);
 			}
 			/*
 			 * Failure here indicates a race with another thread, so
 			 * try again.
 			 */
 		}
 	}
-
 RETURN:
 #endif
 #ifdef MALLOC_PAGEFILE
 	if (pfd != -1)
 		pagefile_close(pfd);
 #endif
 #ifdef MALLOC_STATS
 	if (ret != NULL)
@@ -2774,17 +2904,17 @@ choose_arena(void)
 	 */
 #ifndef NO_TLS
 	if (__isthreaded == false) {
 	    /* Avoid the overhead of TLS for single-threaded operation. */
 	    return (arenas[0]);
 	}
 
 #  ifdef MOZ_MEMORY_WINDOWS
-	ret = TlsGetValue(tlsIndex);
+	ret = (arena_t*)TlsGetValue(tlsIndex);
 #  else
 	ret = arenas_map;
 #  endif
 
 	if (ret == NULL) {
 		ret = choose_arena_hard();
 		assert(ret != NULL);
 	}
@@ -3319,17 +3449,17 @@ arena_run_alloc(arena_t *arena, arena_bi
 	assert((size & pagesize_mask) == 0);
 
 	chunk = NULL;
 	while (true) {
 		/* Search the arena's chunks for the lowest best fit. */
 		key.bits = size | CHUNK_MAP_KEY;
 		mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
 		if (mapelm != NULL) {
-			arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
+			arena_chunk_t *run_chunk = (arena_chunk_t*)CHUNK_ADDR2BASE(mapelm);
 			size_t pageind = ((uintptr_t)mapelm -
 			    (uintptr_t)run_chunk->map) /
 			    sizeof(arena_chunk_map_t);
 
 			if (chunk != NULL)
 				chunk_dealloc(chunk, chunksize);
 			run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
 			    << pagesize_2pow));
@@ -4051,32 +4181,32 @@ arena_palloc(arena_t *arena, size_t alig
 	}
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
 
 	offset = (uintptr_t)ret & (alignment - 1);
 	assert((offset & pagesize_mask) == 0);
 	assert(offset < alloc_size);
 	if (offset == 0)
-		arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
+		arena_run_trim_tail(arena, chunk, (arena_run_t*)ret, alloc_size, size, false);
 	else {
 		size_t leadsize, trailsize;
 
 		leadsize = alignment - offset;
 		if (leadsize > 0) {
-			arena_run_trim_head(arena, chunk, ret, alloc_size,
+			arena_run_trim_head(arena, chunk, (arena_run_t*)ret, alloc_size,
 			    alloc_size - leadsize);
 			ret = (void *)((uintptr_t)ret + leadsize);
 		}
 
 		trailsize = alloc_size - leadsize - size;
 		if (trailsize != 0) {
 			/* Trim trailing space. */
 			assert(trailsize < alloc_size);
-			arena_run_trim_tail(arena, chunk, ret, size + trailsize,
+			arena_run_trim_tail(arena, chunk, (arena_run_t*)ret, size + trailsize,
 			    size, false);
 		}
 	}
 
 #ifdef MALLOC_STATS
 	arena->stats.nmalloc_large++;
 	arena->stats.allocated_large += size;
 #endif
@@ -4344,17 +4474,17 @@ arena_dalloc_small(arena_t *arena, arena
 		 * non-full run, if one exists.
 		 */
 		if (bin->runcur == NULL)
 			bin->runcur = run;
 		else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
 			/* Switch runcur. */
 			if (bin->runcur->nfree > 0) {
 				arena_chunk_t *runcur_chunk =
-				    CHUNK_ADDR2BASE(bin->runcur);
+				    (arena_chunk_t*)CHUNK_ADDR2BASE(bin->runcur);
 				size_t runcur_pageind =
 				    (((uintptr_t)bin->runcur -
 				    (uintptr_t)runcur_chunk)) >> pagesize_2pow;
 				arena_chunk_map_t *runcur_mapelm =
 				    &runcur_chunk->map[runcur_pageind];
 
 				/* Insert runcur. */
 				assert(arena_run_tree_search(&bin->runs,
@@ -4910,16 +5040,19 @@ huge_palloc(size_t alignment, size_t siz
 #ifdef MALLOC_PAGEFILE
 	if (opt_pagefile) {
 		pfd = pagefile_init(size);
 		if (pfd == -1)
 			return (NULL);
 	} else
 #endif
 		pfd = -1;
+#ifdef JEMALLOC_USES_MAP_ALIGN
+		ret = pages_map_align(chunk_size, pfd, alignment);
+#else
 	do {
 		void *over;
 
 		over = chunk_alloc(alloc_size, false, false);
 		if (over == NULL) {
 			base_node_dealloc(node);
 			ret = NULL;
 			goto RETURN;
@@ -4931,17 +5064,17 @@ huge_palloc(size_t alignment, size_t siz
 		ret = (void *)((uintptr_t)over + offset);
 		chunk_dealloc(over, alloc_size);
 		ret = pages_map(ret, chunk_size, pfd);
 		/*
 		 * Failure here indicates a race with another thread, so try
 		 * again.
 		 */
 	} while (ret == NULL);
-
+#endif
 	/* Insert node into huge. */
 	node->addr = ret;
 #ifdef MALLOC_DECOMMIT
 	psize = PAGE_CEILING(size);
 	node->size = psize;
 #else
 	node->size = chunk_size;
 #endif
@@ -5388,31 +5521,31 @@ malloc_print_stats(void)
 	}
 }
 
 /*
  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
  * implementation has to take pains to avoid infinite recursion during
  * initialization.
  */
-#if (defined(MOZ_MEMORY_WINDOWS) || defined(MOZ_MEMORY_DARWIN))
+#if (defined(MOZ_MEMORY_WINDOWS) || defined(MOZ_MEMORY_DARWIN)) && !defined(MOZ_MEMORY_WINCE)
 #define	malloc_init() false
 #else
 static inline bool
 malloc_init(void)
 {
 
 	if (malloc_initialized == false)
 		return (malloc_init_hard());
 
 	return (false);
 }
 #endif
 
-#ifndef MOZ_MEMORY_WINDOWS
+#if !defined(MOZ_MEMORY_WINDOWS) || defined(MOZ_MEMORY_WINCE) 
 static
 #endif
 bool
 malloc_init_hard(void)
 {
 	unsigned i;
 	char buf[PATH_MAX + 1];
 	const char *opts;
@@ -6002,18 +6135,19 @@ malloc_shutdown()
  * zone infrastructure.
  */
 #ifdef MOZ_MEMORY_DARWIN
 #  define ZONE_INLINE	inline
 #else
 #  define ZONE_INLINE
 #endif
 
-/* Mangle standard interfaces on Darwin, in order to avoid linking problems. */
-#ifdef MOZ_MEMORY_DARWIN
+/* Mangle standard interfaces on Darwin and Windows CE, 
+   in order to avoid linking problems. */
+#if defined(MOZ_MEMORY_DARWIN) || defined(MOZ_MEMORY_WINCE)
 #define	malloc(a)	moz_malloc(a)
 #define	valloc(a)	moz_valloc(a)
 #define	calloc(a, b)	moz_calloc(a, b)
 #define	realloc(a, b)	moz_realloc(a, b)
 #define	free(a)		moz_free(a)
 #endif
 
 ZONE_INLINE
@@ -6811,17 +6945,17 @@ reserve_min_set(size_t min)
 		/* Force the reserve to grow by allocating/deallocating. */
 		malloc_mutex_unlock(&reserve_mtx);
 #ifdef MALLOC_DECOMMIT
 		{
 			void **chunks;
 			size_t i, n;
 
 			n = size >> opt_chunk_2pow;
-			chunks = imalloc(n * sizeof(void *));
+			chunks = (void**)imalloc(n * sizeof(void *));
 			if (chunks == NULL)
 				return (true);
 			for (i = 0; i < n; i++) {
 				chunks[i] = huge_malloc(chunksize, false);
 				if (chunks[i] == NULL) {
 					size_t j;
 
 					for (j = 0; j < i; j++) {