Fix race saving/restoring malloc hooks by going back to overriding malloc, etc., plus some additional cleanup. b=392009 r+a1.9=brendan
authordbaron@dbaron.org
Fri, 28 Sep 2007 15:39:59 -0700
changeset 6413 530e99db7a23194926ab30fa3722133b2fa81a84
parent 6412 7493bc3a8046a1656347024316834eeec1572aa3
child 6414 920a8767d2b580b387bbc88b574b5871678edb94
push idunknown
push userunknown
push dateunknown
bugs392009
milestone1.9a9pre
Fix race saving/restoring malloc hooks by going back to overriding malloc, etc., plus some additional cleanup. b=392009 r+a1.9=brendan
tools/trace-malloc/lib/nsTraceMalloc.c
tools/trace-malloc/lib/nsWinTraceMalloc.cpp
--- a/tools/trace-malloc/lib/nsTraceMalloc.c
+++ b/tools/trace-malloc/lib/nsTraceMalloc.c
@@ -83,19 +83,21 @@
 #include <malloc.h>
 
 #define WRITE_FLAGS "w"
 
 #ifdef WRAP_SYSTEM_INCLUDES
 #pragma GCC visibility push(default)
 #endif
 extern __ptr_t __libc_malloc(size_t);
+extern __ptr_t __libc_calloc(size_t, size_t);
 extern __ptr_t __libc_realloc(__ptr_t, size_t);
 extern void    __libc_free(__ptr_t);
 extern __ptr_t __libc_memalign(size_t, size_t);
+extern __ptr_t __libc_valloc(size_t);
 #ifdef WRAP_SYSTEM_INCLUDES
 #pragma GCC visibility pop
 #endif
 
 #elif defined(XP_WIN32)
 
 #include <sys/timeb.h>                  /* for timeb */
 #include <sys/stat.h>                   /* for fstat */
@@ -142,38 +144,56 @@ static char      *sdlogname = NULL; /* f
 
 /*
  * This enables/disables trace-malloc logging.
  *
  * It is separate from suppress_tracing so that we do not have to pay
  * the performance cost of repeated TM_TLS_GET_DATA calls when
  * trace-malloc is disabled (which is not as bad as the locking we used
  * to have).
+ *
+ * It must default to zero, since it can be tested by the Linux malloc
+ * hooks before NS_TraceMallocStartup sets it.
  */
-static uint32 tracing_enabled = 1;
+static uint32 tracing_enabled = 0;
 
 /*
  * This lock must be held while manipulating the calltree, the
  * allocations table, the log, or the tmstats.
  *
  * Callers should not *enter* the lock without checking suppress_tracing
  * first; otherwise they risk trying to re-enter on the same thread.
  */
-#define TM_ENTER_LOCK()                                                       \
+#define TM_ENTER_LOCK(t)                                                      \
     PR_BEGIN_MACRO                                                            \
+        PR_ASSERT(t->suppress_tracing != 0);                                  \
         if (tmlock)                                                           \
             PR_Lock(tmlock);                                                  \
     PR_END_MACRO
 
-#define TM_EXIT_LOCK()                                                        \
+#define TM_EXIT_LOCK(t)                                                       \
     PR_BEGIN_MACRO                                                            \
+        PR_ASSERT(t->suppress_tracing != 0);                                  \
         if (tmlock)                                                           \
             PR_Unlock(tmlock);                                                \
     PR_END_MACRO
 
+#define TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t)                                 \
+    PR_BEGIN_MACRO                                                            \
+        t->suppress_tracing++;                                                \
+        TM_ENTER_LOCK(t);                                                     \
+    PR_END_MACRO
+
+#define TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t)                                \
+    PR_BEGIN_MACRO                                                            \
+        TM_EXIT_LOCK(t);                                                      \
+        t->suppress_tracing--;                                                \
+    PR_END_MACRO
+
+
 /*
  * Thread-local storage.
  *
  * We can't use NSPR thread-local storage for this because it mallocs
  * within PR_GetThreadPrivate (the first time) and PR_SetThreadPrivate
  * (which can be worked around by protecting all uses of those functions
  * with a monitor, ugh) and because it calls malloc/free when the
  * thread-local storage is in an inconsistent state within
@@ -571,17 +591,17 @@ static PLHashTable *libraries = NULL;
 
 /* Table of filename pathnames mapped to logged 'G' record serial numbers. */
 static PLHashTable *filenames = NULL;
 
 /* Table mapping method names to logged 'N' record serial numbers. */
 static PLHashTable *methods = NULL;
 
 static callsite *
-calltree(void **stack, size_t num_stack_entries)
+calltree(void **stack, size_t num_stack_entries, tm_thread *t)
 {
     logfile *fp = logfp;
     void *pc;
     uint32 nkids;
     callsite *parent, *site, **csp, *tmp;
     int maxstack;
     uint32 library_serial, method_serial, filename_serial;
     const char *library, *method, *filename;
@@ -593,17 +613,17 @@ calltree(void **stack, size_t num_stack_
     nsCodeAddressDetails details;
     nsresult rv;
 
     /*
      * FIXME bug 391749: We should really lock only the minimum amount
      * that we need to in this function, because it makes some calls
      * that could lock in the system's shared library loader.
      */
-    TM_ENTER_LOCK();
+    TM_ENTER_LOCK(t);
 
     maxstack = (num_stack_entries > tmstats.calltree_maxstack);
     if (maxstack) {
         /* these two are the same, although that used to be less clear */
         tmstats.calltree_maxstack = num_stack_entries;
         tmstats.calltree_maxdepth = num_stack_entries;
     }
 
@@ -667,19 +687,19 @@ calltree(void **stack, size_t num_stack_
          * we have to exit tmlock around this call.  For details, see
          * https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3
          *
          * We could be more efficient by building the nodes in the
          * calltree, exiting the monitor once to describe all of them,
          * and then filling in the descriptions for any that hadn't been
          * described already.  But this is easier for now.
          */
-        TM_EXIT_LOCK();
+        TM_EXIT_LOCK(t);
         rv = NS_DescribeCodeAddress(pc, &details);
-        TM_ENTER_LOCK();
+        TM_ENTER_LOCK(t);
         if (NS_FAILED(rv)) {
             tmstats.dladdr_failures++;
             goto fail;
         }
 
         /* Check whether we need to emit a library trace record. */
         library_serial = 0;
         library = NULL;
@@ -857,21 +877,21 @@ calltree(void **stack, size_t num_stack_
 
       upward:
         parent = site;
     } while (stack_index > 0);
 
     if (maxstack)
         calltree_maxstack_top = site;
 
-    TM_EXIT_LOCK();
+    TM_EXIT_LOCK(t);
     return site;
 
   fail:
-    TM_EXIT_LOCK();
+    TM_EXIT_LOCK(t);
     return NULL;
 }
 
 /*
  * Buffer the stack from top at low index to bottom at high, so that we can
  * reverse it in calltree.
  */
 PR_STATIC_CALLBACK(void)
@@ -936,25 +956,25 @@ backtrace(tm_thread *t, int skip)
         PR_ASSERT(info->entries * 2 == new_stack_buffer_size); /* same stack */
     }
 
     if (info->entries == 0) {
         t->suppress_tracing--;
         return NULL;
     }
 
-    site = calltree(info->buffer, info->entries);
+    site = calltree(info->buffer, info->entries, t);
 
-    TM_ENTER_LOCK();
+    TM_ENTER_LOCK(t);
     tmstats.backtrace_calls++;
     if (!site) {
         tmstats.backtrace_failures++;
         PR_ASSERT(tmstats.backtrace_failures < 100);
     }
-    TM_EXIT_LOCK();
+    TM_EXIT_LOCK(t);
 
     t->suppress_tracing--;
     return site;
 }
 
 typedef struct allocation {
     PLHashEntry entry;
     size_t      size;
@@ -1077,130 +1097,182 @@ static void
 ShutdownHooker(void)
 {
     PR_ASSERT(malloc_logger == my_malloc_logger);
     malloc_logger = NULL;
 }
 
 #elif defined(XP_UNIX)
 
-static __ptr_t (*old_malloc_hook)(size_t size, __const __malloc_ptr_t caller);
-static __ptr_t (*old_realloc_hook)(__ptr_t ptr, size_t size, __const __malloc_ptr_t caller);
-static __ptr_t (*old_memalign_hook)(size_t boundary, size_t size, __const __malloc_ptr_t caller);
-static void (*old_free_hook)(__ptr_t ptr, __const __malloc_ptr_t caller);
+/*
+ * We can't use glibc's malloc hooks because they can't be used in a
+ * threadsafe manner.  They require unsetting the hooks to call into the
+ * original malloc implementation, and then resetting them when the
+ * original implementation returns.  If another thread calls the same
+ * allocation function while the hooks are unset, we have no chance to
+ * intercept the call.
+ */
 
-static __ptr_t
-my_malloc_hook(size_t size, __const __malloc_ptr_t caller)
+NS_EXTERNAL_VIS_(__ptr_t)
+malloc(size_t size)
 {
-    tm_thread *t;
     PRUint32 start, end;
     __ptr_t ptr;
+    tm_thread *t;
 
-    PR_ASSERT(tracing_enabled);
-    t = tm_get_thread();
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = tm_get_thread())->suppress_tracing != 0) {
+        return __libc_malloc(size);
+    }
+
     t->suppress_tracing++;
-    __malloc_hook = old_malloc_hook;
     start = PR_IntervalNow();
     ptr = __libc_malloc(size);
     end = PR_IntervalNow();
-    __malloc_hook = my_malloc_hook;
     t->suppress_tracing--;
+
     MallocCallback(ptr, size, start, end, t);
+
     return ptr;
 }
 
-static __ptr_t
-my_realloc_hook(__ptr_t oldptr, size_t size, __const __malloc_ptr_t caller)
+NS_EXTERNAL_VIS_(__ptr_t)
+calloc(size_t count, size_t size)
 {
-    tm_thread *t;
     PRUint32 start, end;
     __ptr_t ptr;
+    tm_thread *t;
 
-    PR_ASSERT(tracing_enabled);
-    t = tm_get_thread();
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = tm_get_thread())->suppress_tracing != 0) {
+        return __libc_calloc(count, size);
+    }
+
     t->suppress_tracing++;
-    __realloc_hook = old_realloc_hook;
     start = PR_IntervalNow();
+    ptr = __libc_calloc(count, size);
+    end = PR_IntervalNow();
+    t->suppress_tracing--;
 
-    /*
-     * __libc_realloc(NULL, size) recurs into my_malloc_hook, so it's
-     * important that we've incremented t->suppress_tracing here.
-     */
+    CallocCallback(ptr, count, size, start, end, t);
+
+    return ptr;
+}
+
+NS_EXTERNAL_VIS_(__ptr_t)
+realloc(__ptr_t oldptr, size_t size)
+{
+    PRUint32 start, end;
+    __ptr_t ptr;
+    tm_thread *t;
+
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = tm_get_thread())->suppress_tracing != 0) {
+        return __libc_realloc(oldptr, size);
+    }
+
+    t->suppress_tracing++;
+    start = PR_IntervalNow();
     ptr = __libc_realloc(oldptr, size);
     end = PR_IntervalNow();
-    __realloc_hook = my_realloc_hook;
     t->suppress_tracing--;
+
+    /* FIXME bug 392008: We could race with reallocation of oldptr. */
     ReallocCallback(oldptr, ptr, size, start, end, t);
+
     return ptr;
 }
 
-static __ptr_t
-my_memalign_hook(size_t boundary, size_t size, __const __malloc_ptr_t caller)
+NS_EXTERNAL_VIS_(void*)
+valloc(size_t size)
 {
-    tm_thread *t;
     PRUint32 start, end;
     __ptr_t ptr;
+    tm_thread *t;
 
-    PR_ASSERT(tracing_enabled);
-    t = tm_get_thread();
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = tm_get_thread())->suppress_tracing != 0) {
+        return __libc_valloc(size);
+    }
+
     t->suppress_tracing++;
-    __memalign_hook = old_memalign_hook;
     start = PR_IntervalNow();
-    ptr = __libc_memalign(boundary, size);
+    ptr = __libc_valloc(size);
     end = PR_IntervalNow();
-    __memalign_hook = my_memalign_hook;
     t->suppress_tracing--;
+
     MallocCallback(ptr, size, start, end, t);
+
     return ptr;
 }
 
-static void
-my_free_hook(__ptr_t ptr, __const __malloc_ptr_t caller)
+NS_EXTERNAL_VIS_(void*)
+memalign(size_t boundary, size_t size)
 {
-    tm_thread *t;
     PRUint32 start, end;
+    __ptr_t ptr;
+    tm_thread *t;
 
-    PR_ASSERT(tracing_enabled);
-    t = tm_get_thread();
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = tm_get_thread())->suppress_tracing != 0) {
+        return __libc_memalign(boundary, size);
+    }
+
     t->suppress_tracing++;
-    __free_hook = old_free_hook;
+    start = PR_IntervalNow();
+    ptr = __libc_memalign(boundary, size);
+    end = PR_IntervalNow();
+    t->suppress_tracing--;
+
+    MallocCallback(ptr, size, start, end, t);
+
+    return ptr;
+}
+
+NS_EXTERNAL_VIS_(int)
+posix_memalign(void **memptr, size_t alignment, size_t size)
+{
+    __ptr_t ptr = memalign(alignment, size);
+    if (!ptr)
+        return ENOMEM;
+    *memptr = ptr;
+    return 0;
+}
+
+NS_EXTERNAL_VIS_(void)
+free(__ptr_t ptr)
+{
+    PRUint32 start, end;
+    tm_thread *t;
+
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = tm_get_thread())->suppress_tracing != 0) {
+        __libc_free(ptr);
+        return;
+    }
+
+    t->suppress_tracing++;
     start = PR_IntervalNow();
     __libc_free(ptr);
     end = PR_IntervalNow();
-    __free_hook = my_free_hook;
     t->suppress_tracing--;
+
+    /* FIXME bug 392008: We could race with reallocation of ptr. */
+
     FreeCallback(ptr, start, end, t);
 }
 
-static void
-StartupHooker(void)
+NS_EXTERNAL_VIS_(void)
+cfree(void *ptr)
 {
-    PR_ASSERT(__malloc_hook != my_malloc_hook);
-
-    old_malloc_hook = __malloc_hook;
-    old_realloc_hook = __realloc_hook;
-    old_memalign_hook = __memalign_hook;
-    old_free_hook = __free_hook;
-
-    __malloc_hook = my_malloc_hook;
-    __realloc_hook = my_realloc_hook;
-    __memalign_hook = my_memalign_hook;
-    __free_hook = my_free_hook;
+    free(ptr);
 }
 
-static void
-ShutdownHooker(void)
-{
-    PR_ASSERT(__malloc_hook == my_malloc_hook);
-
-    __malloc_hook = old_malloc_hook;
-    __realloc_hook = old_realloc_hook;
-    __memalign_hook = old_memalign_hook;
-    __free_hook = old_free_hook;
-}
+#define StartupHooker()                 PR_BEGIN_MACRO PR_END_MACRO
+#define ShutdownHooker()                PR_BEGIN_MACRO PR_END_MACRO
 
 #elif defined(XP_WIN32)
 
 /* See nsWinTraceMalloc.cpp. */
 
 #endif
 
 static const char magic[] = NS_TRACE_MALLOC_MAGIC;
@@ -1212,17 +1284,17 @@ log_header(int logfd)
     (void) write(logfd, magic, NS_TRACE_MALLOC_MAGIC_SIZE);
     (void) write(logfd, &ticksPerSec, sizeof ticksPerSec);
 }
 
 PR_IMPLEMENT(void)
 NS_TraceMallocStartup(int logfd)
 {
     /* We must be running on the primordial thread. */
-    PR_ASSERT(tracing_enabled == 1);
+    PR_ASSERT(tracing_enabled == 0);
     PR_ASSERT(logfp == &default_logfile);
     tracing_enabled = (logfd >= 0);
 
     if (tracing_enabled) {
         PR_ASSERT(logfp->simsize == 0); /* didn't overflow startup buffer */
 
         /* Log everything in logfp (aka default_logfile)'s buffer to logfd. */
         logfp->fd = logfd;
@@ -1440,66 +1512,59 @@ NS_TraceMallocDisable(void)
     logfile *fp;
     uint32 sample;
 
     /* Robustify in case of duplicate call. */
     PR_ASSERT(tracing_enabled);
     if (tracing_enabled == 0)
         return;
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
     for (fp = logfile_list; fp; fp = fp->next)
         flush_logfile(fp);
     sample = --tracing_enabled;
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
     if (sample == 0)
         ShutdownHooker();
 }
 
 PR_IMPLEMENT(void)
 NS_TraceMallocEnable(void)
 {
     tm_thread *t = tm_get_thread();
     uint32 sample;
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
     sample = ++tracing_enabled;
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
     if (sample == 1)
         StartupHooker();
 }
 
 PR_IMPLEMENT(int)
 NS_TraceMallocChangeLogFD(int fd)
 {
     logfile *oldfp, *fp;
     struct stat sb;
     tm_thread *t = tm_get_thread();
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
     oldfp = logfp;
     if (oldfp->fd != fd) {
         flush_logfile(oldfp);
         fp = get_logfile(fd);
         if (!fp) {
-            TM_EXIT_LOCK();
-            t->suppress_tracing--;
+            TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
             return -2;
         }
         if (fd >= 0 && fstat(fd, &sb) == 0 && sb.st_size == 0)
             log_header(fd);
         logfp = fp;
     }
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
     return oldfp->fd;
 }
 
 static PRIntn
 lfd_clr_enumerator(PLHashEntry *he, PRIntn i, void *arg)
 {
     lfdset_entry *le = (lfdset_entry*) he;
     logfile *fp = (logfile*) arg;
@@ -1519,18 +1584,17 @@ lfd_clr_walk(callsite *site, logfile *fp
 }
 
 PR_IMPLEMENT(void)
 NS_TraceMallocCloseLogFD(int fd)
 {
     logfile *fp;
     tm_thread *t = tm_get_thread();
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
 
     fp = get_logfile(fd);
     if (fp) {
         flush_logfile(fp);
         if (fp == &default_logfile) {
             /* Leave default_logfile in logfile_list with an fd of -1. */
             fp->fd = -1;
 
@@ -1551,53 +1615,50 @@ NS_TraceMallocCloseLogFD(int fd)
 
             /* Reset logfp if we must, then free fp. */
             if (fp == logfp)
                 logfp = &default_logfile;
             free((void*) fp);
         }
     }
 
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
     close(fd);
 }
 
 PR_IMPLEMENT(void)
 NS_TraceMallocLogTimestamp(const char *caption)
 {
     logfile *fp;
 #ifdef XP_UNIX
     struct timeval tv;
 #endif
 #ifdef XP_WIN32
     struct _timeb tb;
 #endif
     tm_thread *t = tm_get_thread();
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
 
     fp = logfp;
     log_byte(fp, TM_EVENT_TIMESTAMP);
 
 #ifdef XP_UNIX
     gettimeofday(&tv, NULL);
     log_uint32(fp, (uint32) tv.tv_sec);
     log_uint32(fp, (uint32) tv.tv_usec);
 #endif
 #ifdef XP_WIN32
     _ftime(&tb);
     log_uint32(fp, (uint32) tb.time);
     log_uint32(fp, (uint32) tb.millitm);
 #endif
     log_string(fp, caption);
 
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
 }
 
 static PRIntn
 allocation_enumerator(PLHashEntry *he, PRIntn i, void *arg)
 {
     allocation *alloc = (allocation*) he;
     FILE *ofp = (FILE*) arg;
     callsite *site = (callsite*) he->value;
@@ -1646,92 +1707,85 @@ NS_TraceStack(int skip, FILE *ofp)
 PR_IMPLEMENT(int)
 NS_TraceMallocDumpAllocations(const char *pathname)
 {
     FILE *ofp;
     int rv;
 
     tm_thread *t = tm_get_thread();
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
 
     ofp = fopen(pathname, WRITE_FLAGS);
     if (ofp) {
         if (allocations) {
             PL_HashTableEnumerateEntries(allocations, allocation_enumerator,
                                          ofp);
         }
         rv = ferror(ofp) ? -1 : 0;
         fclose(ofp);
     } else {
         rv = -1;
     }
 
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
 
     return rv;
 }
 
 PR_IMPLEMENT(void)
 NS_TraceMallocFlushLogfiles(void)
 {
     logfile *fp;
     tm_thread *t = tm_get_thread();
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
 
     for (fp = logfile_list; fp; fp = fp->next)
         flush_logfile(fp);
 
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
 }
 
 PR_IMPLEMENT(void)
 NS_TrackAllocation(void* ptr, FILE *ofp)
 {
     allocation *alloc;
     tm_thread *t = tm_get_thread();
 
     fprintf(ofp, "Trying to track %p\n", (void*) ptr);
     setlinebuf(ofp);
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
     if (get_allocations()) {
         alloc = (allocation*)
                 *PL_HashTableRawLookup(allocations, hash_pointer(ptr), ptr);
         if (alloc) {
             fprintf(ofp, "Tracking %p\n", (void*) ptr);
             alloc->trackfp = ofp;
         } else {
             fprintf(ofp, "Not tracking %p\n", (void*) ptr);
         }
     }
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
 }
 
 PR_IMPLEMENT(void)
 MallocCallback(void *ptr, size_t size, PRUint32 start, PRUint32 end, tm_thread *t)
 {
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
 
     if (!tracing_enabled || t->suppress_tracing != 0)
         return;
 
     site = backtrace(t, 2);
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
     tmstats.malloc_calls++;
     if (!ptr) {
         tmstats.malloc_failures++;
     } else {
         if (site) {
             log_event5(logfp, TM_EVENT_MALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
@@ -1740,34 +1794,32 @@ MallocCallback(void *ptr, size_t size, P
             he = PL_HashTableAdd(allocations, ptr, site);
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
 }
 
 PR_IMPLEMENT(void)
 CallocCallback(void *ptr, size_t count, size_t size, PRUint32 start, PRUint32 end, tm_thread *t)
 {
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
 
     if (!tracing_enabled || t->suppress_tracing != 0)
         return;
 
     site = backtrace(t, 2);
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
     tmstats.calloc_calls++;
     if (!ptr) {
         tmstats.calloc_failures++;
     } else {
         size *= count;
         if (site) {
             log_event5(logfp, TM_EVENT_CALLOC,
                        site->serial, start, end - start,
@@ -1777,18 +1829,17 @@ CallocCallback(void *ptr, size_t count, 
             he = PL_HashTableAdd(allocations, ptr, site);
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
 }
 
 PR_IMPLEMENT(void)
 ReallocCallback(void * oldptr, void *ptr, size_t size,
                 PRUint32 start, PRUint32 end, tm_thread *t)
 {
     callsite *oldsite, *site;
     size_t oldsize;
@@ -1797,18 +1848,17 @@ ReallocCallback(void * oldptr, void *ptr
     allocation *alloc;
     FILE *trackfp = NULL;
 
     if (!tracing_enabled || t->suppress_tracing != 0)
         return;
 
     site = backtrace(t, 2);
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
     tmstats.realloc_calls++;
     oldsite = NULL;
     oldsize = 0;
     hep = NULL;
     he = NULL;
     if (oldptr && get_allocations()) {
         hash = hash_pointer(oldptr);
         hep = PL_HashTableRawLookup(allocations, hash, oldptr);
@@ -1862,32 +1912,30 @@ ReallocCallback(void * oldptr, void *ptr
             }
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = trackfp;
             }
         }
     }
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
 }
 
 PR_IMPLEMENT(void)
 FreeCallback(void * ptr, PRUint32 start, PRUint32 end, tm_thread *t)
 {
     PLHashEntry **hep, *he;
     callsite *site;
     allocation *alloc;
 
     if (!tracing_enabled || t->suppress_tracing != 0)
         return;
 
-    t->suppress_tracing++;
-    TM_ENTER_LOCK();
+    TM_SUPPRESS_TRACING_AND_ENTER_LOCK(t);
     tmstats.free_calls++;
     if (!ptr) {
         tmstats.null_free_calls++;
     } else {
         if (get_allocations()) {
             hep = PL_HashTableRawLookup(allocations, hash_pointer(ptr), ptr);
             he = *hep;
             if (he) {
@@ -1902,13 +1950,12 @@ FreeCallback(void * ptr, PRUint32 start,
                     log_event5(logfp, TM_EVENT_FREE,
                                site->serial, start, end - start,
                                (uint32)NS_PTR_TO_INT32(ptr), alloc->size);
                 }
                 PL_HashTableRawRemove(allocations, hep, he);
             }
         }
     }
-    TM_EXIT_LOCK();
-    t->suppress_tracing--;
+    TM_EXIT_LOCK_AND_UNSUPPRESS_TRACING(t);
 }
 
 #endif /* NS_TRACE_MALLOC */
--- a/tools/trace-malloc/lib/nsWinTraceMalloc.cpp
+++ b/tools/trace-malloc/lib/nsWinTraceMalloc.cpp
@@ -116,16 +116,17 @@ DHWImportHooker &getFreeHooker()
 void __cdecl dhw_free( void* p )
 {
     tm_thread *t = tm_get_thread();
     ++t->suppress_tracing;
     PRUint32 start = PR_IntervalNow();
     DHW_ORIGINAL(FREE_, getFreeHooker())(p);
     PRUint32 end = PR_IntervalNow();
     --t->suppress_tracing;
+    /* FIXME bug 392008: We could race with reallocation of p. */
     FreeCallback(p, start, end, t);
 }
 
 
 DHW_DECLARE_FUN_TYPE_AND_PROTO(dhw_realloc, void*, __cdecl, REALLOC_, (void*, size_t));
 DHWImportHooker &getReallocHooker()
 {
   static DHWImportHooker gReallocHooker(NS_DEBUG_CRT, "realloc", (PROC) dhw_realloc);
@@ -135,16 +136,17 @@ DHWImportHooker &getReallocHooker()
 void * __cdecl dhw_realloc(void * pin, size_t size)
 {
     tm_thread *t = tm_get_thread();
     ++t->suppress_tracing;
     PRUint32 start = PR_IntervalNow();
     void* pout = DHW_ORIGINAL(REALLOC_, getReallocHooker())(pin, size);
     PRUint32 end = PR_IntervalNow();
     --t->suppress_tracing;
+    /* FIXME bug 392008: We could race with reallocation of pin. */
     ReallocCallback(pin, pout, size, start, end, t);
     return pout;
 }
 
 // Note the mangled name!
 DHW_DECLARE_FUN_TYPE_AND_PROTO(dhw_new, void*, __cdecl, NEW_, (size_t));
 DHWImportHooker &getNewHooker()
 {