Use thread-local storage for tracing suppression. b=376874 r+a=brendan
authordbaron@dbaron.org
Fri, 10 Aug 2007 15:19:14 -0700
changeset 4495 eb2bc90a3583863bbcbaacd663e219f3443f35e6
parent 4494 0821dbf06bdeb757b7d8068c43d90df500028365
child 4496 e5e746415b05e19865589239ceaa5c096493f1ba
push idunknown
push userunknown
push dateunknown
bugs376874
milestone1.9a8pre
Use thread-local storage for tracing suppression. b=376874 r+a=brendan
tools/trace-malloc/lib/nsTraceMalloc.c
tools/trace-malloc/lib/nsWinTraceMalloc.cpp
--- a/tools/trace-malloc/lib/nsTraceMalloc.c
+++ b/tools/trace-malloc/lib/nsTraceMalloc.c
@@ -62,16 +62,17 @@
 #include "prlog.h"
 #include "prmon.h"
 #include "prprf.h"
 #include "prenv.h"
 #include "prnetdb.h"
 #include "nsTraceMalloc.h"
 #include "nscore.h"
 #include "prinit.h"
+#include "prthread.h"
 
 #ifdef XP_WIN32
 #include "nsStackFrameWin.h"
 #include <sys/timeb.h>/*for timeb*/
 #include <sys/stat.h>/*for fstat*/
 
 #include <io.h> /*for write*/
 #include "nsTraceMallocCallbacks.h"
@@ -294,22 +295,16 @@ static logfile   default_logfile =
                    {-1, 0, default_buf, STARTUP_TMBUFSIZE, 0, 0, 0, NULL, NULL};
 static logfile   *logfile_list = NULL;
 static logfile   **logfile_tail = &logfile_list;
 static logfile   *logfp = &default_logfile;
 static PRMonitor *tmmon = NULL;
 static char      *sdlogname = NULL; /* filename for shutdown leak log */
 
 /*
- * This counter suppresses tracing, in case any tracing code needs to malloc,
- * and it must be tested and manipulated only within tmmon.
- */
-static uint32 suppress_tracing = 0;
-
-/*
  * This enables/disables trace-malloc logging.
  *
  * It is separate from suppress_tracing so that we do not have to pay
  * the performance cost of repeated PR_EnterMonitor/PR_ExitMonitor and
  * PR_IntervalNow calls when trace-malloc is disabled.
  */
 static int tracing_enabled = 1;
 
@@ -320,16 +315,111 @@ static int tracing_enabled = 1;
     PR_END_MACRO
 
 #define TM_EXIT_MONITOR()                                                     \
     PR_BEGIN_MACRO                                                            \
         if (tmmon)                                                            \
             PR_ExitMonitor(tmmon);                                            \
     PR_END_MACRO
 
+/*
+ * Thread-local storage.
+ *
+ * We can't use NSPR thread-local storage for this because it mallocs
+ * within PR_GetThreadPrivate (the first time) and PR_SetThreadPrivate
+ * (which can be worked around by protecting all uses of those functions
+ * with a monitor, ugh) and because it calls malloc/free when the
+ * thread-local storage is in an inconsistent state within
+ * PR_SetThreadPrivate (when expanding the thread-local storage array)
+ * and _PRI_DetachThread (when and after deleting the thread-local
+ * storage array).
+ */
+
+#ifdef XP_WIN32
+
+#include <windows.h>
+
+#define TM_TLS_INDEX_TYPE               DWORD
+#define TM_CREATE_TLS_INDEX(i_)         PR_BEGIN_MACRO                        \
+                                          (i_) = TlsAlloc();                  \
+                                        PR_END_MACRO
+#define TM_DESTROY_TLS_INDEX(i_)        TlsFree((i_))
+#define TM_GET_TLS_DATA(i_)             TlsGetValue((i_))
+#define TM_SET_TLS_DATA(i_, v_)         TlsSetValue((i_), (v_))
+
+#else
+
+#include <pthread.h>
+
+#define TM_TLS_INDEX_TYPE               pthread_key_t
+#define TM_CREATE_TLS_INDEX(i_)         pthread_key_create(&(i_), NULL)
+#define TM_DESTROY_TLS_INDEX(i_)        pthread_key_delete((i_))
+#define TM_GET_TLS_DATA(i_)             pthread_getspecific((i_))
+#define TM_SET_TLS_DATA(i_, v_)         pthread_setspecific((i_), (v_))
+
+#endif
+
+typedef struct tm_thread tm_thread;
+struct tm_thread {
+    /*
+     * This counter suppresses tracing, in case any tracing code needs
+     * to malloc.
+     */
+    uint32 suppress_tracing;
+};
+
+static TM_TLS_INDEX_TYPE tls_index;
+static tm_thread main_thread; /* 0-initialization is correct */
+
+/* FIXME (maybe): This is currently unused; we leak the thread-local data. */
+#if 0
+PR_STATIC_CALLBACK(void)
+free_tm_thread(void *priv)
+{
+    tm_thread *t = (tm_thread*) priv;
+
+    PR_ASSERT(t->suppress_tracing == 0);
+
+    if (t->in_heap) {
+        t->suppress_tracing = 1;
+        __libc_free(t);
+    }
+}
+#endif
+
+static tm_thread *
+get_tm_thread(void)
+{
+    tm_thread *t;
+    tm_thread stack_tm_thread;
+
+    if (!tmmon) {
+        return &main_thread;
+    }
+
+    t = TM_GET_TLS_DATA(tls_index);
+
+    if (!t) {
+        /*
+         * First, store a tm_thread on the stack to suppress for the
+         * malloc below
+         */
+        stack_tm_thread.suppress_tracing = 1;
+        TM_SET_TLS_DATA(tls_index, &stack_tm_thread);
+
+        t = (tm_thread*) __libc_malloc(sizeof(tm_thread));
+        t->suppress_tracing = 0;
+        TM_SET_TLS_DATA(tls_index, t);
+
+        PR_ASSERT(stack_tm_thread.suppress_tracing == 1); /* balanced */
+    }
+
+    return t;
+}
+
 /* We don't want more than 32 logfiles open at once, ok? */
 typedef uint32          lfd_set;
 
 #define LFD_SET_STATIC_INITIALIZER 0
 #define LFD_SET_SIZE    32
 
 #define LFD_ZERO(s)     (*(s) = 0)
 #define LFD_BIT(i)      ((uint32)1 << (i))
@@ -1350,45 +1440,45 @@ static callsite *calltree(void **bp)
     return site;
 }
 
 #endif
 
 #ifdef XP_WIN32
 
 callsite *
-backtrace(int skip)
+backtrace(tm_thread *t, int skip)
 {
     callsite *site;
 
     tmstats.backtrace_calls++;
-    suppress_tracing++;
+    t->suppress_tracing++;
 
     site = calltree(skip);
     if (!site) {
         tmstats.backtrace_failures++;
         /* PR_ASSERT(tmstats.backtrace_failures < 100); */
     }
-    suppress_tracing--;
+    t->suppress_tracing--;
     return site;
 }
 
 #else /*XP_UNIX*/
 
 callsite *
-backtrace(int skip)
+backtrace(tm_thread *t, int skip)
 {
     void **bp, **bpdown;
     callsite *site, **key;
     PLHashNumber hash;
     PLHashEntry **hep, *he;
     int i, n;
 
     tmstats.backtrace_calls++;
-    suppress_tracing++;
+    t->suppress_tracing++;
 
     /* Stack walking code adapted from Kipp's "leaky". */
 #if defined(__i386) 
     __asm__( "movl %%ebp, %0" : "=g"(bp));
 #elif defined(__x86_64__)
     __asm__( "movq %%rbp, %0" : "=g"(bp));
 #else
     /*
@@ -1405,17 +1495,17 @@ backtrace(int skip)
         bp = bpdown;
     }
 
     site = calltree(bp);
     if (!site) {
         tmstats.backtrace_failures++;
         PR_ASSERT(tmstats.backtrace_failures < 100);
     }
-    suppress_tracing--;
+    t->suppress_tracing--;
     return site;
 }
 
 
 #endif /* XP_UNIX */
 
 
 typedef struct allocation {
@@ -1496,38 +1586,41 @@ static PLHashTable *new_allocations(void
 NS_EXTERNAL_VIS_(__ptr_t)
 malloc(size_t size)
 {
     PRUint32 start, end;
     __ptr_t ptr;
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
+    tm_thread *t;
 
     if (!tracing_enabled || !PR_Initialized()) {
         return __libc_malloc(size);
     }
 
+    t = get_tm_thread();
+
     start = PR_IntervalNow();
     ptr = __libc_malloc(size);
     end = PR_IntervalNow();
     TM_ENTER_MONITOR();
     tmstats.malloc_calls++;
     if (!ptr) {
         tmstats.malloc_failures++;
-    } else if (suppress_tracing == 0) {
-        site = backtrace(1);
+    } else if (t->suppress_tracing == 0) {
+        site = backtrace(t, 1);
         if (site)
             log_event5(logfp, TM_EVENT_MALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         if (get_allocations()) {
-            suppress_tracing++;
+            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            suppress_tracing--;
+            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
     TM_EXIT_MONITOR();
@@ -1537,51 +1630,54 @@ malloc(size_t size)
 NS_EXTERNAL_VIS_(__ptr_t)
 calloc(size_t count, size_t size)
 {
     PRUint32 start, end;
     __ptr_t ptr;
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
+    tm_thread *t;
 
     /**
      * During the initialization of the glibc/libpthread, and
      * before main() is running, ld-linux.so.2 tries to allocate memory
      * using calloc (call from _dl_tls_setup).
      *
      * Thus, our calloc replacement is invoked too early, tries to
      * initialize NSPR, which calls dlopen, which calls into the dl
      * -> crash.
      *
      * Delaying NSPR calls until NSPR is initialized helps.
      */
     if (!tracing_enabled || !PR_Initialized()) {
         return __libc_calloc(count, size);
     }
 
+    t = get_tm_thread();
+
     start = PR_IntervalNow();
     ptr = __libc_calloc(count, size);
     end = PR_IntervalNow();
     TM_ENTER_MONITOR();
     tmstats.calloc_calls++;
     if (!ptr) {
         tmstats.calloc_failures++;
-    } else if (suppress_tracing == 0) {
-        site = backtrace(1);
+    } else if (t->suppress_tracing == 0) {
+        site = backtrace(t, 1);
         size *= count;
         if (site) {
             log_event5(logfp, TM_EVENT_CALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         }
         if (get_allocations()) {
-            suppress_tracing++;
+            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            suppress_tracing--;
+            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
     TM_EXIT_MONITOR();
@@ -1594,24 +1690,27 @@ realloc(__ptr_t ptr, size_t size)
     PRUint32 start, end;
     __ptr_t oldptr;
     callsite *oldsite, *site;
     size_t oldsize;
     PLHashNumber hash;
     PLHashEntry **hep, *he;
     allocation *alloc;
     FILE *trackfp = NULL;
+    tm_thread *t;
 
     if (!tracing_enabled || !PR_Initialized()) {
         return __libc_realloc(ptr, size);
     }
 
+    t = get_tm_thread();
+
     TM_ENTER_MONITOR();
     tmstats.realloc_calls++;
-    if (suppress_tracing == 0) {
+    if (t->suppress_tracing == 0) {
         oldptr = ptr;
         oldsite = NULL;
         oldsize = 0;
         he = NULL;
         if (oldptr && get_allocations()) {
             hash = hash_pointer(oldptr);
             hep = PL_HashTableRawLookup(allocations, hash, oldptr);
             he = *hep;
@@ -1638,27 +1737,27 @@ realloc(__ptr_t ptr, size_t size)
 
     TM_ENTER_MONITOR();
     if (!ptr && size) {
         /*
          * When realloc() fails, the original block is not freed or moved, so
          * we'll leave the allocation entry untouched.
          */
         tmstats.realloc_failures++;
-    } else if (suppress_tracing == 0) {
-        site = backtrace(1);
+    } else if (t->suppress_tracing == 0) {
+        site = backtrace(t, 1);
         if (site) {
             log_event8(logfp, TM_EVENT_REALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size,
                        oldsite ? oldsite->serial : 0,
                        (uint32)NS_PTR_TO_INT32(oldptr), oldsize);
         }
         if (ptr && allocations) {
-            suppress_tracing++;
+            t->suppress_tracing++;
             if (ptr != oldptr) {
                 /*
                  * If we're reallocating (not merely allocating new space by
                  * passing null to realloc) and realloc has moved the block,
                  * free oldptr.
                  */
                 if (he)
                     PL_HashTableRemove(allocations, oldptr);
@@ -1668,17 +1767,17 @@ realloc(__ptr_t ptr, size_t size)
             } else {
                 /*
                  * If we haven't yet recorded an allocation (possibly due to
                  * a temporary memory shortage), do it now.
                  */
                 if (!he)
                     he = PL_HashTableAdd(allocations, ptr, site);
             }
-            suppress_tracing--;
+            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = trackfp;
             }
         }
     }
     TM_EXIT_MONITOR();
@@ -1688,38 +1787,41 @@ realloc(__ptr_t ptr, size_t size)
 NS_EXTERNAL_VIS_(void*)
 valloc(size_t size)
 {
     PRUint32 start, end;
     __ptr_t ptr;
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
+    tm_thread *t;
 
     if (!tracing_enabled || !PR_Initialized()) {
         return __libc_valloc(size);
     }
 
+    t = get_tm_thread();
+
     start = PR_IntervalNow();
     ptr = __libc_valloc(size);
     end = PR_IntervalNow();
     TM_ENTER_MONITOR();
     tmstats.malloc_calls++; /* XXX valloc_calls ? */
     if (!ptr) {
         tmstats.malloc_failures++; /* XXX valloc_failures ? */
-    } else if (suppress_tracing == 0) {
-        site = backtrace(1);
+    } else if (t->suppress_tracing == 0) {
+        site = backtrace(t, 1);
         if (site)
             log_event5(logfp, TM_EVENT_MALLOC, /* XXX TM_EVENT_VALLOC? */
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         if (get_allocations()) {
-            suppress_tracing++;
+            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            suppress_tracing--;
+            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
     TM_EXIT_MONITOR();
@@ -1729,39 +1831,42 @@ valloc(size_t size)
 NS_EXTERNAL_VIS_(void*)
 memalign(size_t boundary, size_t size)
 {
     PRUint32 start, end;
     __ptr_t ptr;
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
+    tm_thread *t;
 
     if (!tracing_enabled || !PR_Initialized()) {
         return __libc_memalign(boundary, size);
     }
 
+    t = get_tm_thread();
+
     start = PR_IntervalNow();
     ptr = __libc_memalign(boundary, size);
     end = PR_IntervalNow();
     TM_ENTER_MONITOR();
     tmstats.malloc_calls++; /* XXX memalign_calls ? */
     if (!ptr) {
         tmstats.malloc_failures++; /* XXX memalign_failures ? */
-    } else if (suppress_tracing == 0) {
-        site = backtrace(1);
+    } else if (t->suppress_tracing == 0) {
+        site = backtrace(t, 1);
         if (site) {
             log_event5(logfp, TM_EVENT_MALLOC, /* XXX TM_EVENT_MEMALIGN? */
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         }
         if (get_allocations()) {
-            suppress_tracing++;
+            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            suppress_tracing--;
+            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
     TM_EXIT_MONITOR();
@@ -1781,27 +1886,30 @@ posix_memalign(void **memptr, size_t ali
 NS_EXTERNAL_VIS_(void)
 free(__ptr_t ptr)
 {
     PLHashEntry **hep, *he;
     callsite *site;
     allocation *alloc;
     uint32 serial = 0, size = 0;
     PRUint32 start, end;
+    tm_thread *t;
 
     if (!tracing_enabled || !PR_Initialized()) {
         __libc_free(ptr);
         return;
     }
 
+    t = get_tm_thread();
+
     TM_ENTER_MONITOR();
     tmstats.free_calls++;
     if (!ptr) {
         tmstats.null_free_calls++;
-    } else if (suppress_tracing == 0) {
+    } else if (t->suppress_tracing == 0) {
         if (get_allocations()) {
             hep = PL_HashTableRawLookup(allocations, hash_pointer(ptr), ptr);
             he = *hep;
             if (he) {
                 site = (callsite*) he->value;
                 if (site) {
                     alloc = (allocation*) he;
                     serial = site->serial;
@@ -1847,34 +1955,44 @@ log_header(int logfd)
     uint32 ticksPerSec = PR_htonl(PR_TicksPerSecond());
     (void) write(logfd, magic, NS_TRACE_MALLOC_MAGIC_SIZE);
     (void) write(logfd, &ticksPerSec, sizeof ticksPerSec);
 }
 
 PR_IMPLEMENT(void) NS_TraceMallocStartup(int logfd)
 {
     /* We must be running on the primordial thread. */
-    PR_ASSERT(suppress_tracing == 0);
     PR_ASSERT(tracing_enabled == 1);
     PR_ASSERT(logfp == &default_logfile);
     tracing_enabled = (logfd >= 0);
 
     if (tracing_enabled) {
         PR_ASSERT(logfp->simsize == 0); /* didn't overflow startup buffer */
 
         /* Log everything in logfp (aka default_logfile)'s buffer to logfd. */
         logfp->fd = logfd;
         logfile_list = &default_logfile;
         logfp->prevp = &logfile_list;
         logfile_tail = &logfp->next;
         log_header(logfd);
     }
 
     atexit(NS_TraceMallocShutdown);
+
+    /*
+     * We only allow one thread until NS_TraceMallocStartup is called.
+     * When it is, we have to initialize tls_index before allocating tmmon
+     * since get_tm_index uses NULL-tmmon to detect tls_index being
+     * uninitialized.
+     */
+    main_thread.suppress_tracing++;
+    TM_CREATE_TLS_INDEX(tls_index);
+    TM_SET_TLS_DATA(tls_index, &main_thread);
     tmmon = PR_NewMonitor();
+    main_thread.suppress_tracing--;
 
 #ifdef XP_WIN32
     /* Register listeners for win32. */
     if (tracing_enabled) {
         StartupHooker();
     }
 #endif
 }
@@ -2221,18 +2339,19 @@ allocation_enumerator(PLHashEntry *he, P
     fputc('\n', ofp);
     return HT_ENUMERATE_NEXT;
 }
 
 PR_IMPLEMENT(void)
 NS_TraceStack(int skip, FILE *ofp)
 {
     callsite *site;
+    tm_thread *t = get_tm_thread();
 
-    site = backtrace(skip + 1);
+    site = backtrace(t, skip + 1);
     while (site) {
         if (site->name || site->parent) {
             fprintf(ofp, "%s[%s +0x%X]\n",
                     site->name, site->library, site->offset);
         }
         site = site->parent;
     }
 }
@@ -2291,68 +2410,74 @@ NS_TrackAllocation(void* ptr, FILE *ofp)
 #ifdef XP_WIN32
 
 PR_IMPLEMENT(void)
 MallocCallback(void *ptr, size_t size, PRUint32 start, PRUint32 end)
 {
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
+    tm_thread *t;
 
     if (!tracing_enabled)
         return;
 
+    t = get_tm_thread();
+
     TM_ENTER_MONITOR();
     tmstats.malloc_calls++;
     if (!ptr) {
         tmstats.malloc_failures++;
-    } else if (suppress_tracing == 0) {
-        site = backtrace(4);
+    } else if (t->suppress_tracing == 0) {
+        site = backtrace(t, 4);
         if (site)
             log_event5(logfp, TM_EVENT_MALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         if (get_allocations()) {
-            suppress_tracing++;
+            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            suppress_tracing--;
+            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
             }
         }
     }
     TM_EXIT_MONITOR();
 }
 
 PR_IMPLEMENT(void)
 CallocCallback(void *ptr, size_t count, size_t size, PRUint32 start, PRUint32 end)
 {
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
+    tm_thread *t;
 
     if (!tracing_enabled)
         return;
 
+    t = get_tm_thread();
+
     TM_ENTER_MONITOR();
     tmstats.calloc_calls++;
     if (!ptr) {
         tmstats.calloc_failures++;
-    } else if (suppress_tracing == 0) {
-        site = backtrace(1);
+    } else if (t->suppress_tracing == 0) {
+        site = backtrace(t, 1);
         size *= count;
         if (site)
             log_event5(logfp, TM_EVENT_CALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         if (get_allocations()) {
-            suppress_tracing++;
+            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            suppress_tracing--;
+            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
             }
         }
     }
     TM_EXIT_MONITOR();
 }
@@ -2360,23 +2485,26 @@ CallocCallback(void *ptr, size_t count, 
 PR_IMPLEMENT(void)
 ReallocCallback(void * oldptr, void *ptr, size_t size, PRUint32 start, PRUint32 end)
 {
     callsite *oldsite, *site;
     size_t oldsize;
     PLHashNumber hash;
     PLHashEntry **hep, *he;
     allocation *alloc;
+    tm_thread *t;
 
     if (!tracing_enabled)
         return;
 
+    t = get_tm_thread();
+
     TM_ENTER_MONITOR();
     tmstats.realloc_calls++;
-    if (suppress_tracing == 0) {
+    if (t->suppress_tracing == 0) {
         oldsite = NULL;
         oldsize = 0;
         he = NULL;
         if (oldptr && get_allocations()) {
             hash = hash_pointer(oldptr);
             hep = PL_HashTableRawLookup(allocations, hash, oldptr);
             he = *hep;
             if (he) {
@@ -2388,27 +2516,27 @@ ReallocCallback(void * oldptr, void *ptr
     }
     if (!ptr && size) {
         tmstats.realloc_failures++;
 
         /*
          * When realloc() fails, the original block is not freed or moved, so
          * we'll leave the allocation entry untouched.
          */
-    } else if (suppress_tracing == 0) {
-        site = backtrace(1);
+    } else if (t->suppress_tracing == 0) {
+        site = backtrace(t, 1);
         if (site) {
             log_event8(logfp, TM_EVENT_REALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size,
                        oldsite ? oldsite->serial : 0,
                        (uint32)NS_PTR_TO_INT32(oldptr), oldsize);
         }
         if (ptr && allocations) {
-            suppress_tracing++;
+            t->suppress_tracing++;
             if (ptr != oldptr) {
                 /*
                  * If we're reallocating (not allocating new space by passing
                  * null to realloc) and realloc moved the block, free oldptr.
                  */
                 if (he)
                     PL_HashTableRawRemove(allocations, hep, he);
 
@@ -2417,41 +2545,44 @@ ReallocCallback(void * oldptr, void *ptr
             } else {
                 /*
                  * If we haven't yet recorded an allocation (possibly due to a
                  * temporary memory shortage), do it now.
                  */
                 if (!he)
                     he = PL_HashTableAdd(allocations, ptr, site);
             }
-            suppress_tracing--;
+            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
             }
         }
     }
     TM_EXIT_MONITOR();
 }
 
 PR_IMPLEMENT(void)
 FreeCallback(void * ptr, PRUint32 start, PRUint32 end)
 {
     PLHashEntry **hep, *he;
     callsite *site;
     allocation *alloc;
+    tm_thread *t;
 
     if (!tracing_enabled)
         return;
 
+    t = get_tm_thread();
+
     TM_ENTER_MONITOR();
     tmstats.free_calls++;
     if (!ptr) {
         tmstats.null_free_calls++;
-    } else if (suppress_tracing == 0) {
+    } else if (t->suppress_tracing == 0) {
         if (get_allocations()) {
             hep = PL_HashTableRawLookup(allocations, hash_pointer(ptr), ptr);
             he = *hep;
             if (he) {
                 site = (callsite*) he->value;
                 if (site) {
                     alloc = (allocation*) he;
                     log_event5(logfp, TM_EVENT_FREE,
--- a/tools/trace-malloc/lib/nsWinTraceMalloc.cpp
+++ b/tools/trace-malloc/lib/nsWinTraceMalloc.cpp
@@ -135,172 +135,113 @@ int call1(char c, int i, double d, ... )
 #elif _MSC_VER == 1400
 #define NS_DEBUG_CRT "msvcr80d.dll"
 #elif _MSC_VER == 1500
 #define NS_DEBUG_CRT "msvcr90d.dll"
 #else
 #error "Don't know filename of MSVC debug library."
 #endif
 
-static BOOL g_lockOut = FALSE; //stop reentrancy
-
 DHW_DECLARE_FUN_TYPE_AND_PROTO(dhw_malloc, void*, __cdecl, MALLOC_, (size_t));
 
 DHWImportHooker &getMallocHooker()
 {
   static DHWImportHooker gMallocHooker(NS_DEBUG_CRT, "malloc", (PROC) dhw_malloc);
   return gMallocHooker;
 }
 
 void * __cdecl dhw_malloc( size_t size )
 {
     PRUint32 start = PR_IntervalNow();
     void* result = DHW_ORIGINAL(MALLOC_, getMallocHooker())(size);
     PRUint32 end = PR_IntervalNow();
-    if (g_lockOut)
-      return result;
-    g_lockOut = TRUE;
-#ifdef VERBOSE
-    printf("* malloc called to get %d bytes. returned %#x\n", size, result);
-#endif
     MallocCallback(result, size, start, end);
-//    dumpStack(); 
-//    printf("\n");
-    g_lockOut = FALSE;
     return result;    
 }
 
 DHW_DECLARE_FUN_TYPE_AND_PROTO(dhw_calloc, void*, __cdecl, CALLOC_, (size_t,size_t));
 
 DHWImportHooker &getCallocHooker()
 {
   static DHWImportHooker gCallocHooker(NS_DEBUG_CRT, "calloc", (PROC) dhw_calloc);
   return gCallocHooker;
 }
 
 void * __cdecl dhw_calloc( size_t count, size_t size )
 {
     PRUint32 start = PR_IntervalNow();
     void* result = DHW_ORIGINAL(CALLOC_, getCallocHooker())(count,size);
     PRUint32 end = PR_IntervalNow();
-    if (g_lockOut)
-      return result;
-    g_lockOut = TRUE;
-#ifdef VERBOSE
-    printf("* calloc called to get %d many of %d bytes. returned %#x\n", count, size, result);
-#endif
     CallocCallback(result, count, size, start, end);
-//    dumpStack(); 
-//    printf("\n");
-    g_lockOut = FALSE;
     return result;    
 }
 
 DHW_DECLARE_FUN_TYPE_AND_PROTO(dhw_free, void, __cdecl, FREE_, (void*));
 DHWImportHooker &getFreeHooker()
 {
   static DHWImportHooker gFreeHooker(NS_DEBUG_CRT, "free", (PROC) dhw_free);
   return gFreeHooker;
 }
 
 void __cdecl dhw_free( void* p )
 {
     PRUint32 start = PR_IntervalNow();
     DHW_ORIGINAL(FREE_, getFreeHooker())(p);
     PRUint32 end = PR_IntervalNow();
-    if (g_lockOut)
-      return;
-    g_lockOut = TRUE;
-#ifdef VERBOSE
-    printf("* free called for %#x\n", p);
-#endif
     FreeCallback(p, start, end);
-//    dumpStack(); 
-//    printf("\n");
-    g_lockOut = FALSE;
 }
 
 
 DHW_DECLARE_FUN_TYPE_AND_PROTO(dhw_realloc, void*, __cdecl, REALLOC_, (void*, size_t));
 DHWImportHooker &getReallocHooker()
 {
   static DHWImportHooker gReallocHooker(NS_DEBUG_CRT, "realloc", (PROC) dhw_realloc);
   return gReallocHooker;
 }
 
 void * __cdecl dhw_realloc(void * pin, size_t size)
 {
     PRUint32 start = PR_IntervalNow();
     void* pout = DHW_ORIGINAL(REALLOC_, getReallocHooker())(pin, size);
     PRUint32 end = PR_IntervalNow();
-    if (g_lockOut)
-      return pout;
-    g_lockOut = TRUE;
-
-#ifdef VERBOSE
-    printf("* realloc called to resize to %d. old ptr: %#x. new ptr: %#x\n", 
-           size, pin, pout);
-#endif
     ReallocCallback(pin, pout, size, start, end);
-//    dumpStack(); 
-//    printf("\n");
-    g_lockOut = FALSE;
     return pout;
 }
 
 // Note the mangled name!
 DHW_DECLARE_FUN_TYPE_AND_PROTO(dhw_new, void*, __cdecl, NEW_, (size_t));
 DHWImportHooker &getNewHooker()
 {
   static DHWImportHooker gNewHooker(NS_DEBUG_CRT, "??2@YAPAXI@Z", (PROC) dhw_new);
   return gNewHooker;
 }
 
 void * __cdecl dhw_new(size_t size)
 {
     PRUint32 start = PR_IntervalNow();
     void* result = DHW_ORIGINAL(NEW_, getNewHooker())(size);
     PRUint32 end = PR_IntervalNow();
-    if (g_lockOut)
-      return result;
-    g_lockOut = TRUE;
-
-#ifdef VERBOSE
-    printf("* new called to get %d bytes. returned %#x\n", size, result);
-    dumpStack(); 
-#endif
     MallocCallback(result, size, start, end);//do we need a different one for new?
-//    printf("\n");
-    g_lockOut = FALSE;
     return result;
 }
 
 // Note the mangled name!
 DHW_DECLARE_FUN_TYPE_AND_PROTO(dhw_delete, void, __cdecl, DELETE_, (void*));
 DHWImportHooker &getDeleteHooker()
 {
   static DHWImportHooker gDeleteHooker(NS_DEBUG_CRT, "??3@YAXPAX@Z", (PROC) dhw_delete);
   return gDeleteHooker;
 }
 
 void __cdecl dhw_delete(void* p)
 {
     PRUint32 start = PR_IntervalNow();
     DHW_ORIGINAL(DELETE_, getDeleteHooker())(p);
     PRUint32 end = PR_IntervalNow();
-    if (g_lockOut)
-      return;
-    g_lockOut = TRUE;
-#ifdef VERBOSE
-    printf("* delete called for %#x\n", p);
-    dumpStack(); 
-#endif
     FreeCallback(p, start, end);
-//    printf("\n");
-    g_lockOut = FALSE;
 }
 
 
 
 
 /***************************************************************************/
 // A demonstration of using the _CrtSetAllocHook based hooking.
 // This system sucks because you don't get to see the allocated pointer.