Start reducing the scope of locking in trace-malloc, and convert the PRMonitor to a PRLock. b=376874 r+a=brendan
authordbaron@dbaron.org
Fri, 10 Aug 2007 15:19:32 -0700
changeset 4496 e5e746415b05e19865589239ceaa5c096493f1ba
parent 4495 eb2bc90a3583863bbcbaacd663e219f3443f35e6
child 4497 552013b33ac14984247e71356d13dab5c02fec91
push idunknown
push userunknown
push dateunknown
bugs376874
milestone1.9a8pre
Start reducing the scope of locking in trace-malloc, and convert the PRMonitor to a PRLock. b=376874 r+a=brendan
tools/trace-malloc/lib/nsTraceMalloc.c
--- a/tools/trace-malloc/lib/nsTraceMalloc.c
+++ b/tools/trace-malloc/lib/nsTraceMalloc.c
@@ -55,16 +55,17 @@
 #ifdef XP_UNIX
 #include <unistd.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #endif
 #include "plhash.h"
 #include "pratom.h"
 #include "prlog.h"
+#include "prlock.h"
 #include "prmon.h"
 #include "prprf.h"
 #include "prenv.h"
 #include "prnetdb.h"
 #include "nsTraceMalloc.h"
 #include "nscore.h"
 #include "prinit.h"
 #include "prthread.h"
@@ -291,38 +292,46 @@ struct logfile {
 };
 
 static char      default_buf[STARTUP_TMBUFSIZE];
 static logfile   default_logfile =
                    {-1, 0, default_buf, STARTUP_TMBUFSIZE, 0, 0, 0, NULL, NULL};
 static logfile   *logfile_list = NULL;
 static logfile   **logfile_tail = &logfile_list;
 static logfile   *logfp = &default_logfile;
-static PRMonitor *tmmon = NULL;
+static PRLock    *tmlock = NULL;
 static char      *sdlogname = NULL; /* filename for shutdown leak log */
 
 /*
  * This enables/disables trace-malloc logging.
  *
  * It is separate from suppress_tracing so that we do not have to pay
- * the performance cost of repeated PR_EnterMonitor/PR_ExitMonitor and
- * PR_IntervalNow calls when trace-malloc is disabled.
+ * the performance cost of repeated PR_GetThreadPrivate calls when
+ * trace-malloc is disabled (which is not as bad as the locking we used
+ * to have).
  */
 static int tracing_enabled = 1;
 
-#define TM_ENTER_MONITOR()                                                    \
+/*
+ * This lock must be held while manipulating the calltree, the
+ * allocations table, the log, or the tmstats.
+ *
+ * Callers should not *enter* the lock without checking suppress_tracing
+ * first; otherwise they risk trying to re-enter on the same thread.
+ */
+#define TM_ENTER_LOCK()                                                       \
     PR_BEGIN_MACRO                                                            \
-        if (tmmon)                                                            \
-            PR_EnterMonitor(tmmon);                                           \
+        if (tmlock)                                                           \
+            PR_Lock(tmlock);                                                  \
     PR_END_MACRO
 
-#define TM_EXIT_MONITOR()                                                     \
+#define TM_EXIT_LOCK()                                                        \
     PR_BEGIN_MACRO                                                            \
-        if (tmmon)                                                            \
-            PR_ExitMonitor(tmmon);                                            \
+        if (tmlock)                                                           \
+            PR_Unlock(tmlock);                                                \
     PR_END_MACRO
 
 /*
  * Thread-local storage.
  *
  * We can't use NSPR thread-local storage for this because it mallocs
  * within PR_GetThreadPrivate (the first time) and PR_SetThreadPrivate
  * (which can be worked around by protecting all uses of those functions
@@ -386,17 +395,17 @@ free_tm_thread(void *priv)
 #endif
 
 static tm_thread *
 get_tm_thread(void)
 {
     tm_thread *t;
     tm_thread stack_tm_thread;
 
-    if (!tmmon) {
+    if (!tmlock) {
         return &main_thread;
     }
 
     t = TM_GET_TLS_DATA(tls_index);
 
     if (!t) {
         /*
          * First, store a tm_thread on the stack to suppress for the
@@ -628,17 +637,17 @@ struct callsite {
     char        *name;
     const char  *library;
     int         offset;
     callsite    *parent;
     callsite    *siblings;
     callsite    *kids;
 };
 
-/* NB: these counters are incremented and decremented only within tmmon. */
+/* NB: these counters are incremented and decremented only within tmlock. */
 static uint32 library_serial_generator = 0;
 static uint32 method_serial_generator = 0;
 static uint32 callsite_serial_generator = 0;
 static uint32 tmstats_serial_generator = 0;
 static uint32 filename_serial_generator = 0;
 
 /* Root of the tree of callsites, the sum of all (cycle-compressed) stacks. */
 static callsite calltree_root =
@@ -825,16 +834,22 @@ static callsite *calltree(int skip)
 
         /*
          * Time to increase the number of stack frames?
          */
         if (framenum >= MAX_STACKFRAMES)
             break;
     }
 
+    /*
+     * FIXME: We should exit the lock while making some of the below
+     * calls into the system.  This will be fixed in bug 374829.
+     */
+    TM_ENTER_LOCK();
+
     depth = framenum;
     maxstack = (depth > tmstats.calltree_maxstack);
     if (maxstack)
         tmstats.calltree_maxstack = depth;
 
     /* Reverse the stack again, finding and building a path in the tree. */
     parent = &calltree_root;
     while (0 < framenum) {
@@ -910,17 +925,17 @@ static callsite *calltree(int skip)
         library_serial = 0;
         if (library) {
            if (!libraries) {
                 libraries = PL_NewHashTable(100, PL_HashString,
                                             PL_CompareStrings, PL_CompareValues,
                                             &lfdset_hashallocops, NULL);
                 if (!libraries) {
                     tmstats.btmalloc_failures++;
-                    return NULL;
+                    goto fail;
                 }
             }
             hash = PL_HashString(library);
             hep = PL_HashTableRawLookup(libraries, hash, library);
             he = *hep;
             library = strdup(library); /* strdup it always? */
             if (he) {
                 library_serial = (uint32) NS_PTR_TO_INT32(he->value);
@@ -933,17 +948,17 @@ static callsite *calltree(int skip)
 /*                library = strdup(library); */
                 if (library) {
                     library_serial = ++library_serial_generator;
                     he = PL_HashTableRawAdd(libraries, hep, hash, library,
                                             (void*) library_serial);
                 }
                 if (!he) {
                     tmstats.btmalloc_failures++;
-                    return NULL;
+                    goto fail;
                 }
                 le = (lfdset_entry *) he;
             }
             if (le) {
                 /* Need to log an event to fp for this lib. */
                 slash = strrchr(library, '/');
                 if (slash)
                     library = slash + 1;
@@ -957,17 +972,17 @@ static callsite *calltree(int skip)
         filename_serial = 0;
         if (filename) {
             if (!filenames) {
                 filenames = PL_NewHashTable(100, PL_HashString,
                                             PL_CompareStrings, PL_CompareValues,
                                             &lfdset_hashallocops, NULL);
                 if (!filenames) {
                     tmstats.btmalloc_failures++;
-                    return NULL;
+                    goto fail;
                 }
             }
             hash = PL_HashString(filename);
             hep = PL_HashTableRawLookup(filenames, hash, filename);
             he = *hep;
             if (he) {
                 filename_serial = (uint32) NS_PTR_TO_INT32(he->value);
                 le = (lfdset_entry *) he;
@@ -979,17 +994,17 @@ static callsite *calltree(int skip)
                 filename = strdup(filename);
                 if (filename) {
                     filename_serial = ++filename_serial_generator;
                     he = PL_HashTableRawAdd(filenames, hep, hash, filename,
                                             (void*) filename_serial);
                 }
                 if (!he) {
                     tmstats.btmalloc_failures++;
-                    return NULL;
+                    goto fail;
                 }
                 le = (lfdset_entry *) he;
             }
             if (le) {
                 /* Need to log an event to fp for this filename. */
                 log_event1(fp, TM_EVENT_FILENAME, filename_serial);
                 log_filename(fp, filename);
                 LFD_SET(fp->lfd, &le->lfdset);
@@ -1006,17 +1021,17 @@ static callsite *calltree(int skip)
                                 &displacement,
                                 symbol);
 
         /* Now find the demangled method name and pc offset in it. */
         if (0 != getSymRes) {
             demangledname = (char *)malloc(MAX_UNMANGLED_NAME_LEN);
             if (!_SymUnDName(symbol,demangledname,MAX_UNMANGLED_NAME_LEN)) {
                 free(demangledname);
-                return 0;
+                goto fail;
             }
             method = demangledname;
             offset = (char*)pc - (char*)(symbol->Address);
         }
         else {
             method = noname;
             offset = pc;
         }
@@ -1027,17 +1042,17 @@ static callsite *calltree(int skip)
             methods = PL_NewHashTable(10000, PL_HashString,
                                       PL_CompareStrings, PL_CompareValues,
                                       &lfdset_hashallocops, NULL);
             if (!methods) {
                 tmstats.btmalloc_failures++;
                 if (method != noname) {
                     free((void*) method);
                 }
-                return NULL;
+                goto fail;
             }
         }
         hash = PL_HashString(method);
         hep = PL_HashTableRawLookup(methods, hash, method);
         he = *hep;
         if (he) {
             method_serial = (uint32) NS_PTR_TO_INT32(he->value);
             if (method != noname) {
@@ -1053,33 +1068,33 @@ static callsite *calltree(int skip)
             method_serial = ++method_serial_generator;
             he = PL_HashTableRawAdd(methods, hep, hash, method,
                                     (void*) method_serial);
             if (!he) {
                 tmstats.btmalloc_failures++;
                 if (method != noname) {
                     free((void*) method);
                 }
-                return NULL;
+                goto fail;
             }
             le = (lfdset_entry *) he;
         }
         if (le) {
             log_event4(fp, TM_EVENT_METHOD, method_serial, library_serial,
                        filename_serial, linenumber);
             log_string(fp, method);
             LFD_SET(fp->lfd, &le->lfdset);
         }
 
         /* Create a new callsite record. */
         if (!site) {
             site = malloc(sizeof(callsite));
             if (!site) {
                 tmstats.btmalloc_failures++;
-                return NULL;
+                goto fail;
             }
 
             /* Update parent and max-kids-per-parent stats. */
             if (!parent->kids)
                 tmstats.calltree_parents++;
             nkids = 1;
             for (tmp = parent->kids; tmp; tmp = tmp->siblings)
                 nkids++;
@@ -1112,17 +1127,23 @@ static callsite *calltree(int skip)
 
     if (maxstack)
         calltree_maxstack_top = site;
     depth = 0;
     for (tmp = site; tmp; tmp = tmp->parent)
         depth++;
     if (depth > tmstats.calltree_maxdepth)
         tmstats.calltree_maxdepth = depth;
+
+    TM_EXIT_LOCK();
     return site;
+
+  fail:
+    TM_EXIT_LOCK();
+    return NULL;
 }
 
 #else /*XP_UNIX*/
 
 static callsite *calltree(void **bp)
 {
     logfile *fp = logfp;
     void **bpup, **bpdown, *pc;
@@ -1135,16 +1156,23 @@ static callsite *calltree(void **bp)
     char *method, *slash;
     PLHashNumber hash;
     PLHashEntry **hep, *he;
     lfdset_entry *le;
     uint32 filename_serial;
     uint32 linenumber;
     const char* filename;
 
+    /*
+     * FIXME bug 391749: We should really lock only the minimum amount
+     * that we need to in this function, because it makes some calls
+     * that could lock in the system's shared library loader.
+     */
+    TM_ENTER_LOCK();
+
     /* Reverse the stack frame list to avoid recursion. */
     bpup = NULL;
     for (depth = 0; ; depth++) {
         bpdown = (void**) bp[0];
         bp[0] = (void*) bpup;
         if ((void**) bpdown[0] < bpdown)
             break;
         bpup = bp;
@@ -1206,31 +1234,31 @@ static callsite *calltree(void **bp)
          * Not in tree at all, or not logged to fp: let's find our symbolic
          * callsite info.  XXX static syms are masked by nearest lower global
          */
         info.dli_fname = info.dli_sname = NULL;
 
         /*
          * dladdr can acquire a lock inside the shared library loader.
          * Another thread might call malloc while holding that lock
-         * (when loading a shared library).  So we have to exit tmmon
+         * (when loading a shared library).  So we have to exit tmlock
          * around this call.  For details, see
          * https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3
          *
          * We could be more efficient by building the nodes in the
          * calltree, exiting the monitor once to describe all of them,
          * and then filling in the descriptions for any that hadn't been
          * described already.  But this is easier for now.
          */
-        TM_EXIT_MONITOR();
+        TM_EXIT_LOCK();
         ok = my_dladdr((void*) pc, &info);
-        TM_ENTER_MONITOR();
+        TM_ENTER_LOCK();
         if (ok < 0) {
             tmstats.dladdr_failures++;
-            return NULL;
+            goto fail;
         }
 
         /*
          * One day, if someone figures out how to get filename and line
          *   number info, this is the place to fill it all in.
          */
         filename = "noname";
         linenumber = 0;
@@ -1240,17 +1268,17 @@ static callsite *calltree(void **bp)
         library = info.dli_fname;
         if (library) {
             if (!libraries) {
                 libraries = PL_NewHashTable(100, PL_HashString,
                                             PL_CompareStrings, PL_CompareValues,
                                             &lfdset_hashallocops, NULL);
                 if (!libraries) {
                     tmstats.btmalloc_failures++;
-                    return NULL;
+                    goto fail;
                 }
             }
             hash = PL_HashString(library);
             hep = PL_HashTableRawLookup(libraries, hash, library);
             he = *hep;
             if (he) {
                 library_serial = (uint32) NS_PTR_TO_INT32(he->value);
                 le = (lfdset_entry *) he;
@@ -1262,17 +1290,17 @@ static callsite *calltree(void **bp)
                 library = strdup(library);
                 if (library) {
                     library_serial = ++library_serial_generator;
                     he = PL_HashTableRawAdd(libraries, hep, hash, library,
                                             (void*) library_serial);
                 }
                 if (!he) {
                     tmstats.btmalloc_failures++;
-                    return NULL;
+                    goto fail;
                 }
                 le = (lfdset_entry *) he;
             }
             if (le) {
                 /* Need to log an event to fp for this lib. */
                 slash = strrchr(library, '/');
                 if (slash)
                     library = slash + 1;
@@ -1286,17 +1314,17 @@ static callsite *calltree(void **bp)
         filename_serial = 0;
         if (filename) {
             if (!filenames) {
                 filenames = PL_NewHashTable(100, PL_HashString,
                                             PL_CompareStrings, PL_CompareValues,
                                             &lfdset_hashallocops, NULL);
                 if (!filenames) {
                     tmstats.btmalloc_failures++;
-                    return NULL;
+                    goto fail;
                 }
             }
             hash = PL_HashString(filename);
             hep = PL_HashTableRawLookup(filenames, hash, filename);
             he = *hep;
             if (he) {
                 filename_serial = (uint32) NS_PTR_TO_INT32(he->value);
                 le = (lfdset_entry *) he;
@@ -1307,17 +1335,17 @@ static callsite *calltree(void **bp)
             } else {
                 if (filename) {
                     filename_serial = ++filename_serial_generator;
                     he = PL_HashTableRawAdd(filenames, hep, hash, filename,
                                             (void*) filename_serial);
                 }
                 if (!he) {
                     tmstats.btmalloc_failures++;
-                    return NULL;
+                    goto fail;
                 }
                 le = (lfdset_entry *) he;
             }
             if (le) {
                 /* Need to log an event to fp for this filename. */
                 log_event1(fp, TM_EVENT_FILENAME, filename_serial);
                 log_filename(fp, filename);
                 LFD_SET(fp->lfd, &le->lfdset);
@@ -1337,29 +1365,29 @@ static callsite *calltree(void **bp)
             method = symbol
                      ? strdup(symbol)
                      : PR_smprintf("%s+%X",
                                    info.dli_fname ? info.dli_fname : "main",
                                    (char*)pc - (char*)info.dli_fbase);
         }
         if (!method) {
             tmstats.btmalloc_failures++;
-            return NULL;
+            goto fail;
         }
 
         /* Emit an 'N' (for New method, 'M' is for malloc!) event if needed. */
         method_serial = 0;
         if (!methods) {
             methods = PL_NewHashTable(10000, PL_HashString,
                                       PL_CompareStrings, PL_CompareValues,
                                       &lfdset_hashallocops, NULL);
             if (!methods) {
                 tmstats.btmalloc_failures++;
                 free((void*) method);
-                return NULL;
+                goto fail;
             }
         }
         hash = PL_HashString(method);
         hep = PL_HashTableRawLookup(methods, hash, method);
         he = *hep;
         if (he) {
             method_serial = (uint32) NS_PTR_TO_INT32(he->value);
             free((void*) method);
@@ -1371,33 +1399,33 @@ static callsite *calltree(void **bp)
             }
         } else {
             method_serial = ++method_serial_generator;
             he = PL_HashTableRawAdd(methods, hep, hash, method,
                                     (void*) method_serial);
             if (!he) {
                 tmstats.btmalloc_failures++;
                 free((void*) method);
-                return NULL;
+                goto fail;
             }
             le = (lfdset_entry *) he;
         }
         if (le) {
             log_event4(fp, TM_EVENT_METHOD, method_serial, library_serial,
                        filename_serial, linenumber);
             log_string(fp, method);
             LFD_SET(fp->lfd, &le->lfdset);
         }
 
         /* Create a new callsite record. */
         if (!site) {
             site = __libc_malloc(sizeof(callsite));
             if (!site) {
                 tmstats.btmalloc_failures++;
-                return NULL;
+                goto fail;
             }
 
             /* Update parent and max-kids-per-parent stats. */
             if (!parent->kids)
                 tmstats.calltree_parents++;
             nkids = 1;
             for (tmp = parent->kids; tmp; tmp = tmp->siblings)
                 nkids++;
@@ -1432,52 +1460,64 @@ static callsite *calltree(void **bp)
 
     if (maxstack)
         calltree_maxstack_top = site;
     depth = 0;
     for (tmp = site; tmp; tmp = tmp->parent)
         depth++;
     if (depth > tmstats.calltree_maxdepth)
         tmstats.calltree_maxdepth = depth;
+
+    TM_EXIT_LOCK();
+
     return site;
+  fail:
+    TM_EXIT_LOCK();
+    return NULL;
 }
 
 #endif
 
+/*
+ * The caller MUST NOT be holding tmlock when calling backtrace.
+ */
+
 #ifdef XP_WIN32
 
 callsite *
 backtrace(tm_thread *t, int skip)
 {
     callsite *site;
 
-    tmstats.backtrace_calls++;
     t->suppress_tracing++;
 
     site = calltree(skip);
+
+    TM_ENTER_LOCK();
+    tmstats.backtrace_calls++;
     if (!site) {
         tmstats.backtrace_failures++;
         /* PR_ASSERT(tmstats.backtrace_failures < 100); */
     }
+    TM_EXIT_LOCK();
     t->suppress_tracing--;
     return site;
 }
 
 #else /*XP_UNIX*/
 
 callsite *
 backtrace(tm_thread *t, int skip)
 {
     void **bp, **bpdown;
     callsite *site, **key;
     PLHashNumber hash;
     PLHashEntry **hep, *he;
     int i, n;
 
-    tmstats.backtrace_calls++;
     t->suppress_tracing++;
 
     /* Stack walking code adapted from Kipp's "leaky". */
 #if defined(__i386) 
     __asm__( "movl %%ebp, %0" : "=g"(bp));
 #elif defined(__x86_64__)
     __asm__( "movq %%rbp, %0" : "=g"(bp));
 #else
@@ -1491,20 +1531,23 @@ backtrace(tm_thread *t, int skip)
     while (--skip >= 0) {
         bpdown = (void**) bp[0];
         if (bpdown < bp)
             break;
         bp = bpdown;
     }
 
     site = calltree(bp);
+    TM_ENTER_LOCK();
+    tmstats.backtrace_calls++;
     if (!site) {
         tmstats.backtrace_failures++;
         PR_ASSERT(tmstats.backtrace_failures < 100);
     }
+    TM_EXIT_LOCK();
     t->suppress_tracing--;
     return site;
 }
 
 
 #endif /* XP_UNIX */
 
 
@@ -1588,47 +1631,49 @@ malloc(size_t size)
 {
     PRUint32 start, end;
     __ptr_t ptr;
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
     tm_thread *t;
 
-    if (!tracing_enabled || !PR_Initialized()) {
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = get_tm_thread())->suppress_tracing != 0) {
         return __libc_malloc(size);
     }
 
-    t = get_tm_thread();
-
     start = PR_IntervalNow();
     ptr = __libc_malloc(size);
     end = PR_IntervalNow();
-    TM_ENTER_MONITOR();
+
+    site = backtrace(t, 1);
+
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.malloc_calls++;
     if (!ptr) {
         tmstats.malloc_failures++;
-    } else if (t->suppress_tracing == 0) {
-        site = backtrace(t, 1);
+    } else {
         if (site)
             log_event5(logfp, TM_EVENT_MALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         if (get_allocations()) {
-            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
+
     return ptr;
 }
 
 NS_EXTERNAL_VIS_(__ptr_t)
 calloc(size_t count, size_t size)
 {
     PRUint32 start, end;
     __ptr_t ptr;
@@ -1643,74 +1688,75 @@ calloc(size_t count, size_t size)
      * using calloc (call from _dl_tls_setup).
      *
      * Thus, our calloc replacement is invoked too early, tries to
      * initialize NSPR, which calls dlopen, which calls into the dl
      * -> crash.
      *
      * Delaying NSPR calls until NSPR is initialized helps.
      */
-    if (!tracing_enabled || !PR_Initialized()) {
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = get_tm_thread())->suppress_tracing != 0) {
         return __libc_calloc(count, size);
     }
 
-    t = get_tm_thread();
-
     start = PR_IntervalNow();
     ptr = __libc_calloc(count, size);
     end = PR_IntervalNow();
-    TM_ENTER_MONITOR();
+
+    site = backtrace(t, 1);
+
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.calloc_calls++;
     if (!ptr) {
         tmstats.calloc_failures++;
-    } else if (t->suppress_tracing == 0) {
-        site = backtrace(t, 1);
+    } else {
         size *= count;
         if (site) {
             log_event5(logfp, TM_EVENT_CALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         }
         if (get_allocations()) {
-            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
     return ptr;
 }
 
 NS_EXTERNAL_VIS_(__ptr_t)
 realloc(__ptr_t ptr, size_t size)
 {
     PRUint32 start, end;
     __ptr_t oldptr;
     callsite *oldsite, *site;
     size_t oldsize;
     PLHashNumber hash;
     PLHashEntry **hep, *he;
     allocation *alloc;
     FILE *trackfp = NULL;
     tm_thread *t;
 
-    if (!tracing_enabled || !PR_Initialized()) {
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = get_tm_thread())->suppress_tracing != 0) {
         return __libc_realloc(ptr, size);
     }
 
-    t = get_tm_thread();
-
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.realloc_calls++;
-    if (t->suppress_tracing == 0) {
+    if (PR_TRUE) {
         oldptr = ptr;
         oldsite = NULL;
         oldsize = 0;
         he = NULL;
         if (oldptr && get_allocations()) {
             hash = hash_pointer(oldptr);
             hep = PL_HashTableRawLookup(allocations, hash, oldptr);
             he = *hep;
@@ -1724,40 +1770,42 @@ realloc(__ptr_t ptr, size_t size)
                             "\nrealloc(%p, %lu), oldsize %lu, alloc site %p\n",
                             (void*) ptr, (unsigned long) size,
                             (unsigned long) oldsize, (void*) oldsite);
                     NS_TraceStack(1, trackfp);
                 }
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 
     start = PR_IntervalNow();
     ptr = __libc_realloc(ptr, size);
     end = PR_IntervalNow();
 
-    TM_ENTER_MONITOR();
+    site = backtrace(t, 1);
+
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     if (!ptr && size) {
         /*
          * When realloc() fails, the original block is not freed or moved, so
          * we'll leave the allocation entry untouched.
          */
         tmstats.realloc_failures++;
-    } else if (t->suppress_tracing == 0) {
-        site = backtrace(t, 1);
+    } else {
         if (site) {
             log_event8(logfp, TM_EVENT_REALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size,
                        oldsite ? oldsite->serial : 0,
                        (uint32)NS_PTR_TO_INT32(oldptr), oldsize);
         }
         if (ptr && allocations) {
-            t->suppress_tracing++;
             if (ptr != oldptr) {
                 /*
                  * If we're reallocating (not merely allocating new space by
                  * passing null to realloc) and realloc has moved the block,
                  * free oldptr.
                  */
                 if (he)
                     PL_HashTableRemove(allocations, oldptr);
@@ -1767,114 +1815,116 @@ realloc(__ptr_t ptr, size_t size)
             } else {
                 /*
                  * If we haven't yet recorded an allocation (possibly due to
                  * a temporary memory shortage), do it now.
                  */
                 if (!he)
                     he = PL_HashTableAdd(allocations, ptr, site);
             }
-            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = trackfp;
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
     return ptr;
 }
 
 NS_EXTERNAL_VIS_(void*)
 valloc(size_t size)
 {
     PRUint32 start, end;
     __ptr_t ptr;
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
     tm_thread *t;
 
-    if (!tracing_enabled || !PR_Initialized()) {
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = get_tm_thread())->suppress_tracing != 0) {
         return __libc_valloc(size);
     }
 
-    t = get_tm_thread();
-
     start = PR_IntervalNow();
     ptr = __libc_valloc(size);
     end = PR_IntervalNow();
-    TM_ENTER_MONITOR();
+
+    site = backtrace(t, 1);
+
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.malloc_calls++; /* XXX valloc_calls ? */
     if (!ptr) {
         tmstats.malloc_failures++; /* XXX valloc_failures ? */
-    } else if (t->suppress_tracing == 0) {
-        site = backtrace(t, 1);
+    } else {
         if (site)
             log_event5(logfp, TM_EVENT_MALLOC, /* XXX TM_EVENT_VALLOC? */
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         if (get_allocations()) {
-            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
     return ptr;
 }
 
 NS_EXTERNAL_VIS_(void*)
 memalign(size_t boundary, size_t size)
 {
     PRUint32 start, end;
     __ptr_t ptr;
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
     tm_thread *t;
 
-    if (!tracing_enabled || !PR_Initialized()) {
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = get_tm_thread())->suppress_tracing != 0) {
         return __libc_memalign(boundary, size);
     }
 
-    t = get_tm_thread();
-
     start = PR_IntervalNow();
     ptr = __libc_memalign(boundary, size);
     end = PR_IntervalNow();
-    TM_ENTER_MONITOR();
+
+    site = backtrace(t, 1);
+
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.malloc_calls++; /* XXX memalign_calls ? */
     if (!ptr) {
         tmstats.malloc_failures++; /* XXX memalign_failures ? */
-    } else if (t->suppress_tracing == 0) {
-        site = backtrace(t, 1);
+    } else {
         if (site) {
             log_event5(logfp, TM_EVENT_MALLOC, /* XXX TM_EVENT_MEMALIGN? */
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         }
         if (get_allocations()) {
-            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
                 alloc->trackfp = NULL;
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
     return ptr;
 }
 
 NS_EXTERNAL_VIS_(int)
 posix_memalign(void **memptr, size_t alignment, size_t size)
 {
     __ptr_t ptr = memalign(alignment, size);
     if (!ptr)
@@ -1888,28 +1938,28 @@ free(__ptr_t ptr)
 {
     PLHashEntry **hep, *he;
     callsite *site;
     allocation *alloc;
     uint32 serial = 0, size = 0;
     PRUint32 start, end;
     tm_thread *t;
 
-    if (!tracing_enabled || !PR_Initialized()) {
+    if (!tracing_enabled || !PR_Initialized() ||
+        (t = get_tm_thread())->suppress_tracing != 0) {
         __libc_free(ptr);
         return;
     }
 
-    t = get_tm_thread();
-
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.free_calls++;
     if (!ptr) {
         tmstats.null_free_calls++;
-    } else if (t->suppress_tracing == 0) {
+    } else {
         if (get_allocations()) {
             hep = PL_HashTableRawLookup(allocations, hash_pointer(ptr), ptr);
             he = *hep;
             if (he) {
                 site = (callsite*) he->value;
                 if (site) {
                     alloc = (allocation*) he;
                     serial = site->serial;
@@ -1919,28 +1969,31 @@ free(__ptr_t ptr)
                                 (void*) ptr, (void*) site);
                         NS_TraceStack(1, alloc->trackfp);
                     }
                 }
                 PL_HashTableRawRemove(allocations, hep, he);
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 
     start = PR_IntervalNow();
     __libc_free(ptr);
     end = PR_IntervalNow();
 
     if (size != 0) {
-        TM_ENTER_MONITOR();
+        t->suppress_tracing++;
+        TM_ENTER_LOCK();
         log_event5(logfp, TM_EVENT_FREE,
                    serial, start, end - start,
                    (uint32)NS_PTR_TO_INT32(ptr), size);
-        TM_EXIT_MONITOR();
+        TM_EXIT_LOCK();
+        t->suppress_tracing--;
     }
 }
 
 NS_EXTERNAL_VIS_(void)
 cfree(void *ptr)
 {
     free(ptr);
 }
@@ -1974,24 +2027,24 @@ PR_IMPLEMENT(void) NS_TraceMallocStartup
         logfile_tail = &logfp->next;
         log_header(logfd);
     }
 
     atexit(NS_TraceMallocShutdown);
 
     /*
      * We only allow one thread until NS_TraceMallocStartup is called.
-     * When it is, we have to initialize tls_index before allocating tmmon
-     * since get_tm_index uses NULL-tmmon to detect tls_index being
+     * When it is, we have to initialize tls_index before allocating tmlock
+     * since get_tm_index uses NULL-tmlock to detect tls_index being
      * uninitialized.
      */
     main_thread.suppress_tracing++;
     TM_CREATE_TLS_INDEX(tls_index);
     TM_SET_TLS_DATA(tls_index, &main_thread);
-    tmmon = PR_NewMonitor();
+    tmlock = PR_NewLock();
     main_thread.suppress_tracing--;
 
 #ifdef XP_WIN32
     /* Register listeners for win32. */
     if (tracing_enabled) {
         StartupHooker();
     }
 #endif
@@ -2163,63 +2216,76 @@ PR_IMPLEMENT(void) NS_TraceMallocShutdow
             fp->fd = -1;
         }
         if (fp != &default_logfile) {
             if (fp == logfp)
                 logfp = &default_logfile;
             free((void*) fp);
         }
     }
-    if (tmmon) {
-        PRMonitor *mon = tmmon;
-        tmmon = NULL;
-        PR_DestroyMonitor(mon);
+    if (tmlock) {
+        PRLock *lock = tmlock;
+        tmlock = NULL;
+        PR_DestroyLock(lock);
     }
 #ifdef XP_WIN32
     if (tracing_enabled) {
         ShutdownHooker();
     }
 #endif
 }
 
 PR_IMPLEMENT(void) NS_TraceMallocDisable()
 {
     logfile *fp;
+    tm_thread *t = get_tm_thread();
 
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     for (fp = logfile_list; fp; fp = fp->next)
         flush_logfile(fp);
     tracing_enabled = 0;
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 }
 
 PR_IMPLEMENT(void) NS_TraceMallocEnable()
 {
-    TM_ENTER_MONITOR();
+    tm_thread *t = get_tm_thread();
+
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tracing_enabled = 1;
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 }
 
 PR_IMPLEMENT(int) NS_TraceMallocChangeLogFD(int fd)
 {
     logfile *oldfp, *fp;
     struct stat sb;
+    tm_thread *t = get_tm_thread();
 
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     oldfp = logfp;
     if (oldfp->fd != fd) {
         flush_logfile(oldfp);
         fp = get_logfile(fd);
-        if (!fp)
+        if (!fp) {
+            TM_EXIT_LOCK();
+            t->suppress_tracing--;
             return -2;
+        }
         if (fd >= 0 && fstat(fd, &sb) == 0 && sb.st_size == 0)
             log_header(fd);
         logfp = fp;
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
     return oldfp->fd;
 }
 
 static PRIntn
 lfd_clr_enumerator(PLHashEntry *he, PRIntn i, void *arg)
 {
     lfdset_entry *le = (lfdset_entry*) he;
     logfile *fp = (logfile*) arg;
@@ -2237,18 +2303,20 @@ lfd_clr_walk(callsite *site, logfile *fp
     for (kid = site->kids; kid; kid = kid->siblings)
         lfd_clr_walk(kid, fp);
 }
 
 PR_IMPLEMENT(void)
 NS_TraceMallocCloseLogFD(int fd)
 {
     logfile *fp;
+    tm_thread *t = get_tm_thread();
 
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
 
     fp = get_logfile(fd);
     if (fp) {
         flush_logfile(fp);
         if (fp == &default_logfile) {
             /* Leave default_logfile in logfile_list with an fd of -1. */
             fp->fd = -1;
 
@@ -2269,49 +2337,53 @@ NS_TraceMallocCloseLogFD(int fd)
 
             /* Reset logfp if we must, then free fp. */
             if (fp == logfp)
                 logfp = &default_logfile;
             free((void*) fp);
         }
     }
 
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
     close(fd);
 }
 
 PR_IMPLEMENT(void)
 NS_TraceMallocLogTimestamp(const char *caption)
 {
     logfile *fp;
 #ifdef XP_UNIX
     struct timeval tv;
 #endif
 #ifdef XP_WIN32
     struct _timeb tb;
 #endif
+    tm_thread *t = get_tm_thread();
 
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
 
     fp = logfp;
     log_byte(fp, TM_EVENT_TIMESTAMP);
 
 #ifdef XP_UNIX
     gettimeofday(&tv, NULL);
     log_uint32(fp, (uint32) tv.tv_sec);
     log_uint32(fp, (uint32) tv.tv_usec);
 #endif
 #ifdef XP_WIN32
     _ftime(&tb);
     log_uint32(fp, (uint32) tb.time);
     log_uint32(fp, (uint32) tb.millitm);
 #endif
     log_string(fp, caption);
 
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 }
 
 static PRIntn
 allocation_enumerator(PLHashEntry *he, PRIntn i, void *arg)
 {
     allocation *alloc = (allocation*) he;
     FILE *ofp = (FILE*) arg;
     callsite *site = (callsite*) he->value;
@@ -2370,141 +2442,146 @@ NS_TraceMallocDumpAllocations(const char
     fclose(ofp);
     return rv;
 }
 
 PR_IMPLEMENT(void)
 NS_TraceMallocFlushLogfiles()
 {
     logfile *fp;
+    tm_thread *t = get_tm_thread();
 
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
 
     for (fp = logfile_list; fp; fp = fp->next)
         flush_logfile(fp);
 
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 }
 
 PR_IMPLEMENT(void)
 NS_TrackAllocation(void* ptr, FILE *ofp)
 {
     PLHashEntry **hep;
     allocation *alloc;
+    tm_thread *t = get_tm_thread();
 
     fprintf(ofp, "Trying to track %p\n", (void*) ptr);
     setlinebuf(ofp);
 
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     if (get_allocations()) {
         hep = PL_HashTableRawLookup(allocations, hash_pointer(ptr), ptr);
         alloc = (allocation*) *hep;
         if (alloc) {
             fprintf(ofp, "Tracking %p\n", (void*) ptr);
             alloc->trackfp = ofp;
         } else {
             fprintf(ofp, "Not tracking %p\n", (void*) ptr);
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 }
 
 #ifdef XP_WIN32
 
 PR_IMPLEMENT(void)
 MallocCallback(void *ptr, size_t size, PRUint32 start, PRUint32 end)
 {
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
     tm_thread *t;
 
-    if (!tracing_enabled)
+    if (!tracing_enabled || (t = get_tm_thread())->suppress_tracing != 0)
         return;
 
-    t = get_tm_thread();
+    site = backtrace(t, 4);
 
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.malloc_calls++;
     if (!ptr) {
         tmstats.malloc_failures++;
-    } else if (t->suppress_tracing == 0) {
-        site = backtrace(t, 4);
+    } else {
         if (site)
             log_event5(logfp, TM_EVENT_MALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         if (get_allocations()) {
-            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 }
 
 PR_IMPLEMENT(void)
 CallocCallback(void *ptr, size_t count, size_t size, PRUint32 start, PRUint32 end)
 {
     callsite *site;
     PLHashEntry *he;
     allocation *alloc;
     tm_thread *t;
 
-    if (!tracing_enabled)
+    if (!tracing_enabled || (t = get_tm_thread())->suppress_tracing != 0)
         return;
 
-    t = get_tm_thread();
+    site = backtrace(t, 1);
 
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.calloc_calls++;
     if (!ptr) {
         tmstats.calloc_failures++;
-    } else if (t->suppress_tracing == 0) {
-        site = backtrace(t, 1);
+    } else {
         size *= count;
         if (site)
             log_event5(logfp, TM_EVENT_CALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size);
         if (get_allocations()) {
-            t->suppress_tracing++;
             he = PL_HashTableAdd(allocations, ptr, site);
-            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 }
 
 PR_IMPLEMENT(void)
 ReallocCallback(void * oldptr, void *ptr, size_t size, PRUint32 start, PRUint32 end)
 {
     callsite *oldsite, *site;
     size_t oldsize;
     PLHashNumber hash;
     PLHashEntry **hep, *he;
     allocation *alloc;
     tm_thread *t;
 
-    if (!tracing_enabled)
+    if (!tracing_enabled || (t = get_tm_thread())->suppress_tracing != 0)
         return;
 
-    t = get_tm_thread();
+    site = backtrace(t, 1);
 
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.realloc_calls++;
-    if (t->suppress_tracing == 0) {
+    if (PR_TRUE) {
         oldsite = NULL;
         oldsize = 0;
         he = NULL;
         if (oldptr && get_allocations()) {
             hash = hash_pointer(oldptr);
             hep = PL_HashTableRawLookup(allocations, hash, oldptr);
             he = *hep;
             if (he) {
@@ -2516,27 +2593,25 @@ ReallocCallback(void * oldptr, void *ptr
     }
     if (!ptr && size) {
         tmstats.realloc_failures++;
 
         /*
          * When realloc() fails, the original block is not freed or moved, so
          * we'll leave the allocation entry untouched.
          */
-    } else if (t->suppress_tracing == 0) {
-        site = backtrace(t, 1);
+    } else {
         if (site) {
             log_event8(logfp, TM_EVENT_REALLOC,
                        site->serial, start, end - start,
                        (uint32)NS_PTR_TO_INT32(ptr), size,
                        oldsite ? oldsite->serial : 0,
                        (uint32)NS_PTR_TO_INT32(oldptr), oldsize);
         }
         if (ptr && allocations) {
-            t->suppress_tracing++;
             if (ptr != oldptr) {
                 /*
                  * If we're reallocating (not allocating new space by passing
                  * null to realloc) and realloc moved the block, free oldptr.
                  */
                 if (he)
                     PL_HashTableRawRemove(allocations, hep, he);
 
@@ -2545,57 +2620,57 @@ ReallocCallback(void * oldptr, void *ptr
             } else {
                 /*
                  * If we haven't yet recorded an allocation (possibly due to a
                  * temporary memory shortage), do it now.
                  */
                 if (!he)
                     he = PL_HashTableAdd(allocations, ptr, site);
             }
-            t->suppress_tracing--;
             if (he) {
                 alloc = (allocation*) he;
                 alloc->size = size;
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 }
 
 PR_IMPLEMENT(void)
 FreeCallback(void * ptr, PRUint32 start, PRUint32 end)
 {
     PLHashEntry **hep, *he;
     callsite *site;
     allocation *alloc;
     tm_thread *t;
 
-    if (!tracing_enabled)
+    if (!tracing_enabled || (t = get_tm_thread())->suppress_tracing != 0)
         return;
 
-    t = get_tm_thread();
-
-    TM_ENTER_MONITOR();
+    t->suppress_tracing++;
+    TM_ENTER_LOCK();
     tmstats.free_calls++;
     if (!ptr) {
         tmstats.null_free_calls++;
-    } else if (t->suppress_tracing == 0) {
+    } else {
         if (get_allocations()) {
             hep = PL_HashTableRawLookup(allocations, hash_pointer(ptr), ptr);
             he = *hep;
             if (he) {
                 site = (callsite*) he->value;
                 if (site) {
                     alloc = (allocation*) he;
                     log_event5(logfp, TM_EVENT_FREE,
                                site->serial, start, end - start,
                                (uint32)NS_PTR_TO_INT32(ptr), alloc->size);
                 }
                 PL_HashTableRawRemove(allocations, hep, he);
             }
         }
     }
-    TM_EXIT_MONITOR();
+    TM_EXIT_LOCK();
+    t->suppress_tracing--;
 }
 
 #endif /*XP_WIN32*/
 
 #endif /* NS_TRACE_MALLOC */