Bug 1300948 - Add thread identifier to LogAlloc output. r=njn
authorMike Hommey <mh+mozilla@glandium.org>
Wed, 07 Sep 2016 10:22:46 +0900
changeset 313164 4c87875a0befd207544a65313cf3b1a603c11f4f
parent 313163 c3771535a47ebb8260de3362188c83790831867e
child 313165 f3aec6a563866b1ea2e8621b3db91513ed889e60
push id30673
push usercbook@mozilla.com
push dateThu, 08 Sep 2016 10:01:33 +0000
treeherdermozilla-central@938ce16be25f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnjn
bugs1300948
milestone51.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1300948 - Add thread identifier to LogAlloc output. r=njn
memory/replace/logalloc/LogAlloc.cpp
memory/replace/logalloc/README
memory/replace/logalloc/replay/Replay.cpp
memory/replace/logalloc/replay/logalloc_munge.py
memory/replace/logalloc/replay/replay.log
--- a/memory/replace/logalloc/LogAlloc.cpp
+++ b/memory/replace/logalloc/LogAlloc.cpp
@@ -33,16 +33,32 @@ prefork() {
   sLock.Acquire();
 }
 
 static void
 postfork() {
   sLock.Release();
 }
 
+static size_t
+GetPid()
+{
+  return size_t(getpid());
+}
+
+static size_t
+GetTid()
+{
+#if defined(_WIN32)
+  return size_t(GetCurrentThreadId());
+#else
+  return size_t(pthread_self());
+#endif
+}
+
 #ifdef ANDROID
 /* See mozglue/android/APKOpen.cpp */
 extern "C" MOZ_EXPORT __attribute__((weak))
 void* __dso_handle;
 
 /* Android doesn't have pthread_atfork defined in pthread.h */
 extern "C" MOZ_EXPORT
 int pthread_atfork(void (*)(void), void (*)(void), void (*)(void));
@@ -139,100 +155,101 @@ replace_get_bridge()
  */
 
 void*
 replace_malloc(size_t aSize)
 {
   AutoLock lock(sLock);
   void* ptr = sFuncs->malloc(aSize);
   if (ptr) {
-    FdPrintf(sFd, "%zu malloc(%zu)=%p\n", size_t(getpid()), aSize, ptr);
+    FdPrintf(sFd, "%zu %zu malloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr);
   }
   return ptr;
 }
 
 int
 replace_posix_memalign(void** aPtr, size_t aAlignment, size_t aSize)
 {
   AutoLock lock(sLock);
   int ret = sFuncs->posix_memalign(aPtr, aAlignment, aSize);
   if (ret == 0) {
-    FdPrintf(sFd, "%zu posix_memalign(%zu,%zu)=%p\n", size_t(getpid()),
+    FdPrintf(sFd, "%zu %zu posix_memalign(%zu,%zu)=%p\n", GetPid(), GetTid(),
              aAlignment, aSize, *aPtr);
   }
   return ret;
 }
 
 void*
 replace_aligned_alloc(size_t aAlignment, size_t aSize)
 {
   AutoLock lock(sLock);
   void* ptr = sFuncs->aligned_alloc(aAlignment, aSize);
   if (ptr) {
-    FdPrintf(sFd, "%zu aligned_alloc(%zu,%zu)=%p\n", size_t(getpid()),
+    FdPrintf(sFd, "%zu %zu aligned_alloc(%zu,%zu)=%p\n", GetPid(), GetTid(),
              aAlignment, aSize, ptr);
   }
   return ptr;
 }
 
 void*
 replace_calloc(size_t aNum, size_t aSize)
 {
   AutoLock lock(sLock);
   void* ptr = sFuncs->calloc(aNum, aSize);
   if (ptr) {
-    FdPrintf(sFd, "%zu calloc(%zu,%zu)=%p\n", size_t(getpid()), aNum, aSize, ptr);
+    FdPrintf(sFd, "%zu %zu calloc(%zu,%zu)=%p\n", GetPid(), GetTid(), aNum,
+             aSize, ptr);
   }
   return ptr;
 }
 
 void*
 replace_realloc(void* aPtr, size_t aSize)
 {
   AutoLock lock(sLock);
   void* new_ptr = sFuncs->realloc(aPtr, aSize);
   if (new_ptr || !aSize) {
-    FdPrintf(sFd, "%zu realloc(%p,%zu)=%p\n", size_t(getpid()), aPtr, aSize,
-             new_ptr);
+    FdPrintf(sFd, "%zu %zu realloc(%p,%zu)=%p\n", GetPid(), GetTid(), aPtr,
+             aSize, new_ptr);
   }
   return new_ptr;
 }
 
 void
 replace_free(void* aPtr)
 {
   AutoLock lock(sLock);
   if (aPtr) {
-    FdPrintf(sFd, "%zu free(%p)\n", size_t(getpid()), aPtr);
+    FdPrintf(sFd, "%zu %zu free(%p)\n", GetPid(), GetTid(), aPtr);
   }
   sFuncs->free(aPtr);
 }
 
 void*
 replace_memalign(size_t aAlignment, size_t aSize)
 {
   AutoLock lock(sLock);
   void* ptr = sFuncs->memalign(aAlignment, aSize);
   if (ptr) {
-    FdPrintf(sFd, "%zu memalign(%zu,%zu)=%p\n", size_t(getpid()), aAlignment,
-             aSize, ptr);
+    FdPrintf(sFd, "%zu %zu memalign(%zu,%zu)=%p\n", GetPid(), GetTid(),
+             aAlignment, aSize, ptr);
   }
   return ptr;
 }
 
 void*
 replace_valloc(size_t aSize)
 {
   AutoLock lock(sLock);
   void* ptr = sFuncs->valloc(aSize);
   if (ptr) {
-    FdPrintf(sFd, "%zu valloc(%zu)=%p\n", size_t(getpid()), aSize, ptr);
+    FdPrintf(sFd, "%zu %zu valloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr);
   }
   return ptr;
 }
 
 void
 replace_jemalloc_stats(jemalloc_stats_t* aStats)
 {
   AutoLock lock(sLock);
   sFuncs->jemalloc_stats(aStats);
-  FdPrintf(sFd, "%zu jemalloc_stats()\n", size_t(getpid()));
+  FdPrintf(sFd, "%zu %zu jemalloc_stats()\n", GetPid(), GetTid());
 }
--- a/memory/replace/logalloc/README
+++ b/memory/replace/logalloc/README
@@ -37,71 +37,71 @@ by shell redirections, such as:
 
   MALLOC_LOG=3 firefox 3>&1 1>&2 | gzip -c > log.gz
 
 (3>&1 copies the `| gzip` pipe file descriptor to file descriptor #3, 1>&2
 then copies stderr to stdout. This leads to: fd1 and fd2 sending to stderr
 of the parent process (the shell), and fd3 sending to gzip.)
 
 Each line of the allocations log is formatted as follows:
-  <pid> <function>([<args>])[=<result>]
+  <pid> <tid> <function>([<args>])[=<result>]
 where <args> is a comma separated list of values. The number of <args> and
 the presence of <result> depend on the <function>.
 
 Example log:
-  18545 malloc(32)=0x7f90495120e0
-  18545 calloc(1,148)=0x7f9049537480
-  18545 realloc(0x7f90495120e0,64)=0x7f9049536680
-  18545 posix_memalign(256,240)=0x7f9049583300
-  18545 jemalloc_stats()
-  18545 free(0x7f9049536680)
+  18545 18545 malloc(32)=0x7f90495120e0
+  18545 18545 calloc(1,148)=0x7f9049537480
+  18545 18545 realloc(0x7f90495120e0,64)=0x7f9049536680
+  18545 18545 posix_memalign(256,240)=0x7f9049583300
+  18545 18545 jemalloc_stats()
+  18545 18545 free(0x7f9049536680)
 
 This log can be replayed with the logalloc-replay tool in
 memory/replace/logalloc/replay. However, as the goal of that tool is to
 reproduce the recorded memory allocations, it needs to avoid as much as
 possible doing its own allocations for bookkeeping. Reading the logs as
 they are would require data structures and memory allocations. As a
 consequence, the logs need to be preprocessed beforehand.
 
 The logalloc_munge.py script is responsible for that preprocessing. It simply
 takes a raw log on its stdin, and outputs the preprocessed log on its stdout.
 It replaces pointer addresses with indexes the logalloc-replay tool can use
 in a large (almost) linear array of allocation tracking slots (prefixed with
 '#'). It also replaces the pids with numbers starting from 1 (such as the
 first seen pid number is 1, the second is 2, etc.).
 
 The above example log would become the following, once preprocessed:
-  1 malloc(32)=#1
-  1 calloc(1,148)=#2
-  1 realloc(#1,64)=#1
-  1 posix_memalign(256,240)=#3
-  1 jemalloc_stats()
-  1 free(#1)
+  1 1 malloc(32)=#1
+  1 1 calloc(1,148)=#2
+  1 1 realloc(#1,64)=#1
+  1 1 posix_memalign(256,240)=#3
+  1 1 jemalloc_stats()
+  1 1 free(#1)
 
 The logalloc-replay tool then takes the preprocessed log on its stdin and
 replays the allocations printed there, but will only replay those with the
 same process id as the first line (which normally is 1).
 
 As the log files are simple text files, though, it is easy to separate out
 the different processes log with e.g. grep, and feed the separate processes
 logs to logalloc-replay.
 
 The logalloc-replay program won't output anything unless jemalloc_stats
 records appears in the log. You can expect those to be recorded when going
 to about:memory in Firefox, but they can also be added after preprocessing.
 
 Here is an example of what one can do:
 
   gunzip -c log.gz | python logalloc_munge.py | \
-  awk '$1 == "2" { print $0 } !(NR % 10000) { print "2 jemalloc_stats()" }' | \
+  awk '$1 == "2" { print $0 } !(NR % 10000) { print "2 1 jemalloc_stats()" }' | \
     ./logalloc-replay
 
 The above command replays the allocations of process #2, with some stats
 output every 10000 records.
 
 The logalloc-replay tool itself being hooked with replace-malloc, it is possible
 to set LD_PRELOAD/DYLD_INSERT_LIBRARIES/MOZ_REPLACE_MALLOC_LIB and replay a log
 through a different allocator. For example:
 
   LD_PRELOAD=libreplace_jemalloc.so logalloc-replay < log
 
-Will replay the log against jemalloc3 (which is, as of writing, what
+Will replay the log against jemalloc4 (which is, as of writing, what
 libreplace_jemalloc.so contains).
--- a/memory/replace/logalloc/replay/Replay.cpp
+++ b/memory/replace/logalloc/replay/Replay.cpp
@@ -504,16 +504,20 @@ main()
     }
 
     /* The log may contain data for several processes, only entries for the
      * very first that appears are treated. */
     if (first_pid != pid) {
       continue;
     }
 
+    /* The log contains thread ids for manual analysis, but we just ignore them
+     * for now. */
+    parseNumber(line.SplitChar(' '));
+
     Buffer func = line.SplitChar('(');
     Buffer args = line.SplitChar(')');
 
     /* jemalloc_stats and free are functions with no result. */
     if (func == Buffer("jemalloc_stats")) {
       replay.jemalloc_stats(args);
       continue;
     } else if (func == Buffer("free")) {
--- a/memory/replace/logalloc/replay/logalloc_munge.py
+++ b/memory/replace/logalloc/replay/logalloc_munge.py
@@ -56,26 +56,34 @@ class IdMapping(object):
         return value == 0 or value in self._values
 
 
 class Ignored(Exception): pass
 
 
 def split_log_line(line):
     try:
+        # The format for each line is:
+        # <pid> [<tid>] <function>([<args>])[=<result>]
+        #
+        # The original format didn't include the tid, so we try to parse
+        # lines whether they have one or not.
         pid, func_call = line.split(' ', 1)
-        # func_call format is <function>([<args>])[=<result>]
         call, result = func_call.split(')')
         func, args = call.split('(')
         args = args.split(',') if args else []
         if result:
             if result[0] != '=':
                 raise Ignored('Malformed input')
             result = result[1:]
-        return pid, func, args, result
+        if ' ' in func:
+            tid, func = func.split(' ', 1)
+        else:
+            tid = pid
+        return pid, tid, func, args, result
     except:
         raise Ignored('Malformed input')
 
 
 NUM_ARGUMENTS = {
     'jemalloc_stats': 0,
     'free': 1,
     'malloc': 1,
@@ -86,24 +94,26 @@ NUM_ARGUMENTS = {
     'memalign': 2,
     'valloc': 1,
 }
 
 
 def main():
     process_pointers = defaultdict(IdMapping)
     pids = IdMapping()
+    tids = IdMapping()
     for line in sys.stdin:
         line = line.strip()
 
         try:
-            pid, func, args, result = split_log_line(line)
+            pid, tid, func, args, result = split_log_line(line)
 
             # Replace pid with an id.
             pid = pids[int(pid)]
+            tid = tids[int(tid)]
 
             pointers = process_pointers[pid]
 
             if func not in NUM_ARGUMENTS:
                 raise Ignored('Unknown function')
 
             if len(args) != NUM_ARGUMENTS[func]:
                 raise Ignored('Malformed input')
@@ -119,17 +129,17 @@ def main():
                 del pointers[ptr]
 
             if result:
                 result = int(result, 16)
                 if not result:
                     raise Ignored('Result is NULL')
                 result = "#%d" % pointers[result]
 
-            print('%d %s(%s)%s' % (pid, func, ','.join(args),
+            print('%d %d %s(%s)%s' % (pid, tid, func, ','.join(args),
                 '=%s' % result if result else ''))
 
         except Exception as e:
             print('Ignored "%s": %s' % (line, e.message), file=sys.stderr)
 
 
 if __name__ == '__main__':
     main()
--- a/memory/replace/logalloc/replay/replay.log
+++ b/memory/replace/logalloc/replay/replay.log
@@ -1,17 +1,17 @@
-1 malloc(42)=#1
-1 malloc(24)=#2
-2 malloc(42)=#1
-1 free(#1)
-1 posix_memalign(4096,1024)=#1
-1 calloc(4,42)=#3
-1 free(#2)
-1 realloc(#3,84)=#2
-1 aligned_alloc(512,1024)=#3
-1 memalign(512,1024)=#4
-1 valloc(1024)=#5
-1 jemalloc_stats()
-1 free(#5)
-1 free(#4)
-1 free(#3)
-1 free(#2)
-1 free(#1)
+1 1 malloc(42)=#1
+1 1 malloc(24)=#2
+2 2 malloc(42)=#1
+1 1 free(#1)
+1 1 posix_memalign(4096,1024)=#1
+1 1 calloc(4,42)=#3
+1 1 free(#2)
+1 1 realloc(#3,84)=#2
+1 1 aligned_alloc(512,1024)=#3
+1 1 memalign(512,1024)=#4
+1 1 valloc(1024)=#5
+1 1 jemalloc_stats()
+1 1 free(#5)
+1 1 free(#4)
+1 1 free(#3)
+1 1 free(#2)
+1 1 free(#1)