Bug 693586: Add circular buffer support to jprof r=dbaron
authorRandell Jesup <rjesup@wgate.com>
Tue, 08 Nov 2011 00:34:40 -0500
changeset 79970 bcf9c9ec97a04c122d9156483f8f8300e1bf8364
parent 79969 5c0861ec147f45cb4bee620c2381a2c26dcf227d
child 79971 c6d7a275acaa30f84a442431fbbf4ad102f8f297
push id3230
push userrjesup@wgate.com
push dateTue, 08 Nov 2011 05:55:12 +0000
treeherdermozilla-inbound@bcf9c9ec97a0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdbaron
bugs693586
milestone10.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 693586: Add circular buffer support to jprof r=dbaron
dom/base/nsJSEnvironment.cpp
tools/jprof/README.html
tools/jprof/leaky.cpp
tools/jprof/stub/libmalloc.cpp
--- a/dom/base/nsJSEnvironment.cpp
+++ b/dom/base/nsJSEnvironment.cpp
@@ -2856,53 +2856,58 @@ static JSFunctionSpec TraceMallocFunctio
 #ifdef MOZ_JPROF
 
 #include <signal.h>
 
 inline bool
 IsJProfAction(struct sigaction *action)
 {
     return (action->sa_sigaction &&
-            action->sa_flags == (SA_RESTART | SA_SIGINFO));
+            (action->sa_flags & (SA_RESTART | SA_SIGINFO)) == (SA_RESTART | SA_SIGINFO));
 }
 
 void NS_JProfStartProfiling();
 void NS_JProfStopProfiling();
+void NS_JProfClearCircular();
 
 static JSBool
 JProfStartProfilingJS(JSContext *cx, uintN argc, jsval *vp)
 {
   NS_JProfStartProfiling();
   return JS_TRUE;
 }
 
 void NS_JProfStartProfiling()
 {
     // Figure out whether we're dealing with SIGPROF, SIGALRM, or
     // SIGPOLL profiling (SIGALRM for JP_REALTIME, SIGPOLL for
     // JP_RTC_HZ)
     struct sigaction action;
 
+    // Must check ALRM before PROF since both are enabled for real-time
     sigaction(SIGALRM, nsnull, &action);
+    //printf("SIGALRM: %p, flags = %x\n",action.sa_sigaction,action.sa_flags);
     if (IsJProfAction(&action)) {
-        printf("Beginning real-time jprof profiling.\n");
+        //printf("Beginning real-time jprof profiling.\n");
         raise(SIGALRM);
         return;
     }
 
     sigaction(SIGPROF, nsnull, &action);
+    //printf("SIGPROF: %p, flags = %x\n",action.sa_sigaction,action.sa_flags);
     if (IsJProfAction(&action)) {
-        printf("Beginning process-time jprof profiling.\n");
+        //printf("Beginning process-time jprof profiling.\n");
         raise(SIGPROF);
         return;
     }
 
     sigaction(SIGPOLL, nsnull, &action);
+    //printf("SIGPOLL: %p, flags = %x\n",action.sa_sigaction,action.sa_flags);
     if (IsJProfAction(&action)) {
-        printf("Beginning rtc-based jprof profiling.\n");
+        //printf("Beginning rtc-based jprof profiling.\n");
         raise(SIGPOLL);
         return;
     }
 
     printf("Could not start jprof-profiling since JPROF_FLAGS was not set.\n");
 }
 
 static JSBool
@@ -2911,22 +2916,47 @@ JProfStopProfilingJS(JSContext *cx, uint
   NS_JProfStopProfiling();
   return JS_TRUE;
 }
 
 void
 NS_JProfStopProfiling()
 {
     raise(SIGUSR1);
-    printf("Stopped jprof profiling.\n");
+    //printf("Stopped jprof profiling.\n");
+}
+
+static JSBool
+JProfClearCircularJS(JSContext *cx, uintN argc, jsval *vp)
+{
+  NS_JProfClearCircular();
+  return JS_TRUE;
+}
+
+void
+NS_JProfClearCircular()
+{
+    raise(SIGUSR2);
+    //printf("cleared jprof buffer\n");
+}
+
+static JSBool
+JProfSaveCircularJS(JSContext *cx, uintN argc, jsval *vp)
+{
+  // Not ideal...
+  NS_JProfStopProfiling();
+  NS_JProfStartProfiling();
+  return JS_TRUE;
 }
 
 static JSFunctionSpec JProfFunctions[] = {
     {"JProfStartProfiling",        JProfStartProfilingJS,      0, 0},
     {"JProfStopProfiling",         JProfStopProfilingJS,       0, 0},
+    {"JProfClearCircular",         JProfClearCircularJS,       0, 0},
+    {"JProfSaveCircular",          JProfSaveCircularJS,        0, 0},
     {nsnull,                       nsnull,                     0, 0}
 };
 
 #endif /* defined(MOZ_JPROF) */
 
 #ifdef MOZ_TRACEVIS
 static JSFunctionSpec EthogramFunctions[] = {
     {"initEthogram",               js_InitEthogram,            0, 0},
--- a/tools/jprof/README.html
+++ b/tools/jprof/README.html
@@ -98,30 +98,39 @@ which have the following meanings:
     time.  This option overrides JP_PERIOD.  <code>freq</code> is the frequency
     at which the timer should fire, measured in Hz. It must be a power of 2.
     The maximal frequency allowed by the kernel can be changed by writing to
     <code>/proc/sys/dev/rtc/max-user-freq</code>; the maximum value it can be
     set to is 8192.  Note that <code>/dev/rtc</code> will need to be readable
     by the Firefox process; making that file world-readable is a simple way to
     accomplish that.
 
+    <li> <b>JP_CIRCULAR=size</b> : This tells jprof to store samples in a
+    circular buffer of the given size, which then will be saved (appended)
+    to disk when SIGUSR1 is received or JProfStopProfiling is done.  If the
+    buffer overflows, the oldest entries will be evicted until there's
+    space for the new entry.<p>
+	 
+    SIGUSR2 will cause the circular buffer to be cleared.
+
     <li> <b>JP_FILENAME=basefilename</b> : This is the filename used for
     saving the log files to; the default is "jprof-log".  If Electrolysis
     is used, each process after the first will have the process ID
     added ("jprof-log-3212");
 
 </ul>
 
 <h4>Starting and stopping jprof from JavaScript</h4>
 <p>
-A build with jprof enabled adds two functions to the Window object:
-<code>JProfStartProfiling()</code> and <code>JProfStopProfiling()</code>.  When used with JP_DEFER, these
+A build with jprof enabled adds four functions to the Window object:<p>
+<code>JProfStartProfiling()</code> and <code>JProfStopProfiling()</code>:  When used with JP_DEFER, these
 allow one to start and stop the timer just around whatever critical section is
-being profiled
-</p>
+being profiled.</p><p>
+<code>JProfClearCircular()</code> and <code>JProfSaveCircular()</code>:
+These clear the circular buffer and save the buffer (without stopping), respectively.</p>
 
 <h4>Examples of JPROF_FLAGS usage</h4>
 <ul>
 
   <li>To make the timer start firing 3 seconds after the program is started and
   fire every 25 milliseconds of program time use:
     <pre>
         setenv JPROF_FLAGS "JP_START JP_FIRST=3 JP_PERIOD=0.025" </pre>
@@ -136,28 +145,40 @@ being profiled
     <pre>
         setenv JPROF_FLAGS "JP_DEFER JP_PERIOD=0.010 JP_REALTIME" </pre>
 
   <li>To make the timer start on your signal and fire at 8192 Hz in wall-clock
   time use:
     <pre>
         setenv JPROF_FLAGS "JP_DEFER JP_RTC_HZ=8192" </pre>
   
+  <li>To make the timer start on JProfStartProfiling() and run continously
+      with a 1ms sample rate until told to stop, then save the last 1MB of
+      data:
+    <pre>
+        setenv JPROF_FLAGS "JP_DEFER JP_CIRCULAR=1048576 JP_PERIOD=0.001" </pre>
+
 </ul>
 
 <h4>Pausing profiles</h4>
 
 <P>jprof can be paused at any time by sending a SIGUSR1 to mozilla (<code>kill
 -USR1</code>).  This will cause the timer signals to stop and jprof-map to be
 written, but it will not close jprof-log.  Combining SIGUSR1 with the JP_DEFER
 option allows profiling of one sequence of actions by starting the timer right
 before starting the actions and stopping the timer right afterward.
 
 <P>After a SIGUSR1, sending another timer signal (SIGPROF, SIGALRM, or SIGPOLL (aka SIGIO),
-depending on the mode) can be used to continue writing data to the same output.
+depending on the mode) can be used to continue writing data to the same
+output.
+
+<P>SIGUSR2 will cause the circular buffer to be cleared, if it's in use.
+This is useful right before running a test when you're using a large,
+continuous circular buffer, or programmatically at the start of an action
+which might take too long (JProfClearCircular()).
 
 <h4>Looking at the results</h4>
 
 Now that we have <code>jprof-log</code> and <code>jprof-map</code> files, we
 can use the jprof executable is used to turn them into readable output.  To do
 this jprof needs the name of the mozilla binary and the log file.  It deduces
 the name of the map file:
 
--- a/tools/jprof/leaky.cpp
+++ b/tools/jprof/leaky.cpp
@@ -652,17 +652,17 @@ void leaky::generateReportHTML(FILE *fp,
     }
   }
 
   // Ok, We are sorted now.  Let's go through the table until we get to
   // functions that were never called.  Right now we don't do much inside
   // this loop.  Later we can get callers and callees into it like gprof
   // does
   fprintf(fp,
-	  "<h2><A NAME=hier_%d></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#hier\">Hierarchical Profile</a></center></h2><hr>\n",
+          "<h2><A NAME=hier_%d></A><center><a href=\"http://mxr.mozilla.org/mozilla-central/source/tools/jprof/README.html#hier\">Hierarchical Profile</a></center></h2><hr>\n",
           thread);
   fprintf(fp, "<pre>\n");
   fprintf(fp, "%6s %6s         %4s      %s\n",
           "index", "Count", "Hits", "Function Name");
 
   for(i=0; i<usefulSymbols && countArray[rankingTable[i]]>0; i++) {
     Symbol *sp=&externalSymbols[rankingTable[i]];
     
@@ -715,17 +715,17 @@ void leaky::generateReportHTML(FILE *fp,
   }
   if (totalTimerHits == 0)
     totalTimerHits = 1;
 
   if (totalTimerHits != count)
     fprintf(stderr,"Hit count mismatch: count=%d; totalTimerHits=%d",
             count,totalTimerHits);
 
-  fprintf(fp,"<h2><A NAME=flat_%d></A><center><a href=\"http://lxr.mozilla.org/mozilla/source/tools/jprof/README.html#flat\">Flat Profile</a></center></h2><br>\n",
+  fprintf(fp,"<h2><A NAME=flat_%d></A><center><a href=\"http://mxr.mozilla.org/mozilla-central/source/tools/jprof/README.html#flat\">Flat Profile</a></center></h2><br>\n",
           thread);
   fprintf(fp, "<pre>\n");
 
   fprintf(fp, "Total hit count: %d\n", totalTimerHits);
   fprintf(fp, "Count %%Total  Function Name\n");
   // Now loop for as long as we have timer hits
   for(i=0;
     i<usefulSymbols && externalSymbols[rankingTable[i]].timerHit>0; i++) {
--- a/tools/jprof/stub/libmalloc.cpp
+++ b/tools/jprof/stub/libmalloc.cpp
@@ -67,16 +67,28 @@
 #include <execinfo.h>
 
 #include "libmalloc.h"
 #include "jprof.h"
 #include <string.h>
 #include <errno.h>
 #include <dlfcn.h>
 
+#include "mozilla/mozalloc_undef_macro_wrappers.h"
+
+// Must define before including jprof.h
+void *moz_xmalloc(size_t size)
+{
+    return malloc(size);
+}
+
+void moz_xfree(void *mem)
+{
+    free(mem);
+}
 
 #ifdef NTO
 #include <sys/link.h>
 extern r_debug _r_debug;
 #else
 #include <link.h>
 #endif
 
@@ -172,16 +184,17 @@ JPROF_STATIC void CrawlStack(malloc_log_
 #endif
 }
 #endif
 
 //----------------------------------------------------------------------
 
 static int rtcHz;
 static int rtcFD = -1;
+static bool circular = false;
 
 #if defined(linux) || defined(NTO)
 static void DumpAddressMap()
 {
   // Turn off the timer so we don't get interrupts during shutdown
 #if defined(linux)
   if (rtcHz) {
     enableRTCSignals(false);
@@ -210,23 +223,217 @@ static void DumpAddressMap()
     }
     close(mfd);
   }
 }
 #endif
 
 static bool was_paused = true;
 
+JPROF_STATIC void JprofBufferDump();
+JPROF_STATIC void JprofBufferClear();
+
+static void ClearProfilingHook(int signum)
+{
+    if (circular) {
+        JprofBufferClear();
+        puts("Jprof: cleared circular buffer.");
+    }
+}
+
 static void EndProfilingHook(int signum)
 {
+    if (circular)
+        JprofBufferDump();
+
     DumpAddressMap();
     was_paused = true;
     puts("Jprof: profiling paused.");
 }
 
+
+
+//----------------------------------------------------------------------
+// proper usage would be a template, including the function to find the
+// size of an entry, or include a size header explicitly to each entry.
+#if defined(linux)
+#define DUMB_LOCK()   pthread_mutex_lock(&mutex);
+#define DUMB_UNLOCK() pthread_mutex_unlock(&mutex);
+#else
+#define DUMB_LOCK()   FIXME()
+#define DUMB_UNLOCK() FIXME()
+#endif
+
+
+class DumbCircularBuffer
+{
+public:
+    DumbCircularBuffer(size_t init_buffer_size) {
+        used = 0;
+        buffer_size = init_buffer_size;
+        buffer = (unsigned char *) malloc(buffer_size);
+        head = tail = buffer;
+
+#if defined(linux)
+        pthread_mutexattr_t mAttr;
+        pthread_mutexattr_settype(&mAttr, PTHREAD_MUTEX_RECURSIVE_NP);
+        pthread_mutex_init(&mutex, &mAttr);
+        pthread_mutexattr_destroy(&mAttr);
+#endif
+    }
+    ~DumbCircularBuffer() {
+        free(buffer);
+#if defined(linux)
+        pthread_mutex_destroy (&mutex);
+#endif
+    }
+
+    void clear() {
+        DUMB_LOCK();
+        head = tail;
+        used = 0;
+        DUMB_UNLOCK();
+    }
+
+    bool empty() {
+        return head == tail;
+    }
+
+    size_t space_available() {
+        size_t result;
+        DUMB_LOCK();
+        if (tail > head)
+            result = buffer_size - (tail-head) - 1;
+        else
+            result = head-tail - 1;
+        DUMB_UNLOCK();
+        return result;
+    }
+
+    void drop(size_t size) {
+        // assumes correctness!
+        DUMB_LOCK();
+        head += size;
+        if (head >= &buffer[buffer_size])
+            head -= buffer_size;
+        used--;
+        DUMB_UNLOCK();
+    }
+
+    bool insert(void *data, size_t size) {
+        // can fail if not enough space in the entire buffer
+        DUMB_LOCK();
+        if (space_available() < size)
+            return false;
+
+        size_t max_without_wrap = &buffer[buffer_size] - tail;
+        size_t initial = size > max_without_wrap ? max_without_wrap : size;
+#if DEBUG_CIRCULAR
+        fprintf(stderr,"insert(%d): max_without_wrap %d, size %d, initial %d\n",used,max_without_wrap,size,initial);
+#endif
+        memcpy(tail,data,initial);
+        tail += initial;
+        data = ((char *)data)+initial;
+        size -= initial;
+        if (size != 0) {
+#if DEBUG_CIRCULAR
+            fprintf(stderr,"wrapping by %d bytes\n",size);
+#endif
+            memcpy(buffer,data,size);
+            tail = &(((unsigned char *)buffer)[size]);
+        }
+            
+        used++;
+        DUMB_UNLOCK();
+
+        return true;
+    }
+
+    // for external access to the buffer (saving)
+    void lock() {
+        DUMB_LOCK();
+    }
+
+    void unlock() {
+        DUMB_UNLOCK();
+    }
+
+    // XXX These really shouldn't be public...
+    unsigned char *head;
+    unsigned char *tail;
+    unsigned int used;
+    unsigned char *buffer;
+    size_t buffer_size;
+
+private:
+    pthread_mutex_t mutex;
+};
+
+class DumbCircularBuffer *JprofBuffer;
+
+JPROF_STATIC void
+JprofBufferInit(size_t size)
+{
+    JprofBuffer = new DumbCircularBuffer(size);
+}
+
+JPROF_STATIC void
+JprofBufferClear()
+{
+    fprintf(stderr,"Told to clear JPROF circular buffer\n");
+    JprofBuffer->clear();
+}
+
+JPROF_STATIC size_t
+JprofEntrySizeof(malloc_log_entry *me)
+{
+    return offsetof(malloc_log_entry, pcs) + me->numpcs*sizeof(char*);
+}
+
+JPROF_STATIC void
+JprofBufferAppend(malloc_log_entry *me)
+{
+    size_t size = JprofEntrySizeof(me);
+
+    do {
+        while (JprofBuffer->space_available() < size &&
+               JprofBuffer->used > 0) {
+#if DEBUG_CIRCULAR
+            fprintf(stderr,"dropping entry: %d in use, %d free, need %d, size_to_free = %d\n",
+                    JprofBuffer->used,JprofBuffer->space_available(),size,JprofEntrySizeof((malloc_log_entry *) JprofBuffer->head));
+#endif
+            JprofBuffer->drop(JprofEntrySizeof((malloc_log_entry *) JprofBuffer->head));
+        }
+        if (JprofBuffer->space_available() < size)
+            return;
+
+    } while (!JprofBuffer->insert(me,size));
+}
+
+JPROF_STATIC void
+JprofBufferDump()
+{
+    JprofBuffer->lock();
+#if DEBUG_CIRCULAR
+    fprintf(stderr,"dumping JP_CIRCULAR buffer, %d of %d bytes\n",
+            JprofBuffer->tail > JprofBuffer->head ? 
+              JprofBuffer->tail - JprofBuffer->head :
+              JprofBuffer->buffer_size + JprofBuffer->tail - JprofBuffer->head,
+            JprofBuffer->buffer_size);
+#endif
+    if (JprofBuffer->tail >= JprofBuffer->head) {
+        write(gLogFD, JprofBuffer->head, JprofBuffer->tail - JprofBuffer->head);
+    } else {
+        write(gLogFD, JprofBuffer->head, &(JprofBuffer->buffer[JprofBuffer->buffer_size]) - JprofBuffer->head);
+        write(gLogFD, JprofBuffer->buffer, JprofBuffer->tail - JprofBuffer->buffer);
+    }
+    JprofBuffer->clear();
+    JprofBuffer->unlock();
+}
+
 //----------------------------------------------------------------------
 
 JPROF_STATIC void
 JprofLog(u_long aTime, void* stack_top, void* top_instr_ptr)
 {
   // Static is simply to make debugging tolerable
   static malloc_log_entry me;
 
@@ -237,19 +444,23 @@ JprofLog(u_long aTime, void* stack_top, 
       was_paused = 0;
   } else {
       me.flags = 0;
   }
 
   CrawlStack(&me, stack_top, top_instr_ptr);
 
 #ifndef NTO
-  write(gLogFD, &me, offsetof(malloc_log_entry, pcs) + me.numpcs*sizeof(char*));
+  if (circular) {
+      JprofBufferAppend(&me);
+  } else {
+      write(gLogFD, &me, JprofEntrySizeof(&me));
+  }
 #else
-  printf("Neutrino is missing the pcs member of malloc_log_entry!! \n");
+      printf("Neutrino is missing the pcs member of malloc_log_entry!! \n");
 #endif
 }
 
 static int realTime;
 
 /* Lets interrupt at 10 Hz.  This is so my log files don't get too large.
  * This can be changed to a faster value latter.  This timer is not
  * programmed to reset, even though it is capable of doing so.  This is
@@ -419,19 +630,23 @@ NS_EXPORT_(void) setupProfilingStuff(voi
 	 *               rather than time used by the process (and the
 	 *               system for the process).  This is useful for
 	 *               finding time spent by the X server.
          *   JP_APPEND -> Append to jprof-log rather than overwriting it.
          *               This is somewhat risky since it depends on the
          *               address map staying constant across multiple runs.
          *   JP_FILENAME -> base filename to use when saving logs.  Note that
          *               this does not affect the mapfile.
+         *   JP_CIRCULAR -> use a circular buffer of size N, write/clear on SIGUSR1
          *
          * JPROF_SLAVE is set if this is not the first process.
 	*/
+
+        circular = false;
+
 	if(tst) {
 	    if(strstr(tst, "JP_DEFER"))
 	    {
 		doNotStart = 0;
 		startTimer = 0;
 	    }
 	    if(strstr(tst, "JP_START")) doNotStart = 0;
 	    if(strstr(tst, "JP_REALTIME")) realTime = 1;
@@ -445,16 +660,30 @@ NS_EXPORT_(void) setupProfilingStuff(voi
                 } else {
                     fprintf(stderr,
                             "JP_PERIOD of %g less than 0.001 (1ms), using 1ms\n",
                             tmp);
                     timerMiliSec = 1;
                 }
 	    }
 
+	    char *circular_op = strstr(tst,"JP_CIRCULAR=");
+	    if(circular_op) {
+                size_t size = atol(circular_op+strlen("JP_CIRCULAR="));
+                if (size < 1000) {
+                    fprintf(stderr,
+                            "JP_CIRCULAR of %d less than 1000, using 10000\n",
+                            size);
+                    size = 10000;
+                }
+                JprofBufferInit(size);
+                fprintf(stderr,"JP_CIRCULAR buffer of %d bytes\n",size);
+                circular = true;
+	    }
+
 	    char *first = strstr(tst, "JP_FIRST=");
 	    if(first) {
                 firstDelay = atol(first+strlen("JP_FIRST="));
 	    }
 
             char *rtc = strstr(tst, "JP_RTC_HZ=");
             if (rtc) {
 #if defined(linux)
@@ -508,16 +737,18 @@ NS_EXPORT_(void) setupProfilingStuff(voi
 
 		    // Dump out the address map when we terminate
 		    RegisterJprofShutdown();
 
 		    main_thread = pthread_self();
                     //fprintf(stderr,"jprof: main_thread = %u\n",
                     //        (unsigned int)main_thread);
 
+                    // FIX!  probably should block these against each other
+                    // Very unlikely.
 		    sigemptyset(&mset);
 		    action.sa_handler = NULL;
 		    action.sa_sigaction = StackHook;
 		    action.sa_mask  = mset;
 		    action.sa_flags = SA_RESTART | SA_SIGINFO;
 #if defined(linux)
                     if (rtcHz) {
                         if (!setupRTCSignals(rtcHz, &action)) {
@@ -543,16 +774,23 @@ NS_EXPORT_(void) setupProfilingStuff(voi
 		    // later be reopened).
 
 		    struct sigaction stop_action;
 		    stop_action.sa_handler = EndProfilingHook;
 		    stop_action.sa_mask  = mset;
 		    stop_action.sa_flags = SA_RESTART;
 		    sigaction(SIGUSR1, &stop_action, NULL);
 
+		    // make it so a SIGUSR2 will clear the circular buffer
+
+		    stop_action.sa_handler = ClearProfilingHook;
+		    stop_action.sa_mask  = mset;
+		    stop_action.sa_flags = SA_RESTART;
+		    sigaction(SIGUSR2, &stop_action, NULL);
+
                     printf("Jprof: Initialized signal handler and set "
                            "timer for %lu %s, %d s "
                            "initial delay\n",
                            rtcHz ? rtcHz : timerMiliSec, 
                            rtcHz ? "Hz" : "ms",
                            firstDelay);
 
 		    if(startTimer) {