author | Mike Hommey <mh+mozilla@glandium.org> |
Thu, 11 May 2017 13:23:07 +0900 | |
changeset 358521 | faae5fbdd01ddaf171810c446a645ea00085aa54 |
parent 358520 | 0e6e8a7b9973fea4fdeca1e7c49df2674d2a984a |
child 358522 | a7de58787e46f1d1ba71a8d40674387b55108c1a |
push id | 31827 |
push user | cbook@mozilla.com |
push date | Tue, 16 May 2017 10:34:19 +0000 |
treeherder | mozilla-central@49365d675cbb [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | njn |
bugs | 1363992 |
milestone | 55.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/.clang-format-ignore +++ b/.clang-format-ignore @@ -55,17 +55,16 @@ ^media/libvpx/.* ^media/libyuv/.* ^media/mtransport/third_party/.* ^media/openmax_dl/.* ^media/pocketsphinx/.* ^media/sphinxbase/.* ^media/webrtc/trunk/.* ^media/webrtc/signaling/src/sdp/sipcc/.* -^memory/jemalloc/src/.* ^mfbt/decimal/.* ^mfbt/double-conversion/source/.* ^mfbt/lz4.* ^mobile/android/thirdparty/.* ^modules/brotli/.* ^modules/fdlibm/.* ^modules/freetype2/.* ^modules/libbz2/.*
--- a/aclocal.m4 +++ b/aclocal.m4 @@ -22,17 +22,16 @@ builtin(include, build/autoconf/compiler builtin(include, build/autoconf/expandlibs.m4)dnl builtin(include, build/autoconf/arch.m4)dnl builtin(include, build/autoconf/android.m4)dnl builtin(include, build/autoconf/zlib.m4)dnl builtin(include, build/autoconf/icu.m4)dnl builtin(include, build/autoconf/clang-plugin.m4)dnl builtin(include, build/autoconf/alloc.m4)dnl builtin(include, build/autoconf/ios.m4)dnl -builtin(include, build/autoconf/jemalloc.m4)dnl builtin(include, build/autoconf/sanitize.m4)dnl MOZ_PROG_CHECKMSYS() # Read the user's .mozconfig script. We can't do this in # configure.in: autoconf puts the argument parsing code above anything # expanded from configure.in, and we need to get the configure options # from .mozconfig in place before that argument parsing code.
deleted file mode 100644 --- a/build/autoconf/jemalloc.m4 +++ /dev/null @@ -1,84 +0,0 @@ -dnl This Source Code Form is subject to the terms of the Mozilla Public -dnl License, v. 2.0. If a copy of the MPL was not distributed with this -dnl file, You can obtain one at http://mozilla.org/MPL/2.0/. - -AC_DEFUN([MOZ_SUBCONFIGURE_JEMALLOC], [ - -if test "$MOZ_BUILD_APP" != js -o -n "$JS_STANDALONE"; then - - # Run jemalloc configure script - - if test "$MOZ_MEMORY" && test -n "$MOZ_REPLACE_MALLOC"; then - ac_configure_args="--build=$build --host=$target --enable-stats --with-jemalloc-prefix=je_ --disable-valgrind" - if test -n "$MOZ_DEBUG"; then - ac_configure_args="$ac_configure_args --enable-debug" - fi - # We're using memalign for _aligned_malloc in memory/build/mozmemory_wrap.c - # on Windows, so just export memalign on all platforms. - ac_configure_args="$ac_configure_args ac_cv_func_memalign=yes" - if test -n "$MOZ_REPLACE_MALLOC"; then - # When using replace_malloc, we always want valloc exported from jemalloc. - ac_configure_args="$ac_configure_args ac_cv_func_valloc=yes" - if test "${OS_ARCH}" = Darwin; then - # We also need to enable pointer validation on Mac because jemalloc's - # zone allocator is not used. - ac_configure_args="$ac_configure_args --enable-ivsalloc" - fi - fi - MANGLE="malloc posix_memalign aligned_alloc calloc realloc free memalign valloc malloc_usable_size" - JEMALLOC_WRAPPER=replace_ - if test -n "$MANGLE"; then - MANGLED= - for mangle in ${MANGLE}; do - if test -n "$MANGLED"; then - MANGLED="$mangle:$JEMALLOC_WRAPPER$mangle,$MANGLED" - else - MANGLED="$mangle:$JEMALLOC_WRAPPER$mangle" - fi - done - ac_configure_args="$ac_configure_args --with-mangling=$MANGLED" - fi - unset CONFIG_FILES - if test -z "$MOZ_TLS"; then - ac_configure_args="$ac_configure_args --disable-tls" - fi - EXTRA_CFLAGS="$CFLAGS" - for var in AS CC CXX CPP AR RANLIB STRIP CPPFLAGS EXTRA_CFLAGS LDFLAGS; do - ac_configure_args="$ac_configure_args $var='`eval echo \\${${var}}`'" - done - - # jemalloc's configure assumes that if you have CFLAGS set at all, you set - # all the flags necessary to configure jemalloc, which is not likely to be - # the case on Windows if someone is building Firefox with flags set in - # their mozconfig. - if test "$_MSC_VER"; then - ac_configure_args="$ac_configure_args CFLAGS=" - fi - - # Force disable DSS support in jemalloc. - ac_configure_args="$ac_configure_args ac_cv_func_sbrk=false" - - # Make Linux builds munmap freed chunks instead of recycling them. - ac_configure_args="$ac_configure_args --enable-munmap" - - # Disable cache oblivious behavior that appears to have a performance - # impact on Firefox. - ac_configure_args="$ac_configure_args --disable-cache-oblivious" - - if ! test -e memory/jemalloc; then - mkdir -p memory/jemalloc - fi - - # jemalloc's configure runs git to determine the version. But when building - # from a gecko git clone, the git commands it uses is going to pick gecko's - # information, not jemalloc's, which is useless. So pretend we don't have git - # at all. That will make jemalloc's configure pick the in-tree VERSION file. - (PATH="$srcdir/memory/jemalloc/helper:$PATH"; - AC_OUTPUT_SUBDIRS(memory/jemalloc/src) - ) || exit 1 - ac_configure_args="$_SUBDIR_CONFIG_ARGS" - fi - -fi - -])
--- a/js/src/aclocal.m4 +++ b/js/src/aclocal.m4 @@ -20,17 +20,16 @@ builtin(include, ../../build/autoconf/fr builtin(include, ../../build/autoconf/compiler-opts.m4)dnl builtin(include, ../../build/autoconf/expandlibs.m4)dnl builtin(include, ../../build/autoconf/arch.m4)dnl builtin(include, ../../build/autoconf/android.m4)dnl builtin(include, ../../build/autoconf/zlib.m4)dnl builtin(include, ../../build/autoconf/icu.m4)dnl builtin(include, ../../build/autoconf/clang-plugin.m4)dnl builtin(include, ../../build/autoconf/alloc.m4)dnl -builtin(include, ../../build/autoconf/jemalloc.m4)dnl builtin(include, ../../build/autoconf/sanitize.m4)dnl builtin(include, ../../build/autoconf/ios.m4)dnl define([__MOZ_AC_INIT_PREPARE], defn([AC_INIT_PREPARE])) define([AC_INIT_PREPARE], [if test -z "$srcdir"; then srcdir=`dirname "[$]0"` fi
--- a/js/src/make-source-package.sh +++ b/js/src/make-source-package.sh @@ -142,17 +142,16 @@ case $cmd in ${TOPSRCDIR}/mozglue/misc \ ${TOPSRCDIR}/mozglue/moz.build \ ${tgtpath}/mozglue ${MKDIR} -p ${tgtpath}/memory cp -pPR \ ${TOPSRCDIR}/memory/moz.build \ ${TOPSRCDIR}/memory/build \ ${TOPSRCDIR}/memory/fallible \ - ${TOPSRCDIR}/memory/jemalloc \ ${TOPSRCDIR}/memory/mozalloc \ ${TOPSRCDIR}/memory/mozjemalloc \ ${tgtpath}/memory # remove *.pyc and *.pyo files if any find ${tgtpath} -type f -name "*.pyc" -o -name "*.pyo" |xargs rm -f # copy or create INSTALL
--- a/js/src/old-configure.in +++ b/js/src/old-configure.in @@ -2160,18 +2160,16 @@ if test -n "$GNU_CC"; then JS_CONFIG_MOZ_JS_LIBS='-L${libdir} -l${JS_LIBRARY_NAME}' else JS_CONFIG_MOZ_JS_LIBS='${libdir}/${JS_LIBRARY_NAME}.lib' fi AC_SUBST(JS_LIBRARY_NAME) AC_SUBST(JS_CONFIG_MOZ_JS_LIBS) AC_SUBST(JS_CONFIG_LIBS) -MOZ_SUBCONFIGURE_JEMALLOC() - # Avoid using obsolete NSPR features AC_DEFINE(NO_NSPR_10_SUPPORT) dnl Spit out some output dnl ======================================================== MOZ_CREATE_CONFIG_STATUS() if test "$JS_STANDALONE"; then
deleted file mode 100644 --- a/memory/build/mozjemalloc_compat.c +++ /dev/null @@ -1,183 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this file, - * You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef MOZ_JEMALLOC4 -# error Should only compile this file when building with jemalloc 3 -#endif - -#define MOZ_JEMALLOC_IMPL - -#include "mozmemory_wrap.h" -#include "jemalloc_types.h" -#include "mozilla/Types.h" - -#include <stdbool.h> - -#include "jemalloc/jemalloc.h" - -/* - * CTL_* macros are from memory/jemalloc/src/src/stats.c with changes: - * - drop `t' argument to avoid redundancy in calculating type size - * - require `i' argument for arena number explicitly - */ - -#define CTL_GET(n, v) do { \ - size_t sz = sizeof(v); \ - je_(mallctl)(n, &v, &sz, NULL, 0); \ -} while (0) - -#define CTL_I_GET(n, v, i) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(mib[0]); \ - size_t sz = sizeof(v); \ - je_(mallctlnametomib)(n, mib, &miblen); \ - mib[2] = i; \ - je_(mallctlbymib)(mib, miblen, &v, &sz, NULL, 0); \ -} while (0) - -#define CTL_IJ_GET(n, v, i, j) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(mib[0]); \ - size_t sz = sizeof(v); \ - je_(mallctlnametomib)(n, mib, &miblen); \ - mib[2] = i; \ - mib[4] = j; \ - je_(mallctlbymib)(mib, miblen, &v, &sz, NULL, 0); \ -} while (0) - -/* - * VARIABLE_ARRAY is copied from - * memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in - */ -#if __STDC_VERSION__ < 199901L -# ifdef _MSC_VER -# include <malloc.h> -# define alloca _alloca -# else -# ifdef HAVE_ALLOCA_H -# include <alloca.h> -# else -# include <stdlib.h> -# endif -# endif -# define VARIABLE_ARRAY(type, name, count) \ - type *name = alloca(sizeof(type) * (count)) -#else -# define VARIABLE_ARRAY(type, name, count) type name[(count)] -#endif - -MOZ_MEMORY_API size_t -malloc_good_size_impl(size_t size) -{ - /* je_nallocx crashes when given a size of 0. As - * malloc_usable_size(malloc(0)) and malloc_usable_size(malloc(1)) - * return the same value, use a size of 1. */ - if (size == 0) - size = 1; - return je_(nallocx)(size, 0); -} - -static void -compute_bin_unused_and_bookkeeping(jemalloc_stats_t *stats, unsigned int narenas) -{ - size_t bin_unused = 0; - - uint32_t nregs; // number of regions per run in the j-th bin - size_t reg_size; // size of regions served by the j-th bin - size_t curruns; // number of runs belonging to a bin - size_t curregs; // number of allocated regions in a bin - - unsigned int nbins; // number of bins per arena - unsigned int i, j; - - size_t stats_metadata; - size_t stats_ametadata = 0; // total internal allocations in all arenas - - // narenas also counts uninitialized arenas, and initialized arenas - // are not guaranteed to be adjacent - VARIABLE_ARRAY(bool, initialized, narenas); - size_t isz = sizeof(initialized) / sizeof(initialized[0]); - - je_(mallctl)("arenas.initialized", initialized, &isz, NULL, 0); - CTL_GET("arenas.nbins", nbins); - - for (j = 0; j < nbins; j++) { - CTL_I_GET("arenas.bin.0.nregs", nregs, j); - CTL_I_GET("arenas.bin.0.size", reg_size, j); - - for (i = 0; i < narenas; i++) { - if (!initialized[i]) { - continue; - } - - CTL_IJ_GET("stats.arenas.0.bins.0.curruns", curruns, i, j); - CTL_IJ_GET("stats.arenas.0.bins.0.curregs", curregs, i, j); - - bin_unused += (nregs * curruns - curregs) * reg_size; - } - } - - CTL_GET("stats.metadata", stats_metadata); - - /* get the summation for all arenas, i == narenas */ - CTL_I_GET("stats.arenas.0.metadata.allocated", stats_ametadata, narenas); - - stats->bookkeeping = stats_metadata - stats_ametadata; - stats->bin_unused = bin_unused; -} - -MOZ_JEMALLOC_API void -jemalloc_stats_impl(jemalloc_stats_t *stats) -{ - unsigned narenas; - size_t active, allocated, mapped, page, pdirty; - size_t lg_chunk; - - // Refresh jemalloc's stats by updating its epoch, see ctl_refresh in - // src/ctl.c - uint64_t epoch = 0; - size_t esz = sizeof(epoch); - je_(mallctl)("epoch", &epoch, &esz, &epoch, esz); - - CTL_GET("arenas.narenas", narenas); - CTL_GET("arenas.page", page); - CTL_GET("stats.active", active); - CTL_GET("stats.allocated", allocated); - CTL_GET("stats.mapped", mapped); - CTL_GET("opt.lg_chunk", lg_chunk); - - /* get the summation for all arenas, i == narenas */ - CTL_I_GET("stats.arenas.0.pdirty", pdirty, narenas); - - stats->chunksize = (size_t) 1 << lg_chunk; - stats->mapped = mapped; - stats->allocated = allocated; - stats->waste = active - allocated; - stats->page_cache = pdirty * page; - compute_bin_unused_and_bookkeeping(stats, narenas); - stats->waste -= stats->bin_unused; -} - -MOZ_JEMALLOC_API void -jemalloc_purge_freed_pages_impl() -{ -} - -MOZ_JEMALLOC_API void -jemalloc_free_dirty_pages_impl() -{ - unsigned narenas; - size_t mib[3]; - size_t miblen = sizeof(mib) / sizeof(mib[0]); - - CTL_GET("arenas.narenas", narenas); - je_(mallctlnametomib)("arena.0.purge", mib, &miblen); - mib[1] = narenas; - je_(mallctlbymib)(mib, miblen, NULL, NULL, NULL, 0); -} - -MOZ_JEMALLOC_API void -jemalloc_thread_local_arena_impl(jemalloc_bool enabled) -{ -}
--- a/memory/build/mozmemory_wrap.h +++ b/memory/build/mozmemory_wrap.h @@ -32,18 +32,17 @@ * - strdup * - wcsdup (Windows only) * * - jemalloc specific functions: * - jemalloc_stats * - jemalloc_purge_freed_pages * - jemalloc_free_dirty_pages * - jemalloc_thread_local_arena - * (these functions are native to mozjemalloc, and have compatibility - * implementations for jemalloc3) + * (these functions are native to mozjemalloc) * * These functions are all exported as part of libmozglue (see * $(topsrcdir)/mozglue/build/Makefile.in), with a few implementation * peculiarities: * * - On Windows, the malloc implementation functions are all prefixed with * "je_", the duplication functions are prefixed with "wrap_", and jemalloc * specific functions are left unprefixed. All these functions are however @@ -83,31 +82,26 @@ * * * Within libmozglue (when MOZ_MEMORY_IMPL is defined), all the functions * should be suffixed with "_impl" both for declarations and use. * That is, the implementation declaration for e.g. strdup would look like: * char* strdup_impl(const char *) * That implementation would call malloc by using "malloc_impl". * - * While mozjemalloc uses these "_impl" suffixed helpers, jemalloc3, being - * third-party code, doesn't, but instead has an elaborate way to mangle - * individual functions. See under "Run jemalloc configure script" in - * $(topsrcdir)/configure.in. - * * * When building with replace-malloc support, the above still holds, but * the malloc implementation and jemalloc specific functions are the * replace-malloc functions from replace_malloc.c. * - * The actual jemalloc/mozjemalloc implementation is prefixed with "je_". + * The actual mozjemalloc implementation is prefixed with "je_". * * Thus, when MOZ_REPLACE_MALLOC is defined, the "_impl" suffixed macros - * expand to "je_" prefixed function when building mozjemalloc or - * jemalloc3/mozjemalloc_compat, where MOZ_JEMALLOC_IMPL is defined. + * expand to "je_" prefixed function when building mozjemalloc, where + * MOZ_JEMALLOC_IMPL is defined. * * In other cases, the "_impl" suffixed macros follow the original scheme, * except on Windows and MacOSX, where they would expand to "je_" prefixed * functions. Instead, they are left unmodified (malloc_impl expands to * malloc_impl). */ #ifndef MOZ_MEMORY @@ -149,19 +143,16 @@ # endif # endif # endif # ifdef XP_WIN # define mozmem_dup_impl(a) wrap_ ## a # endif #endif -/* All other jemalloc3 functions are prefixed with "je_" */ -#define je_(a) je_ ## a - #if !defined(MOZ_MEMORY_IMPL) # define MOZ_MEMORY_API MFBT_API # define MOZ_JEMALLOC_API MFBT_API #endif #ifndef MOZ_MEMORY_API # define MOZ_MEMORY_API #endif
deleted file mode 100644 --- a/memory/jemalloc/README.mozilla +++ /dev/null @@ -1,9 +0,0 @@ -This is a copy of the jemalloc source code. It is intended to be left pristine. -Modifications to this code without coordinating with upstream are unacceptable, -and will be reverted. Integration modifications should be made in memory/build -whenever possible. - -The canonical repository for this source code is https://github.com/jemalloc/jemalloc. -The information about the upstream repository and revision lives in upstream.info. -In order to update the code, you can run the update.sh script located in -the same directory.
deleted file mode 100755 --- a/memory/jemalloc/helper/git +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -# jemalloc's configure runs git to determine the version. But when building -# from a gecko git clone, the git commands it uses is going to pick gecko's -# information, not jemalloc's, which is useless. So pretend we don't have git -# at all. That will make jemalloc's configure pick the in-tree VERSION file. - -exit 1
deleted file mode 100644 --- a/memory/jemalloc/moz.build +++ /dev/null @@ -1,82 +0,0 @@ -# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- -# vim: set filetype=python: -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -# When built with --enable-debug, jemalloc4 makes headers define functions that -# are normally inlined, and that prevents unified sources from working. -if CONFIG['MOZ_DEBUG']: - maybe_unified_sources = SOURCES -else: - maybe_unified_sources = UNIFIED_SOURCES - -maybe_unified_sources += [ - 'src/src/arena.c', - 'src/src/atomic.c', - 'src/src/base.c', - 'src/src/bitmap.c', - 'src/src/chunk.c', - 'src/src/chunk_dss.c', - 'src/src/chunk_mmap.c', - 'src/src/ckh.c', - 'src/src/ctl.c', - 'src/src/extent.c', - 'src/src/hash.c', - 'src/src/huge.c', - 'src/src/jemalloc.c', - 'src/src/mb.c', - 'src/src/mutex.c', - 'src/src/nstime.c', - 'src/src/pages.c', - 'src/src/prng.c', - 'src/src/prof.c', - 'src/src/quarantine.c', - 'src/src/rtree.c', - 'src/src/spin.c', - 'src/src/stats.c', - 'src/src/tcache.c', - 'src/src/ticker.c', - 'src/src/tsd.c', - 'src/src/util.c', - 'src/src/witness.c', -] - -# Only OSX needs the zone allocation implementation, -# but only if replace-malloc is not enabled. -if CONFIG['OS_TARGET'] == 'Darwin' and not CONFIG['MOZ_REPLACE_MALLOC']: - maybe_unified_sources += [ - 'src/src/zone.c', - ] - -FINAL_LIBRARY = 'replace_jemalloc' - -if CONFIG['MOZ_GLUE_IN_PROGRAM']: - DIST_INSTALL = True - -if CONFIG['_MSC_VER']: - DEFINES['DLLEXPORT'] = True - LOCAL_INCLUDES += ['src/include/msvc_compat'] - if not CONFIG['HAVE_INTTYPES_H']: - LOCAL_INCLUDES += ['src/include/msvc_compat/C99'] - -if CONFIG['OS_TARGET'] == 'Linux': - # For mremap - DEFINES['_GNU_SOURCE'] = True - -if CONFIG['GNU_CC']: - CFLAGS += ['-std=gnu99'] - -DEFINES['abort'] = 'moz_abort' -DEFINES['JEMALLOC_ALLOC_JUNK'] = '((uint8_t)0xe4)' -DEFINES['JEMALLOC_FREE_JUNK'] = '((uint8_t)0xe5)' - -LOCAL_INCLUDES += [ - '!src/include', - 'src/include', -] - -# We allow warnings for third-party code that can be updated from upstream. -ALLOW_COMPILER_WARNINGS = True - -OS_LIBS += CONFIG['REALTIME_LIBS']
deleted file mode 100644 --- a/memory/jemalloc/src/.appveyor.yml +++ /dev/null @@ -1,42 +0,0 @@ -version: '{build}' - -environment: - matrix: - - MSYSTEM: MINGW64 - CPU: x86_64 - MSVC: amd64 - - MSYSTEM: MINGW32 - CPU: i686 - MSVC: x86 - - MSYSTEM: MINGW64 - CPU: x86_64 - - MSYSTEM: MINGW32 - CPU: i686 - - MSYSTEM: MINGW64 - CPU: x86_64 - MSVC: amd64 - CONFIG_FLAGS: --enable-debug - - MSYSTEM: MINGW32 - CPU: i686 - MSVC: x86 - CONFIG_FLAGS: --enable-debug - - MSYSTEM: MINGW64 - CPU: x86_64 - CONFIG_FLAGS: --enable-debug - - MSYSTEM: MINGW32 - CPU: i686 - CONFIG_FLAGS: --enable-debug - -install: - - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% - - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC% - - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc - - pacman --noconfirm -Suy mingw-w64-%CPU%-make - -build_script: - - bash -c "autoconf" - - bash -c "./configure $CONFIG_FLAGS" - - mingw32-make -j3 - - file lib/jemalloc.dll - - mingw32-make -j3 tests - - mingw32-make -k check
deleted file mode 100644 --- a/memory/jemalloc/src/.travis.yml +++ /dev/null @@ -1,95 +0,0 @@ -language: generic - -matrix: - include: - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="" - - os: osx - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="" - - os: linux - env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="" - - os: linux - env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" - addons: - apt: - packages: - - gcc-multilib - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" - - os: osx - env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="" - - os: osx - env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" - - os: osx - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" - - os: osx - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" - - os: osx - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" - - os: linux - env: CC=clang COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" - addons: - apt: - packages: - - gcc-multilib - - os: linux - env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" - - os: linux - env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" - - os: linux - env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" - - os: linux - env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" - - os: linux - env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" - addons: - apt: - packages: - - gcc-multilib - - os: linux - env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" - addons: - apt: - packages: - - gcc-multilib - - os: linux - env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" - addons: - apt: - packages: - - gcc-multilib - - os: linux - env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-tcache" - addons: - apt: - packages: - - gcc-multilib - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-tcache" - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-tcache" - - os: linux - env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-tcache" - - -before_script: - - autoconf - - ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" } $CONFIGURE_FLAGS - - make -j3 - - make -j3 tests - -script: - - make check -
deleted file mode 100644 --- a/memory/jemalloc/src/COPYING +++ /dev/null @@ -1,27 +0,0 @@ -Unless otherwise specified, files in the jemalloc source distribution are -subject to the following license: --------------------------------------------------------------------------------- -Copyright (C) 2002-2016 Jason Evans <jasone@canonware.com>. -All rights reserved. -Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2016 Facebook, Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice(s), - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice(s), - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ---------------------------------------------------------------------------------
deleted file mode 100644 --- a/memory/jemalloc/src/ChangeLog +++ /dev/null @@ -1,1043 +0,0 @@ -Following are change highlights associated with official releases. Important -bug fixes are all mentioned, but some internal enhancements are omitted here for -brevity. Much more detail can be found in the git revision history: - - https://github.com/jemalloc/jemalloc - -* 4.5.0 (February 28, 2017) - - This is the first release to benefit from much broader continuous integration - testing, thanks to @davidtgoldblatt. Had we had this testing infrastructure - in place for prior releases, it would have caught all of the most serious - regressions fixed by this release. - - New features: - - Add --disable-thp and the opt.thp to provide opt-out mechanisms for - transparent huge page integration. (@jasone) - - Update zone allocator integration to work with macOS 10.12. (@glandium) - - Restructure *CFLAGS configuration, so that CFLAGS behaves typically, and - EXTRA_CFLAGS provides a way to specify e.g. -Werror during building, but not - during configuration. (@jasone, @ronawho) - - Bug fixes: - - Fix DSS (sbrk(2)-based) allocation. This regression was first released in - 4.3.0. (@jasone) - - Handle race in per size class utilization computation. This functionality - was first released in 4.0.0. (@interwq) - - Fix lock order reversal during gdump. (@jasone) - - Fix-refactor tcache synchronization. This regression was first released in - 4.0.0. (@jasone) - - Fix various JSON-formatted malloc_stats_print() bugs. This functionality - was first released in 4.3.0. (@jasone) - - Fix huge-aligned allocation. This regression was first released in 4.4.0. - (@jasone) - - When transparent huge page integration is enabled, detect what state pages - start in according to the kernel's current operating mode, and only convert - arena chunks to non-huge during purging if that is not their initial state. - This functionality was first released in 4.4.0. (@jasone) - - Fix lg_chunk clamping for the --enable-cache-oblivious --disable-fill case. - This regression was first released in 4.0.0. (@jasone, @428desmo) - - Properly detect sparc64 when building for Linux. (@glaubitz) - -* 4.4.0 (December 3, 2016) - - New features: - - Add configure support for *-*-linux-android. (@cferris1000, @jasone) - - Add the --disable-syscall configure option, for use on systems that place - security-motivated limitations on syscall(2). (@jasone) - - Add support for Debian GNU/kFreeBSD. (@thesam) - - Optimizations: - - Add extent serial numbers and use them where appropriate as a sort key that - is higher priority than address, so that the allocation policy prefers older - extents. This tends to improve locality (decrease fragmentation) when - memory grows downward. (@jasone) - - Refactor madvise(2) configuration so that MADV_FREE is detected and utilized - on Linux 4.5 and newer. (@jasone) - - Mark partially purged arena chunks as non-huge-page. This improves - interaction with Linux's transparent huge page functionality. (@jasone) - - Bug fixes: - - Fix size class computations for edge conditions involving extremely large - allocations. This regression was first released in 4.0.0. (@jasone, - @ingvarha) - - Remove overly restrictive assertions related to the cactive statistic. This - regression was first released in 4.1.0. (@jasone) - - Implement a more reliable detection scheme for os_unfair_lock on macOS. - (@jszakmeister) - -* 4.3.1 (November 7, 2016) - - Bug fixes: - - Fix a severe virtual memory leak. This regression was first released in - 4.3.0. (@interwq, @jasone) - - Refactor atomic and prng APIs to restore support for 32-bit platforms that - use pre-C11 toolchains, e.g. FreeBSD's mips. (@jasone) - -* 4.3.0 (November 4, 2016) - - This is the first release that passes the test suite for multiple Windows - configurations, thanks in large part to @glandium setting up continuous - integration via AppVeyor (and Travis CI for Linux and OS X). - - New features: - - Add "J" (JSON) support to malloc_stats_print(). (@jasone) - - Add Cray compiler support. (@ronawho) - - Optimizations: - - Add/use adaptive spinning for bootstrapping and radix tree node - initialization. (@jasone) - - Bug fixes: - - Fix large allocation to search starting in the optimal size class heap, - which can substantially reduce virtual memory churn and fragmentation. This - regression was first released in 4.0.0. (@mjp41, @jasone) - - Fix stats.arenas.<i>.nthreads accounting. (@interwq) - - Fix and simplify decay-based purging. (@jasone) - - Make DSS (sbrk(2)-related) operations lockless, which resolves potential - deadlocks during thread exit. (@jasone) - - Fix over-sized allocation of radix tree leaf nodes. (@mjp41, @ogaun, - @jasone) - - Fix over-sized allocation of arena_t (plus associated stats) data - structures. (@jasone, @interwq) - - Fix EXTRA_CFLAGS to not affect configuration. (@jasone) - - Fix a Valgrind integration bug. (@ronawho) - - Disallow 0x5a junk filling when running in Valgrind. (@jasone) - - Fix a file descriptor leak on Linux. This regression was first released in - 4.2.0. (@vsarunas, @jasone) - - Fix static linking of jemalloc with glibc. (@djwatson) - - Use syscall(2) rather than {open,read,close}(2) during boot on Linux. This - works around other libraries' system call wrappers performing reentrant - allocation. (@kspinka, @Whissi, @jasone) - - Fix OS X default zone replacement to work with OS X 10.12. (@glandium, - @jasone) - - Fix cached memory management to avoid needless commit/decommit operations - during purging, which resolves permanent virtual memory map fragmentation - issues on Windows. (@mjp41, @jasone) - - Fix TSD fetches to avoid (recursive) allocation. This is relevant to - non-TLS and Windows configurations. (@jasone) - - Fix malloc_conf overriding to work on Windows. (@jasone) - - Forcibly disable lazy-lock on Windows (was forcibly *enabled*). (@jasone) - -* 4.2.1 (June 8, 2016) - - Bug fixes: - - Fix bootstrapping issues for configurations that require allocation during - tsd initialization (e.g. --disable-tls). (@cferris1000, @jasone) - - Fix gettimeofday() version of nstime_update(). (@ronawho) - - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper(). (@ronawho) - - Fix potential VM map fragmentation regression. (@jasone) - - Fix opt_zero-triggered in-place huge reallocation zeroing. (@jasone) - - Fix heap profiling context leaks in reallocation edge cases. (@jasone) - -* 4.2.0 (May 12, 2016) - - New features: - - Add the arena.<i>.reset mallctl, which makes it possible to discard all of - an arena's allocations in a single operation. (@jasone) - - Add the stats.retained and stats.arenas.<i>.retained statistics. (@jasone) - - Add the --with-version configure option. (@jasone) - - Support --with-lg-page values larger than actual page size. (@jasone) - - Optimizations: - - Use pairing heaps rather than red-black trees for various hot data - structures. (@djwatson, @jasone) - - Streamline fast paths of rtree operations. (@jasone) - - Optimize the fast paths of calloc() and [m,d,sd]allocx(). (@jasone) - - Decommit unused virtual memory if the OS does not overcommit. (@jasone) - - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order - to avoid unfortunate interactions during fork(2). (@jasone) - - Bug fixes: - - Fix chunk accounting related to triggering gdump profiles. (@jasone) - - Link against librt for clock_gettime(2) if glibc < 2.17. (@jasone) - - Scale leak report summary according to sampling probability. (@jasone) - -* 4.1.1 (May 3, 2016) - - This bugfix release resolves a variety of mostly minor issues, though the - bitmap fix is critical for 64-bit Windows. - - Bug fixes: - - Fix the linear scan version of bitmap_sfu() to shift by the proper amount - even when sizeof(long) is not the same as sizeof(void *), as on 64-bit - Windows. (@jasone) - - Fix hashing functions to avoid unaligned memory accesses (and resulting - crashes). This is relevant at least to some ARM-based platforms. - (@rkmisra) - - Fix fork()-related lock rank ordering reversals. These reversals were - unlikely to cause deadlocks in practice except when heap profiling was - enabled and active. (@jasone) - - Fix various chunk leaks in OOM code paths. (@jasone) - - Fix malloc_stats_print() to print opt.narenas correctly. (@jasone) - - Fix MSVC-specific build/test issues. (@rustyx, @yuslepukhin) - - Fix a variety of test failures that were due to test fragility rather than - core bugs. (@jasone) - -* 4.1.0 (February 28, 2016) - - This release is primarily about optimizations, but it also incorporates a lot - of portability-motivated refactoring and enhancements. Many people worked on - this release, to an extent that even with the omission here of minor changes - (see git revision history), and of the people who reported and diagnosed - issues, so much of the work was contributed that starting with this release, - changes are annotated with author credits to help reflect the collaborative - effort involved. - - New features: - - Implement decay-based unused dirty page purging, a major optimization with - mallctl API impact. This is an alternative to the existing ratio-based - unused dirty page purging, and is intended to eventually become the sole - purging mechanism. New mallctls: - + opt.purge - + opt.decay_time - + arena.<i>.decay - + arena.<i>.decay_time - + arenas.decay_time - + stats.arenas.<i>.decay_time - (@jasone, @cevans87) - - Add --with-malloc-conf, which makes it possible to embed a default - options string during configuration. This was motivated by the desire to - specify --with-malloc-conf=purge:decay , since the default must remain - purge:ratio until the 5.0.0 release. (@jasone) - - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin) - - Make *allocx() size class overflow behavior defined. The maximum - size class is now less than PTRDIFF_MAX to protect applications against - numerical overflow, and all allocation functions are guaranteed to indicate - errors rather than potentially crashing if the request size exceeds the - maximum size class. (@jasone) - - jeprof: - + Add raw heap profile support. (@jasone) - + Add --retain and --exclude for backtrace symbol filtering. (@jasone) - - Optimizations: - - Optimize the fast path to combine various bootstrapping and configuration - checks and execute more streamlined code in the common case. (@interwq) - - Use linear scan for small bitmaps (used for small object tracking). In - addition to speeding up bitmap operations on 64-bit systems, this reduces - allocator metadata overhead by approximately 0.2%. (@djwatson) - - Separate arena_avail trees, which substantially speeds up run tree - operations. (@djwatson) - - Use memoization (boot-time-computed table) for run quantization. Separate - arena_avail trees reduced the importance of this optimization. (@jasone) - - Attempt mmap-based in-place huge reallocation. This can dramatically speed - up incremental huge reallocation. (@jasone) - - Incompatible changes: - - Make opt.narenas unsigned rather than size_t. (@jasone) - - Bug fixes: - - Fix stats.cactive accounting regression. (@rustyx, @jasone) - - Handle unaligned keys in hash(). This caused problems for some ARM systems. - (@jasone, @cferris1000) - - Refactor arenas array. In addition to fixing a fork-related deadlock, this - makes arena lookups faster and simpler. (@jasone) - - Move retained memory allocation out of the default chunk allocation - function, to a location that gets executed even if the application installs - a custom chunk allocation function. This resolves a virtual memory leak. - (@buchgr) - - Fix a potential tsd cleanup leak. (@cferris1000, @jasone) - - Fix run quantization. In practice this bug had no impact unless - applications requested memory with alignment exceeding one page. - (@jasone, @djwatson) - - Fix LinuxThreads-specific bootstrapping deadlock. (Cosmin Paraschiv) - - jeprof: - + Don't discard curl options if timeout is not defined. (@djwatson) - + Detect failed profile fetches. (@djwatson) - - Fix stats.arenas.<i>.{dss,lg_dirty_mult,decay_time,pactive,pdirty} for - --disable-stats case. (@jasone) - -* 4.0.4 (October 24, 2015) - - This bugfix release fixes another xallocx() regression. No other regressions - have come to light in over a month, so this is likely a good starting point - for people who prefer to wait for "dot one" releases with all the major issues - shaken out. - - Bug fixes: - - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large - allocations that have been randomly assigned an offset of 0 when - --enable-cache-oblivious configure option is enabled. - -* 4.0.3 (September 24, 2015) - - This bugfix release continues the trend of xallocx() and heap profiling fixes. - - Bug fixes: - - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large - allocations when --enable-cache-oblivious configure option is enabled. - - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations - when resizing from/to a size class that is not a multiple of the chunk size. - - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap - profile dumping started. - - Work around a potentially bad thread-specific data initialization - interaction with NPTL (glibc's pthreads implementation). - -* 4.0.2 (September 21, 2015) - - This bugfix release addresses a few bugs specific to heap profiling. - - Bug fixes: - - Fix ixallocx_prof_sample() to never modify nor create sampled small - allocations. xallocx() is in general incapable of moving small allocations, - so this fix removes buggy code without loss of generality. - - Fix irallocx_prof_sample() to always allocate large regions, even when - alignment is non-zero. - - Fix prof_alloc_rollback() to read tdata from thread-specific data rather - than dereferencing a potentially invalid tctx. - -* 4.0.1 (September 15, 2015) - - This is a bugfix release that is somewhat high risk due to the amount of - refactoring required to address deep xallocx() problems. As a side effect of - these fixes, xallocx() now tries harder to partially fulfill requests for - optional extra space. Note that a couple of minor heap profiling - optimizations are included, but these are better thought of as performance - fixes that were integral to disovering most of the other bugs. - - Optimizations: - - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the - fast path when heap profiling is enabled. Additionally, split a special - case out into arena_prof_tctx_reset(), which also avoids chunk metadata - reads. - - Optimize irallocx_prof() to optimistically update the sampler state. The - prior implementation appears to have been a holdover from when - rallocx()/xallocx() functionality was combined as rallocm(). - - Bug fixes: - - Fix TLS configuration such that it is enabled by default for platforms on - which it works correctly. - - Fix arenas_cache_cleanup() and arena_get_hard() to handle - allocation/deallocation within the application's thread-specific data - cleanup functions even after arenas_cache is torn down. - - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS. - - Fix chunk purge hook calls for in-place huge shrinking reallocation to - specify the old chunk size rather than the new chunk size. This bug caused - no correctness issues for the default chunk purge function, but was - visible to custom functions set via the "arena.<i>.chunk_hooks" mallctl. - - Fix heap profiling bugs: - + Fix heap profiling to distinguish among otherwise identical sample sites - with interposed resets (triggered via the "prof.reset" mallctl). This bug - could cause data structure corruption that would most likely result in a - segfault. - + Fix irealloc_prof() to prof_alloc_rollback() on OOM. - + Make one call to prof_active_get_unlocked() per allocation event, and use - the result throughout the relevant functions that handle an allocation - event. Also add a missing check in prof_realloc(). These fixes protect - allocation events against concurrent prof_active changes. - + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() - in the correct order. - + Fix prof_realloc() to call prof_free_sampled_object() after calling - prof_malloc_sample_object(). Prior to this fix, if tctx and old_tctx were - the same, the tctx could have been prematurely destroyed. - - Fix portability bugs: - + Don't bitshift by negative amounts when encoding/decoding run sizes in - chunk header maps. This affected systems with page sizes greater than 8 - KiB. - + Rename index_t to szind_t to avoid an existing type on Solaris. - + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to - match glibc and avoid compilation errors when including both - jemalloc/jemalloc.h and malloc.h in C++ code. - + Don't assume that /bin/sh is appropriate when running size_classes.sh - during configuration. - + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM. - + Link tests to librt if it contains clock_gettime(2). - -* 4.0.0 (August 17, 2015) - - This version contains many speed and space optimizations, both minor and - major. The major themes are generalization, unification, and simplification. - Although many of these optimizations cause no visible behavior change, their - cumulative effect is substantial. - - New features: - - Normalize size class spacing to be consistent across the complete size - range. By default there are four size classes per size doubling, but this - is now configurable via the --with-lg-size-class-group option. Also add the - --with-lg-page, --with-lg-page-sizes, --with-lg-quantum, and - --with-lg-tiny-min options, which can be used to tweak page and size class - settings. Impacts: - + Worst case performance for incrementally growing/shrinking reallocation - is improved because there are far fewer size classes, and therefore - copying happens less often. - + Internal fragmentation is limited to 20% for all but the smallest size - classes (those less than four times the quantum). (1B + 4 KiB) - and (1B + 4 MiB) previously suffered nearly 50% internal fragmentation. - + Chunk fragmentation tends to be lower because there are fewer distinct run - sizes to pack. - - Add support for explicit tcaches. The "tcache.create", "tcache.flush", and - "tcache.destroy" mallctls control tcache lifetime and flushing, and the - MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to the *allocx() API - control which tcache is used for each operation. - - Implement per thread heap profiling, as well as the ability to - enable/disable heap profiling on a per thread basis. Add the "prof.reset", - "prof.lg_sample", "thread.prof.name", "thread.prof.active", - "opt.prof_thread_active_init", "prof.thread_active_init", and - "thread.prof.active" mallctls. - - Add support for per arena application-specified chunk allocators, configured - via the "arena.<i>.chunk_hooks" mallctl. - - Refactor huge allocation to be managed by arenas, so that arenas now - function as general purpose independent allocators. This is important in - the context of user-specified chunk allocators, aside from the scalability - benefits. Related new statistics: - + The "stats.arenas.<i>.huge.allocated", "stats.arenas.<i>.huge.nmalloc", - "stats.arenas.<i>.huge.ndalloc", and "stats.arenas.<i>.huge.nrequests" - mallctls provide high level per arena huge allocation statistics. - + The "arenas.nhchunks", "arenas.hchunk.<i>.size", - "stats.arenas.<i>.hchunks.<j>.nmalloc", - "stats.arenas.<i>.hchunks.<j>.ndalloc", - "stats.arenas.<i>.hchunks.<j>.nrequests", and - "stats.arenas.<i>.hchunks.<j>.curhchunks" mallctls provide per size class - statistics. - - Add the 'util' column to malloc_stats_print() output, which reports the - proportion of available regions that are currently in use for each small - size class. - - Add "alloc" and "free" modes for for junk filling (see the "opt.junk" - mallctl), so that it is possible to separately enable junk filling for - allocation versus deallocation. - - Add the jemalloc-config script, which provides information about how - jemalloc was configured, and how to integrate it into application builds. - - Add metadata statistics, which are accessible via the "stats.metadata", - "stats.arenas.<i>.metadata.mapped", and - "stats.arenas.<i>.metadata.allocated" mallctls. - - Add the "stats.resident" mallctl, which reports the upper limit of - physically resident memory mapped by the allocator. - - Add per arena control over unused dirty page purging, via the - "arenas.lg_dirty_mult", "arena.<i>.lg_dirty_mult", and - "stats.arenas.<i>.lg_dirty_mult" mallctls. - - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump - feature on/off during program execution. - - Add sdallocx(), which implements sized deallocation. The primary - optimization over dallocx() is the removal of a metadata read, which often - suffers an L1 cache miss. - - Add missing header includes in jemalloc/jemalloc.h, so that applications - only have to #include <jemalloc/jemalloc.h>. - - Add support for additional platforms: - + Bitrig - + Cygwin - + DragonFlyBSD - + iOS - + OpenBSD - + OpenRISC/or1k - - Optimizations: - - Maintain dirty runs in per arena LRUs rather than in per arena trees of - dirty-run-containing chunks. In practice this change significantly reduces - dirty page purging volume. - - Integrate whole chunks into the unused dirty page purging machinery. This - reduces the cost of repeated huge allocation/deallocation, because it - effectively introduces a cache of chunks. - - Split the arena chunk map into two separate arrays, in order to increase - cache locality for the frequently accessed bits. - - Move small run metadata out of runs, into arena chunk headers. This reduces - run fragmentation, smaller runs reduce external fragmentation for small size - classes, and packed (less uniformly aligned) metadata layout improves CPU - cache set distribution. - - Randomly distribute large allocation base pointer alignment relative to page - boundaries in order to more uniformly utilize CPU cache sets. This can be - disabled via the --disable-cache-oblivious configure option, and queried via - the "config.cache_oblivious" mallctl. - - Micro-optimize the fast paths for the public API functions. - - Refactor thread-specific data to reside in a single structure. This assures - that only a single TLS read is necessary per call into the public API. - - Implement in-place huge allocation growing and shrinking. - - Refactor rtree (radix tree for chunk lookups) to be lock-free, and make - additional optimizations that reduce maximum lookup depth to one or two - levels. This resolves what was a concurrency bottleneck for per arena huge - allocation, because a global data structure is critical for determining - which arenas own which huge allocations. - - Incompatible changes: - - Replace --enable-cc-silence with --disable-cc-silence to suppress spurious - warnings by default. - - Assure that the constness of malloc_usable_size()'s return type matches that - of the system implementation. - - Change the heap profile dump format to support per thread heap profiling, - rename pprof to jeprof, and enhance it with the --thread=<n> option. As a - result, the bundled jeprof must now be used rather than the upstream - (gperftools) pprof. - - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can - internally deadlock on some platforms. - - Change the "arenas.nlruns" mallctl type from size_t to unsigned. - - Replace the "stats.arenas.<i>.bins.<j>.allocated" mallctl with - "stats.arenas.<i>.bins.<j>.curregs". - - Ignore MALLOC_CONF in set{uid,gid,cap} binaries. - - Ignore MALLOCX_ARENA(a) in dallocx(), in favor of using the - MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to control tcache usage. - - Removed features: - - Remove the *allocm() API, which is superseded by the *allocx() API. - - Remove the --enable-dss options, and make dss non-optional on all platforms - which support sbrk(2). - - Remove the "arenas.purge" mallctl, which was obsoleted by the - "arena.<i>.purge" mallctl in 3.1.0. - - Remove the unnecessary "opt.valgrind" mallctl; jemalloc automatically - detects whether it is running inside Valgrind. - - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and - "stats.huge.ndalloc" mallctls. - - Remove the --enable-mremap option. - - Remove the "stats.chunks.current", "stats.chunks.total", and - "stats.chunks.high" mallctls. - - Bug fixes: - - Fix the cactive statistic to decrease (rather than increase) when active - memory decreases. This regression was first released in 3.5.0. - - Fix OOM handling in memalign() and valloc(). A variant of this bug existed - in all releases since 2.0.0, which introduced these functions. - - Fix an OOM-related regression in arena_tcache_fill_small(), which could - cause cache corruption on OOM. This regression was present in all releases - from 2.2.0 through 3.6.0. - - Fix size class overflow handling for malloc(), posix_memalign(), memalign(), - calloc(), and realloc() when profiling is enabled. - - Fix the "arena.<i>.dss" mallctl to return an error if "primary" or - "secondary" precedence is specified, but sbrk(2) is not supported. - - Fix fallback lg_floor() implementations to handle extremely large inputs. - - Ensure the default purgeable zone is after the default zone on OS X. - - Fix latent bugs in atomic_*(). - - Fix the "arena.<i>.dss" mallctl to handle read-only calls. - - Fix tls_model configuration to enable the initial-exec model when possible. - - Mark malloc_conf as a weak symbol so that the application can override it. - - Correctly detect glibc's adaptive pthread mutexes. - - Fix the --without-export configure option. - -* 3.6.0 (March 31, 2014) - - This version contains a critical bug fix for a regression present in 3.5.0 and - 3.5.1. - - Bug fixes: - - Fix a regression in arena_chunk_alloc() that caused crashes during - small/large allocation if chunk allocation failed. In the absence of this - bug, chunk allocation failure would result in allocation failure, e.g. NULL - return from malloc(). This regression was introduced in 3.5.0. - - Fix backtracing for gcc intrinsics-based backtracing by specifying - -fno-omit-frame-pointer to gcc. Note that the application (and all the - libraries it links to) must also be compiled with this option for - backtracing to be reliable. - - Use dss allocation precedence for huge allocations as well as small/large - allocations. - - Fix test assertion failure message formatting. This bug did not manifest on - x86_64 systems because of implementation subtleties in va_list. - - Fix inconsequential test failures for hash and SFMT code. - - New features: - - Support heap profiling on FreeBSD. This feature depends on the proc - filesystem being mounted during heap profile dumping. - -* 3.5.1 (February 25, 2014) - - This version primarily addresses minor bugs in test code. - - Bug fixes: - - Configure Solaris/Illumos to use MADV_FREE. - - Fix junk filling for mremap(2)-based huge reallocation. This is only - relevant if configuring with the --enable-mremap option specified. - - Avoid compilation failure if 'restrict' C99 keyword is not supported by the - compiler. - - Add a configure test for SSE2 rather than assuming it is usable on i686 - systems. This fixes test compilation errors, especially on 32-bit Linux - systems. - - Fix mallctl argument size mismatches (size_t vs. uint64_t) in the stats unit - test. - - Fix/remove flawed alignment-related overflow tests. - - Prevent compiler optimizations that could change backtraces in the - prof_accum unit test. - -* 3.5.0 (January 22, 2014) - - This version focuses on refactoring and automated testing, though it also - includes some non-trivial heap profiling optimizations not mentioned below. - - New features: - - Add the *allocx() API, which is a successor to the experimental *allocm() - API. The *allocx() functions are slightly simpler to use because they have - fewer parameters, they directly return the results of primary interest, and - mallocx()/rallocx() avoid the strict aliasing pitfall that - allocm()/rallocm() share with posix_memalign(). Note that *allocm() is - slated for removal in the next non-bugfix release. - - Add support for LinuxThreads. - - Bug fixes: - - Unless heap profiling is enabled, disable floating point code and don't link - with libm. This, in combination with e.g. EXTRA_CFLAGS=-mno-sse on x64 - systems, makes it possible to completely disable floating point register - use. Some versions of glibc neglect to save/restore caller-saved floating - point registers during dynamic lazy symbol loading, and the symbol loading - code uses whatever malloc the application happens to have linked/loaded - with, the result being potential floating point register corruption. - - Report ENOMEM rather than EINVAL if an OOM occurs during heap profiling - backtrace creation in imemalign(). This bug impacted posix_memalign() and - aligned_alloc(). - - Fix a file descriptor leak in a prof_dump_maps() error path. - - Fix prof_dump() to close the dump file descriptor for all relevant error - paths. - - Fix rallocm() to use the arena specified by the ALLOCM_ARENA(s) flag for - allocation, not just deallocation. - - Fix a data race for large allocation stats counters. - - Fix a potential infinite loop during thread exit. This bug occurred on - Solaris, and could affect other platforms with similar pthreads TSD - implementations. - - Don't junk-fill reallocations unless usable size changes. This fixes a - violation of the *allocx()/*allocm() semantics. - - Fix growing large reallocation to junk fill new space. - - Fix huge deallocation to junk fill when munmap is disabled. - - Change the default private namespace prefix from empty to je_, and change - --with-private-namespace-prefix so that it prepends an additional prefix - rather than replacing je_. This reduces the likelihood of applications - which statically link jemalloc experiencing symbol name collisions. - - Add missing private namespace mangling (relevant when - --with-private-namespace is specified). - - Add and use JEMALLOC_INLINE_C so that static inline functions are marked as - static even for debug builds. - - Add a missing mutex unlock in a malloc_init_hard() error path. In practice - this error path is never executed. - - Fix numerous bugs in malloc_strotumax() error handling/reporting. These - bugs had no impact except for malformed inputs. - - Fix numerous bugs in malloc_snprintf(). These bugs were not exercised by - existing calls, so they had no impact. - -* 3.4.1 (October 20, 2013) - - Bug fixes: - - Fix a race in the "arenas.extend" mallctl that could cause memory corruption - of internal data structures and subsequent crashes. - - Fix Valgrind integration flaws that caused Valgrind warnings about reads of - uninitialized memory in: - + arena chunk headers - + internal zero-initialized data structures (relevant to tcache and prof - code) - - Preserve errno during the first allocation. A readlink(2) call during - initialization fails unless /etc/malloc.conf exists, so errno was typically - set during the first allocation prior to this fix. - - Fix compilation warnings reported by gcc 4.8.1. - -* 3.4.0 (June 2, 2013) - - This version is essentially a small bugfix release, but the addition of - aarch64 support requires that the minor version be incremented. - - Bug fixes: - - Fix race-triggered deadlocks in chunk_record(). These deadlocks were - typically triggered by multiple threads concurrently deallocating huge - objects. - - New features: - - Add support for the aarch64 architecture. - -* 3.3.1 (March 6, 2013) - - This version fixes bugs that are typically encountered only when utilizing - custom run-time options. - - Bug fixes: - - Fix a locking order bug that could cause deadlock during fork if heap - profiling were enabled. - - Fix a chunk recycling bug that could cause the allocator to lose track of - whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause - corruption if allocating via sbrk(2) (unlikely unless running with the - "dss:primary" option specified). This was completely harmless on Linux - unless using mlockall(2) (and unlikely even then, unless the - --disable-munmap configure option or the "dss:primary" option was - specified). This regression was introduced in 3.1.0 by the - mlockall(2)/madvise(2) interaction fix. - - Fix TLS-related memory corruption that could occur during thread exit if the - thread never allocated memory. Only the quarantine and prof facilities were - susceptible. - - Fix two quarantine bugs: - + Internal reallocation of the quarantined object array leaked the old - array. - + Reallocation failure for internal reallocation of the quarantined object - array (very unlikely) resulted in memory corruption. - - Fix Valgrind integration to annotate all internally allocated memory in a - way that keeps Valgrind happy about internal data structure access. - - Fix building for s390 systems. - -* 3.3.0 (January 23, 2013) - - This version includes a few minor performance improvements in addition to the - listed new features and bug fixes. - - New features: - - Add clipping support to lg_chunk option processing. - - Add the --enable-ivsalloc option. - - Add the --without-export option. - - Add the --disable-zone-allocator option. - - Bug fixes: - - Fix "arenas.extend" mallctl to output the number of arenas. - - Fix chunk_recycle() to unconditionally inform Valgrind that returned memory - is undefined. - - Fix build break on FreeBSD related to alloca.h. - -* 3.2.0 (November 9, 2012) - - In addition to a couple of bug fixes, this version modifies page run - allocation and dirty page purging algorithms in order to better control - page-level virtual memory fragmentation. - - Incompatible changes: - - Change the "opt.lg_dirty_mult" default from 5 to 3 (32:1 to 8:1). - - Bug fixes: - - Fix dss/mmap allocation precedence code to use recyclable mmap memory only - after primary dss allocation fails. - - Fix deadlock in the "arenas.purge" mallctl. This regression was introduced - in 3.1.0 by the addition of the "arena.<i>.purge" mallctl. - -* 3.1.0 (October 16, 2012) - - New features: - - Auto-detect whether running inside Valgrind, thus removing the need to - manually specify MALLOC_CONF=valgrind:true. - - Add the "arenas.extend" mallctl, which allows applications to create - manually managed arenas. - - Add the ALLOCM_ARENA() flag for {,r,d}allocm(). - - Add the "opt.dss", "arena.<i>.dss", and "stats.arenas.<i>.dss" mallctls, - which provide control over dss/mmap precedence. - - Add the "arena.<i>.purge" mallctl, which obsoletes "arenas.purge". - - Define LG_QUANTUM for hppa. - - Incompatible changes: - - Disable tcache by default if running inside Valgrind, in order to avoid - making unallocated objects appear reachable to Valgrind. - - Drop const from malloc_usable_size() argument on Linux. - - Bug fixes: - - Fix heap profiling crash if sampled object is freed via realloc(p, 0). - - Remove const from __*_hook variable declarations, so that glibc can modify - them during process forking. - - Fix mlockall(2)/madvise(2) interaction. - - Fix fork(2)-related deadlocks. - - Fix error return value for "thread.tcache.enabled" mallctl. - -* 3.0.0 (May 11, 2012) - - Although this version adds some major new features, the primary focus is on - internal code cleanup that facilitates maintainability and portability, most - of which is not reflected in the ChangeLog. This is the first release to - incorporate substantial contributions from numerous other developers, and the - result is a more broadly useful allocator (see the git revision history for - contribution details). Note that the license has been unified, thanks to - Facebook granting a license under the same terms as the other copyright - holders (see COPYING). - - New features: - - Implement Valgrind support, redzones, and quarantine. - - Add support for additional platforms: - + FreeBSD - + Mac OS X Lion - + MinGW - + Windows (no support yet for replacing the system malloc) - - Add support for additional architectures: - + MIPS - + SH4 - + Tilera - - Add support for cross compiling. - - Add nallocm(), which rounds a request size up to the nearest size class - without actually allocating. - - Implement aligned_alloc() (blame C11). - - Add the "thread.tcache.enabled" mallctl. - - Add the "opt.prof_final" mallctl. - - Update pprof (from gperftools 2.0). - - Add the --with-mangling option. - - Add the --disable-experimental option. - - Add the --disable-munmap option, and make it the default on Linux. - - Add the --enable-mremap option, which disables use of mremap(2) by default. - - Incompatible changes: - - Enable stats by default. - - Enable fill by default. - - Disable lazy locking by default. - - Rename the "tcache.flush" mallctl to "thread.tcache.flush". - - Rename the "arenas.pagesize" mallctl to "arenas.page". - - Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB). - - Change the "opt.prof_accum" default from true to false. - - Removed features: - - Remove the swap feature, including the "config.swap", "swap.avail", - "swap.prezeroed", "swap.nfds", and "swap.fds" mallctls. - - Remove highruns statistics, including the - "stats.arenas.<i>.bins.<j>.highruns" and - "stats.arenas.<i>.lruns.<j>.highruns" mallctls. - - As part of small size class refactoring, remove the "opt.lg_[qc]space_max", - "arenas.cacheline", "arenas.subpage", "arenas.[tqcs]space_{min,max}", and - "arenas.[tqcs]bins" mallctls. - - Remove the "arenas.chunksize" mallctl. - - Remove the "opt.lg_prof_tcmax" option. - - Remove the "opt.lg_prof_bt_max" option. - - Remove the "opt.lg_tcache_gc_sweep" option. - - Remove the --disable-tiny option, including the "config.tiny" mallctl. - - Remove the --enable-dynamic-page-shift configure option. - - Remove the --enable-sysv configure option. - - Bug fixes: - - Fix a statistics-related bug in the "thread.arena" mallctl that could cause - invalid statistics and crashes. - - Work around TLS deallocation via free() on Linux. This bug could cause - write-after-free memory corruption. - - Fix a potential deadlock that could occur during interval- and - growth-triggered heap profile dumps. - - Fix large calloc() zeroing bugs due to dropping chunk map unzeroed flags. - - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could - cause memory corruption and crashes with --enable-dss specified. - - Fix fork-related bugs that could cause deadlock in children between fork - and exec. - - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. - - Fix realloc(p, 0) to act like free(p). - - Do not enforce minimum alignment in memalign(). - - Check for NULL pointer in malloc_usable_size(). - - Fix an off-by-one heap profile statistics bug that could be observed in - interval- and growth-triggered heap profiles. - - Fix the "epoch" mallctl to update cached stats even if the passed in epoch - is 0. - - Fix bin->runcur management to fix a layout policy bug. This bug did not - affect correctness. - - Fix a bug in choose_arena_hard() that potentially caused more arenas to be - initialized than necessary. - - Add missing "opt.lg_tcache_max" mallctl implementation. - - Use glibc allocator hooks to make mixed allocator usage less likely. - - Fix build issues for --disable-tcache. - - Don't mangle pthread_create() when --with-private-namespace is specified. - -* 2.2.5 (November 14, 2011) - - Bug fixes: - - Fix huge_ralloc() race when using mremap(2). This is a serious bug that - could cause memory corruption and/or crashes. - - Fix huge_ralloc() to maintain chunk statistics. - - Fix malloc_stats_print(..., "a") output. - -* 2.2.4 (November 5, 2011) - - Bug fixes: - - Initialize arenas_tsd before using it. This bug existed for 2.2.[0-3], as - well as for --disable-tls builds in earlier releases. - - Do not assume a 4 KiB page size in test/rallocm.c. - -* 2.2.3 (August 31, 2011) - - This version fixes numerous bugs related to heap profiling. - - Bug fixes: - - Fix a prof-related race condition. This bug could cause memory corruption, - but only occurred in non-default configurations (prof_accum:false). - - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is - excluded from backtraces). - - Fix a prof-related bug in realloc() (only triggered by OOM errors). - - Fix prof-related bugs in allocm() and rallocm(). - - Fix prof_tdata_cleanup() for --disable-tls builds. - - Fix a relative include path, to fix objdir builds. - -* 2.2.2 (July 30, 2011) - - Bug fixes: - - Fix a build error for --disable-tcache. - - Fix assertions in arena_purge() (for real this time). - - Add the --with-private-namespace option. This is a workaround for symbol - conflicts that can inadvertently arise when using static libraries. - -* 2.2.1 (March 30, 2011) - - Bug fixes: - - Implement atomic operations for x86/x64. This fixes compilation failures - for versions of gcc that are still in wide use. - - Fix an assertion in arena_purge(). - -* 2.2.0 (March 22, 2011) - - This version incorporates several improvements to algorithms and data - structures that tend to reduce fragmentation and increase speed. - - New features: - - Add the "stats.cactive" mallctl. - - Update pprof (from google-perftools 1.7). - - Improve backtracing-related configuration logic, and add the - --disable-prof-libgcc option. - - Bug fixes: - - Change default symbol visibility from "internal", to "hidden", which - decreases the overhead of library-internal function calls. - - Fix symbol visibility so that it is also set on OS X. - - Fix a build dependency regression caused by the introduction of the .pic.o - suffix for PIC object files. - - Add missing checks for mutex initialization failures. - - Don't use libgcc-based backtracing except on x64, where it is known to work. - - Fix deadlocks on OS X that were due to memory allocation in - pthread_mutex_lock(). - - Heap profiling-specific fixes: - + Fix memory corruption due to integer overflow in small region index - computation, when using a small enough sample interval that profiling - context pointers are stored in small run headers. - + Fix a bootstrap ordering bug that only occurred with TLS disabled. - + Fix a rallocm() rsize bug. - + Fix error detection bugs for aligned memory allocation. - -* 2.1.3 (March 14, 2011) - - Bug fixes: - - Fix a cpp logic regression (due to the "thread.{de,}allocatedp" mallctl fix - for OS X in 2.1.2). - - Fix a "thread.arena" mallctl bug. - - Fix a thread cache stats merging bug. - -* 2.1.2 (March 2, 2011) - - Bug fixes: - - Fix "thread.{de,}allocatedp" mallctl for OS X. - - Add missing jemalloc.a to build system. - -* 2.1.1 (January 31, 2011) - - Bug fixes: - - Fix aligned huge reallocation (affected allocm()). - - Fix the ALLOCM_LG_ALIGN macro definition. - - Fix a heap dumping deadlock. - - Fix a "thread.arena" mallctl bug. - -* 2.1.0 (December 3, 2010) - - This version incorporates some optimizations that can't quite be considered - bug fixes. - - New features: - - Use Linux's mremap(2) for huge object reallocation when possible. - - Avoid locking in mallctl*() when possible. - - Add the "thread.[de]allocatedp" mallctl's. - - Convert the manual page source from roff to DocBook, and generate both roff - and HTML manuals. - - Bug fixes: - - Fix a crash due to incorrect bootstrap ordering. This only impacted - --enable-debug --enable-dss configurations. - - Fix a minor statistics bug for mallctl("swap.avail", ...). - -* 2.0.1 (October 29, 2010) - - Bug fixes: - - Fix a race condition in heap profiling that could cause undefined behavior - if "opt.prof_accum" were disabled. - - Add missing mutex unlocks for some OOM error paths in the heap profiling - code. - - Fix a compilation error for non-C99 builds. - -* 2.0.0 (October 24, 2010) - - This version focuses on the experimental *allocm() API, and on improved - run-time configuration/introspection. Nonetheless, numerous performance - improvements are also included. - - New features: - - Implement the experimental {,r,s,d}allocm() API, which provides a superset - of the functionality available via malloc(), calloc(), posix_memalign(), - realloc(), malloc_usable_size(), and free(). These functions can be used to - allocate/reallocate aligned zeroed memory, ask for optional extra memory - during reallocation, prevent object movement during reallocation, etc. - - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is - more human-readable, and more flexible. For example: - JEMALLOC_OPTIONS=AJP - is now: - MALLOC_CONF=abort:true,fill:true,stats_print:true - - Port to Apple OS X. Sponsored by Mozilla. - - Make it possible for the application to control thread-->arena mappings via - the "thread.arena" mallctl. - - Add compile-time support for all TLS-related functionality via pthreads TSD. - This is mainly of interest for OS X, which does not support TLS, but has a - TSD implementation with similar performance. - - Override memalign() and valloc() if they are provided by the system. - - Add the "arenas.purge" mallctl, which can be used to synchronously purge all - dirty unused pages. - - Make cumulative heap profiling data optional, so that it is possible to - limit the amount of memory consumed by heap profiling data structures. - - Add per thread allocation counters that can be accessed via the - "thread.allocated" and "thread.deallocated" mallctls. - - Incompatible changes: - - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above). - - Increase default backtrace depth from 4 to 128 for heap profiling. - - Disable interval-based profile dumps by default. - - Bug fixes: - - Remove bad assertions in fork handler functions. These assertions could - cause aborts for some combinations of configure settings. - - Fix strerror_r() usage to deal with non-standard semantics in GNU libc. - - Fix leak context reporting. This bug tended to cause the number of contexts - to be underreported (though the reported number of objects and bytes were - correct). - - Fix a realloc() bug for large in-place growing reallocation. This bug could - cause memory corruption, but it was hard to trigger. - - Fix an allocation bug for small allocations that could be triggered if - multiple threads raced to create a new run of backing pages. - - Enhance the heap profiler to trigger samples based on usable size, rather - than request size. - - Fix a heap profiling bug due to sometimes losing track of requested object - size for sampled objects. - -* 1.0.3 (August 12, 2010) - - Bug fixes: - - Fix the libunwind-based implementation of stack backtracing (used for heap - profiling). This bug could cause zero-length backtraces to be reported. - - Add a missing mutex unlock in library initialization code. If multiple - threads raced to initialize malloc, some of them could end up permanently - blocked. - -* 1.0.2 (May 11, 2010) - - Bug fixes: - - Fix junk filling of large objects, which could cause memory corruption. - - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual - memory limits could cause swap file configuration to fail. Contributed by - Jordan DeLong. - -* 1.0.1 (April 14, 2010) - - Bug fixes: - - Fix compilation when --enable-fill is specified. - - Fix threads-related profiling bugs that affected accuracy and caused memory - to be leaked during thread exit. - - Fix dirty page purging race conditions that could cause crashes. - - Fix crash in tcache flushing code during thread destruction. - -* 1.0.0 (April 11, 2010) - - This release focuses on speed and run-time introspection. Numerous - algorithmic improvements make this release substantially faster than its - predecessors. - - New features: - - Implement autoconf-based configuration system. - - Add mallctl*(), for the purposes of introspection and run-time - configuration. - - Make it possible for the application to manually flush a thread's cache, via - the "tcache.flush" mallctl. - - Base maximum dirty page count on proportion of active memory. - - Compute various additional run-time statistics, including per size class - statistics for large objects. - - Expose malloc_stats_print(), which can be called repeatedly by the - application. - - Simplify the malloc_message() signature to only take one string argument, - and incorporate an opaque data pointer argument for use by the application - in combination with malloc_stats_print(). - - Add support for allocation backed by one or more swap files, and allow the - application to disable over-commit if swap files are in use. - - Implement allocation profiling and leak checking. - - Removed features: - - Remove the dynamic arena rebalancing code, since thread-specific caching - reduces its utility. - - Bug fixes: - - Modify chunk allocation to work when address space layout randomization - (ASLR) is in use. - - Fix thread cleanup bugs related to TLS destruction. - - Handle 0-size allocation requests in posix_memalign(). - - Fix a chunk leak. The leaked chunks were never touched, so this impacted - virtual memory usage, but not physical memory usage. - -* linux_2008082[78]a (August 27/28, 2008) - - These snapshot releases are the simple result of incorporating Linux-specific - support into the FreeBSD malloc sources. - --------------------------------------------------------------------------------- -vim:filetype=text:textwidth=80
deleted file mode 100644 --- a/memory/jemalloc/src/INSTALL +++ /dev/null @@ -1,434 +0,0 @@ -Building and installing a packaged release of jemalloc can be as simple as -typing the following while in the root directory of the source tree: - - ./configure - make - make install - -If building from unpackaged developer sources, the simplest command sequence -that might work is: - - ./autogen.sh - make dist - make - make install - -Note that documentation is not built by the default target because doing so -would create a dependency on xsltproc in packaged releases, hence the -requirement to either run 'make dist' or avoid installing docs via the various -install_* targets documented below. - -=== Advanced configuration ===================================================== - -The 'configure' script supports numerous options that allow control of which -functionality is enabled, where jemalloc is installed, etc. Optionally, pass -any of the following arguments (not a definitive list) to 'configure': - ---help - Print a definitive list of options. - ---prefix=<install-root-dir> - Set the base directory in which to install. For example: - - ./configure --prefix=/usr/local - - will cause files to be installed into /usr/local/include, /usr/local/lib, - and /usr/local/man. - ---with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid> - Use the specified version string rather than trying to generate one (if in - a git repository) or use existing the VERSION file (if present). - ---with-rpath=<colon-separated-rpath> - Embed one or more library paths, so that libjemalloc can find the libraries - it is linked to. This works only on ELF-based systems. - ---with-mangling=<map> - Mangle public symbols specified in <map> which is a comma-separated list of - name:mangled pairs. - - For example, to use ld's --wrap option as an alternative method for - overriding libc's malloc implementation, specify something like: - - --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...] - - Note that mangling happens prior to application of the prefix specified by - --with-jemalloc-prefix, and mangled symbols are then ignored when applying - the prefix. - ---with-jemalloc-prefix=<prefix> - Prefix all public APIs with <prefix>. For example, if <prefix> is - "prefix_", API changes like the following occur: - - malloc() --> prefix_malloc() - malloc_conf --> prefix_malloc_conf - /etc/malloc.conf --> /etc/prefix_malloc.conf - MALLOC_CONF --> PREFIX_MALLOC_CONF - - This makes it possible to use jemalloc at the same time as the system - allocator, or even to use multiple copies of jemalloc simultaneously. - - By default, the prefix is "", except on OS X, where it is "je_". On OS X, - jemalloc overlays the default malloc zone, but makes no attempt to actually - replace the "malloc", "calloc", etc. symbols. - ---without-export - Don't export public APIs. This can be useful when building jemalloc as a - static library, or to avoid exporting public APIs when using the zone - allocator on OSX. - ---with-private-namespace=<prefix> - Prefix all library-private APIs with <prefix>je_. For shared libraries, - symbol visibility mechanisms prevent these symbols from being exported, but - for static libraries, naming collisions are a real possibility. By - default, <prefix> is empty, which results in a symbol prefix of je_ . - ---with-install-suffix=<suffix> - Append <suffix> to the base name of all installed files, such that multiple - versions of jemalloc can coexist in the same installation directory. For - example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0. - ---with-malloc-conf=<malloc_conf> - Embed <malloc_conf> as a run-time options string that is processed prior to - the malloc_conf global variable, the /etc/malloc.conf symlink, and the - MALLOC_CONF environment variable. For example, to change the default chunk - size to 256 KiB: - - --with-malloc-conf=lg_chunk:18 - ---disable-cc-silence - Disable code that silences non-useful compiler warnings. This is mainly - useful during development when auditing the set of warnings that are being - silenced. - ---enable-debug - Enable assertions and validation code. This incurs a substantial - performance hit, but is very useful during application development. - Implies --enable-ivsalloc. - ---enable-code-coverage - Enable code coverage support, for use during jemalloc test development. - Additional testing targets are available if this option is enabled: - - coverage - coverage_unit - coverage_integration - coverage_stress - - These targets do not clear code coverage results from previous runs, and - there are interactions between the various coverage targets, so it is - usually advisable to run 'make clean' between repeated code coverage runs. - ---disable-stats - Disable statistics gathering functionality. See the "opt.stats_print" - option documentation for usage details. - ---enable-ivsalloc - Enable validation code, which verifies that pointers reside within - jemalloc-owned chunks before dereferencing them. This incurs a minor - performance hit. - ---enable-prof - Enable heap profiling and leak detection functionality. See the "opt.prof" - option documentation for usage details. When enabled, there are several - approaches to backtracing, and the configure script chooses the first one - in the following list that appears to function correctly: - - + libunwind (requires --enable-prof-libunwind) - + libgcc (unless --disable-prof-libgcc) - + gcc intrinsics (unless --disable-prof-gcc) - ---enable-prof-libunwind - Use the libunwind library (http://www.nongnu.org/libunwind/) for stack - backtracing. - ---disable-prof-libgcc - Disable the use of libgcc's backtracing functionality. - ---disable-prof-gcc - Disable the use of gcc intrinsics for backtracing. - ---with-static-libunwind=<libunwind.a> - Statically link against the specified libunwind.a rather than dynamically - linking with -lunwind. - ---disable-tcache - Disable thread-specific caches for small objects. Objects are cached and - released in bulk, thus reducing the total number of mutex operations. See - the "opt.tcache" option for usage details. - ---disable-thp - Disable transparent huge page (THP) integration. On systems with THP - support, THPs are explicitly disabled as a side effect of unused dirty page - purging for chunks that back small and/or large allocations, because such - chunks typically comprise active, unused dirty, and untouched clean - pages. - ---disable-munmap - Disable virtual memory deallocation via munmap(2); instead keep track of - the virtual memory for later use. munmap() is disabled by default (i.e. - --disable-munmap is implied) on Linux, which has a quirk in its virtual - memory allocation algorithm that causes semi-permanent VM map holes under - normal jemalloc operation. - ---disable-fill - Disable support for junk/zero filling of memory, quarantine, and redzones. - See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option - documentation for usage details. - ---disable-valgrind - Disable support for Valgrind. - ---disable-zone-allocator - Disable zone allocator for Darwin. This means jemalloc won't be hooked as - the default allocator on OSX/iOS. - ---enable-utrace - Enable utrace(2)-based allocation tracing. This feature is not broadly - portable (FreeBSD has it, but Linux and OS X do not). - ---enable-xmalloc - Enable support for optional immediate termination due to out-of-memory - errors, as is commonly implemented by "xmalloc" wrapper function for malloc. - See the "opt.xmalloc" option documentation for usage details. - ---enable-lazy-lock - Enable code that wraps pthread_create() to detect when an application - switches from single-threaded to multi-threaded mode, so that it can avoid - mutex locking/unlocking operations while in single-threaded mode. In - practice, this feature usually has little impact on performance unless - thread-specific caching is disabled. - ---disable-tls - Disable thread-local storage (TLS), which allows for fast access to - thread-local variables via the __thread keyword. If TLS is available, - jemalloc uses it for several purposes. - ---disable-cache-oblivious - Disable cache-oblivious large allocation alignment for large allocation - requests with no alignment constraints. If this feature is disabled, all - large allocations are page-aligned as an implementation artifact, which can - severely harm CPU cache utilization. However, the cache-oblivious layout - comes at the cost of one extra page per large allocation, which in the - most extreme case increases physical memory usage for the 16 KiB size class - to 20 KiB. - ---disable-syscall - Disable use of syscall(2) rather than {open,read,write,close}(2). This is - intended as a workaround for systems that place security limitations on - syscall(2). - ---with-xslroot=<path> - Specify where to find DocBook XSL stylesheets when building the - documentation. - ---with-lg-page=<lg-page> - Specify the base 2 log of the system page size. This option is only useful - when cross compiling, since the configure script automatically determines - the host's page size by default. - ---with-lg-page-sizes=<lg-page-sizes> - Specify the comma-separated base 2 logs of the page sizes to support. This - option may be useful when cross-compiling in combination with - --with-lg-page, but its primary use case is for integration with FreeBSD's - libc, wherein jemalloc is embedded. - ---with-lg-size-class-group=<lg-size-class-group> - Specify the base 2 log of how many size classes to use for each doubling in - size. By default jemalloc uses <lg-size-class-group>=2, which results in - e.g. the following size classes: - - [...], 64, - 80, 96, 112, 128, - 160, [...] - - <lg-size-class-group>=3 results in e.g. the following size classes: - - [...], 64, - 72, 80, 88, 96, 104, 112, 120, 128, - 144, [...] - - The minimal <lg-size-class-group>=0 causes jemalloc to only provide size - classes that are powers of 2: - - [...], - 64, - 128, - 256, - [...] - - An implementation detail currently limits the total number of small size - classes to 255, and a compilation error will result if the - <lg-size-class-group> you specify cannot be supported. The limit is - roughly <lg-size-class-group>=4, depending on page size. - ---with-lg-quantum=<lg-quantum> - Specify the base 2 log of the minimum allocation alignment. jemalloc needs - to know the minimum alignment that meets the following C standard - requirement (quoted from the April 12, 2011 draft of the C11 standard): - - The pointer returned if the allocation succeeds is suitably aligned so - that it may be assigned to a pointer to any type of object with a - fundamental alignment requirement and then used to access such an object - or an array of such objects in the space allocated [...] - - This setting is architecture-specific, and although jemalloc includes known - safe values for the most commonly used modern architectures, there is a - wrinkle related to GNU libc (glibc) that may impact your choice of - <lg-quantum>. On most modern architectures, this mandates 16-byte alignment - (<lg-quantum>=4), but the glibc developers chose not to meet this - requirement for performance reasons. An old discussion can be found at - https://sourceware.org/bugzilla/show_bug.cgi?id=206 . Unlike glibc, - jemalloc does follow the C standard by default (caveat: jemalloc - technically cheats if --with-lg-tiny-min is smaller than - --with-lg-quantum), but the fact that Linux systems already work around - this allocator noncompliance means that it is generally safe in practice to - let jemalloc's minimum alignment follow glibc's lead. If you specify - --with-lg-quantum=3 during configuration, jemalloc will provide additional - size classes that are not 16-byte-aligned (24, 40, and 56, assuming - --with-lg-size-class-group=2). - ---with-lg-tiny-min=<lg-tiny-min> - Specify the base 2 log of the minimum tiny size class to support. Tiny - size classes are powers of 2 less than the quantum, and are only - incorporated if <lg-tiny-min> is less than <lg-quantum> (see - --with-lg-quantum). Tiny size classes technically violate the C standard - requirement for minimum alignment, and crashes could conceivably result if - the compiler were to generate instructions that made alignment assumptions, - both because illegal instruction traps could result, and because accesses - could straddle page boundaries and cause segmentation faults due to - accessing unmapped addresses. - - The default of <lg-tiny-min>=3 works well in practice even on architectures - that technically require 16-byte alignment, probably for the same reason - --with-lg-quantum=3 works. Smaller tiny size classes can, and will, cause - crashes (see https://bugzilla.mozilla.org/show_bug.cgi?id=691003 for an - example). - - This option is rarely useful, and is mainly provided as documentation of a - subtle implementation detail. If you do use this option, specify a - value in [3, ..., <lg-quantum>]. - -The following environment variables (not a definitive list) impact configure's -behavior: - -CFLAGS="?" - Pass these flags to the C compiler. Any flags set by the configure script - are prepended, which means explicitly set flags generally take precedence. - Take care when specifying flags such as -Werror, because configure tests may - be affected in undesirable ways. - -EXTRA_CFLAGS="?" - Append these flags to CFLAGS, without passing them to the compiler during - configuration. This makes it possible to add flags such as -Werror, while - allowing the configure script to determine what other flags are appropriate - for the specified configuration. - -CPPFLAGS="?" - Pass these flags to the C preprocessor. Note that CFLAGS is not passed to - 'cpp' when 'configure' is looking for include files, so you must use - CPPFLAGS instead if you need to help 'configure' find header files. - -LD_LIBRARY_PATH="?" - 'ld' uses this colon-separated list to find libraries. - -LDFLAGS="?" - Pass these flags when linking. - -PATH="?" - 'configure' uses this to find programs. - -In some cases it may be necessary to work around configuration results that do -not match reality. For example, Linux 4.5 added support for the MADV_FREE flag -to madvise(2), which can cause problems if building on a host with MADV_FREE -support and deploying to a target without. To work around this, use a cache -file to override the relevant configuration variable defined in configure.ac, -e.g.: - - echo "je_cv_madv_free=no" > config.cache && ./configure -C - -=== Advanced compilation ======================================================= - -To build only parts of jemalloc, use the following targets: - - build_lib_shared - build_lib_static - build_lib - build_doc_html - build_doc_man - build_doc - -To install only parts of jemalloc, use the following targets: - - install_bin - install_include - install_lib_shared - install_lib_static - install_lib - install_doc_html - install_doc_man - install_doc - -To clean up build results to varying degrees, use the following make targets: - - clean - distclean - relclean - -=== Advanced installation ====================================================== - -Optionally, define make variables when invoking make, including (not -exclusively): - -INCLUDEDIR="?" - Use this as the installation prefix for header files. - -LIBDIR="?" - Use this as the installation prefix for libraries. - -MANDIR="?" - Use this as the installation prefix for man pages. - -DESTDIR="?" - Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR. This is useful - when installing to a different path than was specified via --prefix. - -CC="?" - Use this to invoke the C compiler. - -CFLAGS="?" - Pass these flags to the compiler. - -CPPFLAGS="?" - Pass these flags to the C preprocessor. - -LDFLAGS="?" - Pass these flags when linking. - -PATH="?" - Use this to search for programs used during configuration and building. - -=== Development ================================================================ - -If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh' -script rather than 'configure'. This re-generates 'configure', enables -configuration dependency rules, and enables re-generation of automatically -generated source files. - -The build system supports using an object directory separate from the source -tree. For example, you can create an 'obj' directory, and from within that -directory, issue configuration and build commands: - - autoconf - mkdir obj - cd obj - ../configure --enable-autogen - make - -=== Documentation ============================================================== - -The manual page is generated in both html and roff formats. Any web browser -can be used to view the html manual. The roff manual page can be formatted -prior to installation via the following command: - - nroff -man -t doc/jemalloc.3
deleted file mode 100644 --- a/memory/jemalloc/src/Makefile.in +++ /dev/null @@ -1,512 +0,0 @@ -# Clear out all vpaths, then set just one (default vpath) for the main build -# directory. -vpath -vpath % . - -# Clear the default suffixes, so that built-in rules are not used. -.SUFFIXES : - -SHELL := /bin/sh - -CC := @CC@ - -# Configuration parameters. -DESTDIR = -BINDIR := $(DESTDIR)@BINDIR@ -INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@ -LIBDIR := $(DESTDIR)@LIBDIR@ -DATADIR := $(DESTDIR)@DATADIR@ -MANDIR := $(DESTDIR)@MANDIR@ -srcroot := @srcroot@ -objroot := @objroot@ -abs_srcroot := @abs_srcroot@ -abs_objroot := @abs_objroot@ - -# Build parameters. -CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include -CONFIGURE_CFLAGS := @CONFIGURE_CFLAGS@ -SPECIFIED_CFLAGS := @SPECIFIED_CFLAGS@ -EXTRA_CFLAGS := @EXTRA_CFLAGS@ -CFLAGS := $(strip $(CONFIGURE_CFLAGS) $(SPECIFIED_CFLAGS) $(EXTRA_CFLAGS)) -LDFLAGS := @LDFLAGS@ -EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ -LIBS := @LIBS@ -RPATH_EXTRA := @RPATH_EXTRA@ -SO := @so@ -IMPORTLIB := @importlib@ -O := @o@ -A := @a@ -EXE := @exe@ -LIBPREFIX := @libprefix@ -REV := @rev@ -install_suffix := @install_suffix@ -ABI := @abi@ -XSLTPROC := @XSLTPROC@ -AUTOCONF := @AUTOCONF@ -_RPATH = @RPATH@ -RPATH = $(if $(1),$(call _RPATH,$(1))) -cfghdrs_in := $(addprefix $(srcroot),@cfghdrs_in@) -cfghdrs_out := @cfghdrs_out@ -cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@) -cfgoutputs_out := @cfgoutputs_out@ -enable_autogen := @enable_autogen@ -enable_code_coverage := @enable_code_coverage@ -enable_prof := @enable_prof@ -enable_valgrind := @enable_valgrind@ -enable_zone_allocator := @enable_zone_allocator@ -MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF -link_whole_archive := @link_whole_archive@ -DSO_LDFLAGS = @DSO_LDFLAGS@ -SOREV = @SOREV@ -PIC_CFLAGS = @PIC_CFLAGS@ -CTARGET = @CTARGET@ -LDTARGET = @LDTARGET@ -TEST_LD_MODE = @TEST_LD_MODE@ -MKLIB = @MKLIB@ -AR = @AR@ -ARFLAGS = @ARFLAGS@ -CC_MM = @CC_MM@ -LM := @LM@ -INSTALL = @INSTALL@ - -ifeq (macho, $(ABI)) -TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" -else -ifeq (pecoff, $(ABI)) -TEST_LIBRARY_PATH := PATH="$(PATH):$(objroot)lib" -else -TEST_LIBRARY_PATH := -endif -endif - -LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) - -# Lists of files. -BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof -C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h -C_SRCS := $(srcroot)src/jemalloc.c \ - $(srcroot)src/arena.c \ - $(srcroot)src/atomic.c \ - $(srcroot)src/base.c \ - $(srcroot)src/bitmap.c \ - $(srcroot)src/chunk.c \ - $(srcroot)src/chunk_dss.c \ - $(srcroot)src/chunk_mmap.c \ - $(srcroot)src/ckh.c \ - $(srcroot)src/ctl.c \ - $(srcroot)src/extent.c \ - $(srcroot)src/hash.c \ - $(srcroot)src/huge.c \ - $(srcroot)src/mb.c \ - $(srcroot)src/mutex.c \ - $(srcroot)src/nstime.c \ - $(srcroot)src/pages.c \ - $(srcroot)src/prng.c \ - $(srcroot)src/prof.c \ - $(srcroot)src/quarantine.c \ - $(srcroot)src/rtree.c \ - $(srcroot)src/stats.c \ - $(srcroot)src/spin.c \ - $(srcroot)src/tcache.c \ - $(srcroot)src/ticker.c \ - $(srcroot)src/tsd.c \ - $(srcroot)src/util.c \ - $(srcroot)src/witness.c -ifeq ($(enable_valgrind), 1) -C_SRCS += $(srcroot)src/valgrind.c -endif -ifeq ($(enable_zone_allocator), 1) -C_SRCS += $(srcroot)src/zone.c -endif -ifeq ($(IMPORTLIB),$(SO)) -STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) -endif -ifdef PIC_CFLAGS -STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) -else -STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_s.$(A) -endif -DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) -ifneq ($(SOREV),$(SO)) -DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) -endif -ifeq (1, $(link_whole_archive)) -LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive -else -LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) -endif -PC := $(objroot)jemalloc.pc -MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 -DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml -DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html) -DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3) -DOCS := $(DOCS_HTML) $(DOCS_MAN3) -C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ - $(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \ - $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ - $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ - $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c -ifeq (1, $(link_whole_archive)) -C_UTIL_INTEGRATION_SRCS := -else -C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c -endif -TESTS_UNIT := \ - $(srcroot)test/unit/a0.c \ - $(srcroot)test/unit/arena_reset.c \ - $(srcroot)test/unit/atomic.c \ - $(srcroot)test/unit/bitmap.c \ - $(srcroot)test/unit/ckh.c \ - $(srcroot)test/unit/decay.c \ - $(srcroot)test/unit/extent_quantize.c \ - $(srcroot)test/unit/fork.c \ - $(srcroot)test/unit/hash.c \ - $(srcroot)test/unit/junk.c \ - $(srcroot)test/unit/junk_alloc.c \ - $(srcroot)test/unit/junk_free.c \ - $(srcroot)test/unit/lg_chunk.c \ - $(srcroot)test/unit/mallctl.c \ - $(srcroot)test/unit/math.c \ - $(srcroot)test/unit/mq.c \ - $(srcroot)test/unit/mtx.c \ - $(srcroot)test/unit/pack.c \ - $(srcroot)test/unit/pages.c \ - $(srcroot)test/unit/ph.c \ - $(srcroot)test/unit/prng.c \ - $(srcroot)test/unit/prof_accum.c \ - $(srcroot)test/unit/prof_active.c \ - $(srcroot)test/unit/prof_gdump.c \ - $(srcroot)test/unit/prof_idump.c \ - $(srcroot)test/unit/prof_reset.c \ - $(srcroot)test/unit/prof_thread_name.c \ - $(srcroot)test/unit/ql.c \ - $(srcroot)test/unit/qr.c \ - $(srcroot)test/unit/quarantine.c \ - $(srcroot)test/unit/rb.c \ - $(srcroot)test/unit/rtree.c \ - $(srcroot)test/unit/run_quantize.c \ - $(srcroot)test/unit/SFMT.c \ - $(srcroot)test/unit/size_classes.c \ - $(srcroot)test/unit/smoothstep.c \ - $(srcroot)test/unit/stats.c \ - $(srcroot)test/unit/stats_print.c \ - $(srcroot)test/unit/ticker.c \ - $(srcroot)test/unit/nstime.c \ - $(srcroot)test/unit/tsd.c \ - $(srcroot)test/unit/util.c \ - $(srcroot)test/unit/witness.c \ - $(srcroot)test/unit/zero.c -TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ - $(srcroot)test/integration/allocated.c \ - $(srcroot)test/integration/sdallocx.c \ - $(srcroot)test/integration/mallocx.c \ - $(srcroot)test/integration/MALLOCX_ARENA.c \ - $(srcroot)test/integration/overflow.c \ - $(srcroot)test/integration/posix_memalign.c \ - $(srcroot)test/integration/rallocx.c \ - $(srcroot)test/integration/thread_arena.c \ - $(srcroot)test/integration/thread_tcache_enabled.c \ - $(srcroot)test/integration/xallocx.c \ - $(srcroot)test/integration/chunk.c -TESTS_STRESS := $(srcroot)test/stress/microbench.c -TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS) - -C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O)) -C_PIC_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) -C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O)) -C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O)) -C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O)) -C_UTIL_INTEGRATION_OBJS := $(C_UTIL_INTEGRATION_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O)) -C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O)) -C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS) - -TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) - -.PHONY: all dist build_doc_html build_doc_man build_doc -.PHONY: install_bin install_include install_lib -.PHONY: install_doc_html install_doc_man install_doc install -.PHONY: tests check clean distclean relclean - -.SECONDARY : $(TESTS_OBJS) - -# Default target. -all: build_lib - -dist: build_doc - -$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl - $(XSLTPROC) -o $@ $(objroot)doc/html.xsl $< - -$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl - $(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $< - -build_doc_html: $(DOCS_HTML) -build_doc_man: $(DOCS_MAN3) -build_doc: $(DOCS) - -# -# Include generated dependency files. -# -ifdef CC_MM --include $(C_OBJS:%.$(O)=%.d) --include $(C_PIC_OBJS:%.$(O)=%.d) --include $(C_JET_OBJS:%.$(O)=%.d) --include $(C_TESTLIB_OBJS:%.$(O)=%.d) --include $(TESTS_OBJS:%.$(O)=%.d) -endif - -$(C_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c -$(C_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c -$(C_PIC_OBJS): CFLAGS += $(PIC_CFLAGS) -$(C_JET_OBJS): $(objroot)src/%.jet.$(O): $(srcroot)src/%.c -$(C_JET_OBJS): CFLAGS += -DJEMALLOC_JET -$(C_TESTLIB_UNIT_OBJS): $(objroot)test/src/%.unit.$(O): $(srcroot)test/src/%.c -$(C_TESTLIB_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -$(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot)test/src/%.c -$(C_TESTLIB_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST -$(C_UTIL_INTEGRATION_OBJS): $(objroot)src/%.integration.$(O): $(srcroot)src/%.c -$(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%.c -$(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB -$(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include -$(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -$(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST -$(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -$(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c -$(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include -ifneq ($(IMPORTLIB),$(SO)) -$(C_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT -endif - -ifndef CC_MM -# Dependencies. -HEADER_DIRS = $(srcroot)include/jemalloc/internal \ - $(objroot)include/jemalloc $(objroot)include/jemalloc/internal -HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) -$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS) -$(TESTS_OBJS): $(objroot)test/include/test/jemalloc_test.h -endif - -$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O): - @mkdir -p $(@D) - $(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $< -ifdef CC_MM - @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< -endif - -ifneq ($(SOREV),$(SO)) -%.$(SO) : %.$(SOREV) - @mkdir -p $(@D) - ln -sf $(<F) $@ -endif - -$(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(C_PIC_OBJS),$(C_OBJS)) - @mkdir -p $(@D) - $(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS) - -$(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(C_PIC_OBJS) -$(objroot)lib/$(LIBJEMALLOC).$(A) : $(C_OBJS) -$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(C_OBJS) - -$(STATIC_LIBS): - @mkdir -p $(@D) - $(AR) $(ARFLAGS)@AROUT@ $+ - -$(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) - @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) - -$(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) - @mkdir -p $(@D) - $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS) - -$(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) - @mkdir -p $(@D) - $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) - -build_lib_shared: $(DSOS) -build_lib_static: $(STATIC_LIBS) -build_lib: build_lib_shared build_lib_static - -install_bin: - $(INSTALL) -d $(BINDIR) - @for b in $(BINS); do \ - echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \ - $(INSTALL) -m 755 $$b $(BINDIR); \ -done - -install_include: - $(INSTALL) -d $(INCLUDEDIR)/jemalloc - @for h in $(C_HDRS); do \ - echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ - $(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \ -done - -install_lib_shared: $(DSOS) - $(INSTALL) -d $(LIBDIR) - $(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR) -ifneq ($(SOREV),$(SO)) - ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO) -endif - -install_lib_static: $(STATIC_LIBS) - $(INSTALL) -d $(LIBDIR) - @for l in $(STATIC_LIBS); do \ - echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \ - $(INSTALL) -m 755 $$l $(LIBDIR); \ -done - -install_lib_pc: $(PC) - $(INSTALL) -d $(LIBDIR)/pkgconfig - @for l in $(PC); do \ - echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \ - $(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \ -done - -install_lib: install_lib_shared install_lib_static install_lib_pc - -install_doc_html: - $(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix) - @for d in $(DOCS_HTML); do \ - echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \ - $(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \ -done - -install_doc_man: - $(INSTALL) -d $(MANDIR)/man3 - @for d in $(DOCS_MAN3); do \ - echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \ - $(INSTALL) -m 644 $$d $(MANDIR)/man3; \ -done - -install_doc: install_doc_html install_doc_man - -install: install_bin install_include install_lib install_doc - -tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE)) -tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) -tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE)) -tests: tests_unit tests_integration tests_stress - -check_unit_dir: - @mkdir -p $(objroot)test/unit -check_integration_dir: - @mkdir -p $(objroot)test/integration -stress_dir: - @mkdir -p $(objroot)test/stress -check_dir: check_unit_dir check_integration_dir - -check_unit: tests_unit check_unit_dir - $(MALLOC_CONF)="purge:ratio" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) - $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) -check_integration_prof: tests_integration check_integration_dir -ifeq ($(enable_prof), 1) - $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) - $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) -endif -check_integration_decay: tests_integration check_integration_dir - $(MALLOC_CONF)="purge:decay,decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) - $(MALLOC_CONF)="purge:decay,decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) - $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) -check_integration: tests_integration check_integration_dir - $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) -stress: tests_stress stress_dir - $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) -check: check_unit check_integration check_integration_decay check_integration_prof - -ifeq ($(enable_code_coverage), 1) -coverage_unit: check_unit - $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS) - -coverage_integration: check_integration - $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)src integration $(C_UTIL_INTEGRATION_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS) - -coverage_stress: stress - $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress stress $(TESTS_STRESS_OBJS) - -coverage: check - $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)src integration $(C_UTIL_INTEGRATION_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS) - $(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress integration $(TESTS_STRESS_OBJS) -endif - -clean: - rm -f $(C_OBJS) - rm -f $(C_PIC_OBJS) - rm -f $(C_JET_OBJS) - rm -f $(C_TESTLIB_OBJS) - rm -f $(C_OBJS:%.$(O)=%.d) - rm -f $(C_OBJS:%.$(O)=%.gcda) - rm -f $(C_OBJS:%.$(O)=%.gcno) - rm -f $(C_PIC_OBJS:%.$(O)=%.d) - rm -f $(C_PIC_OBJS:%.$(O)=%.gcda) - rm -f $(C_PIC_OBJS:%.$(O)=%.gcno) - rm -f $(C_JET_OBJS:%.$(O)=%.d) - rm -f $(C_JET_OBJS:%.$(O)=%.gcda) - rm -f $(C_JET_OBJS:%.$(O)=%.gcno) - rm -f $(C_TESTLIB_OBJS:%.$(O)=%.d) - rm -f $(C_TESTLIB_OBJS:%.$(O)=%.gcda) - rm -f $(C_TESTLIB_OBJS:%.$(O)=%.gcno) - rm -f $(TESTS_OBJS:%.$(O)=%$(EXE)) - rm -f $(TESTS_OBJS) - rm -f $(TESTS_OBJS:%.$(O)=%.d) - rm -f $(TESTS_OBJS:%.$(O)=%.gcda) - rm -f $(TESTS_OBJS:%.$(O)=%.gcno) - rm -f $(TESTS_OBJS:%.$(O)=%.out) - rm -f $(DSOS) $(STATIC_LIBS) - rm -f $(objroot)*.gcov.* - -distclean: clean - rm -f $(objroot)bin/jemalloc-config - rm -f $(objroot)bin/jemalloc.sh - rm -f $(objroot)bin/jeprof - rm -f $(objroot)config.log - rm -f $(objroot)config.status - rm -f $(objroot)config.stamp - rm -f $(cfghdrs_out) - rm -f $(cfgoutputs_out) - -relclean: distclean - rm -f $(objroot)configure - rm -f $(objroot)VERSION - rm -f $(DOCS_HTML) - rm -f $(DOCS_MAN3) - -#=============================================================================== -# Re-configuration rules. - -ifeq ($(enable_autogen), 1) -$(srcroot)configure : $(srcroot)configure.ac - cd ./$(srcroot) && $(AUTOCONF) - -$(objroot)config.status : $(srcroot)configure - ./$(objroot)config.status --recheck - -$(srcroot)config.stamp.in : $(srcroot)configure.ac - echo stamp > $(srcroot)config.stamp.in - -$(objroot)config.stamp : $(cfgoutputs_in) $(cfghdrs_in) $(srcroot)configure - ./$(objroot)config.status - @touch $@ - -# There must be some action in order for make to re-read Makefile when it is -# out of date. -$(cfgoutputs_out) $(cfghdrs_out) : $(objroot)config.stamp - @true -endif
deleted file mode 100644 --- a/memory/jemalloc/src/README +++ /dev/null @@ -1,20 +0,0 @@ -jemalloc is a general purpose malloc(3) implementation that emphasizes -fragmentation avoidance and scalable concurrency support. jemalloc first came -into use as the FreeBSD libc allocator in 2005, and since then it has found its -way into numerous applications that rely on its predictable behavior. In 2010 -jemalloc development efforts broadened to include developer support features -such as heap profiling, Valgrind integration, and extensive monitoring/tuning -hooks. Modern jemalloc releases continue to be integrated back into FreeBSD, -and therefore versatility remains critical. Ongoing development efforts trend -toward making jemalloc among the best allocators for a broad range of demanding -applications, and eliminating/mitigating weaknesses that have practical -repercussions for real world applications. - -The COPYING file contains copyright and licensing information. - -The INSTALL file contains information on how to configure, build, and install -jemalloc. - -The ChangeLog file contains a brief summary of changes for each release. - -URL: http://jemalloc.net/
deleted file mode 100644 --- a/memory/jemalloc/src/VERSION +++ /dev/null @@ -1,1 +0,0 @@ -4.5.0-0-g04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5
deleted file mode 100755 --- a/memory/jemalloc/src/autogen.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -for i in autoconf; do - echo "$i" - $i - if [ $? -ne 0 ]; then - echo "Error $? in $i" - exit 1 - fi -done - -echo "./configure --enable-autogen $@" -./configure --enable-autogen $@ -if [ $? -ne 0 ]; then - echo "Error $? in ./configure" - exit 1 -fi
deleted file mode 100644 --- a/memory/jemalloc/src/bin/jemalloc-config.in +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/sh - -usage() { - cat <<EOF -Usage: - @BINDIR@/jemalloc-config <option> -Options: - --help | -h : Print usage. - --version : Print jemalloc version. - --revision : Print shared library revision number. - --config : Print configure options used to build jemalloc. - --prefix : Print installation directory prefix. - --bindir : Print binary installation directory. - --datadir : Print data installation directory. - --includedir : Print include installation directory. - --libdir : Print library installation directory. - --mandir : Print manual page installation directory. - --cc : Print compiler used to build jemalloc. - --cflags : Print compiler flags used to build jemalloc. - --cppflags : Print preprocessor flags used to build jemalloc. - --ldflags : Print library flags used to build jemalloc. - --libs : Print libraries jemalloc was linked against. -EOF -} - -prefix="@prefix@" -exec_prefix="@exec_prefix@" - -case "$1" in ---help | -h) - usage - exit 0 - ;; ---version) - echo "@jemalloc_version@" - ;; ---revision) - echo "@rev@" - ;; ---config) - echo "@CONFIG@" - ;; ---prefix) - echo "@PREFIX@" - ;; ---bindir) - echo "@BINDIR@" - ;; ---datadir) - echo "@DATADIR@" - ;; ---includedir) - echo "@INCLUDEDIR@" - ;; ---libdir) - echo "@LIBDIR@" - ;; ---mandir) - echo "@MANDIR@" - ;; ---cc) - echo "@CC@" - ;; ---cflags) - echo "@CFLAGS@" - ;; ---cppflags) - echo "@CPPFLAGS@" - ;; ---ldflags) - echo "@LDFLAGS@ @EXTRA_LDFLAGS@" - ;; ---libs) - echo "@LIBS@" - ;; -*) - usage - exit 1 -esac
deleted file mode 100644 --- a/memory/jemalloc/src/bin/jemalloc.sh.in +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ - -@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SOREV@ -export @LD_PRELOAD_VAR@ -exec "$@"
deleted file mode 100644 --- a/memory/jemalloc/src/bin/jeprof.in +++ /dev/null @@ -1,5611 +0,0 @@ -#! /usr/bin/env perl - -# Copyright (c) 1998-2007, Google Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# --- -# Program for printing the profile generated by common/profiler.cc, -# or by the heap profiler (common/debugallocation.cc) -# -# The profile contains a sequence of entries of the form: -# <count> <stack trace> -# This program parses the profile, and generates user-readable -# output. -# -# Examples: -# -# % tools/jeprof "program" "profile" -# Enters "interactive" mode -# -# % tools/jeprof --text "program" "profile" -# Generates one line per procedure -# -# % tools/jeprof --gv "program" "profile" -# Generates annotated call-graph and displays via "gv" -# -# % tools/jeprof --gv --focus=Mutex "program" "profile" -# Restrict to code paths that involve an entry that matches "Mutex" -# -# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile" -# Restrict to code paths that involve an entry that matches "Mutex" -# and does not match "string" -# -# % tools/jeprof --list=IBF_CheckDocid "program" "profile" -# Generates disassembly listing of all routines with at least one -# sample that match the --list=<regexp> pattern. The listing is -# annotated with the flat and cumulative sample counts at each line. -# -# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile" -# Generates disassembly listing of all routines with at least one -# sample that match the --disasm=<regexp> pattern. The listing is -# annotated with the flat and cumulative sample counts at each PC value. -# -# TODO: Use color to indicate files? - -use strict; -use warnings; -use Getopt::Long; - -my $JEPROF_VERSION = "@jemalloc_version@"; -my $PPROF_VERSION = "2.0"; - -# These are the object tools we use which can come from a -# user-specified location using --tools, from the JEPROF_TOOLS -# environment variable, or from the environment. -my %obj_tool_map = ( - "objdump" => "objdump", - "nm" => "nm", - "addr2line" => "addr2line", - "c++filt" => "c++filt", - ## ConfigureObjTools may add architecture-specific entries: - #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables - #"addr2line_pdb" => "addr2line-pdb", # ditto - #"otool" => "otool", # equivalent of objdump on OS X -); -# NOTE: these are lists, so you can put in commandline flags if you want. -my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local -my @GV = ("gv"); -my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread -my @KCACHEGRIND = ("kcachegrind"); -my @PS2PDF = ("ps2pdf"); -# These are used for dynamic profiles -my @URL_FETCHER = ("curl", "-s", "--fail"); - -# These are the web pages that servers need to support for dynamic profiles -my $HEAP_PAGE = "/pprof/heap"; -my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" -my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param - # ?seconds=#&event=x&period=n -my $GROWTH_PAGE = "/pprof/growth"; -my $CONTENTION_PAGE = "/pprof/contention"; -my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter -my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; -my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param - # "?seconds=#", - # "?tags_regexp=#" and - # "?type=#". -my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST -my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; - -# These are the web pages that can be named on the command line. -# All the alternatives must begin with /. -my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . - "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . - "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; - -# default binary name -my $UNKNOWN_BINARY = "(unknown)"; - -# There is a pervasive dependency on the length (in hex characters, -# i.e., nibbles) of an address, distinguishing between 32-bit and -# 64-bit profiles. To err on the safe size, default to 64-bit here: -my $address_length = 16; - -my $dev_null = "/dev/null"; -if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for - $dev_null = "nul"; -} - -# A list of paths to search for shared object files -my @prefix_list = (); - -# Special routine name that should not have any symbols. -# Used as separator to parse "addr2line -i" output. -my $sep_symbol = '_fini'; -my $sep_address = undef; - -##### Argument parsing ##### - -sub usage_string { - return <<EOF; -Usage: -jeprof [options] <program> <profiles> - <profiles> is a space separated list of profile names. -jeprof [options] <symbolized-profiles> - <symbolized-profiles> is a list of profile files where each file contains - the necessary symbol mappings as well as profile data (likely generated - with --raw). -jeprof [options] <profile> - <profile> is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE - - Each name can be: - /path/to/profile - a path to a profile file - host:port[/<service>] - a location of a service to get profile from - - The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, - $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, - $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. - For instance: - jeprof http://myserver.com:80$HEAP_PAGE - If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). -jeprof --symbols <program> - Maps addresses to symbol names. In this mode, stdin should be a - list of library mappings, in the same format as is found in the heap- - and cpu-profile files (this loosely matches that of /proc/self/maps - on linux), followed by a list of hex addresses to map, one per line. - - For more help with querying remote servers, including how to add the - necessary server-side support code, see this filename (or one like it): - - /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html - -Options: - --cum Sort by cumulative data - --base=<base> Subtract <base> from <profile> before display - --interactive Run in interactive mode (interactive "help" gives help) [default] - --seconds=<n> Length of time for dynamic profiles [default=30 secs] - --add_lib=<file> Read additional symbols and line info from the given library - --lib_prefix=<dir> Comma separated list of library path prefixes - -Reporting Granularity: - --addresses Report at address level - --lines Report at source line level - --functions Report at function level [default] - --files Report at source file level - -Output type: - --text Generate text report - --callgrind Generate callgrind format to stdout - --gv Generate Postscript and display - --evince Generate PDF and display - --web Generate SVG and display - --list=<regexp> Generate source listing of matching routines - --disasm=<regexp> Generate disassembly of matching routines - --symbols Print demangled symbol names found at given addresses - --dot Generate DOT file to stdout - --ps Generate Postcript to stdout - --pdf Generate PDF to stdout - --svg Generate SVG to stdout - --gif Generate GIF to stdout - --raw Generate symbolized jeprof data (useful with remote fetch) - -Heap-Profile Options: - --inuse_space Display in-use (mega)bytes [default] - --inuse_objects Display in-use objects - --alloc_space Display allocated (mega)bytes - --alloc_objects Display allocated objects - --show_bytes Display space in bytes - --drop_negative Ignore negative differences - -Contention-profile options: - --total_delay Display total delay at each region [default] - --contentions Display number of delays at each region - --mean_delay Display mean delay at each region - -Call-graph Options: - --nodecount=<n> Show at most so many nodes [default=80] - --nodefraction=<f> Hide nodes below <f>*total [default=.005] - --edgefraction=<f> Hide edges below <f>*total [default=.001] - --maxdegree=<n> Max incoming/outgoing edges per node [default=8] - --focus=<regexp> Focus on backtraces with nodes matching <regexp> - --thread=<n> Show profile for thread <n> - --ignore=<regexp> Ignore backtraces with nodes matching <regexp> - --scale=<n> Set GV scaling [default=0] - --heapcheck Make nodes with non-0 object counts - (i.e. direct leak generators) more visible - --retain=<regexp> Retain only nodes that match <regexp> - --exclude=<regexp> Exclude all nodes that match <regexp> - -Miscellaneous: - --tools=<prefix or binary:fullpath>[,...] \$PATH for object tool pathnames - --test Run unit tests - --help This message - --version Version information - -Environment Variables: - JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof - JEPROF_TOOLS Prefix for object tools pathnames - -Examples: - -jeprof /bin/ls ls.prof - Enters "interactive" mode -jeprof --text /bin/ls ls.prof - Outputs one line per procedure -jeprof --web /bin/ls ls.prof - Displays annotated call-graph in web browser -jeprof --gv /bin/ls ls.prof - Displays annotated call-graph via 'gv' -jeprof --gv --focus=Mutex /bin/ls ls.prof - Restricts to code paths including a .*Mutex.* entry -jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof - Code paths including Mutex but not string -jeprof --list=getdir /bin/ls ls.prof - (Per-line) annotated source listing for getdir() -jeprof --disasm=getdir /bin/ls ls.prof - (Per-PC) annotated disassembly for getdir() - -jeprof http://localhost:1234/ - Enters "interactive" mode -jeprof --text localhost:1234 - Outputs one line per procedure for localhost:1234 -jeprof --raw localhost:1234 > ./local.raw -jeprof --text ./local.raw - Fetches a remote profile for later analysis and then - analyzes it in text mode. -EOF -} - -sub version_string { - return <<EOF -jeprof (part of jemalloc $JEPROF_VERSION) -based on pprof (part of gperftools $PPROF_VERSION) - -Copyright 1998-2007 Google Inc. - -This is BSD licensed software; see the source for copying conditions -and license information. -There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A -PARTICULAR PURPOSE. -EOF -} - -sub usage { - my $msg = shift; - print STDERR "$msg\n\n"; - print STDERR usage_string(); - print STDERR "\nFATAL ERROR: $msg\n"; # just as a reminder - exit(1); -} - -sub Init() { - # Setup tmp-file name and handler to clean it up. - # We do this in the very beginning so that we can use - # error() and cleanup() function anytime here after. - $main::tmpfile_sym = "/tmp/jeprof$$.sym"; - $main::tmpfile_ps = "/tmp/jeprof$$"; - $main::next_tmpfile = 0; - $SIG{'INT'} = \&sighandler; - - # Cache from filename/linenumber to source code - $main::source_cache = (); - - $main::opt_help = 0; - $main::opt_version = 0; - - $main::opt_cum = 0; - $main::opt_base = ''; - $main::opt_addresses = 0; - $main::opt_lines = 0; - $main::opt_functions = 0; - $main::opt_files = 0; - $main::opt_lib_prefix = ""; - - $main::opt_text = 0; - $main::opt_callgrind = 0; - $main::opt_list = ""; - $main::opt_disasm = ""; - $main::opt_symbols = 0; - $main::opt_gv = 0; - $main::opt_evince = 0; - $main::opt_web = 0; - $main::opt_dot = 0; - $main::opt_ps = 0; - $main::opt_pdf = 0; - $main::opt_gif = 0; - $main::opt_svg = 0; - $main::opt_raw = 0; - - $main::opt_nodecount = 80; - $main::opt_nodefraction = 0.005; - $main::opt_edgefraction = 0.001; - $main::opt_maxdegree = 8; - $main::opt_focus = ''; - $main::opt_thread = undef; - $main::opt_ignore = ''; - $main::opt_scale = 0; - $main::opt_heapcheck = 0; - $main::opt_retain = ''; - $main::opt_exclude = ''; - $main::opt_seconds = 30; - $main::opt_lib = ""; - - $main::opt_inuse_space = 0; - $main::opt_inuse_objects = 0; - $main::opt_alloc_space = 0; - $main::opt_alloc_objects = 0; - $main::opt_show_bytes = 0; - $main::opt_drop_negative = 0; - $main::opt_interactive = 0; - - $main::opt_total_delay = 0; - $main::opt_contentions = 0; - $main::opt_mean_delay = 0; - - $main::opt_tools = ""; - $main::opt_debug = 0; - $main::opt_test = 0; - - # These are undocumented flags used only by unittests. - $main::opt_test_stride = 0; - - # Are we using $SYMBOL_PAGE? - $main::use_symbol_page = 0; - - # Files returned by TempName. - %main::tempnames = (); - - # Type of profile we are dealing with - # Supported types: - # cpu - # heap - # growth - # contention - $main::profile_type = ''; # Empty type means "unknown" - - GetOptions("help!" => \$main::opt_help, - "version!" => \$main::opt_version, - "cum!" => \$main::opt_cum, - "base=s" => \$main::opt_base, - "seconds=i" => \$main::opt_seconds, - "add_lib=s" => \$main::opt_lib, - "lib_prefix=s" => \$main::opt_lib_prefix, - "functions!" => \$main::opt_functions, - "lines!" => \$main::opt_lines, - "addresses!" => \$main::opt_addresses, - "files!" => \$main::opt_files, - "text!" => \$main::opt_text, - "callgrind!" => \$main::opt_callgrind, - "list=s" => \$main::opt_list, - "disasm=s" => \$main::opt_disasm, - "symbols!" => \$main::opt_symbols, - "gv!" => \$main::opt_gv, - "evince!" => \$main::opt_evince, - "web!" => \$main::opt_web, - "dot!" => \$main::opt_dot, - "ps!" => \$main::opt_ps, - "pdf!" => \$main::opt_pdf, - "svg!" => \$main::opt_svg, - "gif!" => \$main::opt_gif, - "raw!" => \$main::opt_raw, - "interactive!" => \$main::opt_interactive, - "nodecount=i" => \$main::opt_nodecount, - "nodefraction=f" => \$main::opt_nodefraction, - "edgefraction=f" => \$main::opt_edgefraction, - "maxdegree=i" => \$main::opt_maxdegree, - "focus=s" => \$main::opt_focus, - "thread=s" => \$main::opt_thread, - "ignore=s" => \$main::opt_ignore, - "scale=i" => \$main::opt_scale, - "heapcheck" => \$main::opt_heapcheck, - "retain=s" => \$main::opt_retain, - "exclude=s" => \$main::opt_exclude, - "inuse_space!" => \$main::opt_inuse_space, - "inuse_objects!" => \$main::opt_inuse_objects, - "alloc_space!" => \$main::opt_alloc_space, - "alloc_objects!" => \$main::opt_alloc_objects, - "show_bytes!" => \$main::opt_show_bytes, - "drop_negative!" => \$main::opt_drop_negative, - "total_delay!" => \$main::opt_total_delay, - "contentions!" => \$main::opt_contentions, - "mean_delay!" => \$main::opt_mean_delay, - "tools=s" => \$main::opt_tools, - "test!" => \$main::opt_test, - "debug!" => \$main::opt_debug, - # Undocumented flags used only by unittests: - "test_stride=i" => \$main::opt_test_stride, - ) || usage("Invalid option(s)"); - - # Deal with the standard --help and --version - if ($main::opt_help) { - print usage_string(); - exit(0); - } - - if ($main::opt_version) { - print version_string(); - exit(0); - } - - # Disassembly/listing/symbols mode requires address-level info - if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { - $main::opt_functions = 0; - $main::opt_lines = 0; - $main::opt_addresses = 1; - $main::opt_files = 0; - } - - # Check heap-profiling flags - if ($main::opt_inuse_space + - $main::opt_inuse_objects + - $main::opt_alloc_space + - $main::opt_alloc_objects > 1) { - usage("Specify at most on of --inuse/--alloc options"); - } - - # Check output granularities - my $grains = - $main::opt_functions + - $main::opt_lines + - $main::opt_addresses + - $main::opt_files + - 0; - if ($grains > 1) { - usage("Only specify one output granularity option"); - } - if ($grains == 0) { - $main::opt_functions = 1; - } - - # Check output modes - my $modes = - $main::opt_text + - $main::opt_callgrind + - ($main::opt_list eq '' ? 0 : 1) + - ($main::opt_disasm eq '' ? 0 : 1) + - ($main::opt_symbols == 0 ? 0 : 1) + - $main::opt_gv + - $main::opt_evince + - $main::opt_web + - $main::opt_dot + - $main::opt_ps + - $main::opt_pdf + - $main::opt_svg + - $main::opt_gif + - $main::opt_raw + - $main::opt_interactive + - 0; - if ($modes > 1) { - usage("Only specify one output mode"); - } - if ($modes == 0) { - if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode - $main::opt_interactive = 1; - } else { - $main::opt_text = 1; - } - } - - if ($main::opt_test) { - RunUnitTests(); - # Should not return - exit(1); - } - - # Binary name and profile arguments list - $main::prog = ""; - @main::pfile_args = (); - - # Remote profiling without a binary (using $SYMBOL_PAGE instead) - if (@ARGV > 0) { - if (IsProfileURL($ARGV[0])) { - $main::use_symbol_page = 1; - } elsif (IsSymbolizedProfileFile($ARGV[0])) { - $main::use_symbolized_profile = 1; - $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file - } - } - - if ($main::use_symbol_page || $main::use_symbolized_profile) { - # We don't need a binary! - my %disabled = ('--lines' => $main::opt_lines, - '--disasm' => $main::opt_disasm); - for my $option (keys %disabled) { - usage("$option cannot be used without a binary") if $disabled{$option}; - } - # Set $main::prog later... - scalar(@ARGV) || usage("Did not specify profile file"); - } elsif ($main::opt_symbols) { - # --symbols needs a binary-name (to run nm on, etc) but not profiles - $main::prog = shift(@ARGV) || usage("Did not specify program"); - } else { - $main::prog = shift(@ARGV) || usage("Did not specify program"); - scalar(@ARGV) || usage("Did not specify profile file"); - } - - # Parse profile file/location arguments - foreach my $farg (@ARGV) { - if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { - my $machine = $1; - my $num_machines = $2; - my $path = $3; - for (my $i = 0; $i < $num_machines; $i++) { - unshift(@main::pfile_args, "$i.$machine$path"); - } - } else { - unshift(@main::pfile_args, $farg); - } - } - - if ($main::use_symbol_page) { - unless (IsProfileURL($main::pfile_args[0])) { - error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); - } - CheckSymbolPage(); - $main::prog = FetchProgramName(); - } elsif (!$main::use_symbolized_profile) { # may not need objtools! - ConfigureObjTools($main::prog) - } - - # Break the opt_lib_prefix into the prefix_list array - @prefix_list = split (',', $main::opt_lib_prefix); - - # Remove trailing / from the prefixes, in the list to prevent - # searching things like /my/path//lib/mylib.so - foreach (@prefix_list) { - s|/+$||; - } -} - -sub FilterAndPrint { - my ($profile, $symbols, $libs, $thread) = @_; - - # Get total data in profile - my $total = TotalProfile($profile); - - # Remove uniniteresting stack items - $profile = RemoveUninterestingFrames($symbols, $profile); - - # Focus? - if ($main::opt_focus ne '') { - $profile = FocusProfile($symbols, $profile, $main::opt_focus); - } - - # Ignore? - if ($main::opt_ignore ne '') { - $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); - } - - my $calls = ExtractCalls($symbols, $profile); - - # Reduce profiles to required output granularity, and also clean - # each stack trace so a given entry exists at most once. - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - # Print - if (!$main::opt_interactive) { - if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); - } elsif ($main::opt_list) { - PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); - } elsif ($main::opt_text) { - # Make sure the output is empty when have nothing to report - # (only matters when --heapcheck is given but we must be - # compatible with old branches that did not pass --heapcheck always): - if ($total != 0) { - printf("Total%s: %s %s\n", - (defined($thread) ? " (t$thread)" : ""), - Unparse($total), Units()); - } - PrintText($symbols, $flat, $cumulative, -1); - } elsif ($main::opt_raw) { - PrintSymbolizedProfile($symbols, $profile, $main::prog); - } elsif ($main::opt_callgrind) { - PrintCallgrind($calls); - } else { - if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), ""); - } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), ""); - } elsif ($main::opt_web) { - my $tmp = TempName($main::next_tmpfile, "svg"); - RunWeb($tmp); - # The command we run might hand the file name off - # to an already running browser instance and then exit. - # Normally, we'd remove $tmp on exit (right now), - # but fork a child to remove $tmp a little later, so that the - # browser has time to load it first. - delete $main::tempnames{$tmp}; - if (fork() == 0) { - sleep 5; - unlink($tmp); - exit(0); - } - } - } else { - cleanup(); - exit(1); - } - } - } else { - InteractiveMode($profile, $symbols, $libs, $total); - } -} - -sub Main() { - Init(); - $main::collected_profile = undef; - @main::profile_files = (); - $main::op_time = time(); - - # Printing symbols is special and requires a lot less info that most. - if ($main::opt_symbols) { - PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin - return; - } - - # Fetch all profile data - FetchDynamicProfiles(); - - # this will hold symbols that we read from the profile files - my $symbol_map = {}; - - # Read one profile, pick the last item on the list - my $data = ReadProfile($main::prog, pop(@main::profile_files)); - my $profile = $data->{profile}; - my $pcs = $data->{pcs}; - my $libs = $data->{libs}; # Info about main program and shared libraries - $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); - - # Add additional profiles, if available. - if (scalar(@main::profile_files) > 0) { - foreach my $pname (@main::profile_files) { - my $data2 = ReadProfile($main::prog, $pname); - $profile = AddProfile($profile, $data2->{profile}); - $pcs = AddPcs($pcs, $data2->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); - } - } - - # Subtract base from profile, if specified - if ($main::opt_base ne '') { - my $base = ReadProfile($main::prog, $main::opt_base); - $profile = SubtractProfile($profile, $base->{profile}); - $pcs = AddPcs($pcs, $base->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); - } - - # Collect symbols - my $symbols; - if ($main::use_symbolized_profile) { - $symbols = FetchSymbols($pcs, $symbol_map); - } elsif ($main::use_symbol_page) { - $symbols = FetchSymbols($pcs); - } else { - # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, - # which may differ from the data from subsequent profiles, especially - # if they were run on different machines. Use appropriate libs for - # each pc somehow. - $symbols = ExtractSymbols($libs, $pcs); - } - - if (!defined($main::opt_thread)) { - FilterAndPrint($profile, $symbols, $libs); - } - if (defined($data->{threads})) { - foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { - if (defined($main::opt_thread) && - ($main::opt_thread eq '*' || $main::opt_thread == $thread)) { - my $thread_profile = $data->{threads}{$thread}; - FilterAndPrint($thread_profile, $symbols, $libs, $thread); - } - } - } - - cleanup(); - exit(0); -} - -##### Entry Point ##### - -Main(); - -# Temporary code to detect if we're running on a Goobuntu system. -# These systems don't have the right stuff installed for the special -# Readline libraries to work, so as a temporary workaround, we default -# to using the normal stdio code, rather than the fancier readline-based -# code -sub ReadlineMightFail { - if (-e '/lib/libtermcap.so.2') { - return 0; # libtermcap exists, so readline should be okay - } else { - return 1; - } -} - -sub RunGV { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) { - # Options using double dash are supported by this gv version. - # Also, turn on noantialias to better handle bug in gv for - # postscript files with large dimensions. - # TODO: Maybe we should not pass the --noantialias flag - # if the gv version is known to work properly without the flag. - system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname) - . $bg); - } else { - # Old gv version - only supports options that use single dash. - print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n"; - system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg); - } -} - -sub RunEvince { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - system(ShellEscape(@EVINCE, $fname) . $bg); -} - -sub RunWeb { - my $fname = shift; - print STDERR "Loading web page file:///$fname\n"; - - if (`uname` =~ /Darwin/) { - # OS X: open will use standard preference for SVG files. - system("/usr/bin/open", $fname); - return; - } - - # Some kind of Unix; try generic symlinks, then specific browsers. - # (Stop once we find one.) - # Works best if the browser is already running. - my @alt = ( - "/etc/alternatives/gnome-www-browser", - "/etc/alternatives/x-www-browser", - "google-chrome", - "firefox", - ); - foreach my $b (@alt) { - if (system($b, $fname) == 0) { - return; - } - } - - print STDERR "Could not load web browser.\n"; -} - -sub RunKcachegrind { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n"; - system(ShellEscape(@KCACHEGRIND, $fname) . $bg); -} - - -##### Interactive helper routines ##### - -sub InteractiveMode { - $| = 1; # Make output unbuffered for interactive mode - my ($orig_profile, $symbols, $libs, $total) = @_; - - print STDERR "Welcome to jeprof! For help, type 'help'.\n"; - - # Use ReadLine if it's installed and input comes from a console. - if ( -t STDIN && - !ReadlineMightFail() && - defined(eval {require Term::ReadLine}) ) { - my $term = new Term::ReadLine 'jeprof'; - while ( defined ($_ = $term->readline('(jeprof) '))) { - $term->addhistory($_) if /\S/; - if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { - last; # exit when we get an interactive command to quit - } - } - } else { # don't have readline - while (1) { - print STDERR "(jeprof) "; - $_ = <STDIN>; - last if ! defined $_ ; - s/\r//g; # turn windows-looking lines into unix-looking lines - - # Save some flags that might be reset by InteractiveCommand() - my $save_opt_lines = $main::opt_lines; - - if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { - last; # exit when we get an interactive command to quit - } - - # Restore flags - $main::opt_lines = $save_opt_lines; - } - } -} - -# Takes two args: orig profile, and command to run. -# Returns 1 if we should keep going, or 0 if we were asked to quit -sub InteractiveCommand { - my($orig_profile, $symbols, $libs, $total, $command) = @_; - $_ = $command; # just to make future m//'s easier - if (!defined($_)) { - print STDERR "\n"; - return 0; - } - if (m/^\s*quit/) { - return 0; - } - if (m/^\s*help/) { - InteractiveHelpMessage(); - return 1; - } - # Clear all the mode options -- mode is controlled by "$command" - $main::opt_text = 0; - $main::opt_callgrind = 0; - $main::opt_disasm = 0; - $main::opt_list = 0; - $main::opt_gv = 0; - $main::opt_evince = 0; - $main::opt_cum = 0; - - if (m/^\s*(text|top)(\d*)\s*(.*)/) { - $main::opt_text = 1; - - my $line_limit = ($2 ne "") ? int($2) : 10; - - my $routine; - my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($3); - - my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - PrintText($symbols, $flat, $cumulative, $line_limit); - return 1; - } - if (m/^\s*callgrind\s*([^ \n]*)/) { - $main::opt_callgrind = 1; - - # Get derived profiles - my $calls = ExtractCalls($symbols, $orig_profile); - my $filename = $1; - if ( $1 eq '' ) { - $filename = TempName($main::next_tmpfile, "callgrind"); - } - PrintCallgrind($calls, $filename); - if ( $1 eq '' ) { - RunKcachegrind($filename, " & "); - $main::next_tmpfile++; - } - - return 1; - } - if (m/^\s*(web)?list\s*(.+)/) { - my $html = (defined($1) && ($1 eq "web")); - $main::opt_list = 1; - - my $routine; - my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($2); - - my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - PrintListing($total, $libs, $flat, $cumulative, $routine, $html); - return 1; - } - if (m/^\s*disasm\s*(.+)/) { - $main::opt_disasm = 1; - - my $routine; - my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($1); - - # Process current profile to account for various settings - my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - PrintDisassembly($libs, $flat, $cumulative, $routine); - return 1; - } - if (m/^\s*(gv|web|evince)\s*(.*)/) { - $main::opt_gv = 0; - $main::opt_evince = 0; - $main::opt_web = 0; - if ($1 eq "gv") { - $main::opt_gv = 1; - } elsif ($1 eq "evince") { - $main::opt_evince = 1; - } elsif ($1 eq "web") { - $main::opt_web = 1; - } - - my $focus; - my $ignore; - ($focus, $ignore) = ParseInteractiveArgs($2); - - # Process current profile to account for various settings - my $profile = ProcessProfile($total, $orig_profile, $symbols, - $focus, $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), " &"); - } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); - } elsif ($main::opt_web) { - RunWeb(TempName($main::next_tmpfile, "svg")); - } - $main::next_tmpfile++; - } - return 1; - } - if (m/^\s*$/) { - return 1; - } - print STDERR "Unknown command: try 'help'.\n"; - return 1; -} - - -sub ProcessProfile { - my $total_count = shift; - my $orig_profile = shift; - my $symbols = shift; - my $focus = shift; - my $ignore = shift; - - # Process current profile to account for various settings - my $profile = $orig_profile; - printf("Total: %s %s\n", Unparse($total_count), Units()); - if ($focus ne '') { - $profile = FocusProfile($symbols, $profile, $focus); - my $focus_count = TotalProfile($profile); - printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", - $focus, - Unparse($focus_count), Units(), - Unparse($total_count), ($focus_count*100.0) / $total_count); - } - if ($ignore ne '') { - $profile = IgnoreProfile($symbols, $profile, $ignore); - my $ignore_count = TotalProfile($profile); - printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", - $ignore, - Unparse($ignore_count), Units(), - Unparse($total_count), - ($ignore_count*100.0) / $total_count); - } - - return $profile; -} - -sub InteractiveHelpMessage { - print STDERR <<ENDOFHELP; -Interactive jeprof mode - -Commands: - gv - gv [focus] [-ignore1] [-ignore2] - Show graphical hierarchical display of current profile. Without - any arguments, shows all samples in the profile. With the optional - "focus" argument, restricts the samples shown to just those where - the "focus" regular expression matches a routine name on the stack - trace. - - web - web [focus] [-ignore1] [-ignore2] - Like GV, but displays profile in your web browser instead of using - Ghostview. Works best if your web browser is already running. - To change the browser that gets used: - On Linux, set the /etc/alternatives/gnome-www-browser symlink. - On OS X, change the Finder association for SVG files. - - list [routine_regexp] [-ignore1] [-ignore2] - Show source listing of routines whose names match "routine_regexp" - - weblist [routine_regexp] [-ignore1] [-ignore2] - Displays a source listing of routines whose names match "routine_regexp" - in a web browser. You can click on source lines to view the - corresponding disassembly. - - top [--cum] [-ignore1] [-ignore2] - top20 [--cum] [-ignore1] [-ignore2] - top37 [--cum] [-ignore1] [-ignore2] - Show top lines ordered by flat profile count, or cumulative count - if --cum is specified. If a number is present after 'top', the - top K routines will be shown (defaults to showing the top 10) - - disasm [routine_regexp] [-ignore1] [-ignore2] - Show disassembly of routines whose names match "routine_regexp", - annotated with sample counts. - - callgrind - callgrind [filename] - Generates callgrind file. If no filename is given, kcachegrind is called. - - help - This listing - quit or ^D - End jeprof - -For commands that accept optional -ignore tags, samples where any routine in -the stack trace matches the regular expression in any of the -ignore -parameters will be ignored. - -Further pprof details are available at this location (or one similar): - - /usr/doc/gperftools-$PPROF_VERSION/cpu_profiler.html - /usr/doc/gperftools-$PPROF_VERSION/heap_profiler.html - -ENDOFHELP -} -sub ParseInteractiveArgs { - my $args = shift; - my $focus = ""; - my $ignore = ""; - my @x = split(/ +/, $args); - foreach $a (@x) { - if ($a =~ m/^(--|-)lines$/) { - $main::opt_lines = 1; - } elsif ($a =~ m/^(--|-)cum$/) { - $main::opt_cum = 1; - } elsif ($a =~ m/^-(.*)/) { - $ignore .= (($ignore ne "") ? "|" : "" ) . $1; - } else { - $focus .= (($focus ne "") ? "|" : "" ) . $a; - } - } - if ($ignore ne "") { - print STDERR "Ignoring samples in call stacks that match '$ignore'\n"; - } - return ($focus, $ignore); -} - -##### Output code ##### - -sub TempName { - my $fnum = shift; - my $ext = shift; - my $file = "$main::tmpfile_ps.$fnum.$ext"; - $main::tempnames{$file} = 1; - return $file; -} - -# Print profile data in packed binary format (64-bit) to standard out -sub PrintProfileData { - my $profile = shift; - - # print header (64-bit style) - # (zero) (header-size) (version) (sample-period) (zero) - print pack('L*', 0, 0, 3, 0, 0, 0, 1, 0, 0, 0); - - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - if ($#addrs >= 0) { - my $depth = $#addrs + 1; - # int(foo / 2**32) is the only reliable way to get rid of bottom - # 32 bits on both 32- and 64-bit systems. - print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); - print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); - - foreach my $full_addr (@addrs) { - my $addr = $full_addr; - $addr =~ s/0x0*//; # strip off leading 0x, zeroes - if (length($addr) > 16) { - print STDERR "Invalid address in profile: $full_addr\n"; - next; - } - my $low_addr = substr($addr, -8); # get last 8 hex chars - my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars - print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); - } - } - } -} - -# Print symbols and profile data -sub PrintSymbolizedProfile { - my $symbols = shift; - my $profile = shift; - my $prog = shift; - - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - - print '--- ', $symbol_marker, "\n"; - if (defined($prog)) { - print 'binary=', $prog, "\n"; - } - while (my ($pc, $name) = each(%{$symbols})) { - my $sep = ' '; - print '0x', $pc; - # We have a list of function names, which include the inlined - # calls. They are separated (and terminated) by --, which is - # illegal in function names. - for (my $j = 2; $j <= $#{$name}; $j += 3) { - print $sep, $name->[$j]; - $sep = '--'; - } - print "\n"; - } - print '---', "\n"; - - my $profile_marker; - if ($main::profile_type eq 'heap') { - $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash - $profile_marker = $&; - } elsif ($main::profile_type eq 'growth') { - $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash - $profile_marker = $&; - } elsif ($main::profile_type eq 'contention') { - $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash - $profile_marker = $&; - } else { # elsif ($main::profile_type eq 'cpu') - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - $profile_marker = $&; - } - - print '--- ', $profile_marker, "\n"; - if (defined($main::collected_profile)) { - # if used with remote fetch, simply dump the collected profile to output. - open(SRC, "<$main::collected_profile"); - while (<SRC>) { - print $_; - } - close(SRC); - } else { - # --raw/http: For everything to work correctly for non-remote profiles, we - # would need to extend PrintProfileData() to handle all possible profile - # types, re-enable the code that is currently disabled in ReadCPUProfile() - # and FixCallerAddresses(), and remove the remote profile dumping code in - # the block above. - die "--raw/http: jeprof can only dump remote profiles for --raw\n"; - # dump a cpu-format profile to standard out - PrintProfileData($profile); - } -} - -# Print text output -sub PrintText { - my $symbols = shift; - my $flat = shift; - my $cumulative = shift; - my $line_limit = shift; - - my $total = TotalProfile($flat); - - # Which profile to sort by? - my $s = $main::opt_cum ? $cumulative : $flat; - - my $running_sum = 0; - my $lines = 0; - foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } - keys(%{$cumulative})) { - my $f = GetEntry($flat, $k); - my $c = GetEntry($cumulative, $k); - $running_sum += $f; - - my $sym = $k; - if (exists($symbols->{$k})) { - $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; - if ($main::opt_addresses) { - $sym = $k . " " . $sym; - } - } - - if ($f != 0 || $c != 0) { - printf("%8s %6s %6s %8s %6s %s\n", - Unparse($f), - Percent($f, $total), - Percent($running_sum, $total), - Unparse($c), - Percent($c, $total), - $sym); - } - $lines++; - last if ($line_limit >= 0 && $lines >= $line_limit); - } -} - -# Callgrind format has a compression for repeated function and file -# names. You show the name the first time, and just use its number -# subsequently. This can cut down the file to about a third or a -# quarter of its uncompressed size. $key and $val are the key/value -# pair that would normally be printed by callgrind; $map is a map from -# value to number. -sub CompressedCGName { - my($key, $val, $map) = @_; - my $idx = $map->{$val}; - # For very short keys, providing an index hurts rather than helps. - if (length($val) <= 3) { - return "$key=$val\n"; - } elsif (defined($idx)) { - return "$key=($idx)\n"; - } else { - # scalar(keys $map) gives the number of items in the map. - $idx = scalar(keys(%{$map})) + 1; - $map->{$val} = $idx; - return "$key=($idx) $val\n"; - } -} - -# Print the call graph in a way that's suiteable for callgrind. -sub PrintCallgrind { - my $calls = shift; - my $filename; - my %filename_to_index_map; - my %fnname_to_index_map; - - if ($main::opt_interactive) { - $filename = shift; - print STDERR "Writing callgrind file to '$filename'.\n" - } else { - $filename = "&STDOUT"; - } - open(CG, ">$filename"); - printf CG ("events: Hits\n\n"); - foreach my $call ( map { $_->[0] } - sort { $a->[1] cmp $b ->[1] || - $a->[2] <=> $b->[2] } - map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; - [$_, $1, $2] } - keys %$calls ) { - my $count = int($calls->{$call}); - $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; - my ( $caller_file, $caller_line, $caller_function, - $callee_file, $callee_line, $callee_function ) = - ( $1, $2, $3, $5, $6, $7 ); - - # TODO(csilvers): for better compression, collect all the - # caller/callee_files and functions first, before printing - # anything, and only compress those referenced more than once. - printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map); - printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map); - if (defined $6) { - printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map); - printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map); - printf CG ("calls=$count $callee_line\n"); - } - printf CG ("$caller_line $count\n\n"); - } -} - -# Print disassembly for all all routines that match $main::opt_disasm -sub PrintDisassembly { - my $libs = shift; - my $flat = shift; - my $cumulative = shift; - my $disasm_opts = shift; - - my $total = TotalProfile($flat); - - foreach my $lib (@{$libs}) { - my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); - my $offset = AddressSub($lib->[1], $lib->[3]); - foreach my $routine (sort ByName keys(%{$symbol_table})) { - my $start_addr = $symbol_table->{$routine}->[0]; - my $end_addr = $symbol_table->{$routine}->[1]; - # See if there are any samples in this routine - my $length = hex(AddressSub($end_addr, $start_addr)); - my $addr = AddressAdd($start_addr, $offset); - for (my $i = 0; $i < $length; $i++) { - if (defined($cumulative->{$addr})) { - PrintDisassembledFunction($lib->[0], $offset, - $routine, $flat, $cumulative, - $start_addr, $end_addr, $total); - last; - } - $addr = AddressInc($addr); - } - } - } -} - -# Return reference to array of tuples of the form: -# [start_address, filename, linenumber, instruction, limit_address] -# E.g., -# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] -sub Disassemble { - my $prog = shift; - my $offset = shift; - my $start_addr = shift; - my $end_addr = shift; - - my $objdump = $obj_tool_map{"objdump"}; - my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn", - "--start-address=0x$start_addr", - "--stop-address=0x$end_addr", $prog); - open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); - my @result = (); - my $filename = ""; - my $linenumber = -1; - my $last = ["", "", "", ""]; - while (<OBJDUMP>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - chop; - if (m|\s*([^:\s]+):(\d+)\s*$|) { - # Location line of the form: - # <filename>:<linenumber> - $filename = $1; - $linenumber = $2; - } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { - # Disassembly line -- zero-extend address to full length - my $addr = HexExtend($1); - my $k = AddressAdd($addr, $offset); - $last->[4] = $k; # Store ending address for previous instruction - $last = [$k, $filename, $linenumber, $2, $end_addr]; - push(@result, $last); - } - } - close(OBJDUMP); - return @result; -} - -# The input file should contain lines of the form /proc/maps-like -# output (same format as expected from the profiles) or that looks -# like hex addresses (like "0xDEADBEEF"). We will parse all -# /proc/maps output, and for all the hex addresses, we will output -# "short" symbol names, one per line, in the same order as the input. -sub PrintSymbols { - my $maps_and_symbols_file = shift; - - # ParseLibraries expects pcs to be in a set. Fine by us... - my @pclist = (); # pcs in sorted order - my $pcs = {}; - my $map = ""; - foreach my $line (<$maps_and_symbols_file>) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - if ($line =~ /\b(0x[0-9a-f]+)\b/i) { - push(@pclist, HexExtend($1)); - $pcs->{$pclist[-1]} = 1; - } else { - $map .= $line; - } - } - - my $libs = ParseLibraries($main::prog, $map, $pcs); - my $symbols = ExtractSymbols($libs, $pcs); - - foreach my $pc (@pclist) { - # ->[0] is the shortname, ->[2] is the full name - print(($symbols->{$pc}->[0] || "??") . "\n"); - } -} - - -# For sorting functions by name -sub ByName { - return ShortFunctionName($a) cmp ShortFunctionName($b); -} - -# Print source-listing for all all routines that match $list_opts -sub PrintListing { - my $total = shift; - my $libs = shift; - my $flat = shift; - my $cumulative = shift; - my $list_opts = shift; - my $html = shift; - - my $output = \*STDOUT; - my $fname = ""; - - if ($html) { - # Arrange to write the output to a temporary file - $fname = TempName($main::next_tmpfile, "html"); - $main::next_tmpfile++; - if (!open(TEMP, ">$fname")) { - print STDERR "$fname: $!\n"; - return; - } - $output = \*TEMP; - print $output HtmlListingHeader(); - printf $output ("<div class=\"legend\">%s<br>Total: %s %s</div>\n", - $main::prog, Unparse($total), Units()); - } - - my $listed = 0; - foreach my $lib (@{$libs}) { - my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); - my $offset = AddressSub($lib->[1], $lib->[3]); - foreach my $routine (sort ByName keys(%{$symbol_table})) { - # Print if there are any samples in this routine - my $start_addr = $symbol_table->{$routine}->[0]; - my $end_addr = $symbol_table->{$routine}->[1]; - my $length = hex(AddressSub($end_addr, $start_addr)); - my $addr = AddressAdd($start_addr, $offset); - for (my $i = 0; $i < $length; $i++) { - if (defined($cumulative->{$addr})) { - $listed += PrintSource( - $lib->[0], $offset, - $routine, $flat, $cumulative, - $start_addr, $end_addr, - $html, - $output); - last; - } - $addr = AddressInc($addr); - } - } - } - - if ($html) { - if ($listed > 0) { - print $output HtmlListingFooter(); - close($output); - RunWeb($fname); - } else { - close($output); - unlink($fname); - } - } -} - -sub HtmlListingHeader { - return <<'EOF'; -<DOCTYPE html> -<html> -<head> -<title>Pprof listing</title> -<style type="text/css"> -body { - font-family: sans-serif; -} -h1 { - font-size: 1.5em; - margin-bottom: 4px; -} -.legend { - font-size: 1.25em; -} -.line { - color: #aaaaaa; -} -.nop { - color: #aaaaaa; -} -.unimportant { - color: #cccccc; -} -.disasmloc { - color: #000000; -} -.deadsrc { - cursor: pointer; -} -.deadsrc:hover { - background-color: #eeeeee; -} -.livesrc { - color: #0000ff; - cursor: pointer; -} -.livesrc:hover { - background-color: #eeeeee; -} -.asm { - color: #008800; - display: none; -} -</style> -<script type="text/javascript"> -function jeprof_toggle_asm(e) { - var target; - if (!e) e = window.event; - if (e.target) target = e.target; - else if (e.srcElement) target = e.srcElement; - - if (target) { - var asm = target.nextSibling; - if (asm && asm.className == "asm") { - asm.style.display = (asm.style.display == "block" ? "" : "block"); - e.preventDefault(); - return false; - } - } -} -</script> -</head> -<body> -EOF -} - -sub HtmlListingFooter { - return <<'EOF'; -</body> -</html> -EOF -} - -sub HtmlEscape { - my $text = shift; - $text =~ s/&/&/g; - $text =~ s/</</g; - $text =~ s/>/>/g; - return $text; -} - -# Returns the indentation of the line, if it has any non-whitespace -# characters. Otherwise, returns -1. -sub Indentation { - my $line = shift; - if (m/^(\s*)\S/) { - return length($1); - } else { - return -1; - } -} - -# If the symbol table contains inlining info, Disassemble() may tag an -# instruction with a location inside an inlined function. But for -# source listings, we prefer to use the location in the function we -# are listing. So use MapToSymbols() to fetch full location -# information for each instruction and then pick out the first -# location from a location list (location list contains callers before -# callees in case of inlining). -# -# After this routine has run, each entry in $instructions contains: -# [0] start address -# [1] filename for function we are listing -# [2] line number for function we are listing -# [3] disassembly -# [4] limit address -# [5] most specific filename (may be different from [1] due to inlining) -# [6] most specific line number (may be different from [2] due to inlining) -sub GetTopLevelLineNumbers { - my ($lib, $offset, $instructions) = @_; - my $pcs = []; - for (my $i = 0; $i <= $#{$instructions}; $i++) { - push(@{$pcs}, $instructions->[$i]->[0]); - } - my $symbols = {}; - MapToSymbols($lib, $offset, $pcs, $symbols); - for (my $i = 0; $i <= $#{$instructions}; $i++) { - my $e = $instructions->[$i]; - push(@{$e}, $e->[1]); - push(@{$e}, $e->[2]); - my $addr = $e->[0]; - my $sym = $symbols->{$addr}; - if (defined($sym)) { - if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) { - $e->[1] = $1; # File name - $e->[2] = $2; # Line number - } - } - } -} - -# Print source-listing for one routine -sub PrintSource { - my $prog = shift; - my $offset = shift; - my $routine = shift; - my $flat = shift; - my $cumulative = shift; - my $start_addr = shift; - my $end_addr = shift; - my $html = shift; - my $output = shift; - - # Disassemble all instructions (just to get line numbers) - my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); - GetTopLevelLineNumbers($prog, $offset, \@instructions); - - # Hack 1: assume that the first source file encountered in the - # disassembly contains the routine - my $filename = undef; - for (my $i = 0; $i <= $#instructions; $i++) { - if ($instructions[$i]->[2] >= 0) { - $filename = $instructions[$i]->[1]; - last; - } - } - if (!defined($filename)) { - print STDERR "no filename found in $routine\n"; - return 0; - } - - # Hack 2: assume that the largest line number from $filename is the - # end of the procedure. This is typically safe since if P1 contains - # an inlined call to P2, then P2 usually occurs earlier in the - # source file. If this does not work, we might have to compute a - # density profile or just print all regions we find. - my $lastline = 0; - for (my $i = 0; $i <= $#instructions; $i++) { - my $f = $instructions[$i]->[1]; - my $l = $instructions[$i]->[2]; - if (($f eq $filename) && ($l > $lastline)) { - $lastline = $l; - } - } - - # Hack 3: assume the first source location from "filename" is the start of - # the source code. - my $firstline = 1; - for (my $i = 0; $i <= $#instructions; $i++) { - if ($instructions[$i]->[1] eq $filename) { - $firstline = $instructions[$i]->[2]; - last; - } - } - - # Hack 4: Extend last line forward until its indentation is less than - # the indentation we saw on $firstline - my $oldlastline = $lastline; - { - if (!open(FILE, "<$filename")) { - print STDERR "$filename: $!\n"; - return 0; - } - my $l = 0; - my $first_indentation = -1; - while (<FILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - $l++; - my $indent = Indentation($_); - if ($l >= $firstline) { - if ($first_indentation < 0 && $indent >= 0) { - $first_indentation = $indent; - last if ($first_indentation == 0); - } - } - if ($l >= $lastline && $indent >= 0) { - if ($indent >= $first_indentation) { - $lastline = $l+1; - } else { - last; - } - } - } - close(FILE); - } - - # Assign all samples to the range $firstline,$lastline, - # Hack 4: If an instruction does not occur in the range, its samples - # are moved to the next instruction that occurs in the range. - my $samples1 = {}; # Map from line number to flat count - my $samples2 = {}; # Map from line number to cumulative count - my $running1 = 0; # Unassigned flat counts - my $running2 = 0; # Unassigned cumulative counts - my $total1 = 0; # Total flat counts - my $total2 = 0; # Total cumulative counts - my %disasm = (); # Map from line number to disassembly - my $running_disasm = ""; # Unassigned disassembly - my $skip_marker = "---\n"; - if ($html) { - $skip_marker = ""; - for (my $l = $firstline; $l <= $lastline; $l++) { - $disasm{$l} = ""; - } - } - my $last_dis_filename = ''; - my $last_dis_linenum = -1; - my $last_touched_line = -1; # To detect gaps in disassembly for a line - foreach my $e (@instructions) { - # Add up counts for all address that fall inside this instruction - my $c1 = 0; - my $c2 = 0; - for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { - $c1 += GetEntry($flat, $a); - $c2 += GetEntry($cumulative, $a); - } - - if ($html) { - my $dis = sprintf(" %6s %6s \t\t%8s: %s ", - HtmlPrintNumber($c1), - HtmlPrintNumber($c2), - UnparseAddress($offset, $e->[0]), - CleanDisassembly($e->[3])); - - # Append the most specific source line associated with this instruction - if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; - $dis = HtmlEscape($dis); - my $f = $e->[5]; - my $l = $e->[6]; - if ($f ne $last_dis_filename) { - $dis .= sprintf("<span class=disasmloc>%s:%d</span>", - HtmlEscape(CleanFileName($f)), $l); - } elsif ($l ne $last_dis_linenum) { - # De-emphasize the unchanged file name portion - $dis .= sprintf("<span class=unimportant>%s</span>" . - "<span class=disasmloc>:%d</span>", - HtmlEscape(CleanFileName($f)), $l); - } else { - # De-emphasize the entire location - $dis .= sprintf("<span class=unimportant>%s:%d</span>", - HtmlEscape(CleanFileName($f)), $l); - } - $last_dis_filename = $f; - $last_dis_linenum = $l; - $running_disasm .= $dis; - $running_disasm .= "\n"; - } - - $running1 += $c1; - $running2 += $c2; - $total1 += $c1; - $total2 += $c2; - my $file = $e->[1]; - my $line = $e->[2]; - if (($file eq $filename) && - ($line >= $firstline) && - ($line <= $lastline)) { - # Assign all accumulated samples to this line - AddEntry($samples1, $line, $running1); - AddEntry($samples2, $line, $running2); - $running1 = 0; - $running2 = 0; - if ($html) { - if ($line != $last_touched_line && $disasm{$line} ne '') { - $disasm{$line} .= "\n"; - } - $disasm{$line} .= $running_disasm; - $running_disasm = ''; - $last_touched_line = $line; - } - } - } - - # Assign any leftover samples to $lastline - AddEntry($samples1, $lastline, $running1); - AddEntry($samples2, $lastline, $running2); - if ($html) { - if ($lastline != $last_touched_line && $disasm{$lastline} ne '') { - $disasm{$lastline} .= "\n"; - } - $disasm{$lastline} .= $running_disasm; - } - - if ($html) { - printf $output ( - "<h1>%s</h1>%s\n<pre onClick=\"jeprof_toggle_asm()\">\n" . - "Total:%6s %6s (flat / cumulative %s)\n", - HtmlEscape(ShortFunctionName($routine)), - HtmlEscape(CleanFileName($filename)), - Unparse($total1), - Unparse($total2), - Units()); - } else { - printf $output ( - "ROUTINE ====================== %s in %s\n" . - "%6s %6s Total %s (flat / cumulative)\n", - ShortFunctionName($routine), - CleanFileName($filename), - Unparse($total1), - Unparse($total2), - Units()); - } - if (!open(FILE, "<$filename")) { - print STDERR "$filename: $!\n"; - return 0; - } - my $l = 0; - while (<FILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - $l++; - if ($l >= $firstline - 5 && - (($l <= $oldlastline + 5) || ($l <= $lastline))) { - chop; - my $text = $_; - if ($l == $firstline) { print $output $skip_marker; } - my $n1 = GetEntry($samples1, $l); - my $n2 = GetEntry($samples2, $l); - if ($html) { - # Emit a span that has one of the following classes: - # livesrc -- has samples - # deadsrc -- has disassembly, but with no samples - # nop -- has no matching disasembly - # Also emit an optional span containing disassembly. - my $dis = $disasm{$l}; - my $asm = ""; - if (defined($dis) && $dis ne '') { - $asm = "<span class=\"asm\">" . $dis . "</span>"; - } - my $source_class = (($n1 + $n2 > 0) - ? "livesrc" - : (($asm ne "") ? "deadsrc" : "nop")); - printf $output ( - "<span class=\"line\">%5d</span> " . - "<span class=\"%s\">%6s %6s %s</span>%s\n", - $l, $source_class, - HtmlPrintNumber($n1), - HtmlPrintNumber($n2), - HtmlEscape($text), - $asm); - } else { - printf $output( - "%6s %6s %4d: %s\n", - UnparseAlt($n1), - UnparseAlt($n2), - $l, - $text); - } - if ($l == $lastline) { print $output $skip_marker; } - }; - } - close(FILE); - if ($html) { - print $output "</pre>\n"; - } - return 1; -} - -# Return the source line for the specified file/linenumber. -# Returns undef if not found. -sub SourceLine { - my $file = shift; - my $line = shift; - - # Look in cache - if (!defined($main::source_cache{$file})) { - if (100 < scalar keys(%main::source_cache)) { - # Clear the cache when it gets too big - $main::source_cache = (); - } - - # Read all lines from the file - if (!open(FILE, "<$file")) { - print STDERR "$file: $!\n"; - $main::source_cache{$file} = []; # Cache the negative result - return undef; - } - my $lines = []; - push(@{$lines}, ""); # So we can use 1-based line numbers as indices - while (<FILE>) { - push(@{$lines}, $_); - } - close(FILE); - - # Save the lines in the cache - $main::source_cache{$file} = $lines; - } - - my $lines = $main::source_cache{$file}; - if (($line < 0) || ($line > $#{$lines})) { - return undef; - } else { - return $lines->[$line]; - } -} - -# Print disassembly for one routine with interspersed source if available -sub PrintDisassembledFunction { - my $prog = shift; - my $offset = shift; - my $routine = shift; - my $flat = shift; - my $cumulative = shift; - my $start_addr = shift; - my $end_addr = shift; - my $total = shift; - - # Disassemble all instructions - my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); - - # Make array of counts per instruction - my @flat_count = (); - my @cum_count = (); - my $flat_total = 0; - my $cum_total = 0; - foreach my $e (@instructions) { - # Add up counts for all address that fall inside this instruction - my $c1 = 0; - my $c2 = 0; - for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { - $c1 += GetEntry($flat, $a); - $c2 += GetEntry($cumulative, $a); - } - push(@flat_count, $c1); - push(@cum_count, $c2); - $flat_total += $c1; - $cum_total += $c2; - } - - # Print header with total counts - printf("ROUTINE ====================== %s\n" . - "%6s %6s %s (flat, cumulative) %.1f%% of total\n", - ShortFunctionName($routine), - Unparse($flat_total), - Unparse($cum_total), - Units(), - ($cum_total * 100.0) / $total); - - # Process instructions in order - my $current_file = ""; - for (my $i = 0; $i <= $#instructions; ) { - my $e = $instructions[$i]; - - # Print the new file name whenever we switch files - if ($e->[1] ne $current_file) { - $current_file = $e->[1]; - my $fname = $current_file; - $fname =~ s|^\./||; # Trim leading "./" - - # Shorten long file names - if (length($fname) >= 58) { - $fname = "..." . substr($fname, -55); - } - printf("-------------------- %s\n", $fname); - } - - # TODO: Compute range of lines to print together to deal with - # small reorderings. - my $first_line = $e->[2]; - my $last_line = $first_line; - my %flat_sum = (); - my %cum_sum = (); - for (my $l = $first_line; $l <= $last_line; $l++) { - $flat_sum{$l} = 0; - $cum_sum{$l} = 0; - } - - # Find run of instructions for this range of source lines - my $first_inst = $i; - while (($i <= $#instructions) && - ($instructions[$i]->[2] >= $first_line) && - ($instructions[$i]->[2] <= $last_line)) { - $e = $instructions[$i]; - $flat_sum{$e->[2]} += $flat_count[$i]; - $cum_sum{$e->[2]} += $cum_count[$i]; - $i++; - } - my $last_inst = $i - 1; - - # Print source lines - for (my $l = $first_line; $l <= $last_line; $l++) { - my $line = SourceLine($current_file, $l); - if (!defined($line)) { - $line = "?\n"; - next; - } else { - $line =~ s/^\s+//; - } - printf("%6s %6s %5d: %s", - UnparseAlt($flat_sum{$l}), - UnparseAlt($cum_sum{$l}), - $l, - $line); - } - - # Print disassembly - for (my $x = $first_inst; $x <= $last_inst; $x++) { - my $e = $instructions[$x]; - printf("%6s %6s %8s: %6s\n", - UnparseAlt($flat_count[$x]), - UnparseAlt($cum_count[$x]), - UnparseAddress($offset, $e->[0]), - CleanDisassembly($e->[3])); - } - } -} - -# Print DOT graph -sub PrintDot { - my $prog = shift; - my $symbols = shift; - my $raw = shift; - my $flat = shift; - my $cumulative = shift; - my $overall_total = shift; - - # Get total - my $local_total = TotalProfile($flat); - my $nodelimit = int($main::opt_nodefraction * $local_total); - my $edgelimit = int($main::opt_edgefraction * $local_total); - my $nodecount = $main::opt_nodecount; - - # Find nodes to include - my @list = (sort { abs(GetEntry($cumulative, $b)) <=> - abs(GetEntry($cumulative, $a)) - || $a cmp $b } - keys(%{$cumulative})); - my $last = $nodecount - 1; - if ($last > $#list) { - $last = $#list; - } - while (($last >= 0) && - (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { - $last--; - } - if ($last < 0) { - print STDERR "No nodes to print\n"; - return 0; - } - - if ($nodelimit > 0 || $edgelimit > 0) { - printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", - Unparse($nodelimit), Units(), - Unparse($edgelimit), Units()); - } - - # Open DOT output file - my $output; - my $escaped_dot = ShellEscape(@DOT); - my $escaped_ps2pdf = ShellEscape(@PS2PDF); - if ($main::opt_gv) { - my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps")); - $output = "| $escaped_dot -Tps2 >$escaped_outfile"; - } elsif ($main::opt_evince) { - my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf")); - $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile"; - } elsif ($main::opt_ps) { - $output = "| $escaped_dot -Tps2"; - } elsif ($main::opt_pdf) { - $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -"; - } elsif ($main::opt_web || $main::opt_svg) { - # We need to post-process the SVG, so write to a temporary file always. - my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg")); - $output = "| $escaped_dot -Tsvg >$escaped_outfile"; - } elsif ($main::opt_gif) { - $output = "| $escaped_dot -Tgif"; - } else { - $output = ">&STDOUT"; - } - open(DOT, $output) || error("$output: $!\n"); - - # Title - printf DOT ("digraph \"%s; %s %s\" {\n", - $prog, - Unparse($overall_total), - Units()); - if ($main::opt_pdf) { - # The output is more printable if we set the page size for dot. - printf DOT ("size=\"8,11\"\n"); - } - printf DOT ("node [width=0.375,height=0.25];\n"); - - # Print legend - printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . - "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", - $prog, - sprintf("Total %s: %s", Units(), Unparse($overall_total)), - sprintf("Focusing on: %s", Unparse($local_total)), - sprintf("Dropped nodes with <= %s abs(%s)", - Unparse($nodelimit), Units()), - sprintf("Dropped edges with <= %s %s", - Unparse($edgelimit), Units()) - ); - - # Print nodes - my %node = (); - my $nextnode = 1; - foreach my $a (@list[0..$last]) { - # Pick font size - my $f = GetEntry($flat, $a); - my $c = GetEntry($cumulative, $a); - - my $fs = 8; - if ($local_total > 0) { - $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); - } - - $node{$a} = $nextnode++; - my $sym = $a; - $sym =~ s/\s+/\\n/g; - $sym =~ s/::/\\n/g; - - # Extra cumulative info to print for non-leaves - my $extra = ""; - if ($f != $c) { - $extra = sprintf("\\rof %s (%s)", - Unparse($c), - Percent($c, $local_total)); - } - my $style = ""; - if ($main::opt_heapcheck) { - if ($f > 0) { - # make leak-causing nodes more visible (add a background) - $style = ",style=filled,fillcolor=gray" - } elsif ($f < 0) { - # make anti-leak-causing nodes (which almost never occur) - # stand out as well (triple border) - $style = ",peripheries=3" - } - } - - printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . - "\",shape=box,fontsize=%.1f%s];\n", - $node{$a}, - $sym, - Unparse($f), - Percent($f, $local_total), - $extra, - $fs, - $style, - ); - } - - # Get edges and counts per edge - my %edge = (); - my $n; - my $fullname_to_shortname_map = {}; - FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); - foreach my $k (keys(%{$raw})) { - # TODO: omit low %age edges - $n = $raw->{$k}; - my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); - for (my $i = 1; $i <= $#translated; $i++) { - my $src = $translated[$i]; - my $dst = $translated[$i-1]; - #next if ($src eq $dst); # Avoid self-edges? - if (exists($node{$src}) && exists($node{$dst})) { - my $edge_label = "$src\001$dst"; - if (!exists($edge{$edge_label})) { - $edge{$edge_label} = 0; - } - $edge{$edge_label} += $n; - } - } - } - - # Print edges (process in order of decreasing counts) - my %indegree = (); # Number of incoming edges added per node so far - my %outdegree = (); # Number of outgoing edges added per node so far - foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { - my @x = split(/\001/, $e); - $n = $edge{$e}; - - # Initialize degree of kept incoming and outgoing edges if necessary - my $src = $x[0]; - my $dst = $x[1]; - if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } - if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } - - my $keep; - if ($indegree{$dst} == 0) { - # Keep edge if needed for reachability - $keep = 1; - } elsif (abs($n) <= $edgelimit) { - # Drop if we are below --edgefraction - $keep = 0; - } elsif ($outdegree{$src} >= $main::opt_maxdegree || - $indegree{$dst} >= $main::opt_maxdegree) { - # Keep limited number of in/out edges per node - $keep = 0; - } else { - $keep = 1; - } - - if ($keep) { - $outdegree{$src}++; - $indegree{$dst}++; - - # Compute line width based on edge count - my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); - if ($fraction > 1) { $fraction = 1; } - my $w = $fraction * 2; - if ($w < 1 && ($main::opt_web || $main::opt_svg)) { - # SVG output treats line widths < 1 poorly. - $w = 1; - } - - # Dot sometimes segfaults if given edge weights that are too large, so - # we cap the weights at a large value - my $edgeweight = abs($n) ** 0.7; - if ($edgeweight > 100000) { $edgeweight = 100000; } - $edgeweight = int($edgeweight); - - my $style = sprintf("setlinewidth(%f)", $w); - if ($x[1] =~ m/\(inline\)/) { - $style .= ",dashed"; - } - - # Use a slightly squashed function of the edge count as the weight - printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", - $node{$x[0]}, - $node{$x[1]}, - Unparse($n), - $edgeweight, - $style); - } - } - - print DOT ("}\n"); - close(DOT); - - if ($main::opt_web || $main::opt_svg) { - # Rewrite SVG to be more usable inside web browser. - RewriteSvg(TempName($main::next_tmpfile, "svg")); - } - - return 1; -} - -sub RewriteSvg { - my $svgfile = shift; - - open(SVG, $svgfile) || die "open temp svg: $!"; - my @svg = <SVG>; - close(SVG); - unlink $svgfile; - my $svg = join('', @svg); - - # Dot's SVG output is - # - # <svg width="___" height="___" - # viewBox="___" xmlns=...> - # <g id="graph0" transform="..."> - # ... - # </g> - # </svg> - # - # Change it to - # - # <svg width="100%" height="100%" - # xmlns=...> - # $svg_javascript - # <g id="viewport" transform="translate(0,0)"> - # <g id="graph0" transform="..."> - # ... - # </g> - # </g> - # </svg> - - # Fix width, height; drop viewBox. - $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/; - - # Insert script, viewport <g> above first <g> - my $svg_javascript = SvgJavascript(); - my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n"; - $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/; - - # Insert final </g> above </svg>. - $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; - $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/; - - if ($main::opt_svg) { - # --svg: write to standard output. - print $svg; - } else { - # Write back to temporary file. - open(SVG, ">$svgfile") || die "open $svgfile: $!"; - print SVG $svg; - close(SVG); - } -} - -sub SvgJavascript { - return <<'EOF'; -<script type="text/ecmascript"><![CDATA[ -// SVGPan -// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/ -// Local modification: if(true || ...) below to force panning, never moving. - -/** - * SVGPan library 1.2 - * ==================== - * - * Given an unique existing element with id "viewport", including the - * the library into any SVG adds the following capabilities: - * - * - Mouse panning - * - Mouse zooming (using the wheel) - * - Object dargging - * - * Known issues: - * - * - Zooming (while panning) on Safari has still some issues - * - * Releases: - * - * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui - * Fixed a bug with browser mouse handler interaction - * - * 1.1, Wed Feb 3 17:39:33 GMT 2010, Zeng Xiaohui - * Updated the zoom code to support the mouse wheel on Safari/Chrome - * - * 1.0, Andrea Leofreddi - * First release - * - * This code is licensed under the following BSD license: - * - * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * The views and conclusions contained in the software and documentation are those of the - * authors and should not be interpreted as representing official policies, either expressed - * or implied, of Andrea Leofreddi. - */ - -var root = document.documentElement; - -var state = 'none', stateTarget, stateOrigin, stateTf; - -setupHandlers(root); - -/** - * Register handlers - */ -function setupHandlers(root){ - setAttributes(root, { - "onmouseup" : "add(evt)", - "onmousedown" : "handleMouseDown(evt)", - "onmousemove" : "handleMouseMove(evt)", - "onmouseup" : "handleMouseUp(evt)", - //"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element - }); - - if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0) - window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari - else - window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others - - var g = svgDoc.getElementById("svg"); - g.width = "100%"; - g.height = "100%"; -} - -/** - * Instance an SVGPoint object with given event coordinates. - */ -function getEventPoint(evt) { - var p = root.createSVGPoint(); - - p.x = evt.clientX; - p.y = evt.clientY; - - return p; -} - -/** - * Sets the current transform matrix of an element. - */ -function setCTM(element, matrix) { - var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")"; - - element.setAttribute("transform", s); -} - -/** - * Dumps a matrix to a string (useful for debug). - */ -function dumpMatrix(matrix) { - var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n 0, 0, 1 ]"; - - return s; -} - -/** - * Sets attributes of an element. - */ -function setAttributes(element, attributes){ - for (i in attributes) - element.setAttributeNS(null, i, attributes[i]); -} - -/** - * Handle mouse move event. - */ -function handleMouseWheel(evt) { - if(evt.preventDefault) - evt.preventDefault(); - - evt.returnValue = false; - - var svgDoc = evt.target.ownerDocument; - - var delta; - - if(evt.wheelDelta) - delta = evt.wheelDelta / 3600; // Chrome/Safari - else - delta = evt.detail / -90; // Mozilla - - var z = 1 + delta; // Zoom factor: 0.9/1.1 - - var g = svgDoc.getElementById("viewport"); - - var p = getEventPoint(evt); - - p = p.matrixTransform(g.getCTM().inverse()); - - // Compute new scale matrix in current mouse position - var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y); - - setCTM(g, g.getCTM().multiply(k)); - - stateTf = stateTf.multiply(k.inverse()); -} - -/** - * Handle mouse move event. - */ -function handleMouseMove(evt) { - if(evt.preventDefault) - evt.preventDefault(); - - evt.returnValue = false; - - var svgDoc = evt.target.ownerDocument; - - var g = svgDoc.getElementById("viewport"); - - if(state == 'pan') { - // Pan mode - var p = getEventPoint(evt).matrixTransform(stateTf); - - setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y)); - } else if(state == 'move') { - // Move mode - var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse()); - - setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM())); - - stateOrigin = p; - } -} - -/** - * Handle click event. - */ -function handleMouseDown(evt) { - if(evt.preventDefault) - evt.preventDefault(); - - evt.returnValue = false; - - var svgDoc = evt.target.ownerDocument; - - var g = svgDoc.getElementById("viewport"); - - if(true || evt.target.tagName == "svg") { - // Pan mode - state = 'pan'; - - stateTf = g.getCTM().inverse(); - - stateOrigin = getEventPoint(evt).matrixTransform(stateTf); - } else { - // Move mode - state = 'move'; - - stateTarget = evt.target; - - stateTf = g.getCTM().inverse(); - - stateOrigin = getEventPoint(evt).matrixTransform(stateTf); - } -} - -/** - * Handle mouse button release event. - */ -function handleMouseUp(evt) { - if(evt.preventDefault) - evt.preventDefault(); - - evt.returnValue = false; - - var svgDoc = evt.target.ownerDocument; - - if(state == 'pan' || state == 'move') { - // Quit pan mode - state = ''; - } -} - -]]></script> -EOF -} - -# Provides a map from fullname to shortname for cases where the -# shortname is ambiguous. The symlist has both the fullname and -# shortname for all symbols, which is usually fine, but sometimes -- -# such as overloaded functions -- two different fullnames can map to -# the same shortname. In that case, we use the address of the -# function to disambiguate the two. This function fills in a map that -# maps fullnames to modified shortnames in such cases. If a fullname -# is not present in the map, the 'normal' shortname provided by the -# symlist is the appropriate one to use. -sub FillFullnameToShortnameMap { - my $symbols = shift; - my $fullname_to_shortname_map = shift; - my $shortnames_seen_once = {}; - my $shortnames_seen_more_than_once = {}; - - foreach my $symlist (values(%{$symbols})) { - # TODO(csilvers): deal with inlined symbols too. - my $shortname = $symlist->[0]; - my $fullname = $symlist->[2]; - if ($fullname !~ /<[0-9a-fA-F]+>$/) { # fullname doesn't end in an address - next; # the only collisions we care about are when addresses differ - } - if (defined($shortnames_seen_once->{$shortname}) && - $shortnames_seen_once->{$shortname} ne $fullname) { - $shortnames_seen_more_than_once->{$shortname} = 1; - } else { - $shortnames_seen_once->{$shortname} = $fullname; - } - } - - foreach my $symlist (values(%{$symbols})) { - my $shortname = $symlist->[0]; - my $fullname = $symlist->[2]; - # TODO(csilvers): take in a list of addresses we care about, and only - # store in the map if $symlist->[1] is in that list. Saves space. - next if defined($fullname_to_shortname_map->{$fullname}); - if (defined($shortnames_seen_more_than_once->{$shortname})) { - if ($fullname =~ /<0*([^>]*)>$/) { # fullname has address at end of it - $fullname_to_shortname_map->{$fullname} = "$shortname\@$1"; - } - } - } -} - -# Return a small number that identifies the argument. -# Multiple calls with the same argument will return the same number. -# Calls with different arguments will return different numbers. -sub ShortIdFor { - my $key = shift; - my $id = $main::uniqueid{$key}; - if (!defined($id)) { - $id = keys(%main::uniqueid) + 1; - $main::uniqueid{$key} = $id; - } - return $id; -} - -# Translate a stack of addresses into a stack of symbols -sub TranslateStack { - my $symbols = shift; - my $fullname_to_shortname_map = shift; - my $k = shift; - - my @addrs = split(/\n/, $k); - my @result = (); - for (my $i = 0; $i <= $#addrs; $i++) { - my $a = $addrs[$i]; - - # Skip large addresses since they sometimes show up as fake entries on RH9 - if (length($a) > 8 && $a gt "7fffffffffffffff") { - next; - } - - if ($main::opt_disasm || $main::opt_list) { - # We want just the address for the key - push(@result, $a); - next; - } - - my $symlist = $symbols->{$a}; - if (!defined($symlist)) { - $symlist = [$a, "", $a]; - } - - # We can have a sequence of symbols for a particular entry - # (more than one symbol in the case of inlining). Callers - # come before callees in symlist, so walk backwards since - # the translated stack should contain callees before callers. - for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { - my $func = $symlist->[$j-2]; - my $fileline = $symlist->[$j-1]; - my $fullfunc = $symlist->[$j]; - if (defined($fullname_to_shortname_map->{$fullfunc})) { - $func = $fullname_to_shortname_map->{$fullfunc}; - } - if ($j > 2) { - $func = "$func (inline)"; - } - - # Do not merge nodes corresponding to Callback::Run since that - # causes confusing cycles in dot display. Instead, we synthesize - # a unique name for this frame per caller. - if ($func =~ m/Callback.*::Run$/) { - my $caller = ($i > 0) ? $addrs[$i-1] : 0; - $func = "Run#" . ShortIdFor($caller); - } - - if ($main::opt_addresses) { - push(@result, "$a $func $fileline"); - } elsif ($main::opt_lines) { - if ($func eq '??' && $fileline eq '??:0') { - push(@result, "$a"); - } else { - push(@result, "$func $fileline"); - } - } elsif ($main::opt_functions) { - if ($func eq '??') { - push(@result, "$a"); - } else { - push(@result, $func); - } - } elsif ($main::opt_files) { - if ($fileline eq '??:0' || $fileline eq '') { - push(@result, "$a"); - } else { - my $f = $fileline; - $f =~ s/:\d+$//; - push(@result, $f); - } - } else { - push(@result, $a); - last; # Do not print inlined info - } - } - } - - # print join(",", @addrs), " => ", join(",", @result), "\n"; - return @result; -} - -# Generate percent string for a number and a total -sub Percent { - my $num = shift; - my $tot = shift; - if ($tot != 0) { - return sprintf("%.1f%%", $num * 100.0 / $tot); - } else { - return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); - } -} - -# Generate pretty-printed form of number -sub Unparse { - my $num = shift; - if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { - if ($main::opt_inuse_objects || $main::opt_alloc_objects) { - return sprintf("%d", $num); - } else { - if ($main::opt_show_bytes) { - return sprintf("%d", $num); - } else { - return sprintf("%.1f", $num / 1048576.0); - } - } - } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { - return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds - } else { - return sprintf("%d", $num); - } -} - -# Alternate pretty-printed form: 0 maps to "." -sub UnparseAlt { - my $num = shift; - if ($num == 0) { - return "."; - } else { - return Unparse($num); - } -} - -# Alternate pretty-printed form: 0 maps to "" -sub HtmlPrintNumber { - my $num = shift; - if ($num == 0) { - return ""; - } else { - return Unparse($num); - } -} - -# Return output units -sub Units { - if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { - if ($main::opt_inuse_objects || $main::opt_alloc_objects) { - return "objects"; - } else { - if ($main::opt_show_bytes) { - return "B"; - } else { - return "MB"; - } - } - } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { - return "seconds"; - } else { - return "samples"; - } -} - -##### Profile manipulation code ##### - -# Generate flattened profile: -# If count is charged to stack [a,b,c,d], in generated profile, -# it will be charged to [a] -sub FlatProfile { - my $profile = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - if ($#addrs >= 0) { - AddEntry($result, $addrs[0], $count); - } - } - return $result; -} - -# Generate cumulative profile: -# If count is charged to stack [a,b,c,d], in generated profile, -# it will be charged to [a], [b], [c], [d] -sub CumulativeProfile { - my $profile = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - foreach my $a (@addrs) { - AddEntry($result, $a, $count); - } - } - return $result; -} - -# If the second-youngest PC on the stack is always the same, returns -# that pc. Otherwise, returns undef. -sub IsSecondPcAlwaysTheSame { - my $profile = shift; - - my $second_pc = undef; - foreach my $k (keys(%{$profile})) { - my @addrs = split(/\n/, $k); - if ($#addrs < 1) { - return undef; - } - if (not defined $second_pc) { - $second_pc = $addrs[1]; - } else { - if ($second_pc ne $addrs[1]) { - return undef; - } - } - } - return $second_pc; -} - -sub ExtractSymbolLocation { - my $symbols = shift; - my $address = shift; - # 'addr2line' outputs "??:0" for unknown locations; we do the - # same to be consistent. - my $location = "??:0:unknown"; - if (exists $symbols->{$address}) { - my $file = $symbols->{$address}->[1]; - if ($file eq "?") { - $file = "??:0" - } - $location = $file . ":" . $symbols->{$address}->[0]; - } - return $location; -} - -# Extracts a graph of calls. -sub ExtractCalls { - my $symbols = shift; - my $profile = shift; - - my $calls = {}; - while( my ($stack_trace, $count) = each %$profile ) { - my @address = split(/\n/, $stack_trace); - my $destination = ExtractSymbolLocation($symbols, $address[0]); - AddEntry($calls, $destination, $count); - for (my $i = 1; $i <= $#address; $i++) { - my $source = ExtractSymbolLocation($symbols, $address[$i]); - my $call = "$source -> $destination"; - AddEntry($calls, $call, $count); - $destination = $source; - } - } - - return $calls; -} - -sub FilterFrames { - my $symbols = shift; - my $profile = shift; - - if ($main::opt_retain eq '' && $main::opt_exclude eq '') { - return $profile; - } - - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - my @path = (); - foreach my $a (@addrs) { - my $sym; - if (exists($symbols->{$a})) { - $sym = $symbols->{$a}->[0]; - } else { - $sym = $a; - } - if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) { - next; - } - if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) { - next; - } - push(@path, $a); - } - if (scalar(@path) > 0) { - my $reduced_path = join("\n", @path); - AddEntry($result, $reduced_path, $count); - } - } - - return $result; -} - -sub RemoveUninterestingFrames { - my $symbols = shift; - my $profile = shift; - - # List of function names to skip - my %skip = (); - my $skip_regexp = 'NOMATCH'; - if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { - foreach my $name ('calloc', - 'cfree', - 'malloc', - 'free', - 'memalign', - 'posix_memalign', - 'aligned_alloc', - 'pvalloc', - 'valloc', - 'realloc', - 'mallocx', # jemalloc - 'rallocx', # jemalloc - 'xallocx', # jemalloc - 'dallocx', # jemalloc - 'sdallocx', # jemalloc - 'tc_calloc', - 'tc_cfree', - 'tc_malloc', - 'tc_free', - 'tc_memalign', - 'tc_posix_memalign', - 'tc_pvalloc', - 'tc_valloc', - 'tc_realloc', - 'tc_new', - 'tc_delete', - 'tc_newarray', - 'tc_deletearray', - 'tc_new_nothrow', - 'tc_newarray_nothrow', - 'do_malloc', - '::do_malloc', # new name -- got moved to an unnamed ns - '::do_malloc_or_cpp_alloc', - 'DoSampledAllocation', - 'simple_alloc::allocate', - '__malloc_alloc_template::allocate', - '__builtin_delete', - '__builtin_new', - '__builtin_vec_delete', - '__builtin_vec_new', - 'operator new', - 'operator new[]', - # The entry to our memory-allocation routines on OS X - 'malloc_zone_malloc', - 'malloc_zone_calloc', - 'malloc_zone_valloc', - 'malloc_zone_realloc', - 'malloc_zone_memalign', - 'malloc_zone_free', - # These mark the beginning/end of our custom sections - '__start_google_malloc', - '__stop_google_malloc', - '__start_malloc_hook', - '__stop_malloc_hook') { - $skip{$name} = 1; - $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything - } - # TODO: Remove TCMalloc once everything has been - # moved into the tcmalloc:: namespace and we have flushed - # old code out of the system. - $skip_regexp = "TCMalloc|^tcmalloc::"; - } elsif ($main::profile_type eq 'contention') { - foreach my $vname ('base::RecordLockProfileData', - 'base::SubmitMutexProfileData', - 'base::SubmitSpinLockProfileData', - 'Mutex::Unlock', - 'Mutex::UnlockSlow', - 'Mutex::ReaderUnlock', - 'MutexLock::~MutexLock', - 'SpinLock::Unlock', - 'SpinLock::SlowUnlock', - 'SpinLockHolder::~SpinLockHolder') { - $skip{$vname} = 1; - } - } elsif ($main::profile_type eq 'cpu') { - # Drop signal handlers used for CPU profile collection - # TODO(dpeng): this should not be necessary; it's taken - # care of by the general 2nd-pc mechanism below. - foreach my $name ('ProfileData::Add', # historical - 'ProfileData::prof_handler', # historical - 'CpuProfiler::prof_handler', - '__FRAME_END__', - '__pthread_sighandler', - '__restore') { - $skip{$name} = 1; - } - } else { - # Nothing skipped for unknown types - } - - if ($main::profile_type eq 'cpu') { - # If all the second-youngest program counters are the same, - # this STRONGLY suggests that it is an artifact of measurement, - # i.e., stack frames pushed by the CPU profiler signal handler. - # Hence, we delete them. - # (The topmost PC is read from the signal structure, not from - # the stack, so it does not get involved.) - while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { - my $result = {}; - my $func = ''; - if (exists($symbols->{$second_pc})) { - $second_pc = $symbols->{$second_pc}->[0]; - } - print STDERR "Removing $second_pc from all stack traces.\n"; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - splice @addrs, 1, 1; - my $reduced_path = join("\n", @addrs); - AddEntry($result, $reduced_path, $count); - } - $profile = $result; - } - } - - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - my @path = (); - foreach my $a (@addrs) { - if (exists($symbols->{$a})) { - my $func = $symbols->{$a}->[0]; - if ($skip{$func} || ($func =~ m/$skip_regexp/)) { - # Throw away the portion of the backtrace seen so far, under the - # assumption that previous frames were for functions internal to the - # allocator. - @path = (); - next; - } - } - push(@path, $a); - } - my $reduced_path = join("\n", @path); - AddEntry($result, $reduced_path, $count); - } - - $result = FilterFrames($symbols, $result); - - return $result; -} - -# Reduce profile to granularity given by user -sub ReduceProfile { - my $symbols = shift; - my $profile = shift; - my $result = {}; - my $fullname_to_shortname_map = {}; - FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); - my @path = (); - my %seen = (); - $seen{''} = 1; # So that empty keys are skipped - foreach my $e (@translated) { - # To avoid double-counting due to recursion, skip a stack-trace - # entry if it has already been seen - if (!$seen{$e}) { - $seen{$e} = 1; - push(@path, $e); - } - } - my $reduced_path = join("\n", @path); - AddEntry($result, $reduced_path, $count); - } - return $result; -} - -# Does the specified symbol array match the regexp? -sub SymbolMatches { - my $sym = shift; - my $re = shift; - if (defined($sym)) { - for (my $i = 0; $i < $#{$sym}; $i += 3) { - if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { - return 1; - } - } - } - return 0; -} - -# Focus only on paths involving specified regexps -sub FocusProfile { - my $symbols = shift; - my $profile = shift; - my $focus = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - foreach my $a (@addrs) { - # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { - AddEntry($result, $k, $count); - last; - } - } - } - return $result; -} - -# Focus only on paths not involving specified regexps -sub IgnoreProfile { - my $symbols = shift; - my $profile = shift; - my $ignore = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - my $matched = 0; - foreach my $a (@addrs) { - # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { - $matched = 1; - last; - } - } - if (!$matched) { - AddEntry($result, $k, $count); - } - } - return $result; -} - -# Get total count in profile -sub TotalProfile { - my $profile = shift; - my $result = 0; - foreach my $k (keys(%{$profile})) { - $result += $profile->{$k}; - } - return $result; -} - -# Add A to B -sub AddProfile { - my $A = shift; - my $B = shift; - - my $R = {}; - # add all keys in A - foreach my $k (keys(%{$A})) { - my $v = $A->{$k}; - AddEntry($R, $k, $v); - } - # add all keys in B - foreach my $k (keys(%{$B})) { - my $v = $B->{$k}; - AddEntry($R, $k, $v); - } - return $R; -} - -# Merges symbol maps -sub MergeSymbols { - my $A = shift; - my $B = shift; - - my $R = {}; - foreach my $k (keys(%{$A})) { - $R->{$k} = $A->{$k}; - } - if (defined($B)) { - foreach my $k (keys(%{$B})) { - $R->{$k} = $B->{$k}; - } - } - return $R; -} - - -# Add A to B -sub AddPcs { - my $A = shift; - my $B = shift; - - my $R = {}; - # add all keys in A - foreach my $k (keys(%{$A})) { - $R->{$k} = 1 - } - # add all keys in B - foreach my $k (keys(%{$B})) { - $R->{$k} = 1 - } - return $R; -} - -# Subtract B from A -sub SubtractProfile { - my $A = shift; - my $B = shift; - - my $R = {}; - foreach my $k (keys(%{$A})) { - my $v = $A->{$k} - GetEntry($B, $k); - if ($v < 0 && $main::opt_drop_negative) { - $v = 0; - } - AddEntry($R, $k, $v); - } - if (!$main::opt_drop_negative) { - # Take care of when subtracted profile has more entries - foreach my $k (keys(%{$B})) { - if (!exists($A->{$k})) { - AddEntry($R, $k, 0 - $B->{$k}); - } - } - } - return $R; -} - -# Get entry from profile; zero if not present -sub GetEntry { - my $profile = shift; - my $k = shift; - if (exists($profile->{$k})) { - return $profile->{$k}; - } else { - return 0; - } -} - -# Add entry to specified profile -sub AddEntry { - my $profile = shift; - my $k = shift; - my $n = shift; - if (!exists($profile->{$k})) { - $profile->{$k} = 0; - } - $profile->{$k} += $n; -} - -# Add a stack of entries to specified profile, and add them to the $pcs -# list. -sub AddEntries { - my $profile = shift; - my $pcs = shift; - my $stack = shift; - my $count = shift; - my @k = (); - - foreach my $e (split(/\s+/, $stack)) { - my $pc = HexExtend($e); - $pcs->{$pc} = 1; - push @k, $pc; - } - AddEntry($profile, (join "\n", @k), $count); -} - -##### Code to profile a server dynamically ##### - -sub CheckSymbolPage { - my $url = SymbolPageURL(); - my $command = ShellEscape(@URL_FETCHER, $url); - open(SYMBOL, "$command |") or error($command); - my $line = <SYMBOL>; - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - close(SYMBOL); - unless (defined($line)) { - error("$url doesn't exist\n"); - } - - if ($line =~ /^num_symbols:\s+(\d+)$/) { - if ($1 == 0) { - error("Stripped binary. No symbols available.\n"); - } - } else { - error("Failed to get the number of symbols from $url\n"); - } -} - -sub IsProfileURL { - my $profile_name = shift; - if (-f $profile_name) { - printf STDERR "Using local file $profile_name.\n"; - return 0; - } - return 1; -} - -sub ParseProfileURL { - my $profile_name = shift; - - if (!defined($profile_name) || $profile_name eq "") { - return (); - } - - # Split profile URL - matches all non-empty strings, so no test. - $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; - - my $proto = $1 || "http://"; - my $hostport = $2; - my $prefix = $3; - my $profile = $4 || "/"; - - my $host = $hostport; - $host =~ s/:.*//; - - my $baseurl = "$proto$hostport$prefix"; - return ($host, $baseurl, $profile); -} - -# We fetch symbols from the first profile argument. -sub SymbolPageURL { - my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); - return "$baseURL$SYMBOL_PAGE"; -} - -sub FetchProgramName() { - my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); - my $url = "$baseURL$PROGRAM_NAME_PAGE"; - my $command_line = ShellEscape(@URL_FETCHER, $url); - open(CMDLINE, "$command_line |") or error($command_line); - my $cmdline = <CMDLINE>; - $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines - close(CMDLINE); - error("Failed to get program name from $url\n") unless defined($cmdline); - $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. - $cmdline =~ s!\n!!g; # Remove LFs. - return $cmdline; -} - -# Gee, curl's -L (--location) option isn't reliable at least -# with its 7.12.3 version. Curl will forget to post data if -# there is a redirection. This function is a workaround for -# curl. Redirection happens on borg hosts. -sub ResolveRedirectionForCurl { - my $url = shift; - my $command_line = ShellEscape(@URL_FETCHER, "--head", $url); - open(CMDLINE, "$command_line |") or error($command_line); - while (<CMDLINE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - if (/^Location: (.*)/) { - $url = $1; - } - } - close(CMDLINE); - return $url; -} - -# Add a timeout flat to URL_FETCHER. Returns a new list. -sub AddFetchTimeout { - my $timeout = shift; - my @fetcher = @_; - if (defined($timeout)) { - if (join(" ", @fetcher) =~ m/\bcurl -s/) { - push(@fetcher, "--max-time", sprintf("%d", $timeout)); - } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) { - push(@fetcher, sprintf("--deadline=%d", $timeout)); - } - } - return @fetcher; -} - -# Reads a symbol map from the file handle name given as $1, returning -# the resulting symbol map. Also processes variables relating to symbols. -# Currently, the only variable processed is 'binary=<value>' which updates -# $main::prog to have the correct program name. -sub ReadSymbols { - my $in = shift; - my $map = {}; - while (<$in>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Removes all the leading zeroes from the symbols, see comment below. - if (m/^0x0*([0-9a-f]+)\s+(.+)/) { - $map->{$1} = $2; - } elsif (m/^---/) { - last; - } elsif (m/^([a-z][^=]*)=(.*)$/ ) { - my ($variable, $value) = ($1, $2); - for ($variable, $value) { - s/^\s+//; - s/\s+$//; - } - if ($variable eq "binary") { - if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { - printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", - $main::prog, $value); - } - $main::prog = $value; - } else { - printf STDERR ("Ignoring unknown variable in symbols list: " . - "'%s' = '%s'\n", $variable, $value); - } - } - } - return $map; -} - -sub URLEncode { - my $str = shift; - $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg; - return $str; -} - -sub AppendSymbolFilterParams { - my $url = shift; - my @params = (); - if ($main::opt_retain ne '') { - push(@params, sprintf("retain=%s", URLEncode($main::opt_retain))); - } - if ($main::opt_exclude ne '') { - push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude))); - } - if (scalar @params > 0) { - $url = sprintf("%s?%s", $url, join("&", @params)); - } - return $url; -} - -# Fetches and processes symbols to prepare them for use in the profile output -# code. If the optional 'symbol_map' arg is not given, fetches symbols from -# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols -# are assumed to have already been fetched into 'symbol_map' and are simply -# extracted and processed. -sub FetchSymbols { - my $pcset = shift; - my $symbol_map = shift; - - my %seen = (); - my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq - - if (!defined($symbol_map)) { - my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); - - open(POSTFILE, ">$main::tmpfile_sym"); - print POSTFILE $post_data; - close(POSTFILE); - - my $url = SymbolPageURL(); - - my $command_line; - if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { - $url = ResolveRedirectionForCurl($url); - $url = AppendSymbolFilterParams($url); - $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", - $url); - } else { - $url = AppendSymbolFilterParams($url); - $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) - . " < " . ShellEscape($main::tmpfile_sym)); - } - # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. - my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"}); - open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line); - $symbol_map = ReadSymbols(*SYMBOL{IO}); - close(SYMBOL); - } - - my $symbols = {}; - foreach my $pc (@pcs) { - my $fullname; - # For 64 bits binaries, symbols are extracted with 8 leading zeroes. - # Then /symbol reads the long symbols in as uint64, and outputs - # the result with a "0x%08llx" format which get rid of the zeroes. - # By removing all the leading zeroes in both $pc and the symbols from - # /symbol, the symbols match and are retrievable from the map. - my $shortpc = $pc; - $shortpc =~ s/^0*//; - # Each line may have a list of names, which includes the function - # and also other functions it has inlined. They are separated (in - # PrintSymbolizedProfile), by --, which is illegal in function names. - my $fullnames; - if (defined($symbol_map->{$shortpc})) { - $fullnames = $symbol_map->{$shortpc}; - } else { - $fullnames = "0x" . $pc; # Just use addresses - } - my $sym = []; - $symbols->{$pc} = $sym; - foreach my $fullname (split("--", $fullnames)) { - my $name = ShortFunctionName($fullname); - push(@{$sym}, $name, "?", $fullname); - } - } - return $symbols; -} - -sub BaseName { - my $file_name = shift; - $file_name =~ s!^.*/!!; # Remove directory name - return $file_name; -} - -sub MakeProfileBaseName { - my ($binary_name, $profile_name) = @_; - my ($host, $baseURL, $path) = ParseProfileURL($profile_name); - my $binary_shortname = BaseName($binary_name); - return sprintf("%s.%s.%s", - $binary_shortname, $main::op_time, $host); -} - -sub FetchDynamicProfile { - my $binary_name = shift; - my $profile_name = shift; - my $fetch_name_only = shift; - my $encourage_patience = shift; - - if (!IsProfileURL($profile_name)) { - return $profile_name; - } else { - my ($host, $baseURL, $path) = ParseProfileURL($profile_name); - if ($path eq "" || $path eq "/") { - # Missing type specifier defaults to cpu-profile - $path = $PROFILE_PAGE; - } - - my $profile_file = MakeProfileBaseName($binary_name, $profile_name); - - my $url = "$baseURL$path"; - my $fetch_timeout = undef; - if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { - if ($path =~ m/[?]/) { - $url .= "&"; - } else { - $url .= "?"; - } - $url .= sprintf("seconds=%d", $main::opt_seconds); - $fetch_timeout = $main::opt_seconds * 1.01 + 60; - # Set $profile_type for consumption by PrintSymbolizedProfile. - $main::profile_type = 'cpu'; - } else { - # For non-CPU profiles, we add a type-extension to - # the target profile file name. - my $suffix = $path; - $suffix =~ s,/,.,g; - $profile_file .= $suffix; - # Set $profile_type for consumption by PrintSymbolizedProfile. - if ($path =~ m/$HEAP_PAGE/) { - $main::profile_type = 'heap'; - } elsif ($path =~ m/$GROWTH_PAGE/) { - $main::profile_type = 'growth'; - } elsif ($path =~ m/$CONTENTION_PAGE/) { - $main::profile_type = 'contention'; - } - } - - my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof"); - if (! -d $profile_dir) { - mkdir($profile_dir) - || die("Unable to create profile directory $profile_dir: $!\n"); - } - my $tmp_profile = "$profile_dir/.tmp.$profile_file"; - my $real_profile = "$profile_dir/$profile_file"; - - if ($fetch_name_only > 0) { - return $real_profile; - } - - my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER); - my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile); - if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ - print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; - if ($encourage_patience) { - print STDERR "Be patient...\n"; - } - } else { - print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; - } - - (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); - (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n"); - print STDERR "Wrote profile to $real_profile\n"; - $main::collected_profile = $real_profile; - return $main::collected_profile; - } -} - -# Collect profiles in parallel -sub FetchDynamicProfiles { - my $items = scalar(@main::pfile_args); - my $levels = log($items) / log(2); - - if ($items == 1) { - $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); - } else { - # math rounding issues - if ((2 ** $levels) < $items) { - $levels++; - } - my $count = scalar(@main::pfile_args); - for (my $i = 0; $i < $count; $i++) { - $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); - } - print STDERR "Fetching $count profiles, Be patient...\n"; - FetchDynamicProfilesRecurse($levels, 0, 0); - $main::collected_profile = join(" \\\n ", @main::profile_files); - } -} - -# Recursively fork a process to get enough processes -# collecting profiles -sub FetchDynamicProfilesRecurse { - my $maxlevel = shift; - my $level = shift; - my $position = shift; - - if (my $pid = fork()) { - $position = 0 | ($position << 1); - TryCollectProfile($maxlevel, $level, $position); - wait; - } else { - $position = 1 | ($position << 1); - TryCollectProfile($maxlevel, $level, $position); - cleanup(); - exit(0); - } -} - -# Collect a single profile -sub TryCollectProfile { - my $maxlevel = shift; - my $level = shift; - my $position = shift; - - if ($level >= ($maxlevel - 1)) { - if ($position < scalar(@main::pfile_args)) { - FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); - } - } else { - FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); - } -} - -##### Parsing code ##### - -# Provide a small streaming-read module to handle very large -# cpu-profile files. Stream in chunks along a sliding window. -# Provides an interface to get one 'slot', correctly handling -# endian-ness differences. A slot is one 32-bit or 64-bit word -# (depending on the input profile). We tell endianness and bit-size -# for the profile by looking at the first 8 bytes: in cpu profiles, -# the second slot is always 3 (we'll accept anything that's not 0). -BEGIN { - package CpuProfileStream; - - sub new { - my ($class, $file, $fname) = @_; - my $self = { file => $file, - base => 0, - stride => 512 * 1024, # must be a multiple of bitsize/8 - slots => [], - unpack_code => "", # N for big-endian, V for little - perl_is_64bit => 1, # matters if profile is 64-bit - }; - bless $self, $class; - # Let unittests adjust the stride - if ($main::opt_test_stride > 0) { - $self->{stride} = $main::opt_test_stride; - } - # Read the first two slots to figure out bitsize and endianness. - my $slots = $self->{slots}; - my $str; - read($self->{file}, $str, 8); - # Set the global $address_length based on what we see here. - # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). - $address_length = ($str eq (chr(0)x8)) ? 16 : 8; - if ($address_length == 8) { - if (substr($str, 6, 2) eq chr(0)x2) { - $self->{unpack_code} = 'V'; # Little-endian. - } elsif (substr($str, 4, 2) eq chr(0)x2) { - $self->{unpack_code} = 'N'; # Big-endian - } else { - ::error("$fname: header size >= 2**16\n"); - } - @$slots = unpack($self->{unpack_code} . "*", $str); - } else { - # If we're a 64-bit profile, check if we're a 64-bit-capable - # perl. Otherwise, each slot will be represented as a float - # instead of an int64, losing precision and making all the - # 64-bit addresses wrong. We won't complain yet, but will - # later if we ever see a value that doesn't fit in 32 bits. - my $has_q = 0; - eval { $has_q = pack("Q", "1") ? 1 : 1; }; - if (!$has_q) { - $self->{perl_is_64bit} = 0; - } - read($self->{file}, $str, 8); - if (substr($str, 4, 4) eq chr(0)x4) { - # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. - $self->{unpack_code} = 'V'; # Little-endian. - } elsif (substr($str, 0, 4) eq chr(0)x4) { - $self->{unpack_code} = 'N'; # Big-endian - } else { - ::error("$fname: header size >= 2**32\n"); - } - my @pair = unpack($self->{unpack_code} . "*", $str); - # Since we know one of the pair is 0, it's fine to just add them. - @$slots = (0, $pair[0] + $pair[1]); - } - return $self; - } - - # Load more data when we access slots->get(X) which is not yet in memory. - sub overflow { - my ($self) = @_; - my $slots = $self->{slots}; - $self->{base} += $#$slots + 1; # skip over data we're replacing - my $str; - read($self->{file}, $str, $self->{stride}); - if ($address_length == 8) { # the 32-bit case - # This is the easy case: unpack provides 32-bit unpacking primitives. - @$slots = unpack($self->{unpack_code} . "*", $str); - } else { - # We need to unpack 32 bits at a time and combine. - my @b32_values = unpack($self->{unpack_code} . "*", $str); - my @b64_values = (); - for (my $i = 0; $i < $#b32_values; $i += 2) { - # TODO(csilvers): if this is a 32-bit perl, the math below - # could end up in a too-large int, which perl will promote - # to a double, losing necessary precision. Deal with that. - # Right now, we just die. - my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); - if ($self->{unpack_code} eq 'N') { # big-endian - ($lo, $hi) = ($hi, $lo); - } - my $value = $lo + $hi * (2**32); - if (!$self->{perl_is_64bit} && # check value is exactly represented - (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { - ::error("Need a 64-bit perl to process this 64-bit profile.\n"); - } - push(@b64_values, $value); - } - @$slots = @b64_values; - } - } - - # Access the i-th long in the file (logically), or -1 at EOF. - sub get { - my ($self, $idx) = @_; - my $slots = $self->{slots}; - while ($#$slots >= 0) { - if ($idx < $self->{base}) { - # The only time we expect a reference to $slots[$i - something] - # after referencing $slots[$i] is reading the very first header. - # Since $stride > |header|, that shouldn't cause any lookback - # errors. And everything after the header is sequential. - print STDERR "Unexpected look-back reading CPU profile"; - return -1; # shrug, don't know what better to return - } elsif ($idx > $self->{base} + $#$slots) { - $self->overflow(); - } else { - return $slots->[$idx - $self->{base}]; - } - } - # If we get here, $slots is [], which means we've reached EOF - return -1; # unique since slots is supposed to hold unsigned numbers - } -} - -# Reads the top, 'header' section of a profile, and returns the last -# line of the header, commonly called a 'header line'. The header -# section of a profile consists of zero or more 'command' lines that -# are instructions to jeprof, which jeprof executes when reading the -# header. All 'command' lines start with a %. After the command -# lines is the 'header line', which is a profile-specific line that -# indicates what type of profile it is, and perhaps other global -# information about the profile. For instance, here's a header line -# for a heap profile: -# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile -# For historical reasons, the CPU profile does not contain a text- -# readable header line. If the profile looks like a CPU profile, -# this function returns "". If no header line could be found, this -# function returns undef. -# -# The following commands are recognized: -# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' -# -# The input file should be in binmode. -sub ReadProfileHeader { - local *PROFILE = shift; - my $firstchar = ""; - my $line = ""; - read(PROFILE, $firstchar, 1); - seek(PROFILE, -1, 1); # unread the firstchar - if ($firstchar !~ /[[:print:]]/) { # is not a text character - return ""; - } - while (defined($line = <PROFILE>)) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - if ($line =~ /^%warn\s+(.*)/) { # 'warn' command - # Note this matches both '%warn blah\n' and '%warn\n'. - print STDERR "WARNING: $1\n"; # print the rest of the line - } elsif ($line =~ /^%/) { - print STDERR "Ignoring unknown command from profile header: $line"; - } else { - # End of commands, must be the header line. - return $line; - } - } - return undef; # got to EOF without seeing a header line -} - -sub IsSymbolizedProfileFile { - my $file_name = shift; - if (!(-e $file_name) || !(-r $file_name)) { - return 0; - } - # Check if the file contains a symbol-section marker. - open(TFILE, "<$file_name"); - binmode TFILE; - my $firstline = ReadProfileHeader(*TFILE); - close(TFILE); - if (!$firstline) { - return 0; - } - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - return $firstline =~ /^--- *$symbol_marker/; -} - -# Parse profile generated by common/profiler.cc and return a reference -# to a map: -# $result->{version} Version number of profile file -# $result->{period} Sampling period (in microseconds) -# $result->{profile} Profile object -# $result->{threads} Map of thread IDs to profile objects -# $result->{map} Memory map info from profile -# $result->{pcs} Hash of all PC values seen, key is hex address -sub ReadProfile { - my $prog = shift; - my $fname = shift; - my $result; # return value - - $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $contention_marker = $&; - $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $growth_marker = $&; - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; - $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $heap_marker = $&; - - # Look at first line to see if it is a heap or a CPU profile. - # CPU profile may start with no header at all, and just binary data - # (starting with \0\0\0\0) -- in that case, don't try to read the - # whole firstline, since it may be gigabytes(!) of data. - open(PROFILE, "<$fname") || error("$fname: $!\n"); - binmode PROFILE; # New perls do UTF-8 processing - my $header = ReadProfileHeader(*PROFILE); - if (!defined($header)) { # means "at EOF" - error("Profile is empty.\n"); - } - - my $symbols; - if ($header =~ m/^--- *$symbol_marker/o) { - # Verify that the user asked for a symbolized profile - if (!$main::use_symbolized_profile) { - # we have both a binary and symbolized profiles, abort - error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . - "a binary arg. Try again without passing\n $prog\n"); - } - # Read the symbol section of the symbolized profile file. - $symbols = ReadSymbols(*PROFILE{IO}); - # Read the next line to get the header for the remaining profile. - $header = ReadProfileHeader(*PROFILE) || ""; - } - - if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) { - # Skip "--- ..." line for profile types that have their own headers. - $header = ReadProfileHeader(*PROFILE) || ""; - } - - $main::profile_type = ''; - - if ($header =~ m/^heap profile:.*$growth_marker/o) { - $main::profile_type = 'growth'; - $result = ReadHeapProfile($prog, *PROFILE, $header); - } elsif ($header =~ m/^heap profile:/) { - $main::profile_type = 'heap'; - $result = ReadHeapProfile($prog, *PROFILE, $header); - } elsif ($header =~ m/^heap/) { - $main::profile_type = 'heap'; - $result = ReadThreadedHeapProfile($prog, $fname, $header); - } elsif ($header =~ m/^--- *$contention_marker/o) { - $main::profile_type = 'contention'; - $result = ReadSynchProfile($prog, *PROFILE); - } elsif ($header =~ m/^--- *Stacks:/) { - print STDERR - "Old format contention profile: mistakenly reports " . - "condition variable signals as lock contentions.\n"; - $main::profile_type = 'contention'; - $result = ReadSynchProfile($prog, *PROFILE); - } elsif ($header =~ m/^--- *$profile_marker/) { - # the binary cpu profile data starts immediately after this line - $main::profile_type = 'cpu'; - $result = ReadCPUProfile($prog, $fname, *PROFILE); - } else { - if (defined($symbols)) { - # a symbolized profile contains a format we don't recognize, bail out - error("$fname: Cannot recognize profile section after symbols.\n"); - } - # no ascii header present -- must be a CPU profile - $main::profile_type = 'cpu'; - $result = ReadCPUProfile($prog, $fname, *PROFILE); - } - - close(PROFILE); - - # if we got symbols along with the profile, return those as well - if (defined($symbols)) { - $result->{symbols} = $symbols; - } - - return $result; -} - -# Subtract one from caller pc so we map back to call instr. -# However, don't do this if we're reading a symbolized profile -# file, in which case the subtract-one was done when the file -# was written. -# -# We apply the same logic to all readers, though ReadCPUProfile uses an -# independent implementation. -sub FixCallerAddresses { - my $stack = shift; - # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile() - # dumps unadjusted profiles. - { - $stack =~ /(\s)/; - my $delimiter = $1; - my @addrs = split(' ', $stack); - my @fixedaddrs; - $#fixedaddrs = $#addrs; - if ($#addrs >= 0) { - $fixedaddrs[0] = $addrs[0]; - } - for (my $i = 1; $i <= $#addrs; $i++) { - $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); - } - return join $delimiter, @fixedaddrs; - } -} - -# CPU profile reader -sub ReadCPUProfile { - my $prog = shift; - my $fname = shift; # just used for logging - local *PROFILE = shift; - my $version; - my $period; - my $i; - my $profile = {}; - my $pcs = {}; - - # Parse string into array of slots. - my $slots = CpuProfileStream->new(*PROFILE, $fname); - - # Read header. The current header version is a 5-element structure - # containing: - # 0: header count (always 0) - # 1: header "words" (after this one: 3) - # 2: format version (0) - # 3: sampling period (usec) - # 4: unused padding (always 0) - if ($slots->get(0) != 0 ) { - error("$fname: not a profile file, or old format profile file\n"); - } - $i = 2 + $slots->get(1); - $version = $slots->get(2); - $period = $slots->get(3); - # Do some sanity checking on these header values. - if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { - error("$fname: not a profile file, or corrupted profile file\n"); - } - - # Parse profile - while ($slots->get($i) != -1) { - my $n = $slots->get($i++); - my $d = $slots->get($i++); - if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? - my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); - print STDERR "At index $i (address $addr):\n"; - error("$fname: stack trace depth >= 2**32\n"); - } - if ($slots->get($i) == 0) { - # End of profile data marker - $i += $d; - last; - } - - # Make key out of the stack entries - my @k = (); - for (my $j = 0; $j < $d; $j++) { - my $pc = $slots->get($i+$j); - # Subtract one from caller pc so we map back to call instr. - $pc--; - $pc = sprintf("%0*x", $address_length, $pc); - $pcs->{$pc} = 1; - push @k, $pc; - } - - AddEntry($profile, (join "\n", @k), $n); - $i += $d; - } - - # Parse map - my $map = ''; - seek(PROFILE, $i * 4, 0); - read(PROFILE, $map, (stat PROFILE)[7]); - - my $r = {}; - $r->{version} = $version; - $r->{period} = $period; - $r->{profile} = $profile; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - - return $r; -} - -sub HeapProfileIndex { - my $index = 1; - if ($main::opt_inuse_space) { - $index = 1; - } elsif ($main::opt_inuse_objects) { - $index = 0; - } elsif ($main::opt_alloc_space) { - $index = 3; - } elsif ($main::opt_alloc_objects) { - $index = 2; - } - return $index; -} - -sub ReadMappedLibraries { - my $fh = shift; - my $map = ""; - # Read the /proc/self/maps data - while (<$fh>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - $map .= $_; - } - return $map; -} - -sub ReadMemoryMap { - my $fh = shift; - my $map = ""; - # Read /proc/self/maps data as formatted by DumpAddressMap() - my $buildvar = ""; - while (<PROFILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Parse "build=<dir>" specification if supplied - if (m/^\s*build=(.*)\n/) { - $buildvar = $1; - } - - # Expand "$build" variable if available - $_ =~ s/\$build\b/$buildvar/g; - - $map .= $_; - } - return $map; -} - -sub AdjustSamples { - my ($sample_adjustment, $sampling_algorithm, $n1, $s1, $n2, $s2) = @_; - if ($sample_adjustment) { - if ($sampling_algorithm == 2) { - # Remote-heap version 2 - # The sampling frequency is the rate of a Poisson process. - # This means that the probability of sampling an allocation of - # size X with sampling rate Y is 1 - exp(-X/Y) - if ($n1 != 0) { - my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n1 *= $scale_factor; - $s1 *= $scale_factor; - } - if ($n2 != 0) { - my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n2 *= $scale_factor; - $s2 *= $scale_factor; - } - } else { - # Remote-heap version 1 - my $ratio; - $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - if ($ratio < 1) { - $n1 /= $ratio; - $s1 /= $ratio; - } - $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - if ($ratio < 1) { - $n2 /= $ratio; - $s2 /= $ratio; - } - } - } - return ($n1, $s1, $n2, $s2); -} - -sub ReadHeapProfile { - my $prog = shift; - local *PROFILE = shift; - my $header = shift; - - my $index = HeapProfileIndex(); - - # Find the type of this profile. The header line looks like: - # heap profile: 1246: 8800744 [ 1246: 8800744] @ <heap-url>/266053 - # There are two pairs <count: size>, the first inuse objects/space, and the - # second allocated objects/space. This is followed optionally by a profile - # type, and if that is present, optionally by a sampling frequency. - # For remote heap profiles (v1): - # The interpretation of the sampling frequency is that the profiler, for - # each sample, calculates a uniformly distributed random integer less than - # the given value, and records the next sample after that many bytes have - # been allocated. Therefore, the expected sample interval is half of the - # given frequency. By default, if not specified, the expected sample - # interval is 128KB. Only remote-heap-page profiles are adjusted for - # sample size. - # For remote heap profiles (v2): - # The sampling frequency is the rate of a Poisson process. This means that - # the probability of sampling an allocation of size X with sampling rate Y - # is 1 - exp(-X/Y) - # For version 2, a typical header line might look like this: - # heap profile: 1922: 127792360 [ 1922: 127792360] @ <heap-url>_v2/524288 - # the trailing number (524288) is the sampling rate. (Version 1 showed - # double the 'rate' here) - my $sampling_algorithm = 0; - my $sample_adjustment = 0; - chomp($header); - my $type = "unknown"; - if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { - if (defined($6) && ($6 ne '')) { - $type = $6; - my $sample_period = $8; - # $type is "heapprofile" for profiles generated by the - # heap-profiler, and either "heap" or "heap_v2" for profiles - # generated by sampling directly within tcmalloc. It can also - # be "growth" for heap-growth profiles. The first is typically - # found for profiles generated locally, and the others for - # remote profiles. - if (($type eq "heapprofile") || ($type !~ /heap/) ) { - # No need to adjust for the sampling rate with heap-profiler-derived data - $sampling_algorithm = 0; - } elsif ($type =~ /_v2/) { - $sampling_algorithm = 2; # version 2 sampling - if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period); - } - } else { - $sampling_algorithm = 1; # version 1 sampling - if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period)/2; - } - } - } else { - # We detect whether or not this is a remote-heap profile by checking - # that the total-allocated stats ($n2,$s2) are exactly the - # same as the in-use stats ($n1,$s1). It is remotely conceivable - # that a non-remote-heap profile may pass this check, but it is hard - # to imagine how that could happen. - # In this case it's so old it's guaranteed to be remote-heap version 1. - my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); - if (($n1 == $n2) && ($s1 == $s2)) { - # This is likely to be a remote-heap based sample profile - $sampling_algorithm = 1; - } - } - } - - if ($sampling_algorithm > 0) { - # For remote-heap generated profiles, adjust the counts and sizes to - # account for the sample rate (we sample once every 128KB by default). - if ($sample_adjustment == 0) { - # Turn on profile adjustment. - $sample_adjustment = 128*1024; - print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; - } else { - printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", - $sample_adjustment); - } - if ($sampling_algorithm > 1) { - # We don't bother printing anything for the original version (version 1) - printf STDERR "Heap version $sampling_algorithm\n"; - } - } - - my $profile = {}; - my $pcs = {}; - my $map = ""; - - while (<PROFILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - if (/^MAPPED_LIBRARIES:/) { - $map .= ReadMappedLibraries(*PROFILE); - last; - } - - if (/^--- Memory map:/) { - $map .= ReadMemoryMap(*PROFILE); - last; - } - - # Read entry of the form: - # <count1>: <bytes1> [<count2>: <bytes2>] @ a1 a2 a3 ... an - s/^\s*//; - s/\s*$//; - if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { - my $stack = $5; - my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); - my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, - $n1, $s1, $n2, $s2); - AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); - } - } - - my $r = {}; - $r->{version} = "heap"; - $r->{period} = 1; - $r->{profile} = $profile; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - return $r; -} - -sub ReadThreadedHeapProfile { - my ($prog, $fname, $header) = @_; - - my $index = HeapProfileIndex(); - my $sampling_algorithm = 0; - my $sample_adjustment = 0; - chomp($header); - my $type = "unknown"; - # Assuming a very specific type of header for now. - if ($header =~ m"^heap_v2/(\d+)") { - $type = "_v2"; - $sampling_algorithm = 2; - $sample_adjustment = int($1); - } - if ($type ne "_v2" || !defined($sample_adjustment)) { - die "Threaded heap profiles require v2 sampling with a sample rate\n"; - } - - my $profile = {}; - my $thread_profiles = {}; - my $pcs = {}; - my $map = ""; - my $stack = ""; - - while (<PROFILE>) { - s/\r//g; - if (/^MAPPED_LIBRARIES:/) { - $map .= ReadMappedLibraries(*PROFILE); - last; - } - - if (/^--- Memory map:/) { - $map .= ReadMemoryMap(*PROFILE); - last; - } - - # Read entry of the form: - # @ a1 a2 ... an - # t*: <count1>: <bytes1> [<count2>: <bytes2>] - # t1: <count1>: <bytes1> [<count2>: <bytes2>] - # ... - # tn: <count1>: <bytes1> [<count2>: <bytes2>] - s/^\s*//; - s/\s*$//; - if (m/^@\s+(.*)$/) { - $stack = $1; - } elsif (m/^\s*(t(\*|\d+)):\s+(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]$/) { - if ($stack eq "") { - # Still in the header, so this is just a per-thread summary. - next; - } - my $thread = $2; - my ($n1, $s1, $n2, $s2) = ($3, $4, $5, $6); - my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, - $n1, $s1, $n2, $s2); - if ($thread eq "*") { - AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); - } else { - if (!exists($thread_profiles->{$thread})) { - $thread_profiles->{$thread} = {}; - } - AddEntries($thread_profiles->{$thread}, $pcs, - FixCallerAddresses($stack), $counts[$index]); - } - } - } - - my $r = {}; - $r->{version} = "heap"; - $r->{period} = 1; - $r->{profile} = $profile; - $r->{threads} = $thread_profiles; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - return $r; -} - -sub ReadSynchProfile { - my $prog = shift; - local *PROFILE = shift; - my $header = shift; - - my $map = ''; - my $profile = {}; - my $pcs = {}; - my $sampling_period = 1; - my $cyclespernanosec = 2.8; # Default assumption for old binaries - my $seen_clockrate = 0; - my $line; - - my $index = 0; - if ($main::opt_total_delay) { - $index = 0; - } elsif ($main::opt_contentions) { - $index = 1; - } elsif ($main::opt_mean_delay) { - $index = 2; - } - - while ( $line = <PROFILE> ) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { - my ($cycles, $count, $stack) = ($1, $2, $3); - - # Convert cycles to nanoseconds - $cycles /= $cyclespernanosec; - - # Adjust for sampling done by application - $cycles *= $sampling_period; - $count *= $sampling_period; - - my @values = ($cycles, $count, $cycles / $count); - AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); - - } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || - $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { - my ($cycles, $stack) = ($1, $2); - if ($cycles !~ /^\d+$/) { - next; - } - - # Convert cycles to nanoseconds - $cycles /= $cyclespernanosec; - - # Adjust for sampling done by application - $cycles *= $sampling_period; - - AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); - - } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { - my ($variable, $value) = ($1,$2); - for ($variable, $value) { - s/^\s+//; - s/\s+$//; - } - if ($variable eq "cycles/second") { - $cyclespernanosec = $value / 1e9; - $seen_clockrate = 1; - } elsif ($variable eq "sampling period") { - $sampling_period = $value; - } elsif ($variable eq "ms since reset") { - # Currently nothing is done with this value in jeprof - # So we just silently ignore it for now - } elsif ($variable eq "discarded samples") { - # Currently nothing is done with this value in jeprof - # So we just silently ignore it for now - } else { - printf STDERR ("Ignoring unnknown variable in /contention output: " . - "'%s' = '%s'\n",$variable,$value); - } - } else { - # Memory map entry - $map .= $line; - } - } - - if (!$seen_clockrate) { - printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", - $cyclespernanosec); - } - - my $r = {}; - $r->{version} = 0; - $r->{period} = $sampling_period; - $r->{profile} = $profile; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - return $r; -} - -# Given a hex value in the form "0x1abcd" or "1abcd", return either -# "0001abcd" or "000000000001abcd", depending on the current (global) -# address length. -sub HexExtend { - my $addr = shift; - - $addr =~ s/^(0x)?0*//; - my $zeros_needed = $address_length - length($addr); - if ($zeros_needed < 0) { - printf STDERR "Warning: address $addr is longer than address length $address_length\n"; - return $addr; - } - return ("0" x $zeros_needed) . $addr; -} - -##### Symbol extraction ##### - -# Aggressively search the lib_prefix values for the given library -# If all else fails, just return the name of the library unmodified. -# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" -# it will search the following locations in this order, until it finds a file: -# /my/path/lib/dir/mylib.so -# /other/path/lib/dir/mylib.so -# /my/path/dir/mylib.so -# /other/path/dir/mylib.so -# /my/path/mylib.so -# /other/path/mylib.so -# /lib/dir/mylib.so (returned as last resort) -sub FindLibrary { - my $file = shift; - my $suffix = $file; - - # Search for the library as described above - do { - foreach my $prefix (@prefix_list) { - my $fullpath = $prefix . $suffix; - if (-e $fullpath) { - return $fullpath; - } - } - } while ($suffix =~ s|^/[^/]+/|/|); - return $file; -} - -# Return path to library with debugging symbols. -# For libc libraries, the copy in /usr/lib/debug contains debugging symbols -sub DebuggingLibrary { - my $file = shift; - if ($file =~ m|^/|) { - if (-f "/usr/lib/debug$file") { - return "/usr/lib/debug$file"; - } elsif (-f "/usr/lib/debug$file.debug") { - return "/usr/lib/debug$file.debug"; - } - } - return undef; -} - -# Parse text section header of a library using objdump -sub ParseTextSectionHeaderFromObjdump { - my $lib = shift; - - my $size = undef; - my $vma; - my $file_offset; - # Get objdump output from the library file to figure out how to - # map between mapped addresses and addresses in the library. - my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib); - open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); - while (<OBJDUMP>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Idx Name Size VMA LMA File off Algn - # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 - # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file - # offset may still be 8. But AddressSub below will still handle that. - my @x = split; - if (($#x >= 6) && ($x[1] eq '.text')) { - $size = $x[2]; - $vma = $x[3]; - $file_offset = $x[5]; - last; - } - } - close(OBJDUMP); - - if (!defined($size)) { - return undef; - } - - my $r = {}; - $r->{size} = $size; - $r->{vma} = $vma; - $r->{file_offset} = $file_offset; - - return $r; -} - -# Parse text section header of a library using otool (on OS X) -sub ParseTextSectionHeaderFromOtool { - my $lib = shift; - - my $size = undef; - my $vma = undef; - my $file_offset = undef; - # Get otool output from the library file to figure out how to - # map between mapped addresses and addresses in the library. - my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib); - open(OTOOL, "$command |") || error("$command: $!\n"); - my $cmd = ""; - my $sectname = ""; - my $segname = ""; - foreach my $line (<OTOOL>) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - # Load command <#> - # cmd LC_SEGMENT - # [...] - # Section - # sectname __text - # segname __TEXT - # addr 0x000009f8 - # size 0x00018b9e - # offset 2552 - # align 2^2 (4) - # We will need to strip off the leading 0x from the hex addresses, - # and convert the offset into hex. - if ($line =~ /Load command/) { - $cmd = ""; - $sectname = ""; - $segname = ""; - } elsif ($line =~ /Section/) { - $sectname = ""; - $segname = ""; - } elsif ($line =~ /cmd (\w+)/) { - $cmd = $1; - } elsif ($line =~ /sectname (\w+)/) { - $sectname = $1; - } elsif ($line =~ /segname (\w+)/) { - $segname = $1; - } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && - $sectname eq "__text" && - $segname eq "__TEXT")) { - next; - } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { - $vma = $1; - } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { - $size = $1; - } elsif ($line =~ /\boffset ([0-9]+)/) { - $file_offset = sprintf("%016x", $1); - } - if (defined($vma) && defined($size) && defined($file_offset)) { - last; - } - } - close(OTOOL); - - if (!defined($vma) || !defined($size) || !defined($file_offset)) { - return undef; - } - - my $r = {}; - $r->{size} = $size; - $r->{vma} = $vma; - $r->{file_offset} = $file_offset; - - return $r; -} - -sub ParseTextSectionHeader { - # obj_tool_map("otool") is only defined if we're in a Mach-O environment - if (defined($obj_tool_map{"otool"})) { - my $r = ParseTextSectionHeaderFromOtool(@_); - if (defined($r)){ - return $r; - } - } - # If otool doesn't work, or we don't have it, fall back to objdump - return ParseTextSectionHeaderFromObjdump(@_); -} - -# Split /proc/pid/maps dump into a list of libraries -sub ParseLibraries { - return if $main::use_symbol_page; # We don't need libraries info. - my $prog = shift; - my $map = shift; - my $pcs = shift; - - my $result = []; - my $h = "[a-f0-9]+"; - my $zero_offset = HexExtend("0"); - - my $buildvar = ""; - foreach my $l (split("\n", $map)) { - if ($l =~ m/^\s*build=(.*)$/) { - $buildvar = $1; - } - - my $start; - my $finish; - my $offset; - my $lib; - if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { - # Full line from /proc/self/maps. Example: - # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so - $start = HexExtend($1); - $finish = HexExtend($2); - $offset = HexExtend($3); - $lib = $4; - $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths - } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { - # Cooked line from DumpAddressMap. Example: - # 40000000-40015000: /lib/ld-2.3.2.so - $start = HexExtend($1); - $finish = HexExtend($2); - $offset = $zero_offset; - $lib = $3; - } - # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in - # function procfs_doprocmap (sys/fs/procfs/procfs_map.c) - # - # Example: - # 0x800600000 0x80061a000 26 0 0xfffff800035a0000 r-x 75 33 0x1004 COW NC vnode /libexec/ld-elf.s - # o.1 NCH -1 - elsif ($l =~ /^(0x$h)\s(0x$h)\s\d+\s\d+\s0x$h\sr-x\s\d+\s\d+\s0x\d+\s(COW|NCO)\s(NC|NNC)\svnode\s(\S+\.so(\.\d+)*)/) { - $start = HexExtend($1); - $finish = HexExtend($2); - $offset = $zero_offset; - $lib = FindLibrary($5); - - } else { - next; - } - - # Expand "$build" variable if available - $lib =~ s/\$build\b/$buildvar/g; - - $lib = FindLibrary($lib); - - # Check for pre-relocated libraries, which use pre-relocated symbol tables - # and thus require adjusting the offset that we'll use to translate - # VM addresses into symbol table addresses. - # Only do this if we're not going to fetch the symbol table from a - # debugging copy of the library. - if (!DebuggingLibrary($lib)) { - my $text = ParseTextSectionHeader($lib); - if (defined($text)) { - my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); - $offset = AddressAdd($offset, $vma_offset); - } - } - - if($main::opt_debug) { printf STDERR "$start:$finish ($offset) $lib\n"; } - push(@{$result}, [$lib, $start, $finish, $offset]); - } - - # Append special entry for additional library (not relocated) - if ($main::opt_lib ne "") { - my $text = ParseTextSectionHeader($main::opt_lib); - if (defined($text)) { - my $start = $text->{vma}; - my $finish = AddressAdd($start, $text->{size}); - - push(@{$result}, [$main::opt_lib, $start, $finish, $start]); - } - } - - # Append special entry for the main program. This covers - # 0..max_pc_value_seen, so that we assume pc values not found in one - # of the library ranges will be treated as coming from the main - # program binary. - my $min_pc = HexExtend("0"); - my $max_pc = $min_pc; # find the maximal PC value in any sample - foreach my $pc (keys(%{$pcs})) { - if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } - } - push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); - - return $result; -} - -# Add two hex addresses of length $address_length. -# Run jeprof --test for unit test if this is changed. -sub AddressAdd { - my $addr1 = shift; - my $addr2 = shift; - my $sum; - - if ($address_length == 8) { - # Perl doesn't cope with wraparound arithmetic, so do it explicitly: - $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); - return sprintf("%08x", $sum); - - } else { - # Do the addition in 7-nibble chunks to trivialize carry handling. - - if ($main::opt_debug and $main::opt_test) { - print STDERR "AddressAdd $addr1 + $addr2 = "; - } - - my $a1 = substr($addr1,-7); - $addr1 = substr($addr1,0,-7); - my $a2 = substr($addr2,-7); - $addr2 = substr($addr2,0,-7); - $sum = hex($a1) + hex($a2); - my $c = 0; - if ($sum > 0xfffffff) { - $c = 1; - $sum -= 0x10000000; - } - my $r = sprintf("%07x", $sum); - - $a1 = substr($addr1,-7); - $addr1 = substr($addr1,0,-7); - $a2 = substr($addr2,-7); - $addr2 = substr($addr2,0,-7); - $sum = hex($a1) + hex($a2) + $c; - $c = 0; - if ($sum > 0xfffffff) { - $c = 1; - $sum -= 0x10000000; - } - $r = sprintf("%07x", $sum) . $r; - - $sum = hex($addr1) + hex($addr2) + $c; - if ($sum > 0xff) { $sum -= 0x100; } - $r = sprintf("%02x", $sum) . $r; - - if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } - - return $r; - } -} - - -# Subtract two hex addresses of length $address_length. -# Run jeprof --test for unit test if this is changed. -sub AddressSub { - my $addr1 = shift; - my $addr2 = shift; - my $diff; - - if ($address_length == 8) { - # Perl doesn't cope with wraparound arithmetic, so do it explicitly: - $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); - return sprintf("%08x", $diff); - - } else { - # Do the addition in 7-nibble chunks to trivialize borrow handling. - # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } - - my $a1 = hex(substr($addr1,-7)); - $addr1 = substr($addr1,0,-7); - my $a2 = hex(substr($addr2,-7)); - $addr2 = substr($addr2,0,-7); - my $b = 0; - if ($a2 > $a1) { - $b = 1; - $a1 += 0x10000000; - } - $diff = $a1 - $a2; - my $r = sprintf("%07x", $diff); - - $a1 = hex(substr($addr1,-7)); - $addr1 = substr($addr1,0,-7); - $a2 = hex(substr($addr2,-7)) + $b; - $addr2 = substr($addr2,0,-7); - $b = 0; - if ($a2 > $a1) { - $b = 1; - $a1 += 0x10000000; - } - $diff = $a1 - $a2; - $r = sprintf("%07x", $diff) . $r; - - $a1 = hex($addr1); - $a2 = hex($addr2) + $b; - if ($a2 > $a1) { $a1 += 0x100; } - $diff = $a1 - $a2; - $r = sprintf("%02x", $diff) . $r; - - # if ($main::opt_debug) { print STDERR "$r\n"; } - - return $r; - } -} - -# Increment a hex addresses of length $address_length. -# Run jeprof --test for unit test if this is changed. -sub AddressInc { - my $addr = shift; - my $sum; - - if ($address_length == 8) { - # Perl doesn't cope with wraparound arithmetic, so do it explicitly: - $sum = (hex($addr)+1) % (0x10000000 * 16); - return sprintf("%08x", $sum); - - } else { - # Do the addition in 7-nibble chunks to trivialize carry handling. - # We are always doing this to step through the addresses in a function, - # and will almost never overflow the first chunk, so we check for this - # case and exit early. - - # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } - - my $a1 = substr($addr,-7); - $addr = substr($addr,0,-7); - $sum = hex($a1) + 1; - my $r = sprintf("%07x", $sum); - if ($sum <= 0xfffffff) { - $r = $addr . $r; - # if ($main::opt_debug) { print STDERR "$r\n"; } - return HexExtend($r); - } else { - $r = "0000000"; - } - - $a1 = substr($addr,-7); - $addr = substr($addr,0,-7); - $sum = hex($a1) + 1; - $r = sprintf("%07x", $sum) . $r; - if ($sum <= 0xfffffff) { - $r = $addr . $r; - # if ($main::opt_debug) { print STDERR "$r\n"; } - return HexExtend($r); - } else { - $r = "00000000000000"; - } - - $sum = hex($addr) + 1; - if ($sum > 0xff) { $sum -= 0x100; } - $r = sprintf("%02x", $sum) . $r; - - # if ($main::opt_debug) { print STDERR "$r\n"; } - return $r; - } -} - -# Extract symbols for all PC values found in profile -sub ExtractSymbols { - my $libs = shift; - my $pcset = shift; - - my $symbols = {}; - - # Map each PC value to the containing library. To make this faster, - # we sort libraries by their starting pc value (highest first), and - # advance through the libraries as we advance the pc. Sometimes the - # addresses of libraries may overlap with the addresses of the main - # binary, so to make sure the libraries 'win', we iterate over the - # libraries in reverse order (which assumes the binary doesn't start - # in the middle of a library, which seems a fair assumption). - my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings - foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { - my $libname = $lib->[0]; - my $start = $lib->[1]; - my $finish = $lib->[2]; - my $offset = $lib->[3]; - - # Use debug library if it exists - my $debug_libname = DebuggingLibrary($libname); - if ($debug_libname) { - $libname = $debug_libname; - } - - # Get list of pcs that belong in this library. - my $contained = []; - my ($start_pc_index, $finish_pc_index); - # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. - for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; - $finish_pc_index--) { - last if $pcs[$finish_pc_index - 1] le $finish; - } - # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. - for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; - $start_pc_index--) { - last if $pcs[$start_pc_index - 1] lt $start; - } - # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, - # in case there are overlaps in libraries and the main binary. - @{$contained} = splice(@pcs, $start_pc_index, - $finish_pc_index - $start_pc_index); - # Map to symbols - MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); - } - - return $symbols; -} - -# Map list of PC values to symbols for a given image -sub MapToSymbols { - my $image = shift; - my $offset = shift; - my $pclist = shift; - my $symbols = shift; - - my $debug = 0; - - # Ignore empty binaries - if ($#{$pclist} < 0) { return; } - - # Figure out the addr2line command to use - my $addr2line = $obj_tool_map{"addr2line"}; - my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image); - if (exists $obj_tool_map{"addr2line_pdb"}) { - $addr2line = $obj_tool_map{"addr2line_pdb"}; - $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image); - } - - # If "addr2line" isn't installed on the system at all, just use - # nm to get what info we can (function names, but not line numbers). - if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) { - MapSymbolsWithNM($image, $offset, $pclist, $symbols); - return; - } - - # "addr2line -i" can produce a variable number of lines per input - # address, with no separator that allows us to tell when data for - # the next address starts. So we find the address for a special - # symbol (_fini) and interleave this address between all real - # addresses passed to addr2line. The name of this special symbol - # can then be used as a separator. - $sep_address = undef; # May be filled in by MapSymbolsWithNM() - my $nm_symbols = {}; - MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); - if (defined($sep_address)) { - # Only add " -i" to addr2line if the binary supports it. - # addr2line --help returns 0, but not if it sees an unknown flag first. - if (system("$cmd -i --help >$dev_null 2>&1") == 0) { - $cmd .= " -i"; - } else { - $sep_address = undef; # no need for sep_address if we don't support -i - } - } - - # Make file with all PC values with intervening 'sep_address' so - # that we can reliably detect the end of inlined function list - open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); - if ($debug) { print("---- $image ---\n"); } - for (my $i = 0; $i <= $#{$pclist}; $i++) { - # addr2line always reads hex addresses, and does not need '0x' prefix. - if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } - printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); - if (defined($sep_address)) { - printf ADDRESSES ("%s\n", $sep_address); - } - } - close(ADDRESSES); - if ($debug) { - print("----\n"); - system("cat", $main::tmpfile_sym); - print("----\n"); - system("$cmd < " . ShellEscape($main::tmpfile_sym)); - print("----\n"); - } - - open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |") - || error("$cmd: $!\n"); - my $count = 0; # Index in pclist - while (<SYMBOLS>) { - # Read fullfunction and filelineinfo from next pair of lines - s/\r?\n$//g; - my $fullfunction = $_; - $_ = <SYMBOLS>; - s/\r?\n$//g; - my $filelinenum = $_; - - if (defined($sep_address) && $fullfunction eq $sep_symbol) { - # Terminating marker for data for this address - $count++; - next; - } - - $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths - - my $pcstr = $pclist->[$count]; - my $function = ShortFunctionName($fullfunction); - my $nms = $nm_symbols->{$pcstr}; - if (defined($nms)) { - if ($fullfunction eq '??') { - # nm found a symbol for us. - $function = $nms->[0]; - $fullfunction = $nms->[2]; - } else { - # MapSymbolsWithNM tags each routine with its starting address, - # useful in case the image has multiple occurrences of this - # routine. (It uses a syntax that resembles template paramters, - # that are automatically stripped out by ShortFunctionName().) - # addr2line does not provide the same information. So we check - # if nm disambiguated our symbol, and if so take the annotated - # (nm) version of the routine-name. TODO(csilvers): this won't - # catch overloaded, inlined symbols, which nm doesn't see. - # Better would be to do a check similar to nm's, in this fn. - if ($nms->[2] =~ m/^\Q$function\E/) { # sanity check it's the right fn - $function = $nms->[0]; - $fullfunction = $nms->[2]; - } - } - } - - # Prepend to accumulated symbols for pcstr - # (so that caller comes before callee) - my $sym = $symbols->{$pcstr}; - if (!defined($sym)) { - $sym = []; - $symbols->{$pcstr} = $sym; - } - unshift(@{$sym}, $function, $filelinenum, $fullfunction); - if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } - if (!defined($sep_address)) { - # Inlining is off, so this entry ends immediately - $count++; - } - } - close(SYMBOLS); -} - -# Use nm to map the list of referenced PCs to symbols. Return true iff we -# are able to read procedure information via nm. -sub MapSymbolsWithNM { - my $image = shift; - my $offset = shift; - my $pclist = shift; - my $symbols = shift; - - # Get nm output sorted by increasing address - my $symbol_table = GetProcedureBoundaries($image, "."); - if (!%{$symbol_table}) { - return 0; - } - # Start addresses are already the right length (8 or 16 hex digits). - my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } - keys(%{$symbol_table}); - - if ($#names < 0) { - # No symbols: just use addresses - foreach my $pc (@{$pclist}) { - my $pcstr = "0x" . $pc; - $symbols->{$pc} = [$pcstr, "?", $pcstr]; - } - return 0; - } - - # Sort addresses so we can do a join against nm output - my $index = 0; - my $fullname = $names[0]; - my $name = ShortFunctionName($fullname); - foreach my $pc (sort { $a cmp $b } @{$pclist}) { - # Adjust for mapped offset - my $mpc = AddressSub($pc, $offset); - while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ - $index++; - $fullname = $names[$index]; - $name = ShortFunctionName($fullname); - } - if ($mpc lt $symbol_table->{$fullname}->[1]) { - $symbols->{$pc} = [$name, "?", $fullname]; - } else { - my $pcstr = "0x" . $pc; - $symbols->{$pc} = [$pcstr, "?", $pcstr]; - } - } - return 1; -} - -sub ShortFunctionName { - my $function = shift; - while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types - while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments - $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type - return $function; -} - -# Trim overly long symbols found in disassembler output -sub CleanDisassembly { - my $d = shift; - while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) - while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments - return $d; -} - -# Clean file name for display -sub CleanFileName { - my ($f) = @_; - $f =~ s|^/proc/self/cwd/||; - $f =~ s|^\./||; - return $f; -} - -# Make address relative to section and clean up for display -sub UnparseAddress { - my ($offset, $address) = @_; - $address = AddressSub($address, $offset); - $address =~ s/^0x//; - $address =~ s/^0*//; - return $address; -} - -##### Miscellaneous ##### - -# Find the right versions of the above object tools to use. The -# argument is the program file being analyzed, and should be an ELF -# 32-bit or ELF 64-bit executable file. The location of the tools -# is determined by considering the following options in this order: -# 1) --tools option, if set -# 2) JEPROF_TOOLS environment variable, if set -# 3) the environment -sub ConfigureObjTools { - my $prog_file = shift; - - # Check for the existence of $prog_file because /usr/bin/file does not - # predictably return error status in prod. - (-e $prog_file) || error("$prog_file does not exist.\n"); - - my $file_type = undef; - if (-e "/usr/bin/file") { - # Follow symlinks (at least for systems where "file" supports that). - my $escaped_prog_file = ShellEscape($prog_file); - $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null || - /usr/bin/file $escaped_prog_file`; - } elsif ($^O == "MSWin32") { - $file_type = "MS Windows"; - } else { - print STDERR "WARNING: Can't determine the file type of $prog_file"; - } - - if ($file_type =~ /64-bit/) { - # Change $address_length to 16 if the program file is ELF 64-bit. - # We can't detect this from many (most?) heap or lock contention - # profiles, since the actual addresses referenced are generally in low - # memory even for 64-bit programs. - $address_length = 16; - } - - if ($file_type =~ /MS Windows/) { - # For windows, we provide a version of nm and addr2line as part of - # the opensource release, which is capable of parsing - # Windows-style PDB executables. It should live in the path, or - # in the same directory as jeprof. - $obj_tool_map{"nm_pdb"} = "nm-pdb"; - $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; - } - - if ($file_type =~ /Mach-O/) { - # OS X uses otool to examine Mach-O files, rather than objdump. - $obj_tool_map{"otool"} = "otool";