massdiff.py
author Alon Zakai <azakai@mozilla.com>
Wed, 30 May 2012 16:31:59 -0700
changeset 585 694acee4d0b4
parent 492 38154e736dc8
permissions -rw-r--r--
work
'''
Parse a massif ms_print dump
============================

Usage: this_script.py FILE SNAPSHOT_1 SNAPSHOT 2

where

  FILE is a ms_print dump (note - not massif, but ms_print!)
  SNAPSHOT_1 and 2 are the two snapshots to diff
'''

import os, sys, re

def commify(x):
  sign = '+' if x >= 0 else '-'
  ret = list(str(abs(x))[::-1])
  for i in range(len(ret)):
    if i % 3 == 2 and i != len(ret)-1:
      ret[i] += ','
  return sign+(''.join(ret))[::-1]

class Snapshot: pass
class SnapshotLine:
  def __str__(self):
    return ('%s%s - %10s' % ('  '*self.indent, self.text, self.visual_bytes))[:130]

snapshots = []
started = False

for line in open(sys.argv[1], 'r').readlines():
  #print line
  line = line.replace('\n', '')
  header = '  n       time(' in line
  started = started or header
  if not started: continue
  if header: continue

  if line == '--------------------------------------------------------------------------------': continue

  # Snapshot title
  m = re.match(' +(?P<n>[\d,]+) +(?P<time>[\d,]+) +(?P<total>[\d,]+) +(?P<useful_heap>[\d,]+) +(?P<extra_heap>[\d,]+) +(?P<stacks>[\d,]+)', line)
  if m:
    snapshot = Snapshot()
    for i in ['n', 'time', 'total', 'useful_heap', 'extra_heap', 'stacks']:
      setattr(snapshot, i, m.group(i))
    snapshot.lines = []
    #print snapshot.__dict__
    snapshots.append(snapshot)

  # Snapshot detail line
  m = re.match('(?P<percent>[\d\.]+)\% \((?P<bytes>[\d,]+)B\) (?P<text>.*)', line)
  if not m:
    m = re.match('(?P<indent>[\| ]*)->(?P<percent>[\d\.]+)\% \((?P<bytes>[\d,]+)B\) (?P<text>.*)', line)
  if m:
    snapshot_line = SnapshotLine()
    try:
      snapshot_line.indent = len(m.group('indent'))/2+1
    except:
      snapshot_line.indent = 0
    snapshot_line.percent = eval(m.group('percent'))
    snapshot_line.bytes = int(m.group('bytes').replace(',', ''))
    snapshot_line.visual_bytes = m.group('bytes')#.replace(',', ''))
    snapshot_line.text = m.group('text')
    snapshots[-1].lines.append(snapshot_line)
    #print snapshot_line.__dict__


# Generate tree structure

for snapshot in snapshots:
  snapshot.roots = {}
  for i in range(snapshot.lines.__len__()):
    line = snapshot.lines[i]
    line.children = {}
    indent = line.indent
    if indent == 0:
      snapshot.roots[line.text] = line
    else:
      # Find parent
      j = i-1
      while snapshot.lines[j].indent != indent-1: j -= 1
      snapshot.lines[j].children[line.text] = line

  #print snapshot.n, snapshot.roots


# Dump tree

def dump_lines(lines):
  def printit(line, indent):
    print indent, str(line.__dict__)[:40]
    for child in line.children.values():
      printit(child, indent + ' ')
  if type(lines) not in [list, tuple]:
    lines = [lines]
  for line in lines:
    printit(line, '   ')

def dump_tree():
  print 'Tree:'
  for snapshot in snapshots:
    print snapshot.n, snapshot.roots
    dump_lines(snapshot.roots.values())

#dump_tree()


# Diff two snapshots

def diff(i, j):
  print 'Diffing snapshots', i, j, '\n'
  a = snapshots[i]
  b = snapshots[j]

  def diff_dicts(d1, d2):
    keys = list(set(d1.keys() + d2.keys()))
    data = [[key, 0] for key in keys]

    for datum in data:
      key = datum[0]
      if key not in d2:
        datum[1] = -d1[key].bytes
      elif key not in d1:
        datum[1] = d2[key].bytes
      else:
        datum[1] = d2[key].bytes - d1[key].bytes

    data.sort(lambda x, y: y[1]-x[1])

    for datum in data:
      key = datum[0]
      diff = datum[1]
      if key not in d2:
        print "-", d1[key], '\n'
      elif key not in d1:
        print "+", d2[key], '\n'
      else:
        if d1[key].bytes != d2[key].bytes:
          print "-", d1[key]
          print '  '*(d1[key].indent+2) + '[ diff: %s ]' % commify(diff)
          print "+", d2[key], '\n'
        diff_dicts(d1[key].children, d2[key].children)

  diff_dicts(a.roots, b.roots)

diff(int(sys.argv[2]), int(sys.argv[3]))