unittest-log.py
author Ted Mielczarek <ted.mielczarek@gmail.com>
Mon, 10 Aug 2009 13:55:05 -0400
changeset 9 f130c2a6c5c00054d1ae3f026c412b84cd29415b
parent 8 1eb0b82cd788720944afb5467a1e68be0894d98b
child 10 e0b688516ffd4fb4e5cf9bf23532176ece98eb0e
permissions -rw-r--r--
change to use simplejson or py2.6 built-in json module

#!/usr/bin/env python
#

import re, os, sys, urllib, logging
import sqlite3 as sqlite
from time import time, tzset, mktime, localtime, strptime, strftime, sleep
from math import ceil
from optparse import OptionParser
from gzip import GzipFile
try:
  import simplejson as json
except ImportError:
  import json

# download data in chunks so as not to overwhelm the tinderbox server
chunksize = 1.0/7* 168 * 3600
# seconds between requests
sleeptime = 1

class OS():
  Windows = 0
  Mac = 1
  Linux = 2
  Unknown = 3

class BuildStatus():
  Success = 0
  TestFailed = 1
  Burning = 2
  Exception = 3
  Unknown = 4

csetre = re.compile("rev/([0-9A-Za-z]+)")
def FindChangesetInScrape(scrape):
    for line in scrape:
        m = csetre.search(line)
        if m:
            return m.group(1)
    return None

def OSFromBuilderName(name):
  if name.startswith("WINNT"):
    return OS.Windows
  elif name.startswith("Linux"):
    return OS.Linux
  elif name.startswith("OS X") or name.startswith("MacOSX"):
    return OS.Mac
  return OS.Unknown

def BuildStatusFromText(status):
  if status == "success":
    return BuildStatus.Success
  if status == "testfailed":
    return BuildStatus.TestFailed
  if status == "busted":
    return BuildStatus.Burning
  if status == "exception":
    return BuildStatus.Exception
  return BuildStatus.Unknown

def CreateDB(conn):
  conn.execute("CREATE TABLE trees (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT)")
  conn.execute("CREATE TABLE builds (id INTEGER PRIMARY KEY AUTOINCREMENT, treeid INTEGER, os INTEGER, starttime INTEGER, status INTEGER, logfile TEXT, changeset TEXT)")
  conn.execute("CREATE TABLE tests (buildid INTEGER, name TEXT, description TEXT)")
  conn.execute("CREATE INDEX builds_starttime ON builds (starttime)")
  conn.execute("CREATE INDEX tests_name ON tests (name)")
  conn.commit()

def GetOrInsertTree(conn, tree):
  """Get an id for a tree named |tree|. If it's not already in the trees
  table, insert a new row and return the id."""
  rows = conn.execute("SELECT id from trees WHERE name = ?", (tree,)).fetchall()
  if len(rows) > 0:
    return rows[0][0]
  # need to insert it
  cur = conn.execute("INSERT INTO trees (name) VALUES(?)", (tree,))
  conn.commit()
  return cur.lastrowid

def HaveBuild(conn, treeid, os, starttime):
  """See if we already have this build in our database."""
  return conn.execute("SELECT COUNT(*) from builds WHERE treeid = ? AND os = ? AND starttime = ?", (treeid, os, starttime)).fetchone()[0] == 1

def UpdateLogfile(conn, treeid, os, starttime, logfile):
  """Update the logfile for a given build."""
  conn.execute("UPDATE builds SET logfile=? WHERE treeid=? AND os=? AND starttime=?", (logfile, treeid, os, starttime))

def InsertBuild(conn, treeid, os, starttime, status, logfile, changeset):
  """Insert a build into the builds table and return the id."""
  cur = conn.execute("INSERT INTO builds (treeid, os, starttime, status, logfile, changeset) VALUES(?, ?, ?, ?, ?, ?)", (treeid, os, starttime, status, logfile, changeset))
  return cur.lastrowid

def InsertTest(conn, buildid, name, description):
  conn.execute("INSERT INTO tests (buildid, name, description) VALUES(?, ?, ?)", (buildid, name, description))

def fix_tbox_json(s):
    """Fixes up tinderbox json.

    Tinderbox returns strings as single-quoted strings, and occasionally
    includes the unquoted substring 'undef' (with quotes) in the output, e.g.

    {'key': 'hello 'undef' world'}
    
    should return a dictionary 

    {'key': 'hello \'undef\' world'}
    """
    json_data = re.sub(r"^tinderbox_data\s*=\s*", "", s)
    json_data = re.sub(r";$", "", json_data)
    retval = []
    in_str = False
    in_esc = False
    skip = 0
    for i,c in enumerate(json_data):
        if skip > 0:
            skip -= 1
            continue
        if in_str:
            if in_esc:
                if c == "'":
                    retval.append("'")
                else:
                    retval.append("\\")
                    retval.append(c)
                in_esc = False
            elif c == "\\":
                in_esc = True
            elif c == "\"":
                retval.append("\\\"")
            elif c == "'":
                if json_data[i:i+7] == "'undef'":
                    retval.append("'undef'")
                    skip = 7
                else:
                    retval.append("\"")
                    in_str = False
            else:
                retval.append(c)
        else:
            if c == "'":
                retval.append("\"")
                in_str = True
            else:
                retval.append(c)

    retval = "".join(retval)
    return retval

parser = OptionParser()
parser.add_option("-s", "--span", action="store",
                  dest="timespan", default="7d",
                  help="Period of time to fetch data for (N[y,m,w,d,h], default=%default)")
parser.add_option("-t", "--tree", action="store",
                  dest="tree", default="Firefox",
                  help="Tinderbox tree to fetch data from (default=%default)")
parser.add_option("-d", "--database", action="store",
                  dest="db", default="unittest.db",
                  help="Database filename (default=%default)")
parser.add_option("-v", "--verbose", action="store_true",
                  dest="verbose", default="False",
                  help="Enable verbose logging")

(options, args) = parser.parse_args()

logging.basicConfig(level=options.verbose and logging.DEBUG or logging.WARNING)

os.environ['TZ'] = "US/Pacific"
tzset()
endtime = time()

m = re.match("(\d+)([ymwdh])", options.timespan)
if m is None:
    print >>sys.stderr, "Bad timespan %s!" % options.timespan
    sys.exit(1)

timespan = int(m.group(1)) * {'y': 3600*24*365,
                              'm': 3600*24*30,
                              'w': 3600*24*7,
                              'd': 3600*24,
                              'h': 3600}[m.group(2)]
starttime = endtime - timespan

db = os.path.abspath(options.db)
createdb = False
if not os.path.exists(db):
  createdb = True
conn = sqlite.connect(db)
if createdb:
  CreateDB(conn)

treeid = GetOrInsertTree(conn, options.tree)
logging.info("Reading tinderbox data...")
curtime = starttime
chunk = 0
# add a fudge factor here, since builds can take up to 3 hours to finish,
# and we can't get the changeset unless we ask for time up to the end of the
# build
endtime += 3 * 3600
timespan += 3 * 3600
totalchunks = ceil(timespan/chunksize)

while curtime < endtime and chunk < totalchunks:
    chunk += 1
    if (endtime - curtime) < chunksize:
        chunksize = endtime - curtime
    tboxurl = "http://tinderbox.mozilla.org/showbuilds.cgi?tree=%(tree)s&maxdate=%(curtime)d&noignore=1&hours=%(chunk)d&json=1" \
        % {'tree': options.tree,
           'curtime': int(curtime + chunksize), # tbox wants the end time
           'chunk': int(chunksize/3600)}
    logging.info("chunk %d/%d" % (chunk, totalchunks))
    u = urllib.urlopen(tboxurl)
    tboxjson = ''.join(u.readlines())
    u.close()
    tboxjson = fix_tbox_json(tboxjson)
    try:
      tboxdata = json.loads(tboxjson)
    except Exception, inst:
      print >>sys.stderr, "Error parsing JSON: ", inst
      continue
    # we only care about unit test boxes
    unittest_indices = [tboxdata['build_name_index'][x] for x in tboxdata['build_name_index'] if re.search("unit test$", x)]
    # read build table
    for build in tboxdata['build_table']:
        for index in unittest_indices:
            if index >= len(build) or build[index] == -1:
                continue
            if 'buildname' not in build[index]:
                continue
            # only look at success/testfail builds (ignore burning/building)
            if build[index]['buildstatus'] == 'building':
                continue
            name = build[index]['buildname']
            os = OSFromBuilderName(name)
            starttime = int(build[index]['buildtime'])
            # skip builds we've already seen
            if HaveBuild(conn, treeid, os, starttime):
              logging.info("Skipping already seen build %s at %d" % (name, starttime))
              # update logfile anyway...
              UpdateLogfile(conn, treeid, os, starttime, build[index]['logfile'])
              continue
            # must have scrape data for changeset
            if 'logfile' not in build[index]:
                continue
            if build[index]['logfile'] not in tboxdata['scrape']:
                continue
            changeset = FindChangesetInScrape(tboxdata['scrape'][build[index]['logfile']])
            if changeset is None:
                continue

            status = BuildStatusFromText(build[index]['buildstatus'])
            buildid = InsertBuild(conn, treeid, os, starttime, status, build[index]['logfile'], changeset)
            # grab the build log and look for test failures
            if status != BuildStatus.Success:
              logging.info("Checking build log for %s at %d" % (name, starttime))
              try:
                log, headers = urllib.urlretrieve("http://tinderbox.mozilla.org/%s/%s" % (options.tree, build[index]['logfile']))
                gz = GzipFile(log)
                for line in gz:
                  if line.find("TEST-UNEXPECTED") != -1:
                    bits = line.split('|', 2)[1:]
                    test = bits[0]
                    if len(bits) == 2:
                      text = bits[1]
                    else:
                      text = ''
                    InsertTest(conn, buildid, test.strip(), text.strip())
              except:
                logging.error("Unexpected error:", sys.exc_info()[0])
                #XXX: handle me?
    conn.commit()
    if chunk < totalchunks:
        sleep(sleeptime)
    curtime += chunksize

conn.close()
logging.info("Done")