warning-parser.py
author Benjamin Smedberg <benjamin@smedbergs.us>
Fri, 25 Sep 2009 12:23:53 -0400
branchnewdbschema
changeset 20 5c4f22089101fd8f02dfcf0b2c96544a48f56c61
parent 13 e352aa637c0db2eb3d8bb8717646d128cf0beb30
child 24 19e7a98a8dc454bdf3c33eba3ea07b1f3e1cebdf
permissions -rw-r--r--
The new DB schema, but slow. Checkpoint.

#!/usr/bin/env python

"""
Reads a build log on stdin. Parse warning messages (from GCC and elsewhere)
and store them in a sqlite database for later consumption.

Uses gmake "Entering directory" and "Leaving directory" messages to
keep track of the working directory. Resolves relative file paths against
these working directories, and also follows symlinks back into the
source tree.
"""

import sys, os, re, mercurial.hg, mercurial.ui, mercurial.revlog, mercurial.node

(srcdir, blamedb, logfile) = sys.argv[1:]

srcdir = os.path.realpath(srcdir) + '/'

blamedb = sqlite3.connect(blamedb)
blamecur = blamedb.cursor()

repo = mercurial.hg.repository(mercurial.ui.ui(quiet=True, interactive=False), srcdir)
ctx = repo.changectx('.')

emptyBlame = (None, None, None, None, None)

def getCVSBlame(file, line):
    """
    Get CVS blame information for a specific line that is blamed on the initial
    import of CVS code to Mercurial.

    @returns a tuple ('cvs', who, path, rev, line) or EmptyBlame
    """

    blamecur.execute('''SELECT blame.who, blame.rev
                        FROM blame, files
                        WHERE
                          files.file = ? AND
                          files.id = blame.fileid AND
                          blame.minline <= ? AND blame.maxline >= ?''', (file, line, line))
    r = blamecur.fetchone()
    if r is None:
        print "Couldn't find CVS blame for %s:%i" % (file, line)
        return emptyBlame

    who, rev = r
    return ('cvs', who.replace('%', '@'), file, rev, line)

cvsimportrev = mercurial.node.bin('9b2a99adc05e53cd4010de512f50118594756650')

def getBlame(file, line):
    try:
        fctx = ctx[file]
    except mercurial.revlog.LookupError:
        return emptyBlame

    (blamectx, blameline), linetext = fctx.annotate(follow=True, linenumber=True)[line - 1]
    if blamectx.node() == cvsimportrev:
        # Walk backwards into CVS history
        return getCVSBlame(blamectx.path(), blameline)

    return ('hg', blamectx.user(), blamectx.path(),
            mercurial.node.hex(blamectx.node()), blameline)

cwdre = re.compile(r'g?make(\[\d+\])?: (Entering|Leaving) directory `(.*)\'$')
warningre = re.compile(r'(?P<file>[-/\.\w<>]+):((?P<line>\d+):)?(\d+:)? warning: (?P<msg>[^ ].*)$')
continuere = re.compile(r'(?P<file>[-/\.\w<>]+):((?P<line>\d+):)?(\d+:)?( warning:)?   (?P<msg>.*)$')

cwdstack = [os.getcwd()]

curid = -1
curord = -1

for line in open(logfile):
    line = line.strip()

    m = cwdre.match(line)
    if m is not None:
        e, wd = m.group(2, 3)
        if e == "Entering":
            cwdstack.append(wd)
        else:
            cwdstack.pop()
        continue

    m = warningre.match(line)
    if m is not None:
        curid += 1
        curord = 0
    else:
        m = continuere.match(line)
        if m is None:
            if line.find('warning') != -1:
                print "Found unexpected 'warning': %s" % line

            curord = -1
            continue

        if curord == -1:
            continue

    file, lineno, msg = m.group('file', 'line', 'msg')
    file = os.path.join(cwdstack[-1], file)
    file = os.path.realpath(file)

    if lineno is not None:
        lineno = int(lineno)

    blametype = blamewho = blamepath = blamerev = blameline = None

    if file.startswith(srcdir):
        file = file[len(srcdir):]
        blametype, blamewho, blamepath, blamerev, blameline = getBlame(file, lineno)

    print "WARN-DB: %r" % ( (curid,
                             curord,
                             file,
                             lineno,
                             msg,
                             blametype,
                             blamewho,
                             blamepath,
                             blamerev,
                             blameline), )

    curord += 1