Use a better database schema with warning signatures... this makes checking the prior build for the same signature very simple indeed
authorBenjamin Smedberg <benjamin@smedbergs.us>
Tue, 09 Dec 2008 15:56:49 -0500
changeset 5 79214df79809
parent 4 923e095ab9e7
child 6 03664a1472ce
push id3
push userbsmedberg@mozilla.com
push date2008-12-10 20:06 +0000
Use a better database schema with warning signatures... this makes checking the prior build for the same signature very simple indeed
warningstep.py
--- a/warningstep.py
+++ b/warningstep.py
@@ -1,48 +1,49 @@
 from buildbot.process.buildstep import SUCCESS, FAILURE
 from buildbot.steps.shell import ShellCommand
-import sqlite3
+import sqlite3, os.path
 
 class WarningParserStep(ShellCommand):
     """
     A subclass of shellcommand used for running warning-parser.py.
 
     This parses the output and saves interesting information to the
     master-side sqlite database. It produces output about "new" warnings, etc.
 
-    dbfile should be pre-created with the following schema:
+    If the dbfile does not exist, it will be created the first time it is needed.
+    """
 
+    _createSchema = """
     CREATE TABLE builds (
-      id INTEGER NOT NULL PRIMARY KEY,
+      buildnumber INTEGER NOT NULL PRIMARY KEY,
       rev TEXT NOT NULL
     );
 
-    CREATE TABLE wdata (
-      build INTEGER NOT NULL,
-      id INTEGER NOT NULL,
-      ord INTEGER NOT NULL,
+    CREATE TABLE warnings (
+      buildnumber INTEGER NOT NULL CONSTRAINT warnings_buildnumer_fkey REFERENCES builds (buildnumber),
+      signature TEXT NOT NULL,
+      count INTEGER NOT NULL,
+      CONSTRAINT warnings_pkey PRIMARY KEY (buildnumber, signature)
+    );
+
+    CREATE TABLE wlines (
+      buildnumber INTEGER NOT NULL,
+      signature TEXT NOT NULL,
+      wline INTEGER NOT NULL,
       file TEXT NOT NULL,
-      line INTEGER,
+      lineno INTEGER,
       msg TEXT NOT NULL,
       blametype TEXT,
-      blamewho TEXT,
-      blamepath TEXT,
+      blamefile TEXT,
       blamerev TEXT,
       blameline INTEGER,
-      CONSTRAINT pkey PRIMARY KEY (build, id, ord)
-    );
-
-    CREATE INDEX wdata_blame ON wdata (
-      build,
-      ord,
-      blametype,
-      blamepath,
-      blamerev,
-      blameline
+      blamewho TEXT,
+      CONSTRAINT wlines_refwarning_fkey FOREIGN KEY (buildnumber, signature) REFERENCES warnings (buildnumber, signature),
+      CONSTRAINT wlines_pkey PRIMARY KEY (buildnumber, signature, wline)
     );
     """
 
     def __init__(self, dbfile, **kwargs):
         ShellCommand.__init__(self, **kwargs)
 
         self.dbfile = dbfile
         self.addFactoryArguments(dbfile=dbfile)
@@ -50,98 +51,124 @@ class WarningParserStep(ShellCommand):
     def createSummary(self, log):
         stepresults = self.step_status.getBuild().getSteps()
         for sr in stepresults:
             result, str = sr.getResults()
             if result == FAILURE:
                 self.addCompleteLog('skipped', 'Skipped analyzing warnings because a prior step failed.')
                 return
 
+        createSchema = not os.path.exists(self.dbfile)
+
         db = sqlite3.connect(self.dbfile)
         cur = db.cursor()
 
+        if createSchema:
+            cur.executescript(self._createSchema)
+
         buildnumber = self.getProperty('buildnumber')
 
-        cur.execute('INSERT INTO builds (id, rev) VALUES ( ?, ? )',
+        cur.execute('INSERT INTO builds (buildnumber, rev) VALUES ( ?, ? )',
                     (buildnumber, self.getProperty('got_revision')))
 
-        def getData():
-            for line in log.getText().split("\n"):
-                if line.startswith('WARN-DB: '):
-                    id, ord, file, lineno, msg, blametype, blamewho, blamepath, blamerev, blameline = eval(line[9:].strip())
-                    yield (buildnumber, id, ord, file, lineno, msg, blametype, blamewho, blamepath, blamerev, blameline) 
+        def lineSignature(line):
+            """
+            Given a line tuple, return a string signature for the line. This signature is based
+            on the blame location if available, else the reported location.
+            """
+            file, lineno, msg, blametype, blamewho, blamepath, blamerev, blameline = line
+
+            if blametype is None:
+                return "%s:%s:%s" % (file, lineno, msg[:30])
+
+            return "%s:%s:%s:%s:%s" % (blametype, blamepath, blamerev[:12], blameline, msg[:30])
+
+        def processWarning(lines):
+            """
+            process a warning. A warning is a list of lines. Each line is a tuple of
+            (file, lineno, msg, blametype, blamewho, blamepath, blamerev, blameline)
+            """
+
+            signature = "\t".join( (lineSignature(line) for line in lines) )
 
-        cur.executemany('''INSERT INTO wdata (
-                             build, id, ord, file, line, msg,
-                             blametype, blamewho, blamepath, blamerev, blameline
-                           ) VALUES (?,?,?,?,?,?,?,?,?,?,?)''', getData())
+            cur.execute('''UPDATE warnings
+                           SET count = count + 1
+                           WHERE buildnumber = ? AND signature = ?''', (buildnumber, signature))
+            if cur.rowcount == 0:
+                cur.execute('INSERT INTO warnings (buildnumber, signature, count) VALUES (?, ?, ?)', (buildnumber, signature, 1))
+
+                for i in xrange(0, len(lines)):
+                    file, lineno, msg, blametype, blamewho, blamepath, blamerev, blameline = lines[i]
+                    cur.execute('''INSERT INTO wlines (buildnumber, signature, wline, file, lineno, msg, blametype, blamefile, blamerev, blameline, blamewho)
+                                   VALUES             (?,           ?,         ?,     ?,    ?,      ?,   ?,         ?,         ?,        ?,         ?)''',
+                                (buildnumber, signature, i, file, lineno, msg, blametype, blamepath, blamerev, blameline, blamewho))
+
+
+        curwarning = None
+        for line in log.getText().split("\n"):
+            if line.startswith('WARN-DB: '):
+                id, ord, file, lineno, msg, blametype, blamewho, blamepath, blamerev, blameline = eval(line[9:].strip())
+
+                if ord == 0:
+                    if curwarning is not None:
+                        processWarning(curwarning)
+                    curwarning = []
+
+                curwarning.append( (file, lineno, msg, blametype, blamewho, blamepath, blamerev, blameline) )
+
+        if curwarning is not None:
+            processWarning(curwarning)
 
         db.commit()
 
         # A list of line to stick in the comparison log
         comparison = []
 
-        cur.execute('SELECT count(*) FROM wdata WHERE build = ? AND ord = 0', (buildnumber,))
+        cur.execute('SELECT sum(count) FROM warnings WHERE buildnumber = ?', (buildnumber,))
         wcount, = cur.fetchone()
-        comparison.append('TinderboxPrint:warn:%i' % wcount)
+        comparison.append('TinderboxPrint:warn:%s' % wcount)
 
-        cur.execute('SELECT max(id) FROM builds WHERE id < ?', (buildnumber,))
+        cur.execute('SELECT max(buildnumber) FROM builds WHERE buildnumber < ?', (buildnumber,))
         r = cur.fetchone()
         if r is None:
             comparison.append('no prior build found: skipping comparison')
         else:
             prevbuildnumber, = r
 
-            # For each warning in this build, see if the same warning was
-            # present in a prior build. I'm sorry this query is arcane, but
-            # it EXPLAINs well, and avoids lots of client traffic.
-
-            cur.execute('''
-              SELECT id
-              FROM (SELECT id, build, count(*) AS newnumlines
-                    FROM wdata
-                    WHERE build = ?
-                    GROUP BY id) AS widlist
-              WHERE NOT EXISTS (
-                SELECT count(*) as oldnumlines
-                FROM
-                 wdata AS oldwarn, wdata as newwarn
-                WHERE
-                  oldwarn.ord = newwarn.ord AND
-                  oldwarn.blametype = newwarn.blametype AND
-                  oldwarn.blamepath = newwarn.blamepath AND
-                  oldwarn.blamerev = newwarn.blamerev AND
-                  oldwarn.blameline = newwarn.blameline AND
-                  oldwarn.build = ? AND
-                  newwarn.build = widlist.build
-                GROUP BY oldwarn.id
-                HAVING oldnumlines = newnumlines)
-             ''', (buildnumber, prevbuildnumber))
-
+            cur.execute('''SELECT signature
+                           FROM warnings
+                           WHERE buildnumber = ? AND
+                             NOT EXISTS (SELECT *
+                                         FROM warnings AS oldwarnings
+                                         WHERE oldwarnings.signature = warnings.signature AND
+                                           oldwarnings.buildnumber = ?)''', (buildnumber, prevbuildnumber))
             cur2 = db.cursor()
 
-            for id, in cur:
-                cur2.execute('''SELECT file, line, msg, blametype, blamewho, blamepath, blamerev, blameline FROM wdata WHERE build = ? AND id = ? ORDER BY ORD''',
-                             (buildnumber, id))
+            for signature, in cur:
+                cur2.execute('''SELECT file, lineno, msg, blametype, blamefile, blamerev, blameline, blamewho
+                                FROM wlines
+                                WHERE buildnumber = ? AND signature = ?
+                                ORDER BY wline''',
+                             (buildnumber, signature))
                 comparison.append('NEW WARNING:')
 
-                for file, line, msg, blametype, blamewho, blamepath, blamerev, blameline in cur2.fetchall():
+                for file, lineno, msg, blametype, blamefile, blamerev, blameline, blamewho in cur2:
                     out = "  %s" % file
                     if line is not None:
-                        out += ":%i" % line
+                        out += ":%i" % lineno
 
                     out += ": %s" % msg
 
                     if blamewho is not None:
                         out += ' - Blamed on %s' % blamewho
 
                     if blametype == 'cvs':
-                        out += ' -  http://bonsai.mozilla.org/cvslog.cgi?file=mozilla/%s&rev=HG_REPO_INITIAL_IMPORT&mark=%s#%s' % (blamepath, blameline, blameline)
+                        out += ' -  http://bonsai.mozilla.org/cvslog.cgi?file=mozilla/%s&rev=HG_REPO_INITIAL_IMPORT&mark=%s#%s' % (blamefile, blameline, blameline)
                     elif blametype == 'hg':
-                        out += ' - http://hg.mozilla.org/mozilla-central/annotate/%s/%s#l%i' % (blamerev, blamepath, blameline)
+                        out += ' - http://hg.mozilla.org/mozilla-central/annotate/%s/%s#l%i' % (blamerev, blamefile, blameline)
 
                     comparison.append(out)
 
         self.addCompleteLog('comparison', '\n'.join(comparison))
 
         cur.close()
         db.close()