Bug 858756 (2/2) - Blame the changeset whose t score is a local maximum [r=catlee]
authorMatt Brubeck <mbrubeck@mozilla.com>
Wed, 17 Apr 2013 12:11:05 -0700
changeset 926 75f91c5b30379373fb92f140ecc22729c7454024
parent 925 1d982ff52990598d1c6e758bf0faffb5ab259c44
child 927 e328fe4e58c4bda41b00bce80244093cdfe0addf
push id357
push usermbrubeck@mozilla.com
push dateWed, 17 Apr 2013 19:12:41 +0000
reviewerscatlee
bugs858756
Bug 858756 (2/2) - Blame the changeset whose t score is a local maximum [r=catlee] When several changesets in a row are potential causes of a regression, blame the one with the highest t-test score rather than the first one.
server/analysis/analyze.py
server/analysis/analyze_talos.py
server/analysis/test_analyze.py
server/analysis/test_analyze_talos.py
--- a/server/analysis/analyze.py
+++ b/server/analysis/analyze.py
@@ -21,33 +21,37 @@ def calc_t(w1, w2):
     if s1['variance'] == 0 and s2['variance'] == 0:
         return float('inf')
 
     return delta_s / (((s1['variance'] / s1['n']) + (s2['variance'] / s2['n'])) ** 0.5)
 
 class PerfDatum(object):
     __slots__ = ('testrun_id', 'machine_id', 'timestamp', 'value', 'buildid',
             'time', 'revision', 'run_number', 'last_other', 'historical_stats',
-            'forward_stats')
+            'forward_stats', 't', 'state')
     def __init__(self, testrun_id, machine_id, timestamp, value, buildid, time,
-            revision=None):
+            revision=None, state='good'):
         # Which test run was this
         self.testrun_id = testrun_id
         # Which machine is this
         self.machine_id = machine_id
         # Talos timestamp
         self.timestamp = timestamp
         # Value of this point
         self.value = value
         # Which build was this
         self.buildid = buildid
         # Date code was pushed
         self.time = time
         # What revision this data is for
         self.revision = revision
+        # t-test score
+        self.t = 0
+        # Whether a machine issue or perf regression is found
+        self.state = state
 
     def __cmp__(self, o):
         return cmp(
                 (self.time, self.timestamp),
                 (o.time, o.timestamp),
                 )
 
     def __eq__(self, o):
@@ -81,17 +85,18 @@ class TalosAnalyzer:
         for d in self.machine_history.values():
             d.sort()
 
     def analyze_t(self, j, k, threshold, machine_threshold, machine_history_size):
         # Use T-Tests
         # Analyze test data using T-Tests, comparing data[i-j:i] to data[i:i+k]
         good_data = []
 
-        for i in range(len(self.data)-k+1):
+        num_points = len(self.data) - k + 1
+        for i in range(num_points):
             di = self.data[i]
             jw = [d.value for d in good_data[-j:]]
             kw = [d.value for d in self.data[i:i+k]]
 
             my_history = self.machine_history[di.machine_id]
             my_history_index = my_history.index(di)
             my_data = [d.value for d in self.machine_history[di.machine_id][my_history_index-machine_history_size+1:my_history_index+1]]
             other_data = []
@@ -101,37 +106,55 @@ class TalosAnalyzer:
                 if dl.machine_id != di.machine_id:
                     other_data.insert(0, dl.value)
                 l -= 1
 
             di.historical_stats = analyze(jw)
             di.forward_stats = analyze(kw)
 
             if len(jw) >= j:
-                t = calc_t(jw, kw)
+                di.t = abs(calc_t(jw, kw))
             else:
                 # Assume it's ok, we don't have enough data
-                t = 0
+                di.t = 0
 
             if len(other_data) >= k*2 and len(my_data) >= machine_history_size:
                 m_t = calc_t(other_data, my_data)
             else:
                 m_t = 0
 
             if abs(m_t) >= machine_threshold:
                 l = len(good_data)-1
                 while l >= 0:
                     dl = good_data[l]
                     if dl.machine_id != di.machine_id:
                         di.last_other = dl
                         break
                     l -= 1
                 # We think this machine is bad, so don't add its data to the
                 # set of good data
-                yield di, "machine"
-            elif abs(t) <= threshold:
-                good_data.append(di)
-                yield di, "good"
+                di.state = 'machine'
             else:
-                # By including the data point as part of the "good" data, we slowly
-                # adjust to the new baseline.
                 good_data.append(di)
-                yield di, "regression"
+
+        # Now that the t-test scores are calculated, go back through the data to
+        # find where regressions most likely happened.
+        for i in range(1, len(good_data) - 1):
+            di = good_data[i]
+            if di.t <= threshold:
+                continue
+
+            # Check the adjacent points
+            prev = good_data[i-1]
+            if prev.t > di.t:
+                continue
+            next = good_data[i+1]
+            if next.t > di.t:
+                continue
+
+            # This datapoint has a t value higher than the threshold and higher
+            # than either neighbor.  Mark it as the cause of a regression.
+            di.state = 'regression'
+
+        # Return all but the first and last points whose scores we calculated,
+        # since we can only produce a final decision for a point whose scores
+        # were compared to both of its neighbors.
+        return self.data[1:num_points-1]
--- a/server/analysis/analyze_talos.py
+++ b/server/analysis/analyze_talos.py
@@ -672,24 +672,24 @@ class AnalysisRunner:
         good_data = []
         regressions = []
         bad_machines = {}
         graph_dir = self.config.get('main', 'graph_dir')
         test_name = series.test_name.replace("/", "_")
         basename = "%s/%s-%s-%s" % (graph_dir,
                 series.branch_name, series.os_name, test_name)
 
-        for d, state, skip, last_good in series_data:
+        for d, skip, last_good in series_data:
             graph_point = (d.time * 1000, d.value)
             all_data.append(graph_point)
-            if state == "good":
+            if d.state == "good":
                 good_data.append(graph_point)
-            elif state == "regression":
+            elif d.state == "regression":
                 regressions.append(graph_point)
-            elif state == "machine":
+            elif d.state == "machine":
                 bad_machines.setdefault(d.machine_id, []).append(graph_point)
 
         log.debug("Creating graph %s", basename)
 
         graphs = []
         graphs.append({"label": "Value", "data": all_data})
 
         graphs.append({"label": "Smooth Value", "data": good_data, "color": "green"})
@@ -823,64 +823,54 @@ class AnalysisRunner:
             self.warning_history[s.branch_name] = {}
         if s.os_name not in self.warning_history[s.branch_name]:
             self.warning_history[s.branch_name][s.os_name] = {}
         if s.test_name not in self.warning_history[s.branch_name][s.os_name]:
             self.warning_history[s.branch_name][s.os_name][s.test_name] = []
         warnings = self.warning_history[s.branch_name][s.os_name][s.test_name]
 
         series_data = self.processSeries(analysis_gen, warnings)
-        for d, state, skip, last_good in series_data:
-            self.handleData(s, d, state, skip, last_good)
+        for d, skip, last_good in series_data:
+            self.handleData(s, d, d.state, skip, last_good)
 
         if self.config.has_option('main', 'graph_dir'):
             self.outputGraphs(s, series_data)
 
     def processSeries(self, analysis_gen, warnings):
         last_good = None
-        last_err = None
-        last_err_good = None
         # Uncomment this for debugging!
         #cutoff = self.options.start_time
         cutoff = time.time() - 7*24*3600
         series_data = []
-        for d, state in analysis_gen:
+        for d in analysis_gen:
             skip = False
             if d.timestamp < cutoff:
                 continue
 
-            if state != "good":
+            if d.state == "good":
+                last_good = d
+            else:
                 # Skip warnings about regressions we've already
                 # warned people about
                 if (d.buildid, d.timestamp) in warnings:
                     skip = True
                 else:
                     warnings.append((d.buildid, d.timestamp))
-                    if state == "machine":
+                    if d.state == "machine":
                         machine_name = self.source.getMachineName(d.machine_id)
                         if 'bad_machines' not in self.warning_history:
                             self.warning_history['bad_machines'] = {}
                         # When did we last warn about this machine?
                         if self.warning_history['bad_machines'].get(machine_name, 0) > time.time() - 7*24*3600:
                             skip = True
                         else:
                             # If it was over a week ago, then send another warning
                             self.warning_history['bad_machines'][machine_name] = time.time()
 
-                if not last_err:
-                    last_err = d
-                    last_err_good = last_good
-                elif last_err_good == last_good:
-                    skip = True
-
-            else:
-                last_err = None
-                last_good = d
-
-            series_data.append((d, state, skip, last_good))
+            series_data.append((d, skip, last_good))
 
         return series_data
 
 
     def loadSeries(self):
         start_time = self.options.start_time
         if self.config.has_option('cache', 'last_run_file'):
             try:
--- a/server/analysis/test_analyze.py
+++ b/server/analysis/test_analyze.py
@@ -25,25 +25,23 @@ class TestTalosAnalyzer(unittest.TestCas
         return [PerfDatum(t, t, t, float(v), t, t) for t, v in zip(times, values)]
 
     def test_analyze_t(self):
         a = TalosAnalyzer()
 
         data = self.get_data()
         a.addData(data)
 
-        result = [(d.time, state) for d, state in a.analyze_t(5, 5, 2, 15, 5)]
+        result = [(d.time, d.state) for d in a.analyze_t(5, 5, 2, 15, 5)]
         self.assertEqual(result, [
-            (0, 'good'),
             (1, 'good'),
             (2, 'good'),
             (3, 'good'),
             (4, 'good'),
             (5, 'good'),
-            (6, 'regression'),
-            (7, 'regression'),
+            (6, 'good'),
+            (7, 'good'),
             (8, 'regression'),
-            (9, 'regression'),
-            (10, 'regression'),
-            (11, 'good')])
+            (9, 'good'),
+            (10, 'good')])
 
 if __name__ == '__main__':
     unittest.main()
--- a/server/analysis/test_analyze_talos.py
+++ b/server/analysis/test_analyze_talos.py
@@ -21,36 +21,36 @@ class TestAnalysisRunner(unittest.TestCa
         config.set('main', 'threshold', '9')
         config.set('main', 'percentage_threshold', '9')
         config.set('main', 'machine_threshold', '9')
         config.set('main', 'machine_history_size', '0')
         return options, config
 
     def get_data(self):
         return [
-            (PerfDatum(0, 0, time() + 0, 0.0, 0, 0), 'good'),
-            (PerfDatum(1, 1, time() + 1, 0.0, 1, 1), 'good'),
-            (PerfDatum(2, 2, time() + 2, 0.0, 2, 2), 'good'),
-            (PerfDatum(3, 3, time() + 3, 0.0, 3, 3), 'good'),
-            (PerfDatum(4, 4, time() + 4, 1.0, 4, 4), 'regression'),
-            (PerfDatum(5, 5, time() + 5, 1.0, 5, 5), 'good'),
-            (PerfDatum(6, 6, time() + 6, 1.0, 6, 6), 'good'),
-            (PerfDatum(7, 7, time() + 7, 1.0, 7, 7), 'good'),
+            PerfDatum(0, 0, time() + 0, 0.0, 0, 0, state='good'),
+            PerfDatum(1, 1, time() + 1, 0.0, 1, 1, state='good'),
+            PerfDatum(2, 2, time() + 2, 0.0, 2, 2, state='good'),
+            PerfDatum(3, 3, time() + 3, 0.0, 3, 3, state='good'),
+            PerfDatum(4, 4, time() + 4, 1.0, 4, 4, state='regression'),
+            PerfDatum(5, 5, time() + 5, 1.0, 5, 5, state='good'),
+            PerfDatum(6, 6, time() + 6, 1.0, 6, 6, state='good'),
+            PerfDatum(7, 7, time() + 7, 1.0, 7, 7, state='good'),
         ]
 
     def test_processSeries(self):
         options, config = self.get_config()
         runner = AnalysisRunner(options, config)
 
         data = self.get_data()
         results = runner.processSeries(data, [])
         self.assertEqual(len(results), 8)
 
-        skipped = filter(lambda (d, state, skip, last_good): skip, results)
+        skipped = filter(lambda (d, skip, last_good): skip, results)
         self.assertEqual(len(skipped), 0)
 
-        self.assertEqual(results[3], (data[3][0], 'good', False, data[3][0]))
-        self.assertEqual(results[4], (data[4][0], 'regression', False, data[3][0]))
-        self.assertEqual(results[5], (data[5][0], 'good', False, data[5][0]))
+        self.assertEqual(results[3], (data[3], False, data[3]))
+        self.assertEqual(results[4], (data[4], False, data[3]))
+        self.assertEqual(results[5], (data[5], False, data[5]))
 
 
 if __name__ == '__main__':
     unittest.main()