Bug 1469720 - Add a 'mach test-info' subcommand to find long-running tasks; r=jmaher
authorGeoff Brown <gbrown@mozilla.com>
Tue, 07 Aug 2018 10:05:37 -0600
changeset 430398 43fea7881cff86fb7ed63db47291bd3a362f0275
parent 430397 658f019ace0e7b91c52c491a1a8363d1264573e5
child 430399 4af8de99d32106d2ab5c91649c499aafca85776d
push id34403
push usercbrindusan@mozilla.com
push dateTue, 07 Aug 2018 21:52:23 +0000
treeherdermozilla-central@d9e6ce390607 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjmaher
bugs1469720
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1469720 - Add a 'mach test-info' subcommand to find long-running tasks; r=jmaher
testing/mach_commands.py
--- a/testing/mach_commands.py
+++ b/testing/mach_commands.py
@@ -13,16 +13,17 @@ import tempfile
 import subprocess
 import shutil
 
 from mach.decorators import (
     CommandArgument,
     CommandProvider,
     Command,
     SettingsProvider,
+    SubCommand,
 )
 
 from mozbuild.base import (
     BuildEnvironmentNotFoundException,
     MachCommandBase,
     MachCommandConditions as conditions,
 )
 from moztest.resolve import TEST_SUITES
@@ -981,8 +982,104 @@ class TestInfoCommand(MachCommandBase):
         response.raise_for_status()
         json_response = response.json()
         print("\nBugzilla quick search for '%s':" % search)
         if 'bugs' in json_response:
             for bug in json_response['bugs']:
                 print("Bug %s: %s" % (bug['id'], bug['summary']))
         else:
             print("No bugs found.")
+
+    @SubCommand('test-info', 'long-tasks',
+                description='Find tasks approaching their taskcluster max-run-time.')
+    @CommandArgument('--branches',
+                     default='mozilla-central,mozilla-inbound,autoland',
+                     help='Report for named branches '
+                          '(default: mozilla-central,mozilla-inbound,autoland)')
+    @CommandArgument('--start',
+                     default=(date.today() - timedelta(7)
+                              ).strftime("%Y-%m-%d"),
+                     help='Start date (YYYY-MM-DD)')
+    @CommandArgument('--end',
+                     default=date.today().strftime("%Y-%m-%d"),
+                     help='End date (YYYY-MM-DD)')
+    @CommandArgument('--max-threshold-pct',
+                     default=90.0,
+                     help='Count tasks exceeding this percentage of max-run-time.')
+    @CommandArgument('--filter-threshold-pct',
+                     default=0.5,
+                     help='Report tasks exceeding this percentage of long tasks.')
+    @CommandArgument('--verbose', action='store_true',
+                     help='Enable debug logging.')
+    def report_long_running_tasks(self, **params):
+        def get_long_running_ratio(record):
+            count = record['count']
+            tasks_gt_pct = record['tasks_gt_pct']
+            return count / tasks_gt_pct
+
+        branches = params['branches']
+        start = params['start']
+        end = params['end']
+        self.verbose = params['verbose']
+        threshold_pct = float(params['max_threshold_pct'])
+        filter_threshold_pct = float(params['filter_threshold_pct'])
+
+        # Search test durations in ActiveData for long-running tests
+        query = {
+            "from": "task",
+            "format": "list",
+            "groupby": ["run.name"],
+            "limit": 1000,
+            "select": [
+                {
+                    "value": "task.maxRunTime",
+                    "aggregate": "median",
+                    "name": "max_run_time"
+                },
+                {
+                    "aggregate": "count"
+                },
+                {
+                    "value": {
+                        "when": {
+                            "gt": [
+                                {
+                                    "div": ["action.duration", "task.maxRunTime"]
+                                }, threshold_pct/100.0
+                            ]
+                        },
+                        "then": 1
+                    },
+                    "aggregate": "sum",
+                    "name": "tasks_gt_pct"
+                },
+            ],
+            "where": {"and": [
+                {"in": {"build.branch": branches.split(',')}},
+                {"gt": {"task.run.start_time": {"date": start}}},
+                {"lte": {"task.run.start_time": {"date": end}}},
+                {"eq": {"state": "completed"}},
+            ]}
+        }
+        data = self.submit(query)
+        print("\nTasks nearing their max-run-time on %s between %s and %s" %
+              (branches, start, end))
+        if data and len(data) > 0:
+            filtered = []
+            for record in data:
+                if 'tasks_gt_pct' in record:
+                    count = record['count']
+                    tasks_gt_pct = record['tasks_gt_pct']
+                    if tasks_gt_pct / count > filter_threshold_pct / 100.0:
+                        filtered.append(record)
+            filtered.sort(key=get_long_running_ratio)
+            if not filtered:
+                print("No long running tasks found.")
+            for record in filtered:
+                name = record['run']['name']
+                count = record['count']
+                max_run_time = record['max_run_time']
+                tasks_gt_pct = record['tasks_gt_pct']
+                print("%-55s: %d of %d runs (%.1f%%) exceeded %d%% of max-run-time (%d s)" %
+                      (name, tasks_gt_pct, count, tasks_gt_pct * 100 / count,
+                       threshold_pct, max_run_time))
+        else:
+            print("No tasks found.")