Bug 1422133 - Generate runnable-jobs.json.gz file as part of the Gecko decision task run. r=dustin
authorArmen Zambrano G. <armenzg@mozilla.com>
Thu, 30 Nov 2017 16:05:53 -0500
changeset 394924 3881a65bd690ded3723d43e65123de00ad22fc9d
parent 394923 baada68cad47311154f570973c53ccf8db177efc
child 394925 f7292e7fee0e84e26a164cbbe2a38f5eedf44ceb
push id33025
push usershindli@mozilla.com
push dateTue, 05 Dec 2017 09:57:50 +0000
treeherdermozilla-central@390c1aad9d4d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdustin
bugs1422133
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1422133 - Generate runnable-jobs.json.gz file as part of the Gecko decision task run. r=dustin The runnable jobs API on Treeherder is timing out quite frequently. This is due that it downloads the full-task-graph.json artifact from the Gecko decision task. This file has grown to be massive (over 30MB) and that takes too long to fetch. The API times out after 20 seconds. The data we need from the artifact is minimal. We can generate a file with just the minimum amount of data needed. This code change adds logic to generate a runnable-jobs.json.gz file after the full-task-graph.json artifact is generated. MozReview-Commit-ID: 9u2H7HbUAcl
taskcluster/docs/taskgraph.rst
taskcluster/taskgraph/decision.py
taskcluster/taskgraph/test/python.ini
taskcluster/taskgraph/test/test_util_runnable_jobs.py
--- a/taskcluster/docs/taskgraph.rst
+++ b/taskcluster/docs/taskgraph.rst
@@ -140,19 +140,26 @@ scheduled.
 This task invokes ``mach taskgraph action-task`` which builds up a task graph of
 the requested tasks. This graph is optimized using the tasks running initially in
 the same push, due to the decision task.
 
 So for instance, if you had already requested a build task in the ``try`` command,
 and you wish to add a test which depends on this build, the original build task
 is re-used.
 
-Action Tasks are currently scheduled by
-[pulse_actions](https://github.com/mozilla/pulse_actions). This feature is only
-present on ``try`` pushes for now.
+
+Runnable jobs
+-------------
+As part of the execution of the Gecko decision task we generate a
+``public/runnable-jobs.json.gz`` file. It contains a subset of all the data
+contained within the ``full-task-graph.json``.
+
+This file has the minimum ammount of data needed by Treeherder to show all
+tasks that can be scheduled on a push.
+
 
 Task Parameterization
 ---------------------
 
 A few components of tasks are only known at the very end of the decision task
 -- just before the ``queue.createTask`` call is made.  These are specified
 using simple parameterized values, as follows:
 
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -84,16 +84,35 @@ PER_PROJECT_PARAMETERS = {
     'default': {
         'target_tasks_method': 'default',
         'optimize_target_tasks': True,
         'include_nightly': False,
     }
 }
 
 
+def full_task_graph_to_runnable_jobs(full_task_json):
+    runnable_jobs = {}
+    for label, node in full_task_json.iteritems():
+        if not ('extra' in node['task'] and 'treeherder' in node['task']['extra']):
+            continue
+
+        th = node['task']['extra']['treeherder']
+        runnable_jobs[label] = {
+            'symbol': th['symbol']
+        }
+
+        for i in ('groupName', 'groupSymbol', 'collection'):
+            if i in th:
+                runnable_jobs[label][i] = th[i]
+        if th.get('machine', {}).get('platform'):
+            runnable_jobs[label]['platform'] = th['machine']['platform']
+    return runnable_jobs
+
+
 def taskgraph_decision(options, parameters=None):
     """
     Run the decision task.  This function implements `mach taskgraph decision`,
     and is responsible for
 
      * processing decision task command-line options into parameters
      * running task-graph generation exactly the same way the other `mach
        taskgraph` commands do
@@ -113,16 +132,19 @@ def taskgraph_decision(options, paramete
 
     # write out the public/actions.json file
     write_artifact('actions.json', render_actions_json(parameters))
 
     # write out the full graph for reference
     full_task_json = tgg.full_task_graph.to_json()
     write_artifact('full-task-graph.json', full_task_json)
 
+    # write out the public/runnable-jobs.json.gz file
+    write_artifact('runnable-jobs.json.gz', full_task_graph_to_runnable_jobs(full_task_json))
+
     # this is just a test to check whether the from_json() function is working
     _, _ = TaskGraph.from_json(full_task_json)
 
     # write out the target task set to allow reproducing this as input
     write_artifact('target-tasks.json', tgg.target_task_set.tasks.keys())
 
     # write out the optimized task graph to describe what will actually happen,
     # and the map of labels to taskids
@@ -254,10 +276,14 @@ def write_artifact(filename, data):
         os.mkdir(ARTIFACTS_DIR)
     path = os.path.join(ARTIFACTS_DIR, filename)
     if filename.endswith('.yml'):
         with open(path, 'w') as f:
             yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
     elif filename.endswith('.json'):
         with open(path, 'w') as f:
             json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': '))
+    elif filename.endswith('.gz'):
+        import gzip
+        with gzip.open(path, 'wb') as f:
+            f.write(json.dumps(data))
     else:
         raise TypeError("Don't know how to write to {}".format(filename))
--- a/taskcluster/taskgraph/test/python.ini
+++ b/taskcluster/taskgraph/test/python.ini
@@ -13,13 +13,14 @@ subsuite = taskgraph
 [test_target_tasks.py]
 [test_taskgraph.py]
 [test_transforms_base.py]
 [test_try_option_syntax.py]
 [test_util_attributes.py]
 [test_util_docker.py]
 [test_util_parameterization.py]
 [test_util_python_path.py]
+[test_util_runnable_jobs.py]
 [test_util_schema.py]
 [test_util_templates.py]
 [test_util_time.py]
 [test_util_treeherder.py]
 [test_util_yaml.py]
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/test/test_util_runnable_jobs.py
@@ -0,0 +1,83 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+
+import unittest
+
+from taskgraph.decision import full_task_graph_to_runnable_jobs
+from taskgraph.graph import Graph
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.task import Task
+from mozunit import main
+
+
+class TestRunnableJobs(unittest.TestCase):
+
+    tasks = [
+        {
+            'kind': 'build',
+            'label': 'a',
+            'attributes': {},
+            'task': {
+                'extra': {
+                    'treeherder': {
+                        'symbol': 'B'
+                    }
+                },
+            }
+        },
+        {
+            'kind': 'test',
+            'label': 'b',
+            'attributes': {},
+            'task': {
+                'extra': {
+                    'treeherder': {
+                        'collection': {
+                            'opt': True
+                        },
+                        'groupName': 'Some group',
+                        'groupSymbol': 'GS',
+                        'machine': {
+                            'platform': 'linux64'
+                        },
+                        'symbol': 't'
+                    }
+                },
+            }
+        },
+    ]
+
+    def make_taskgraph(self, tasks):
+        label_to_taskid = {k: k + '-tid' for k in tasks}
+        for label, task_id in label_to_taskid.iteritems():
+            tasks[label].task_id = task_id
+        graph = Graph(nodes=set(tasks), edges=set())
+        taskgraph = TaskGraph(tasks, graph)
+        return taskgraph, label_to_taskid
+
+    def test_taskgraph_to_runnable_jobs(self):
+        tg, label_to_taskid = self.make_taskgraph({
+            t['label']: Task(**t) for t in self.tasks[:]
+        })
+
+        res = full_task_graph_to_runnable_jobs(tg.to_json())
+
+        self.assertEqual(res, {
+            'a': {
+                'symbol': 'B'
+            },
+            'b': {
+                'collection': {'opt': True},
+                'groupName': 'Some group',
+                'groupSymbol': 'GS',
+                'symbol': 't',
+                'platform': 'linux64'
+            }
+        })
+
+
+if __name__ == '__main__':
+    main()