Bug 1277417: output task information in JSON or just labels; r=ahal
authorDustin J. Mitchell <dustin@mozilla.com>
Tue, 07 Jun 2016 03:09:48 +0000
changeset 300967 cf46a59ce5e1b11baa0581dd3cfb6085ec82f443
parent 300966 2836ccae457cfa6e3ecdc9a838274243bf4875ab
child 300968 234044e5e8045ccc1c0e4be8ac7c55ceb33de38c
push id30324
push usercbook@mozilla.com
push dateWed, 08 Jun 2016 09:58:15 +0000
treeherdermozilla-central@f8ad071a6e14 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1277417: output task information in JSON or just labels; r=ahal The JSON output is suitable for processing with `jq` to extract features of interest. MozReview-Commit-ID: 5wpV7sXlOz3
--- a/taskcluster/docs/taskgraph.rst
+++ b/taskcluster/docs/taskgraph.rst
@@ -146,42 +146,51 @@ parameter file.  The parameter keys and 
 Finally, the ``mach taskgraph decision`` subcommand performs the entire
 task-graph generation process, then creates the tasks.  This command should
 only be used within a decision task, as it assumes it is running in that
 Taskgraph JSON Format
-Each task graph artifact is represented as a JSON object.  The object's
-properties are the task labels or taskIds (see below), and the value of each
-property describes a task in an object with the following attributes:
+Each task in the graph is represented as a JSON object.  The output is suitable
+for processing with the `jq <https://stedolan.github.io/jq/>`_ utility.
+Each task has the following properties:
+   The task's taskId (only for optimized task graphs)
-   The task's label (never a taskId).
+   The task's label
    The task's attributes
-   The task's in-graph dependencies, each represented as a pair ``[name, label]``
-   giving the dependency name and the label for the required task.
+   The task's in-graph dependencies, represented as an object mapping
+   dependency name to label (or to taskId for optimized task graphs)
    The task's TaskCluster task definition.
 The task definition may contain "task references" of the form
 ``{"task-reference": "string containing <task-label>"}``.  These will be
 replaced during the optimization step, with the appropriate taskId substituted
 for ``<task-label>`` in the string.  Multiple labels may be substituted in a
 single string, and ``<<>`` can be used to escape a literal ``<``.
 The results from each command are in the same format, but with some differences
 in the content:
 * The ``tasks`` and ``target`` subcommands both return graphs with no edges.
   That is, just collections of tasks without any dependencies indicated.
-* The ``optimized`` subcommand returns a graph keyed by taskId rather than
-  label.  The dependencies array, too, contains taskIds instead of labels.
-  Dependencies on optimized tasks are omitted.  However, the
-  ``task.dependencies`` array is populated with the full list of dependency
-  taskIds.  All task references are resolved in the optimized graph.
+* The ``optimized`` subcommand returns tasks that have been assigned taskIds.
+  The dependencies array, too, contains taskIds instead of labels, with
+  dependencies on optimized tasks omitted.  However, the ``task.dependencies``
+  array is populated with the full list of dependency taskIds.  All task
+  references are resolved in the optimized graph.
+The graph artifacts produced by the decision task are JSON objects, keyed by
+label (``full-task-graph.json`` and ``target-tasks``) or by taskId
+(``task-graph.json``).  For convenience, the decision task also writes out
+``label-to-taskid.json`` containing a mapping from label to taskId.
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -1,16 +1,17 @@
 # -*- coding: utf-8 -*-
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 from __future__ import absolute_import, print_function, unicode_literals
+import json
 import logging
 import sys
 import traceback
 from mach.decorators import (
@@ -27,16 +28,22 @@ class ShowTaskGraphSubCommand(SubCommand
         after = SubCommand.__call__(self, func)
         args = [
             CommandArgument('--root', '-r', default='taskcluster/ci',
                             help="root of the taskgraph definition relative to topsrcdir"),
             CommandArgument('--quiet', '-q', action="store_true",
                             help="suppress all logging output"),
             CommandArgument('--verbose', '-v', action="store_true",
                             help="include debug-level logging output"),
+            CommandArgument('--json', '-J', action="store_const",
+                            dest="format", const="json",
+                            help="Output each task in the task graph as a JSON object"),
+            CommandArgument('--labels', '-L', action="store_const",
+                            dest="format", const="labels",
+                            help="Output the label for each task in the task graph (default)"),
             CommandArgument('--parameters', '-p', required=True,
                             help="parameters file (.yml or .json; see "
             CommandArgument('--no-optimize', dest="optimize", action="store_false",
                             help="do not remove tasks from the graph that are found in the "
                             "index (a.k.a. optimize the graph)"),
@@ -176,13 +183,23 @@ class MachCommands(MachCommandBase):
             target_tasks_method = taskgraph.target_tasks.get_method(target_tasks_method)
             tgg = taskgraph.generator.TaskGraphGenerator(
             tg = getattr(tgg, graph_attr)
-            for label in tg.graph.visit_postorder():
-                print(tg.tasks[label])
+            show_method = getattr(self, 'show_taskgraph_' + (options['format'] or 'labels'))
+            show_method(tg)
         except Exception as e:
+    def show_taskgraph_labels(self, taskgraph):
+        for label in taskgraph.graph.visit_postorder():
+            print(label)
+    def show_taskgraph_json(self, taskgraph):
+        # JSON output is a sequence of JSON objects, rather than a single object, so
+        # disassemble the dictionary
+        for task in taskgraph.to_json().itervalues():
+            print(json.dumps(task))
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -60,27 +60,24 @@ def taskgraph_decision(options):
     # write out the parameters used to generate this graph
     write_artifact('parameters.yml', dict(**parameters))
     # write out the full graph for reference
-    write_artifact('full-task-graph.json',
-                   taskgraph_to_json(tgg.full_task_graph))
+    write_artifact('full-task-graph.json', tgg.full_task_graph.to_json())
     # write out the target task set to allow reproducing this as input
-    write_artifact('target-tasks.json',
-                   tgg.target_task_set.tasks.keys())
+    write_artifact('target-tasks.json', tgg.target_task_set.tasks.keys())
     # write out the optimized task graph to describe what will actually happen,
     # and the map of labels to taskids
-    write_artifact('task-graph.json',
-                   taskgraph_to_json(tgg.optimized_task_graph))
+    write_artifact('task-graph.json', tgg.optimized_task_graph.to_json())
     write_artifact('label-to-taskid.json', tgg.label_to_taskid)
     # actually create the graph
     create_tasks(tgg.optimized_task_graph, tgg.label_to_taskid)
 def get_decision_parameters(options):
@@ -109,35 +106,16 @@ def get_decision_parameters(options):
         logger.warning("using default project parameters; add {} to "
               "PER_PROJECT_PARAMETERS in {} to customize behavior "
               "for this project".format(project, __file__))
     return Parameters(parameters)
-def taskgraph_to_json(taskgraph):
-    tasks = taskgraph.tasks
-    def tojson(task):
-        return {
-            'label': task.label,
-            'task': task.task,
-            'attributes': task.attributes,
-            'dependencies': []
-        }
-    rv = {label: tojson(tasks[label]) for label in taskgraph.graph.nodes}
-    # add dependencies with one trip through the graph edges
-    for (left, right, name) in taskgraph.graph.edges:
-        rv[left]['dependencies'].append((name, right))
-    return rv
 def write_artifact(filename, data):
     logger.info('writing artifact file `{}`'.format(filename))
     if not os.path.isdir(ARTIFACTS_DIR):
     path = os.path.join(ARTIFACTS_DIR, filename)
     if filename.endswith('.yml'):
         with open(path, 'w') as f:
             yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
--- a/taskcluster/taskgraph/test/test_decision.py
+++ b/taskcluster/taskgraph/test/test_decision.py
@@ -28,23 +28,23 @@ class TestDecision(unittest.TestCase):
         res = decision.taskgraph_to_json(taskgraph)
         self.assertEqual(res, {
             'a': {
                 'label': 'a',
                 'attributes': {'attr': 'a-task'},
                 'task': {},
-                'dependencies': [('edgelabel', 'b')],
+                'dependencies': {'edgelabel': 'b'},
             'b': {
                 'label': 'b',
                 'attributes': {},
                 'task': {'task': 'def'},
-                'dependencies': [],
+                'dependencies': {},
     def test_write_artifact_json(self):
         data = [{'some': 'data'}]
         tmpdir = tempfile.mkdtemp()
--- a/taskcluster/taskgraph/types.py
+++ b/taskcluster/taskgraph/types.py
@@ -48,16 +48,34 @@ class TaskGraph(object):
     by label.  TaskGraph instances should be treated as immutable.
     def __init__(self, tasks, graph):
         assert set(tasks) == graph.nodes
         self.tasks = tasks
         self.graph = graph
+    def to_json(self):
+        "Return a JSON-able object representing the task graph, as documented"
+        named_links_dict = self.graph.named_links_dict()
+        # this dictionary may be keyed by label or by taskid, so let's just call it 'key'
+        tasks = {}
+        for key in self.graph.visit_postorder():
+            task = self.tasks[key]
+            task_json = {
+                'label': task.label,
+                'attributes': task.attributes,
+                'dependencies': named_links_dict.get(key, {}),
+                'task': task.task
+            }
+            if task.task_id:
+                task_json['task_id'] = task.task_id
+            tasks[key] = task_json
+        return tasks
     def __getitem__(self, label):
         "Get a task by label"
         return self.tasks[label]
     def __iter__(self):
         "Iterate over tasks in undefined order"
         return self.tasks.itervalues()