Bug 1527895 - Add soft-dependencies to taskgraph, r=ahal,marco,tomprince,dustin
authorBastien Abadie <bastien@mozilla.com>
Mon, 04 Mar 2019 17:07:34 +0000
changeset 520117 5caf48a420eb337a296c8b337332f2933884f467
parent 520116 8b3fe0426ffc1b3a2ad044ef6cdde6c4f736f8e2
child 520118 0ecb667b6b046767759ca6fda321d5ee2385d3e6
push id10862
push userffxbld-merge
push dateMon, 11 Mar 2019 13:01:11 +0000
treeherdermozilla-beta@a2e7f5c935da [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersahal, marco, tomprince, dustin
bugs1527895
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1527895 - Add soft-dependencies to taskgraph, r=ahal,marco,tomprince,dustin Differential Revision: https://phabricator.services.mozilla.com/D19791
taskcluster/docs/optimization-process.rst
taskcluster/docs/taskgraph.rst
taskcluster/taskgraph/generator.py
taskcluster/taskgraph/optimize.py
taskcluster/taskgraph/task.py
taskcluster/taskgraph/test/test_taskgraph.py
taskcluster/taskgraph/transforms/job/__init__.py
taskcluster/taskgraph/transforms/task.py
--- a/taskcluster/docs/optimization-process.rst
+++ b/taskcluster/docs/optimization-process.rst
@@ -60,16 +60,19 @@ The first two phases annotate each task 
 fate: removed, replaced, or retained. The tasks that are replaced also have a
 replacement taskId.
 
 The last phase constructs a subgraph containing the retained tasks, and
 simultaneously rewrites all dependencies to refer to taskIds instead of labels.
 To do so, it assigns a taskId to each retained task and uses the replacement
 taskId for all replaced tasks.
 
+The `soft-dependencies` are then solved for each task, by adding all the
+remaining tasks in the subgraph from that list to its `dependencies`.
+
 The result is an optimized taskgraph with tasks named by taskId instead of
 label. At this phase, the edges in the task graph diverge from the
 ``task.dependencies`` attributes, as the latter may contain dependencies
 outside of the taskgraph (for replacement tasks).
 
 As a side-effect, this phase also expands all ``{"task-reference": ".."}`` and
 ``{"artifact-reference": ".."}`` objects within the task definitions.
 
--- a/taskcluster/docs/taskgraph.rst
+++ b/taskcluster/docs/taskgraph.rst
@@ -60,16 +60,28 @@ the bread-and-butter build and test conf
 Dependencies
 ------------
 
 Dependencies between tasks are represented as labeled edges in the task graph.
 For example, a test task must depend on the build task creating the artifact it
 tests, and this dependency edge is named 'build'.  The task graph generation
 process later resolves these dependencies to specific taskIds.
 
+Dependencies are typically used to ensure that prerequisites to a task, such as
+creation of binary artifacts, are completed before that task runs. But
+dependencies can also be used to schedule follow-up work such as summarizing
+test results. In the latter case, the summarization task will "pull in" all of
+the tasks it depends on, even if those tasks might otherwise be optimized away.
+The fix for this situation is "soft dependencies".
+To add a task depending only on tasks remaining after the optimization process
+completed, you can use `soft-dependencies`, as a list of optimized tasks labels.
+This is useful for tasks that should not pull other tasks into the graph, but do
+need to run after them, if they are in the graph (signing task after an optional
+build or reporting on tasks outputs).
+
 Decision Task
 -------------
 
 The decision task is the first task created when a new graph begins.  It is
 responsible for creating the rest of the task graph.
 
 The decision task for pushes is defined in-tree, in ``.taskcluster.yml``.  That
 task description invokes ``mach taskcluster decision`` with some metadata about
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@@ -66,16 +66,17 @@ class Kind(object):
         trans_config = TransformConfig(self.name, self.path, config, parameters,
                                        kind_dependencies_tasks, self.graph_config)
         tasks = [Task(self.name,
                       label=task_dict['label'],
                       attributes=task_dict['attributes'],
                       task=task_dict['task'],
                       optimization=task_dict.get('optimization'),
                       dependencies=task_dict.get('dependencies'),
+                      soft_dependencies=task_dict.get('soft-dependencies'),
                       release_artifacts=task_dict.get('release-artifacts'),
                       )
                  for task_dict in transforms(trans_config, inputs)]
         return tasks
 
     @classmethod
     def load(cls, root_dir, graph_config, kind_name):
         path = os.path.join(root_dir, kind_name)
--- a/taskcluster/taskgraph/optimize.py
+++ b/taskcluster/taskgraph/optimize.py
@@ -209,16 +209,25 @@ def get_subgraph(target_task_graph, remo
     omit = removed_tasks | replaced_tasks
     for label, task in target_task_graph.tasks.iteritems():
         if label in omit:
             continue
         task.task_id = label_to_taskid[label]
         named_task_dependencies = {
             name: label_to_taskid[label]
             for name, label in named_links_dict.get(label, {}).iteritems()}
+
+        # Add remaining soft dependencies
+        if task.soft_dependencies:
+            named_task_dependencies.update({
+                label: label_to_taskid[label]
+                for label in task.soft_dependencies
+                if label in label_to_taskid and label not in omit
+            })
+
         task.task = resolve_task_references(task.label, task.task, named_task_dependencies)
         deps = task.task.setdefault('dependencies', [])
         deps.extend(sorted(named_task_dependencies.itervalues()))
         tasks_by_taskid[task.task_id] = task
 
     # resolve edges to taskIds
     edges_by_taskid = (
         (label_to_taskid.get(left), label_to_taskid.get(right), name)
--- a/taskcluster/taskgraph/task.py
+++ b/taskcluster/taskgraph/task.py
@@ -14,46 +14,50 @@ class Task(object):
 
     - kind: the name of the task kind
     - label; the label for this task
     - attributes: a dictionary of attributes for this task (used for filtering)
     - task: the task definition (JSON-able dictionary)
     - optimization: optimization to apply to the task (see taskgraph.optimize)
     - dependencies: tasks this one depends on, in the form {name: label}, for example
       {'build': 'build-linux64/opt', 'docker-image': 'build-docker-image-desktop-test'}
+    - soft_dependencies: tasks this one may depend on if they are available post
+      optimisation. They are set as a list of tasks label.
 
     And later, as the task-graph processing proceeds:
 
     - task_id -- TaskCluster taskId under which this task will be created
 
     This class is just a convenience wrapper for the data type and managing
     display, comparison, serialization, etc. It has no functionality of its own.
     """
 
     kind = attr.ib()
     label = attr.ib()
     attributes = attr.ib()
     task = attr.ib()
     task_id = attr.ib(default=None, init=False)
     optimization = attr.ib(default=None)
     dependencies = attr.ib(factory=dict)
+    soft_dependencies = attr.ib(factory=list)
     release_artifacts = attr.ib(
         converter=attr.converters.optional(frozenset),
         default=None,
     )
 
     def __attrs_post_init__(self):
         self.attributes['kind'] = self.kind
 
     def to_json(self):
         rv = {
             'kind': self.kind,
             'label': self.label,
             'attributes': self.attributes,
             'dependencies': self.dependencies,
+            'soft_dependencies': self.soft_dependencies,
             'optimization': self.optimization,
             'task': self.task,
         }
         if self.task_id:
             rv['task_id'] = self.task_id
         if self.release_artifacts:
             rv['release_artifacts'] = sorted(self.release_artifacts)
         return rv
@@ -67,13 +71,14 @@ class Task(object):
         """
         rv = cls(
             kind=task_dict['kind'],
             label=task_dict['label'],
             attributes=task_dict['attributes'],
             task=task_dict['task'],
             optimization=task_dict['optimization'],
             dependencies=task_dict.get('dependencies'),
+            soft_dependencies=task_dict.get('soft_dependencies'),
             release_artifacts=task_dict.get('release-artifacts'),
         )
         if 'task_id' in task_dict:
             rv.task_id = task_dict['task_id']
         return rv
--- a/taskcluster/taskgraph/test/test_taskgraph.py
+++ b/taskcluster/taskgraph/test/test_taskgraph.py
@@ -36,24 +36,26 @@ class TestTaskGraph(unittest.TestCase):
 
         self.assertEqual(res, {
             'a': {
                 'kind': 'test',
                 'label': 'a',
                 'attributes': {'attr': 'a-task', 'kind': 'test'},
                 'task': {'taskdef': True},
                 'dependencies': {'edgelabel': 'b'},
+                'soft_dependencies': [],
                 'optimization': None,
             },
             'b': {
                 'kind': 'test',
                 'label': 'b',
                 'attributes': {'kind': 'test'},
                 'task': {'task': 'def'},
                 'dependencies': {},
+                'soft_dependencies': [],
                 'optimization': {'seta': None},
             }
         })
 
     def test_round_trip(self):
         graph = TaskGraph(tasks={
             'a': Task(
                 kind='fancy',
--- a/taskcluster/taskgraph/transforms/job/__init__.py
+++ b/taskcluster/taskgraph/transforms/job/__init__.py
@@ -43,16 +43,17 @@ job_description_schema = Schema({
 
     # the following fields are passed directly through to the task description,
     # possibly modified by the run implementation.  See
     # taskcluster/taskgraph/transforms/task.py for the schema details.
     Required('description'): task_description_schema['description'],
     Optional('attributes'): task_description_schema['attributes'],
     Optional('job-from'): task_description_schema['job-from'],
     Optional('dependencies'): task_description_schema['dependencies'],
+    Optional('soft-dependencies'): task_description_schema['soft-dependencies'],
     Optional('expires-after'): task_description_schema['expires-after'],
     Optional('routes'): task_description_schema['routes'],
     Optional('scopes'): task_description_schema['scopes'],
     Optional('tags'): task_description_schema['tags'],
     Optional('extra'): task_description_schema['extra'],
     Optional('treeherder'): task_description_schema['treeherder'],
     Optional('index'): task_description_schema['index'],
     Optional('run-on-projects'): task_description_schema['run-on-projects'],
@@ -242,16 +243,17 @@ def make_task_description(config, jobs):
         if job['run']['using'] != 'always-optimized':
             job['run'].setdefault('workdir', '/builds/worker')
 
         taskdesc = copy.deepcopy(job)
 
         # fill in some empty defaults to make run implementations easier
         taskdesc.setdefault('attributes', {})
         taskdesc.setdefault('dependencies', {})
+        taskdesc.setdefault('soft-dependencies', [])
         taskdesc.setdefault('routes', [])
         taskdesc.setdefault('scopes', [])
         taskdesc.setdefault('extra', {})
 
         # give the function for job.run.using on this worker implementation a
         # chance to set up the task description.
         configure_taskdesc_for_run(config, job, taskdesc, impl)
         del taskdesc['run']
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -63,16 +63,19 @@ task_description_schema = Schema({
     # relative path (from config.path) to the file task was defined in
     Optional('job-from'): basestring,
 
     # dependencies of this task, keyed by name; these are passed through
     # verbatim and subject to the interpretation of the Task's get_dependencies
     # method.
     Optional('dependencies'): {basestring: object},
 
+    # Soft dependencies of this task, as a list of tasks labels
+    Optional('soft-dependencies'): [basestring],
+
     Optional('requires'): Any('all-completed', 'all-resolved'),
 
     # expiration and deadline times, relative to task creation, with units
     # (e.g., "14 days").  Defaults are set based on the project.
     Optional('expires-after'): basestring,
     Optional('deadline-after'): basestring,
 
     # custom routes for this task; the default treeherder routes will be added
@@ -1766,16 +1769,17 @@ def build_task(config, tasks):
             if payload:
                 env = payload.setdefault('env', {})
                 env['MOZ_AUTOMATION'] = '1'
 
         yield {
             'label': task['label'],
             'task': task_def,
             'dependencies': task.get('dependencies', {}),
+            'soft-dependencies': task.get('soft-dependencies', []),
             'attributes': attributes,
             'optimization': task.get('optimization', None),
             'release-artifacts': task.get('release-artifacts', []),
         }
 
 
 @transforms.add
 def chain_of_trust(config, tasks):