Bug 1408352 - [taskgraph] Implement 'always_target' attribute, r=dustin
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Mon, 30 Oct 2017 09:41:51 -0400
changeset 443166 4d233eb8b619bcba41a42ef74086576c2c1850ca
parent 443165 e982afd4f073a55188f4351cd2cfc9bab1f36bf2
child 443167 6fa4468dd31edf46f34a95d947e0798b06ae8e50
push id1618
push userCallek@gmail.com
push dateThu, 11 Jan 2018 17:45:48 +0000
treeherdermozilla-release@882ca853e05a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdustin
bugs1408352
milestone58.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1408352 - [taskgraph] Implement 'always_target' attribute, r=dustin Tasks that have the 'always_target' attribute set will be always be included in the target_task_graph, regardless of target task filtering. Furthermore, if they were only added because of this attribute (i.e, the filters would have excluded the task), then the task will be a candidate for optimization even if the 'optimize_target_tasks' parameter is False. MozReview-Commit-ID: 9eoVJ5qpAMO
taskcluster/docs/attributes.rst
taskcluster/taskgraph/generator.py
taskcluster/taskgraph/test/test_generator.py
taskcluster/taskgraph/transforms/job/__init__.py
taskcluster/taskgraph/transforms/task.py
--- a/taskcluster/docs/attributes.rst
+++ b/taskcluster/docs/attributes.rst
@@ -158,19 +158,31 @@ specific locale involved. Currently this
 ``beetmover`` and ``balrog`` kinds.
 
 signed
 ======
 Signals that the output of this task contains signed artifacts.
 
 repackage_type
 ==============
-This is the type of repackage. Can be ``repackage`` or 
+This is the type of repackage. Can be ``repackage`` or
 ``repackage_signing``.
 
 toolchain-artifact
 ==================
 For toolchain jobs, this is the path to the artifact for that toolchain.
 
 toolchain-alias
 ===============
 For toolchain jobs, this optionally gives an alias that can be used instead of the
 real toolchain job name in the toolchains list for build jobs.
+
+always_target
+=============
+
+Tasks with this attribute will be included in the ``target_task_graph`` regardless
+of any target task filtering that occurs. When a task is included in this manner
+(i.e it otherwise would have been filtered out), it will be considered for
+optimization even if the ``optimize_target_tasks`` parameter is False.
+
+This is meant to be used for tasks which a developer would almost always want to
+run. Typically these tasks will be short running and have a high risk of causing
+a backout. For example ``lint`` or ``python-unittest`` tasks.
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@@ -252,17 +252,23 @@ class TaskGraphGenerator(object):
                 len(target_tasks)))
 
         yield verifications('target_task_set', target_task_set)
 
         logger.info("Generating target task graph")
         # include all docker-image build tasks here, in case they are needed for a graph morph
         docker_image_tasks = set(t.label for t in full_task_graph.tasks.itervalues()
                                  if t.attributes['kind'] == 'docker-image')
-        target_graph = full_task_graph.graph.transitive_closure(target_tasks | docker_image_tasks)
+        # include all tasks with `always_target` set
+        always_target_tasks = set(t.label for t in full_task_graph.tasks.itervalues()
+                                  if t.attributes.get('always_target'))
+        logger.info('Adding %d tasks with `always_target` attribute' % (
+                    len(always_target_tasks) - len(always_target_tasks & target_tasks)))
+        target_graph = full_task_graph.graph.transitive_closure(
+            target_tasks | docker_image_tasks | always_target_tasks)
         target_task_graph = TaskGraph(
             {l: all_tasks[l] for l in target_graph.nodes},
             target_graph)
         yield verifications('target_task_graph', target_task_graph)
 
         logger.info("Generating optimized task graph")
         existing_tasks = self.parameters.get('existing_tasks')
         do_not_optimize = set(self.parameters.get('do_not_optimize', []))
--- a/taskcluster/taskgraph/test/test_generator.py
+++ b/taskcluster/taskgraph/test/test_generator.py
@@ -1,84 +1,123 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import pytest
 import unittest
+from mozunit import main
 
 from taskgraph.generator import TaskGraphGenerator, Kind
-from taskgraph import graph, target_tasks as target_tasks_mod
-from mozunit import main
+from taskgraph.optimize import OptimizationStrategy
+from taskgraph.util.templates import merge
+from taskgraph import (
+    graph,
+    optimize as optimize_mod,
+    target_tasks as target_tasks_mod,
+)
 
 
 def fake_loader(kind, path, config, parameters, loaded_tasks):
     for i in range(3):
         dependencies = {}
         if i >= 1:
             dependencies['prev'] = '{}-t-{}'.format(kind, i-1)
-        yield {'kind': kind,
-               'label': '{}-t-{}'.format(kind, i),
-               'attributes': {'_tasknum': str(i)},
-               'task': {'i': i},
-               'dependencies': dependencies}
+
+        task = {
+            'kind': kind,
+            'label': '{}-t-{}'.format(kind, i),
+            'attributes': {'_tasknum': str(i)},
+            'task': {'i': i},
+            'dependencies': dependencies,
+        }
+        if 'job-defaults' in config:
+            task = merge(config['job-defaults'], task)
+        yield task
 
 
 class FakeKind(Kind):
 
     def _get_loader(self):
         return fake_loader
 
     def load_tasks(self, parameters, loaded_tasks):
         FakeKind.loaded_kinds.append(self.name)
         return super(FakeKind, self).load_tasks(parameters, loaded_tasks)
 
 
 class WithFakeKind(TaskGraphGenerator):
 
     def _load_kinds(self):
-        for kind_name, deps in self.parameters['_kinds']:
+        for kind_name, cfg in self.parameters['_kinds']:
             config = {
                 'transforms': [],
             }
-            if deps:
-                config['kind-dependencies'] = deps
+            if cfg:
+                config.update(cfg)
             yield FakeKind(kind_name, '/fake', config)
 
 
 class FakeParameters(dict):
     strict = True
 
 
+class FakeOptimization(OptimizationStrategy):
+    def __init__(self, mode, *args, **kwargs):
+        super(FakeOptimization, self).__init__(*args, **kwargs)
+        self.mode = mode
+
+    def should_remove_task(self, task, params, arg):
+        if self.mode == 'always':
+            return True
+        if self.mode == 'even':
+            return task.task['i'] % 2 == 0
+        if self.mode == 'odd':
+            return task.task['i'] % 2 != 0
+        return False
+
+
 class TestGenerator(unittest.TestCase):
 
-    def maketgg(self, target_tasks=None, kinds=[('_fake', [])]):
+    @pytest.fixture(autouse=True)
+    def patch(self, monkeypatch):
+        self.patch = monkeypatch
+
+    def maketgg(self, target_tasks=None, kinds=[('_fake', [])], params=None):
+        params = params or {}
         FakeKind.loaded_kinds = []
         self.target_tasks = target_tasks or []
 
         def target_tasks_method(full_task_graph, parameters):
             return self.target_tasks
 
+        def make_fake_strategies():
+            return {mode: FakeOptimization(mode)
+                    for mode in ('always', 'never', 'even', 'odd')}
+
         target_tasks_mod._target_task_methods['test_method'] = target_tasks_method
+        self.patch.setattr(optimize_mod, '_make_default_strategies', make_fake_strategies)
 
         parameters = FakeParameters({
             '_kinds': kinds,
             'target_tasks_method': 'test_method',
             'try_mode': None,
         })
+        parameters.update(params)
 
         return WithFakeKind('/root', parameters)
 
     def test_kind_ordering(self):
         "When task kinds depend on each other, they are loaded in postorder"
         self.tgg = self.maketgg(kinds=[
-            ('_fake3', ['_fake2', '_fake1']),
-            ('_fake2', ['_fake1']),
-            ('_fake1', []),
+            ('_fake3', {'kind-dependencies': ['_fake2', '_fake1']}),
+            ('_fake2', {'kind-dependencies': ['_fake1']}),
+            ('_fake1', {'kind-dependencies': []}),
         ])
         self.tgg._run_until('full_task_set')
         self.assertEqual(FakeKind.loaded_kinds, ['_fake1', '_fake2', '_fake3'])
 
     def test_full_task_set(self):
         "The full_task_set property has all tasks"
         self.tgg = self.maketgg()
         self.assertEqual(self.tgg.full_task_set.graph,
@@ -110,16 +149,40 @@ class TestGenerator(unittest.TestCase):
         "The target_task_graph property has the targeted tasks and deps"
         self.tgg = self.maketgg(['_fake-t-1'])
         self.assertEqual(self.tgg.target_task_graph.graph,
                          graph.Graph({'_fake-t-0', '_fake-t-1'},
                                      {('_fake-t-1', '_fake-t-0', 'prev')}))
         self.assertEqual(sorted(self.tgg.target_task_graph.tasks.keys()),
                          sorted(['_fake-t-0', '_fake-t-1']))
 
+    def test_always_target_tasks(self):
+        "The target_task_graph includes tasks with 'always_target'"
+        tgg_args = {
+            'target_tasks': ['_fake-t-0', '_fake-t-1', '_ignore-t-0', '_ignore-t-1'],
+            'kinds': [
+                ('_fake', {'job-defaults': {'optimization': {'odd': None}}}),
+                ('_ignore', {'job-defaults': {
+                    'attributes': {'always_target': True},
+                    'optimization': {'even': None},
+                }}),
+            ],
+            'params': {'optimize_target_tasks': False},
+        }
+        self.tgg = self.maketgg(**tgg_args)
+        self.assertEqual(
+            sorted(self.tgg.target_task_set.tasks.keys()),
+            sorted(['_fake-t-0', '_fake-t-1', '_ignore-t-0', '_ignore-t-1']))
+        self.assertEqual(
+            sorted(self.tgg.target_task_graph.tasks.keys()),
+            sorted(['_fake-t-0', '_fake-t-1', '_ignore-t-0', '_ignore-t-1', '_ignore-t-2']))
+        self.assertEqual(
+            sorted([t.label for t in self.tgg.optimized_task_graph.tasks.values()]),
+            sorted(['_fake-t-0', '_fake-t-1', '_ignore-t-0', '_ignore-t-1']))
+
     def test_optimized_task_graph(self):
         "The optimized task graph contains task ids"
         self.tgg = self.maketgg(['_fake-t-2'])
         tid = self.tgg.label_to_taskid
         self.assertEqual(
             self.tgg.optimized_task_graph.graph,
             graph.Graph({tid['_fake-t-0'], tid['_fake-t-1'], tid['_fake-t-2']}, {
                 (tid['_fake-t-1'], tid['_fake-t-0'], 'prev'),
--- a/taskcluster/taskgraph/transforms/job/__init__.py
+++ b/taskcluster/taskgraph/transforms/job/__init__.py
@@ -56,16 +56,17 @@ job_description_schema = Schema({
     Optional('scopes'): task_description_schema['scopes'],
     Optional('tags'): task_description_schema['tags'],
     Optional('extra'): task_description_schema['extra'],
     Optional('notifications'): task_description_schema['notifications'],
     Optional('treeherder'): task_description_schema['treeherder'],
     Optional('index'): task_description_schema['index'],
     Optional('run-on-projects'): task_description_schema['run-on-projects'],
     Optional('coalesce'): task_description_schema['coalesce'],
+    Optional('always-target'): task_description_schema['always-target'],
     Exclusive('optimization', 'optimization'): task_description_schema['optimization'],
     Optional('needs-sccache'): task_description_schema['needs-sccache'],
 
     # The "when" section contains descriptions of the circumstances under which
     # this task should be included in the task graph.  This will be converted
     # into an optimization, so it cannot be specified in a job description that
     # also gives 'optimization'.
     Exclusive('when', 'optimization'): Any({
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -186,16 +186,23 @@ task_description_schema = Schema({
         # tasks.
         'age': int,
 
         # The minimum number of backlogged tasks with the same coalescing key,
         # before the coalescing service will return tasks.
         'size': int,
     },
 
+    # The `always-target` attribute will cause the task to be included in the
+    # target_task_graph regardless of filtering. Tasks included in this manner
+    # will be candidates for optimization even when `optimize_target_tasks` is
+    # False, unless the task was also explicitly chosen by the target_tasks
+    # method.
+    Required('always-target', default=False): bool,
+
     # Optimization to perform on this task during the optimization phase.
     # Optimizations are defined in taskcluster/taskgraph/optimize.py.
     Required('optimization', default=None): Any(
         # always run this task (default)
         None,
         # search the index for the given index namespaces, and replace this task if found
         # the search occurs in order, with the first match winning
         {'index-search': [basestring]},
@@ -1317,16 +1324,17 @@ def build_task(config, tasks):
             task_def['metadata']['description'] += ' ([Treeherder push]({}))'.format(
                 th_push_link)
 
         # add the payload and adjust anything else as required (e.g., scopes)
         payload_builders[task['worker']['implementation']](config, task, task_def)
 
         attributes = task.get('attributes', {})
         attributes['run_on_projects'] = task.get('run-on-projects', ['all'])
+        attributes['always_target'] = task['always-target']
 
         # Set MOZ_AUTOMATION on all jobs.
         if task['worker']['implementation'] in (
             'generic-worker',
             'docker-engine',
             'native-engine',
             'docker-worker',
         ):