Bug 1383880: add support for optimizing tasks based on SCHEDULES; r=ahal
authorDustin J. Mitchell <dustin@mozilla.com>
Wed, 23 Aug 2017 16:21:06 +0000
changeset 382221 a418182c67b93ba87cfd03538f6d1accc932ea35
parent 382220 33e1e1b4acd5403e6d32b19aed1c8a50d919bec8
child 382222 ab8f1fe0716313514d5b74d4145f13c8ec3a28f8
push id51794
push usergszorc@mozilla.com
push dateThu, 21 Sep 2017 15:24:42 +0000
treeherderautoland@b0d1cd898a0b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1383880: add support for optimizing tasks based on SCHEDULES; r=ahal This adds some new optimization strategies. For tests, we use Either(SETA, SkipUnlessSchedules), thereby giving both mechanisms a chance to skip tasks. On try, SETA is omitted. MozReview-Commit-ID: GL4tlwyeBa6
--- a/build/sparse-profiles/taskgraph
+++ b/build/sparse-profiles/taskgraph
@@ -17,11 +17,15 @@ path:taskcluster/
 # them all in.
 # for new-style try pushes
+# Moz.build files are read in filesystem mode
 # Tooltool manifests also need to be opened. Assume they
 # are all somewhere in "tooltool-manifests" directories.
--- a/taskcluster/taskgraph/optimize.py
+++ b/taskcluster/taskgraph/optimize.py
@@ -19,20 +19,24 @@ import requests
 from collections import defaultdict
 from .graph import Graph
 from . import files_changed
 from .taskgraph import TaskGraph
 from .util.seta import is_low_value_task
 from .util.taskcluster import find_task_id
 from .util.parameterization import resolve_task_references
+from mozbuild.util import memoize
 from slugid import nice as slugid
+from mozbuild.frontend import reader
 logger = logging.getLogger(__name__)
+TOPSRCDIR = os.path.abspath(os.path.join(__file__, '../../../'))
 def optimize_task_graph(target_task_graph, params, do_not_optimize,
                         existing_tasks=None, strategies=None):
     Perform task optimization, returning a taskgraph and a map from label to
     assigned taskId, including replacement tasks.
     label_to_taskid = {}
@@ -66,16 +70,18 @@ def optimize_task_graph(target_task_grap
 def _make_default_strategies():
     return {
         'never': OptimizationStrategy(),  # "never" is the default behavior
         'index-search': IndexSearch(),
         'seta': SETA(),
         'skip-unless-changed': SkipUnlessChanged(),
+        'skip-unless-schedules': SkipUnlessSchedules(),
+        'skip-unless-schedules-or-seta': Either(SkipUnlessSchedules(), SETA()),
 def _get_optimizations(target_task_graph, strategies):
     def optimizations(label):
         task = target_task_graph.tasks[label]
         if task.optimization:
             opt_by, arg = task.optimization.items()[0]
@@ -239,16 +245,47 @@ class OptimizationStrategy(object):
     def should_replace_task(self, task, params, arg):
         """Determine whether to optimize this task by replacing it.  Returns a
         taskId to replace this task, True to replace with nothing, or False to
         keep the task."""
         return False
+class Either(OptimizationStrategy):
+    """Given one or more optimization strategies, remove a task if any of them
+    says to, and replace with a task if any finds a replacement (preferring the
+    earliest).  By default, each substrategy gets the same arg, but split_args
+    can return a list of args for each strategy, if desired."""
+    def __init__(self, *substrategies, **kwargs):
+        self.substrategies = substrategies
+        self.split_args = kwargs.pop('split_args', None)
+        if not self.split_args:
+            self.split_args = lambda arg: [arg] * len(substrategies)
+        if kwargs:
+            raise TypeError("unexpected keyword args")
+    def _for_substrategies(self, arg, fn):
+        for sub, arg in zip(self.substrategies, self.split_args(arg)):
+            rv = fn(sub, arg)
+            if rv:
+                return rv
+        return False
+    def should_remove_task(self, task, params, arg):
+        return self._for_substrategies(
+            arg,
+            lambda sub, arg: sub.should_remove_task(task, params, arg))
+    def should_replace_task(self, task, params, arg):
+        return self._for_substrategies(
+            arg,
+            lambda sub, arg: sub.should_replace_task(task, params, arg))
 class IndexSearch(OptimizationStrategy):
     def should_remove_task(self, task, params, index_paths):
         "If this task has no dependencies, don't run it.."
         return True
     def should_replace_task(self, task, params, index_paths):
         "Look for a task with one of the given index paths"
         for index_path in index_paths:
@@ -295,8 +332,35 @@ class SkipUnlessChanged(OptimizationStra
             return False
         changed = files_changed.check(params, file_patterns)
         if not changed:
             logger.debug('no files found matching a pattern in `skip-unless-changed` for ' +
             return True
         return False
+class SkipUnlessSchedules(OptimizationStrategy):
+    @memoize
+    def scheduled_by_push(self, repository, revision):
+        changed_files = files_changed.get_changed_files(repository, revision)
+        config = reader.EmptyConfig(TOPSRCDIR)
+        rdr = reader.BuildReader(config)
+        components = set()
+        for p, m in rdr.files_info(changed_files).items():
+            components |= set(m['SCHEDULES'].components)
+        return components
+    def should_remove_task(self, task, params, conditions):
+        if params.get('pushlog_id') == -1:
+            return False
+        scheduled = self.scheduled_by_push(params['head_repository'], params['head_rev'])
+        conditions = set(conditions)
+        # if *any* of the condition components are scheduled, do not optimize
+        if conditions & scheduled:
+            return False
+        return True
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -13,16 +13,17 @@ from __future__ import absolute_import, 
 import hashlib
 import json
 import os
 import re
 import time
 from copy import deepcopy
 from mozbuild.util import memoize
+from mozbuild import schedules
 from taskgraph.util.attributes import TRUNK_PROJECTS
 from taskgraph.util.hash import hash_path
 from taskgraph.util.treeherder import split_symbol
 from taskgraph.transforms.base import TransformSequence
 from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import get_release_config
 from voluptuous import Any, Required, Optional, Extra
 from taskgraph import GECKO
@@ -174,16 +175,20 @@ task_description_schema = Schema({
         # search the index for the given index namespaces, and replace this task if found
         # the search occurs in order, with the first match winning
         {'index-search': [basestring]},
         # consult SETA and skip this task if it is low-value
         {'seta': None},
         # skip this task if none of the given file patterns match
         {'skip-unless-changed': [basestring]},
+        # skip this task if unless the change files' SCHEDULES contains any of these components
+        {'skip-unless-schedules': list(schedules.ALL_COMPONENTS)},
+        # skip if SETA or skip-unless-schedules says to
+        {'skip-unless-schedules-or-seta': list(schedules.ALL_COMPONENTS)},
     # the provisioner-id/worker-type for the task.  The following parameters will
     # be substituted in this string:
     #  {level} -- the scm level of this push
     'worker-type': basestring,
     # Whether the job should use sccache compiler caching.