Bug 1383880: add support for optimizing tasks based on SCHEDULES; r=ahal
☠☠ backed out by d88e5dec2638 ☠ ☠
authorDustin J. Mitchell <dustin@mozilla.com>
Wed, 23 Aug 2017 16:21:06 +0000
changeset 431650 ebdd6ccbcfca7f7672040f7227da31594d60f737
parent 431649 ebcc9d20981a491ab1cb6b71bff43225a90169e8
child 431651 b354fdf6e233ae06118ac8f1a975d0d4a53db4a9
push id7785
push userryanvm@gmail.com
push dateThu, 21 Sep 2017 13:39:55 +0000
treeherdermozilla-beta@06d4034a8a03 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1383880: add support for optimizing tasks based on SCHEDULES; r=ahal This adds some new optimization strategies. For tests, we use Either(SETA, SkipUnlessSchedules), thereby giving both mechanisms a chance to skip tasks. On try, SETA is omitted. MozReview-Commit-ID: GL4tlwyeBa6
--- a/build/sparse-profiles/taskgraph
+++ b/build/sparse-profiles/taskgraph
@@ -17,11 +17,15 @@ path:taskcluster/
 # them all in.
 # for new-style try pushes
+# Moz.build files are read in filesystem mode
 # Tooltool manifests also need to be opened. Assume they
 # are all somewhere in "tooltool-manifests" directories.
--- a/taskcluster/taskgraph/optimize.py
+++ b/taskcluster/taskgraph/optimize.py
@@ -19,20 +19,24 @@ import requests
 from collections import defaultdict
 from .graph import Graph
 from . import files_changed
 from .taskgraph import TaskGraph
 from .util.seta import is_low_value_task
 from .util.taskcluster import find_task_id
 from .util.parameterization import resolve_task_references
+from mozbuild.util import memoize
 from slugid import nice as slugid
+from mozbuild.frontend import reader
 logger = logging.getLogger(__name__)
+TOPSRCDIR = os.path.abspath(os.path.join(__file__, '../../../'))
 def optimize_task_graph(target_task_graph, params, do_not_optimize,
                         existing_tasks=None, strategies=None):
     Perform task optimization, returning a taskgraph and a map from label to
     assigned taskId, including replacement tasks.
     label_to_taskid = {}
@@ -66,16 +70,18 @@ def optimize_task_graph(target_task_grap
 def _make_default_strategies():
     return {
         'never': OptimizationStrategy(),  # "never" is the default behavior
         'index-search': IndexSearch(),
         'seta': SETA(),
         'skip-unless-changed': SkipUnlessChanged(),
+        'skip-unless-schedules': SkipUnlessSchedules(),
+        'skip-unless-schedules-or-seta': Either(SkipUnlessSchedules(), SETA()),
 def _get_optimizations(target_task_graph, strategies):
     def optimizations(label):
         task = target_task_graph.tasks[label]
         if task.optimization:
             opt_by, arg = task.optimization.items()[0]
@@ -239,16 +245,47 @@ class OptimizationStrategy(object):
     def should_replace_task(self, task, params, arg):
         """Determine whether to optimize this task by replacing it.  Returns a
         taskId to replace this task, True to replace with nothing, or False to
         keep the task."""
         return False
+class Either(OptimizationStrategy):
+    """Given one or more optimization strategies, remove a task if any of them
+    says to, and replace with a task if any finds a replacement (preferring the
+    earliest).  By default, each substrategy gets the same arg, but split_args
+    can return a list of args for each strategy, if desired."""
+    def __init__(self, *substrategies, **kwargs):
+        self.substrategies = substrategies
+        self.split_args = kwargs.pop('split_args', None)
+        if not self.split_args:
+            self.split_args = lambda arg: [arg] * len(substrategies)
+        if kwargs:
+            raise TypeError("unexpected keyword args")
+    def _for_substrategies(self, arg, fn):
+        for sub, arg in zip(self.substrategies, self.split_args(arg)):
+            rv = fn(sub, arg)
+            if rv:
+                return rv
+        return False
+    def should_remove_task(self, task, params, arg):
+        return self._for_substrategies(
+            arg,
+            lambda sub, arg: sub.should_remove_task(task, params, arg))
+    def should_replace_task(self, task, params, arg):
+        return self._for_substrategies(
+            arg,
+            lambda sub, arg: sub.should_replace_task(task, params, arg))
 class IndexSearch(OptimizationStrategy):
     def should_remove_task(self, task, params, index_paths):
         "If this task has no dependencies, don't run it.."
         return True
     def should_replace_task(self, task, params, index_paths):
         "Look for a task with one of the given index paths"
         for index_path in index_paths:
@@ -295,8 +332,35 @@ class SkipUnlessChanged(OptimizationStra
             return False
         changed = files_changed.check(params, file_patterns)
         if not changed:
             logger.debug('no files found matching a pattern in `skip-unless-changed` for ' +
             return True
         return False
+class SkipUnlessSchedules(OptimizationStrategy):
+    @memoize
+    def scheduled_by_push(self, repository, revision):
+        changed_files = files_changed.get_changed_files(repository, revision)
+        config = reader.EmptyConfig(TOPSRCDIR)
+        rdr = reader.BuildReader(config)
+        components = set()
+        for p, m in rdr.files_info(changed_files).items():
+            components |= set(m['SCHEDULES'].components)
+        return components
+    def should_remove_task(self, task, params, conditions):
+        if params.get('pushlog_id') == -1:
+            return False
+        scheduled = self.scheduled_by_push(params['head_repository'], params['head_rev'])
+        conditions = set(conditions)
+        # if *any* of the condition components are scheduled, do not optimize
+        if conditions & scheduled:
+            return False
+        return True
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -13,16 +13,17 @@ from __future__ import absolute_import, 
 import hashlib
 import json
 import os
 import re
 import time
 from copy import deepcopy
 from mozbuild.util import memoize
+from mozbuild import schedules
 from taskgraph.util.attributes import TRUNK_PROJECTS
 from taskgraph.util.hash import hash_path
 from taskgraph.util.treeherder import split_symbol
 from taskgraph.transforms.base import TransformSequence
 from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import get_release_config
 from voluptuous import Any, Required, Optional, Extra
 from taskgraph import GECKO
@@ -174,16 +175,20 @@ task_description_schema = Schema({
         # search the index for the given index namespaces, and replace this task if found
         # the search occurs in order, with the first match winning
         {'index-search': [basestring]},
         # consult SETA and skip this task if it is low-value
         {'seta': None},
         # skip this task if none of the given file patterns match
         {'skip-unless-changed': [basestring]},
+        # skip this task if unless the change files' SCHEDULES contains any of these components
+        {'skip-unless-schedules': list(schedules.ALL_COMPONENTS)},
+        # skip if SETA or skip-unless-schedules says to
+        {'skip-unless-schedules-or-seta': list(schedules.ALL_COMPONENTS)},
     # the provisioner-id/worker-type for the task.  The following parameters will
     # be substituted in this string:
     #  {level} -- the scm level of this push
     'worker-type': basestring,
     # Whether the job should use sccache compiler caching.