Bug 1415619: Factor out toolchain cache index calculations. r=dustin,mshal
authorTom Prince <mozilla@hocat.ca>
Mon, 06 Nov 2017 13:37:00 -0700
changeset 437073 44579aec5f277bf69548365d95ed73d075e72c1e
parent 437072 1e7c4b2344521ad4d6e3e6039ed214df968b8b46
child 437074 4a3811a038dcfb2ef0ada6787f67527030a5fdec
push id117
push userfmarier@mozilla.com
push dateTue, 28 Nov 2017 20:17:16 +0000
reviewersdustin, mshal
bugs1415619
milestone59.0a1
Bug 1415619: Factor out toolchain cache index calculations. r=dustin,mshal MozReview-Commit-ID: KSvwr94j5QB
taskcluster/taskgraph/transforms/job/toolchain.py
taskcluster/taskgraph/util/cached_tasks.py
--- a/taskcluster/taskgraph/transforms/job/toolchain.py
+++ b/taskcluster/taskgraph/transforms/job/toolchain.py
@@ -2,35 +2,34 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Support for running toolchain-building jobs via dedicated scripts
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-import hashlib
-
 from mozbuild.shellutil import quote as shell_quote
 
 from taskgraph.util.schema import Schema
 from voluptuous import Optional, Required, Any
 
 from taskgraph.transforms.job import run_job_using
 from taskgraph.transforms.job.common import (
     docker_worker_add_gecko_vcs_env_vars,
     docker_worker_add_public_artifacts,
     docker_worker_add_tooltool,
     support_vcs_checkout,
 )
 from taskgraph.util.hash import hash_paths
 from taskgraph import GECKO
+from taskgraph.util.cached_tasks import add_optimization
 
 
-TOOLCHAIN_INDEX = 'gecko.cache.level-{level}.toolchains.v1.{name}.{digest}'
+CACHE_TYPE = 'toolchains.v1'
 
 toolchain_run_schema = Schema({
     Required('using'): 'toolchain-script',
 
     # The script (in taskcluster/scripts/misc) to run.
     # Python scripts are invoked with `mach python` so vendored libraries
     # are available.
     Required('script'): basestring,
@@ -61,17 +60,17 @@ toolchain_run_schema = Schema({
     Required('toolchain-artifact'): basestring,
 
     # An alias that can be used instead of the real toolchain job name in
     # the toolchains list for build jobs.
     Optional('toolchain-alias'): basestring,
 })
 
 
-def add_optimization(config, run, taskdesc):
+def get_digest_data(config, run, taskdesc):
     files = list(run.get('resources', []))
     # This file
     files.append('taskcluster/taskgraph/transforms/job/toolchain.py')
     # The script
     files.append('taskcluster/scripts/misc/{}'.format(run['script']))
     # Tooltool manifest if any is defined:
     tooltool_manifest = taskdesc['worker']['env'].get('TOOLTOOL_MANIFEST')
     if tooltool_manifest:
@@ -87,34 +86,17 @@ def add_optimization(config, run, taskde
     deps = taskdesc['dependencies']
     if deps:
         data.extend(sorted(deps.values()))
 
     # Likewise script arguments should influence the index.
     args = run.get('arguments')
     if args:
         data.extend(args)
-
-    label = taskdesc['label']
-    subs = {
-        'name': label.replace('%s-' % config.kind, ''),
-        'digest': hashlib.sha256('\n'.join(data)).hexdigest()
-    }
-
-    # We'll try to find a cached version of the toolchain at levels above
-    # and including the current level, starting at the highest level.
-    index_routes = []
-    for level in reversed(range(int(config.params['level']), 4)):
-        subs['level'] = level
-        index_routes.append(TOOLCHAIN_INDEX.format(**subs))
-    taskdesc['optimization'] = {'index-search': index_routes}
-
-    # ... and cache at the lowest level.
-    taskdesc.setdefault('routes', []).append(
-        'index.{}'.format(TOOLCHAIN_INDEX.format(**subs)))
+    return data
 
 
 @run_job_using("docker-worker", "toolchain-script", schema=toolchain_run_schema)
 def docker_worker_toolchain(config, job, taskdesc):
     run = job['run']
     taskdesc['run-on-projects'] = ['trunk', 'try']
 
     worker = taskdesc['worker']
@@ -168,17 +150,23 @@ def docker_worker_toolchain(config, job,
             wrapper, run['script'], args)
     ]
 
     attributes = taskdesc.setdefault('attributes', {})
     attributes['toolchain-artifact'] = run['toolchain-artifact']
     if 'toolchain-alias' in run:
         attributes['toolchain-alias'] = run['toolchain-alias']
 
-    add_optimization(config, run, taskdesc)
+    name = taskdesc['label'].replace('{}-'.format(config.kind), '', 1)
+    add_optimization(
+        config, taskdesc,
+        cache_type=CACHE_TYPE,
+        cache_name=name,
+        digest_data=get_digest_data(config, run, taskdesc),
+    )
 
 
 @run_job_using("generic-worker", "toolchain-script", schema=toolchain_run_schema)
 def windows_toolchain(config, job, taskdesc):
     run = job['run']
     taskdesc['run-on-projects'] = ['trunk', 'try']
 
     worker = taskdesc['worker']
@@ -224,9 +212,15 @@ def windows_toolchain(config, job, taskd
             bash, run['script'], args)
     ]
 
     attributes = taskdesc.setdefault('attributes', {})
     attributes['toolchain-artifact'] = run['toolchain-artifact']
     if 'toolchain-alias' in run:
         attributes['toolchain-alias'] = run['toolchain-alias']
 
-    add_optimization(config, run, taskdesc)
+    name = taskdesc['label'].replace('{}-'.format(config.kind), '', 1)
+    add_optimization(
+        config, taskdesc,
+        cache_type=CACHE_TYPE,
+        cache_name=name,
+        digest_data=get_digest_data(config, run, taskdesc),
+    )
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/util/cached_tasks.py
@@ -0,0 +1,55 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import hashlib
+
+
+TARGET_CACHE_INDEX = (
+    'gecko.cache.level-{level}.{type}.{name}.{digest}'
+)
+
+
+def add_optimization(config, taskdesc, cache_type, cache_name, digest=None, digest_data=None):
+    """
+    Allow the results of this task to be cached. This adds index routes to the
+    task so it can be looked up for future runs, and optimization hints so that
+    cached artifacts can be found. Exactly one of `digest` and `digest_data`
+    must be passed.
+
+    :param TransformConfig config: The configuration for the kind being transformed.
+    :param dict taskdesc: The description of the current task.
+    :param str cache_type: The type of task result being cached.
+    :param str cache_name: The name of the object being cached.
+    :param digest: A unique string indentifying this version of the artifacts
+        being generated. Typically this will be the hash of inputs to the task.
+    :type digest: bytes or None
+    :param digest_data: A list of bytes representing the inputs of this task.
+        They will be concatenated and hashed to create the digest for this
+        task.
+    :type digest_data: list of bytes or None
+    """
+    if (digest is None) == (digest_data is None):
+        raise Exception("Must pass exactly one of `digest` and `digest_data`.")
+    if digest is None:
+        digest = hashlib.sha256('\n'.join(digest_data)).hexdigest()
+
+    subs = {
+        'type': cache_type,
+        'name': cache_name,
+        'digest': digest,
+    }
+
+    # We'll try to find a cached version of the toolchain at levels above
+    # and including the current level, starting at the highest level.
+    index_routes = []
+    for level in reversed(range(int(config.params['level']), 4)):
+        subs['level'] = level
+        index_routes.append(TARGET_CACHE_INDEX.format(**subs))
+    taskdesc['optimization'] = {'index-search': index_routes}
+
+    # ... and cache at the lowest level.
+    taskdesc.setdefault('routes', []).append(
+        'index.{}'.format(TARGET_CACHE_INDEX.format(**subs)))