Bug 1335651 - Setup an index path in the gecko.cache namespace for toolchain builds. r=dustin
authorMike Hommey <mh+mozilla@glandium.org>
Wed, 01 Feb 2017 09:27:31 +0900
changeset 343183 e7e02e3c2e56edb9ba8bff03716052ae261ebb26
parent 343182 671410de0b248dc65f0dbbd4782a9253fb9c58f2
child 343184 8c8b54b13be7ec12cb8e104b772162a80b524497
push id37462
push usermh@glandium.org
push dateThu, 16 Feb 2017 04:38:25 +0000
treeherderautoland@e7e02e3c2e56 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdustin
bugs1335651
milestone54.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1335651 - Setup an index path in the gecko.cache namespace for toolchain builds. r=dustin This allows to find them and optimize them out during the taskgraph optimization phase, and will allow to get toolchain artifacts through a mach command for developers. The index path is generated similarly to git trees or mercurial manifests, and allows to find the right task corresponding to the the contents of the files in the task `extra.resources` along the toolchain scripts. `when.files-changed` is not used when a task has index paths because we need tasks to happen independently of whether there were changes to those files when the index or artifacts expire.
taskcluster/ci/toolchain/linux.yml
taskcluster/ci/toolchain/macosx.yml
taskcluster/ci/toolchain/windows.yml
taskcluster/taskgraph/task/transform.py
taskcluster/taskgraph/transforms/job/toolchain.py
taskcluster/taskgraph/transforms/task.py
taskcluster/taskgraph/util/hash.py
--- a/taskcluster/ci/toolchain/linux.yml
+++ b/taskcluster/ci/toolchain/linux.yml
@@ -4,134 +4,129 @@
 
 linux64-clang/opt:
     description: "Clang toolchain build"
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TL(clang)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-clang-linux.sh
-        tooltool-downloads: public
     worker-type: aws-provisioner-v1/gecko-{level}-b-linux
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
-    when:
-        files-changed:
+    run:
+        using: toolchain-script
+        script: build-clang-linux.sh
+        tooltool-downloads: public
+        resources:
             - 'build/build-clang/**'
 
 linux64-clang-tidy/opt:
     description: "Clang-tidy build"
     index:
         product: static-analysis
         job-name: linux64-clang-tidy
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TL(clang-tidy)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-clang-tidy-linux.sh
-        tooltool-downloads: public
     worker-type: aws-provisioner-v1/gecko-{level}-b-linux
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
-    when:
-        files-changed:
+    run:
+        using: toolchain-script
+        script: build-clang-tidy-linux.sh
+        tooltool-downloads: public
+        resources:
             - 'build/clang-plugin/**'
             - 'build/build-clang/**'
 
 linux64-gcc/opt:
     description: "GCC toolchain build"
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TL(gcc)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-gcc-linux.sh
     worker-type: aws-provisioner-v1/gecko-{level}-b-linux
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
-    when:
-        files-changed:
+    run:
+        using: toolchain-script
+        script: build-gcc-linux.sh
+        resources:
             - 'build/unix/build-gcc/**'
 
 linux64-binutils/opt:
     description: "Binutils toolchain build"
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TL(binutil)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-binutils-linux.sh
     worker-type: aws-provisioner-v1/gecko-{level}-b-linux
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
-    when:
-        files-changed:
+    run:
+        using: toolchain-script
+        script: build-binutils-linux.sh
+        resources:
             - 'build/unix/build-binutils/**'
 
 linux64-cctools-port/opt:
     description: "cctools-port toolchain build"
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TL(cctools)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-cctools-port.sh
     worker-type: aws-provisioner-v1/gecko-{level}-b-linux
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
+    run:
+        using: toolchain-script
+        script: build-cctools-port.sh
 
 linux64-hfsplus/opt:
     description: "hfsplus toolchain build"
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TL(hfs+)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-hfsplus-linux.sh
-        tooltool-downloads: public
     worker-type: aws-provisioner-v1/gecko-{level}-b-linux
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
-    when:
-        files-changed:
+    run:
+        using: toolchain-script
+        script: build-hfsplus-linux.sh
+        tooltool-downloads: public
+        resources:
             - 'build/unix/build-hfsplus/**'
 
 linux64-libdmg/opt:
     description: "libdmg-hfsplus toolchain build"
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TL(libdmg-hfs+)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-libdmg-hfsplus.sh
     worker-type: aws-provisioner-v1/gecko-{level}-b-linux
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
+    run:
+        using: toolchain-script
+        script: build-libdmg-hfsplus.sh
--- a/taskcluster/ci/toolchain/macosx.yml
+++ b/taskcluster/ci/toolchain/macosx.yml
@@ -4,61 +4,59 @@
 
 macosx64-clang/opt:
     description: "Clang toolchain build"
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TM(clang)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-clang-macosx.sh
-        tooltool-downloads: internal
     worker-type: aws-provisioner-v1/gecko-{level}-b-macosx64
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
-    when:
-        files-changed:
+    run:
+        using: toolchain-script
+        script: build-clang-macosx.sh
+        tooltool-downloads: internal
+        resources:
             - 'build/build-clang/**'
 
 macosx64-clang-tidy/opt:
     description: "Clang-tidy build"
     index:
         product: static-analysis
         job-name: macosx64-clang-tidy
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TM(clang-tidy)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-clang-tidy-macosx.sh
-        tooltool-downloads: internal
     worker-type: aws-provisioner-v1/gecko-{level}-b-macosx64
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
-    when:
-        files-changed:
+    run:
+        using: toolchain-script
+        script: build-clang-tidy-macosx.sh
+        tooltool-downloads: internal
+        resources:
             - 'build/clang-plugin/**'
             - 'build/build-clang/**'
 
 macosx64-cctools-port/opt:
     description: "cctools-port toolchain build"
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TM(cctools)
         tier: 1
-    run:
-        using: toolchain-script
-        script: build-cctools-port-macosx.sh
-        tooltool-downloads: internal
     worker-type: aws-provisioner-v1/gecko-{level}-b-macosx64
     worker:
         implementation: docker-worker
         docker-image: {in-tree: desktop-build}
         max-run-time: 36000
+    run:
+        using: toolchain-script
+        script: build-cctools-port-macosx.sh
+        tooltool-downloads: internal
--- a/taskcluster/ci/toolchain/windows.yml
+++ b/taskcluster/ci/toolchain/windows.yml
@@ -11,36 +11,34 @@ win32-clang-cl/opt:
         tier: 2
     worker-type: aws-provisioner-v1/gecko-{level}-b-win2012
     worker:
         implementation: generic-worker
         max-run-time: 36000
     run:
         using: toolchain-script
         script: build-clang32-windows.sh
-    when:
-        files-changed:
+        resources:
             - 'build/build-clang/**'
 
 win64-clang-cl/opt:
     description: "Clang-cl toolchain build"
     treeherder:
         kind: build
         platform: toolchains/opt
         symbol: TW64(clang-cl)
         tier: 2
     worker-type: aws-provisioner-v1/gecko-{level}-b-win2012
     worker:
         implementation: generic-worker
         max-run-time: 36000
     run:
         using: toolchain-script
         script: build-clang64-windows.sh
-    when:
-        files-changed:
+        resources:
             - 'build/build-clang/**'
 
 win32-clang-tidy/opt:
     description: "Clang-tidy toolchain build"
     index:
         product: static-analysis
         job-name: win32-clang-tidy
     treeherder:
@@ -50,18 +48,17 @@ win32-clang-tidy/opt:
         tier: 2
     worker-type: aws-provisioner-v1/gecko-{level}-b-win2012
     worker:
         implementation: generic-worker
         max-run-time: 36000
     run:
         using: toolchain-script
         script: build-clang-tidy32-windows.sh
-    when:
-        files-changed:
+        resources:
             - 'build/build-clang/**'
 
 win64-clang-tidy/opt:
     description: "Clang-tidy toolchain build"
     index:
         product: static-analysis
         job-name: win64-clang-tidy
     treeherder:
@@ -71,11 +68,10 @@ win64-clang-tidy/opt:
         tier: 2
     worker-type: aws-provisioner-v1/gecko-{level}-b-win2012
     worker:
         implementation: generic-worker
         max-run-time: 36000
     run:
         using: toolchain-script
         script: build-clang-tidy64-windows.sh
-    when:
-        files-changed:
+        resources:
             - 'build/build-clang/**'
--- a/taskcluster/taskgraph/task/transform.py
+++ b/taskcluster/taskgraph/task/transform.py
@@ -75,23 +75,29 @@ class TransformTask(base.Task):
         trans_config = TransformConfig(kind, path, config, params)
         tasks = [cls(kind, t) for t in transforms(trans_config, inputs)]
         return tasks
 
     def __init__(self, kind, task):
         self.dependencies = task['dependencies']
         self.when = task['when']
         super(TransformTask, self).__init__(kind, task['label'],
-                                            task['attributes'], task['task'])
+                                            task['attributes'], task['task'],
+                                            index_paths=task.get('index-paths'))
 
     def get_dependencies(self, taskgraph):
         return [(label, name) for name, label in self.dependencies.items()]
 
     def optimize(self, params):
-        if 'files-changed' in self.when:
+        if self.index_paths:
+            optimized, taskId = super(TransformTask, self).optimize(params)
+            if optimized:
+                return optimized, taskId
+
+        elif 'files-changed' in self.when:
             changed = files_changed.check(
                 params, self.when['files-changed'])
             if not changed:
                 logger.debug('no files found matching a pattern in `when.files-changed` for ' +
                              self.label)
                 return True, None
 
         # we would like to return 'False, None' while it's high_value_task
--- a/taskcluster/taskgraph/transforms/job/toolchain.py
+++ b/taskcluster/taskgraph/transforms/job/toolchain.py
@@ -2,48 +2,77 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Support for running toolchain-building jobs via dedicated scripts
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-from voluptuous import Schema, Required, Any
+import os
+
+from voluptuous import Schema, Optional, Required, Any
 
 from taskgraph.transforms.job import run_job_using
 from taskgraph.transforms.job.common import (
     docker_worker_add_tc_vcs_cache,
     docker_worker_add_gecko_vcs_env_vars,
     docker_worker_support_vcs_checkout,
 )
+from taskgraph.util.hash import hash_paths
+
+
+GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..', '..'))
+TOOLCHAIN_INDEX = 'gecko.cache.level-{level}.toolchains.v1.{name}.{digest}'
 
 toolchain_run_schema = Schema({
     Required('using'): 'toolchain-script',
 
     # the script (in taskcluster/scripts/misc) to run
     Required('script'): basestring,
 
     # If not false, tooltool downloads will be enabled via relengAPIProxy
     # for either just public files, or all files.  Not supported on Windows
     Required('tooltool-downloads', default=False): Any(
         False,
         'public',
         'internal',
     ),
+
+    # Paths/patterns pointing to files that influence the outcome of a
+    # toolchain build.
+    Optional('resources'): [basestring],
 })
 
 
-def add_files_changed(run, taskdesc):
-    files = taskdesc.setdefault('when', {}).setdefault('files-changed', [])
+def add_index_paths(config, run, taskdesc):
+    files = list(run.get('resources', []))
     # This file
     files.append('taskcluster/taskgraph/transforms/job/toolchain.py')
     # The script
     files.append('taskcluster/scripts/misc/{}'.format(run['script']))
 
+    label = taskdesc['label']
+    subs = {
+        'name': label.replace('toolchain-', '').split('/')[0],
+        'digest': hash_paths(GECKO, files),
+    }
+
+    index_paths = taskdesc.setdefault('index-paths', [])
+
+    # We'll try to find a cached version of the toolchain at levels above
+    # and including the current level, starting at the highest level.
+    for level in reversed(range(int(config.params['level']), 4)):
+        subs['level'] = level
+        index_paths.append(TOOLCHAIN_INDEX.format(**subs))
+
+    # ... and cache at the lowest level.
+    taskdesc.setdefault('routes', []).append(
+        'index.{}'.format(TOOLCHAIN_INDEX.format(**subs)))
+
 
 @run_job_using("docker-worker", "toolchain-script", schema=toolchain_run_schema)
 def docker_worker_toolchain(config, job, taskdesc):
     run = job['run']
 
     worker = taskdesc['worker']
     worker['artifacts'] = []
     worker['caches'] = []
@@ -92,17 +121,17 @@ def docker_worker_toolchain(config, job,
         '--',
         'bash',
         '-c',
         'cd /home/worker && '
         './workspace/build/src/taskcluster/scripts/misc/{}'.format(
             run['script'])
     ]
 
-    add_files_changed(run, taskdesc)
+    add_index_paths(config, run, taskdesc)
 
 
 @run_job_using("generic-worker", "toolchain-script", schema=toolchain_run_schema)
 def windows_toolchain(config, job, taskdesc):
     run = job['run']
 
     worker = taskdesc['worker']
 
@@ -141,9 +170,9 @@ def windows_toolchain(config, job, taskd
 
     bash = r'c:\mozilla-build\msys\bin\bash'
     worker['command'] = [
         ' '.join(hg_command),
         # do something intelligent.
         r'{} -c ./build/src/taskcluster/scripts/misc/{}'.format(bash, run['script'])
     ]
 
-    add_files_changed(run, taskdesc)
+    add_index_paths(config, run, taskdesc)
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -46,16 +46,20 @@ task_description_schema = Schema({
     # (e.g., "14 days").  Defaults are set based on the project.
     Optional('expires-after'): basestring,
     Optional('deadline-after'): basestring,
 
     # custom routes for this task; the default treeherder routes will be added
     # automatically
     Optional('routes'): [basestring],
 
+    # The index paths where this task may be cached. Transforms are expected to
+    # fill these automatically when wanted.
+    Optional('index-paths'): [basestring],
+
     # custom scopes for this task; any scopes required for the worker will be
     # added automatically
     Optional('scopes'): [basestring],
 
     # custom "task.extra" content
     Optional('extra'): {basestring: object},
 
     # treeherder-related information; see
@@ -850,16 +854,17 @@ def build_task(config, tasks):
         attributes = task.get('attributes', {})
         attributes['run_on_projects'] = task.get('run-on-projects', ['all'])
 
         yield {
             'label': task['label'],
             'task': task_def,
             'dependencies': task.get('dependencies', {}),
             'attributes': attributes,
+            'index-paths': task.get('index-paths'),
             'when': task.get('when', {}),
         }
 
 
 # Check that the v2 route templates match those used by Mozharness.  This can
 # go away once Mozharness builds are no longer performed in Buildbot, and the
 # Mozharness code referencing routes.json is deleted.
 def check_v2_routes():
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/util/hash.py
@@ -0,0 +1,38 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+from mozbuild.util import memoize
+from mozpack.files import FileFinder
+import mozpack.path as mozpath
+import hashlib
+
+
+@memoize
+def _hash_path(path):
+    with open(path) as fh:
+        return (hashlib.sha256(fh.read()).hexdigest(),
+                mozpath.normsep(path))
+
+
+def hash_paths(base_path, patterns):
+    """
+    Give a list of path patterns, return a digest of the contents of all
+    the corresponding files, similarly to git tree objects or mercurial
+    manifests.
+
+    Each file is hashed. The list of all hashes and file paths is then
+    itself hashed to produce the result.
+    """
+    finder = FileFinder(base_path)
+    h = hashlib.sha256()
+    files = {}
+    for pattern in patterns:
+        files.update(finder.find(pattern))
+    for path in sorted(files.keys()):
+        h.update('{} {}\n'.format(
+            _hash_path(mozpath.abspath(mozpath.join(base_path, path))),
+            mozpath.normsep(path)
+        ))
+    return h.hexdigest()