Bug 1506920 - [taskgraph] Track parent images in docker image digests. r=dustin, a=release
authorTom Prince <mozilla@hocat.ca>
Wed, 14 Nov 2018 14:29:26 +0000
changeset 492994 3e36063212e3f68ea6be99f943fd89eb3db64983
parent 492993 1b78230f1e90aaec18928b98a3b555ccbfa25410
child 492995 c7d6ec9d4a1c7312f60974b7a05755a08d0fa96f
push id1855
push userryanvm@gmail.com
push dateWed, 14 Nov 2018 19:35:08 +0000
treeherdermozilla-release@3e36063212e3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdustin, release
bugs1506920
milestone63.0.3
Bug 1506920 - [taskgraph] Track parent images in docker image digests. r=dustin, a=release The digest for a docker image task did not include the digest for the parent image in it, and so in particular did not depend on the versions of packages included in a parent image. If two branches have a docker image with identical docker files, but different parents, this would lead to them both getting the same digest, leading to unexpected interference between the branches. This fixes things by including the digest of the parent image as input into the digest of child images. Differential Revision: https://phabricator.services.mozilla.com/D11807
taskcluster/docs/attributes.rst
taskcluster/taskgraph/transforms/docker_image.py
taskcluster/taskgraph/util/cached_tasks.py
--- a/taskcluster/docs/attributes.rst
+++ b/taskcluster/docs/attributes.rst
@@ -232,8 +232,19 @@ In automation, full crashsymbol package 
 build kinds where the full crashsymbols should be enabled, set this attribute
 to True. The full symbol packages will then be generated and uploaded on
 release branches and on try.
 
 cron
 ====
 Indicates that a task is meant to be run via cron tasks, and should not be run
 on push.
+
+cache_digest
+============
+Some tasks generate artifacts that are cached between pushes. This is the unique string used
+to identify the current version of the artifacts. See :py:mod:`taskgraph.util.cached_task`.
+
+cache_type
+==========
+Some tasks generate artifacts that are cached between pushes. This is the type of cache that is
+used for the this task. See :py:mod:`taskgraph.util.cached_task`.
+
--- a/taskcluster/taskgraph/transforms/docker_image.py
+++ b/taskcluster/taskgraph/transforms/docker_image.py
@@ -86,25 +86,20 @@ def order_image_tasks(config, tasks):
 
 @transforms.add
 def fill_template(config, tasks):
     available_packages = {}
     for task in config.kind_dependencies_tasks:
         if task.kind != 'packages':
             continue
         name = task.label.replace('packages-', '')
-        for route in task.task.get('routes', []):
-            if route.startswith('index.') and '.hash.' in route:
-                # Only keep the hash part of the route.
-                h = route.rsplit('.', 1)[1]
-                assert DIGEST_RE.match(h)
-                available_packages[name] = h
-                break
+        available_packages[name] = task.attributes['cache_digest']
 
     context_hashes = {}
+    image_digests = {}
 
     for task in order_image_tasks(config, tasks):
         image_name = task.pop('name')
         job_symbol = task.pop('symbol')
         args = task.pop('args', {})
         definition = task.pop('definition', image_name)
         packages = task.pop('packages', [])
         parent = task.pop('parent', None)
@@ -125,16 +120,19 @@ def fill_template(config, tasks):
             args['DOCKER_IMAGE_PARENT'] = '{}:{}'.format(parent, context_hashes[parent])
 
         context_path = os.path.join('taskcluster', 'docker', definition)
         context_hash = generate_context_hash(
             GECKO, context_path, image_name, args)
         digest_data = [context_hash]
         context_hashes[image_name] = context_hash
 
+        if parent:
+            digest_data += [image_digests[parent]]
+
         description = 'Build the docker image {} for use by dependent tasks'.format(
             image_name)
 
         # Adjust the zstandard compression level based on the execution level.
         # We use faster compression for level 1 because we care more about
         # end-to-end times. We use slower/better compression for other levels
         # because images are read more often and it is worth the trade-off to
         # burn more CPU once to reduce image size.
@@ -240,9 +238,11 @@ def fill_template(config, tasks):
             kwargs = {'digest': digest_data[0]}
         add_optimization(
             config, taskdesc,
             cache_type="docker-images.v1",
             cache_name=image_name,
             **kwargs
         )
 
+        image_digests[image_name] = taskdesc['attributes']['cache_digest']
+
         yield taskdesc
--- a/taskcluster/taskgraph/util/cached_tasks.py
+++ b/taskcluster/taskgraph/util/cached_tasks.py
@@ -63,16 +63,19 @@ def add_optimization(config, taskdesc, c
     # ... and add some extra routes for humans
     subs['build_date_long'] = time.strftime("%Y.%m.%d.%Y%m%d%H%M%S",
                                             time.gmtime(config.params['build_date']))
     taskdesc['routes'].extend([
         'index.{}'.format(route.format(**subs))
         for route in EXTRA_CACHE_INDEXES
     ])
 
+    taskdesc['attributes']['cache_digest'] = digest
+    taskdesc['attributes']['cache_type'] = cache_type
+
 
 def cached_index_path(level, trust_domain, cache_type, cache_name, digest=None, digest_data=None):
     """
     Get the index path needed to locate the task that would be created by
     :func:`add_optimization`.
 
     :param int level: The SCM level of the task to look for.
     :param str trust_domain: The trust domain to look for the task in.