Bug 1430037 - Use the in-tree image builder image to build docker images. r?dustin draft
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 11 Jan 2018 17:50:00 +0900
changeset 721339 94606338eed42bdcaa533e3f2c2974e2c0638296
parent 720739 a3887394965f161d011eebc74e8987a653366e4b
child 721340 ca25f7b1943016a808c8b7d197e584188e569231
push id95807
push userbmo:mh+mozilla@glandium.org
push dateWed, 17 Jan 2018 04:31:10 +0000
reviewersdustin
bugs1430037
milestone59.0a1
Bug 1430037 - Use the in-tree image builder image to build docker images. r?dustin The image builder image we use to build docker images is updated manually, and not necessarily when changes occur in tree that should be reflected by a new image builder image. For instance, its run-task is currently outdated. Not enough that it's actually a problem, but it could rapidly become a problem. There is also a lot of friction when trying to make changes in how docker images are built, and while last time I tried, I ended up not being able to do the changes I wanted to make because the docker version on the host is too old, but this is already the second time I've been trying to make things better and hit a wall because the the image builder is essentially fixed in stone on the docker hub. So with this change, we make all the docker images use the in-tree image builder image, except itself, obviously. That one uses the last version that was uploaded. We may want to update it at some point, but not doing so will only impact building the image builder image itself, not the other ones.
taskcluster/ci/docker-image/kind.yml
taskcluster/docker/image_builder/HASH
taskcluster/docker/image_builder/VERSION
taskcluster/taskgraph/transforms/docker_image.py
taskcluster/taskgraph/transforms/task.py
--- a/taskcluster/ci/docker-image/kind.yml
+++ b/taskcluster/ci/docker-image/kind.yml
@@ -12,16 +12,18 @@ transforms:
   - taskgraph.transforms.task:transforms
 
 # make a task for each docker-image we might want.  For the moment, since we
 # write artifacts for each, these are whitelisted, but ideally that will change
 # (to use subdirectory clones of the proper directory), at which point we can
 # generate tasks for every docker image in the directory, secure in the
 # knowledge that unnecessary images will be omitted from the target task graph
 jobs:
+  image_builder:
+    symbol: I(ib)
   desktop1604-test:
     symbol: I(dt16t)
   desktop-build:
     symbol: I(db)
   valgrind-build:
     symbol: I(vb)
   toolchain-build:
     symbol: I(toolchain)
deleted file mode 100644
--- a/taskcluster/docker/image_builder/HASH
+++ /dev/null
@@ -1,1 +0,0 @@
-sha256:24ce54a1602453bc93515aecd9d4ad25a22115fbc4b209ddb5541377e9a37315
deleted file mode 100644
--- a/taskcluster/docker/image_builder/VERSION
+++ /dev/null
@@ -1,1 +0,0 @@
-2.0.0
--- a/taskcluster/taskgraph/transforms/docker_image.py
+++ b/taskcluster/taskgraph/transforms/docker_image.py
@@ -3,19 +3,19 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import os
 import re
 
 from taskgraph.transforms.base import TransformSequence
+from taskgraph.transforms.task import _run_task_suffix
 from .. import GECKO
 from taskgraph.util.docker import (
-    docker_image,
     generate_context_hash,
 )
 from taskgraph.util.cached_tasks import add_optimization
 from taskgraph.util.schema import (
     Schema,
     validate_schema,
 )
 from voluptuous import (
@@ -91,16 +91,17 @@ def fill_template(config, tasks):
         # containing a hash to get the overall docker image hash, so changes
         # to packages will be reflected in the docker image hash.
         args['DOCKER_IMAGE_PACKAGES'] = ' '.join('<{}>'.format(p)
                                                  for p in packages)
 
         context_path = os.path.join('taskcluster', 'docker', definition)
         context_hash = generate_context_hash(
             GECKO, context_path, image_name, args)
+        digest_data = [context_hash]
 
         description = 'Build the docker image {} for use by dependent tasks'.format(
             image_name)
 
         # Adjust the zstandard compression level based on the execution level.
         # We use faster compression for level 1 because we care more about
         # end-to-end times. We use slower/better compression for other levels
         # because images are read more often and it is worth the trade-off to
@@ -119,70 +120,87 @@ def fill_template(config, tasks):
                 'symbol': job_symbol,
                 'platform': 'taskcluster-images/opt',
                 'kind': 'other',
                 'tier': 1,
             },
             'run-on-projects': [],
             'worker-type': 'aws-provisioner-v1/gecko-{}-images'.format(
                 config.params['level']),
-            # can't use {in-tree: ..} here, otherwise we might try to build
-            # this image..
             'worker': {
                 'implementation': 'docker-worker',
                 'os': 'linux',
-                'docker-image': docker_image('image_builder'),
-                'caches': [{
-                    'type': 'persistent',
-                    'name': 'level-{}-imagebuilder-v1'.format(config.params['level']),
-                    'mount-point': '/builds/worker/checkouts',
-                }],
-                'volumes': [
-                    # Keep in sync with Dockerfile and TASKCLUSTER_VOLUMES
-                    '/builds/worker/checkouts',
-                    '/builds/worker/workspace',
-                ],
                 'artifacts': [{
                     'type': 'file',
                     'path': '/builds/worker/workspace/artifacts/image.tar.zst',
                     'name': 'public/image.tar.zst',
                 }],
                 'env': {
                     'HG_STORE_PATH': '/builds/worker/checkouts/hg-store',
                     'HASH': context_hash,
                     'PROJECT': config.params['project'],
                     'IMAGE_NAME': image_name,
                     'DOCKER_IMAGE_ZSTD_LEVEL': zstd_level,
                     'GECKO_BASE_REPOSITORY': config.params['base_repository'],
                     'GECKO_HEAD_REPOSITORY': config.params['head_repository'],
                     'GECKO_HEAD_REV': config.params['head_rev'],
-                    'TASKCLUSTER_VOLUMES': '/builds/worker/checkouts;/builds/worker/workspace',
                 },
                 'chain-of-trust': True,
                 'docker-in-docker': True,
                 'taskcluster-proxy': True,
                 'max-run-time': 7200,
             },
         }
 
+        worker = taskdesc['worker']
+
+        # We use the in-tree image_builder image to build docker images, but
+        # that can't be used to build the image_builder image itself,
+        # obviously. So we fall back to the last snapshot of the image that
+        # was uploaded to docker hub.
+        if image_name == 'image_builder':
+            worker['docker-image'] = 'taskcluster/image_builder@sha256:' + \
+                '24ce54a1602453bc93515aecd9d4ad25a22115fbc4b209ddb5541377e9a37315'
+            # Keep in sync with the Dockerfile used to generate the
+            # docker image whose digest is referenced above.
+            worker['volumes'] = [
+                '/builds/worker/checkouts',
+                '/builds/worker/workspace',
+            ]
+            cache_name = 'imagebuilder-v1'
+        else:
+            worker['docker-image'] = {'in-tree': 'image_builder'}
+            cache_name = 'imagebuilder-{}'.format(_run_task_suffix())
+            # Force images built against the in-tree image builder to
+            # have a different digest by adding a fixed string to the
+            # hashed data.
+            digest_data.append('image_builder')
+
+        worker['caches'] = [{
+            'type': 'persistent',
+            'name': 'level-{}-{}'.format(config.params['level'], cache_name),
+            'mount-point': '/builds/worker/checkouts',
+        }]
+
         for k, v in args.items():
             if k == 'DOCKER_IMAGE_PACKAGES':
-                taskdesc['worker']['env'][k] = {'task-reference': v}
+                worker['env'][k] = {'task-reference': v}
             else:
-                taskdesc['worker']['env'][k] = v
+                worker['env'][k] = v
 
         if packages:
             deps = taskdesc.setdefault('dependencies', {})
-            digest_data = [context_hash]
             for p in sorted(packages):
                 deps[p] = 'packages-{}'.format(p)
                 digest_data.append(available_packages[p])
+
+        if len(digest_data) > 1:
             kwargs = {'digest_data': digest_data}
         else:
-            kwargs = {'digest': context_hash}
+            kwargs = {'digest': digest_data[0]}
         add_optimization(
             config, taskdesc,
             cache_type="docker-images.v1",
             cache_name=image_name,
             **kwargs
         )
 
         yield taskdesc
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -825,27 +825,26 @@ def build_docker_worker_payload(config, 
         for artifact in worker['artifacts']:
             artifacts[artifact['name']] = {
                 'path': artifact['path'],
                 'type': artifact['type'],
                 'expires': task_def['expires'],  # always expire with the task
             }
         payload['artifacts'] = artifacts
 
+    run_task = payload.get('command', [''])[0].endswith('run-task')
+
     if isinstance(worker.get('docker-image'), basestring):
         out_of_tree_image = worker['docker-image']
+        run_task = run_task or out_of_tree_image.startswith(
+            'taskcluster/image_builder')
     else:
         out_of_tree_image = None
-
-    run_task = any([
-        payload.get('command', [''])[0].endswith('run-task'),
-        # image_builder is special and doesn't get detected like other tasks.
-        # It uses run-task so it needs our cache manipulations.
-        (out_of_tree_image or '').startswith('taskcluster/image_builder'),
-    ])
+        image = worker.get('docker-image', {}).get('in-tree')
+        run_task = run_task or image == 'image_builder'
 
     if 'caches' in worker:
         caches = {}
 
         # run-task knows how to validate caches.
         #
         # To help ensure new run-task features and bug fixes don't interfere
         # with existing caches, we seed the hash of run-task into cache names.