Bug 1419638 - Allow to pass arguments to docker when building docker-images. r=dustin
authorMike Hommey <mh+mozilla@glandium.org>
Sun, 24 Dec 2017 07:51:29 +0900
changeset 397792 7b087c4f2aa7fac5900c92a0f3d37faac8bbe890
parent 397791 48faf408be3b34f11b62c2a7c9816821a40e5b67
child 397793 ab480c23769ba548febf955c0b0f63c85dfdba6b
push id98618
push userapavel@mozilla.com
push dateThu, 04 Jan 2018 21:34:32 +0000
treeherdermozilla-inbound@ea7269f28ea0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1419638 - Allow to pass arguments to docker when building docker-images. r=dustin Ideally, we'd simply use the --build-arg docker argument along with ARG in the Dockerfile, but that's only supported from Docker API 1.21, and we're stuck on 1.18 for the moment. So we add another hack to how we handle the Dockerfile, by adding a commented syntax that allows to declare arguments to the Dockerfile. The arguments can be defined in the docker images kind.yml file through the `args` keyword. Under the hood, they are passed down to the docker image task through the environment. The mach taskcluster-build-image command then uses the corresponding values from the environment to generate a "preprocessed" Dockerfile for its context.
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -412,19 +412,19 @@ class TaskClusterImagesProvider(object):
                      help="File name the context tarball should be written to."
                           "with this option it will only build the context.tar.",
     def build_image(self, image_name, context_only):
         from taskgraph.docker import build_image, build_context
             if context_only is None:
-                build_image(image_name)
+                build_image(image_name, os.environ)
-                build_context(image_name, context_only)
+                build_context(image_name, context_only, os.environ)
         except Exception:
 class TaskClusterPartialsData(object):
     @Command('release-history', category="ci",
--- a/taskcluster/taskgraph/docker.py
+++ b/taskcluster/taskgraph/docker.py
@@ -48,32 +48,32 @@ def load_image_by_task_id(task_id, tag=N
     if tag:
         print("Re-tagged as: {}".format(tag))
         tag = '{}:{}'.format(result['image'], result['tag'])
     print("Try: docker run -ti --rm {} bash".format(tag))
     return True
-def build_context(name, outputFile):
+def build_context(name, outputFile, args=None):
     """Build a context.tar for image with specified name.
     if not name:
         raise ValueError('must provide a Docker image name')
     if not outputFile:
         raise ValueError('must provide a outputFile')
     image_dir = os.path.join(docker.IMAGE_DIR, name)
     if not os.path.isdir(image_dir):
         raise Exception('image directory does not exist: %s' % image_dir)
-    docker.create_context_tar(GECKO, image_dir, outputFile, "")
+    docker.create_context_tar(GECKO, image_dir, outputFile, "", args)
-def build_image(name):
+def build_image(name, args=None):
     """Build a Docker image of specified name.
     Output from image building process will be printed to stdout.
     if not name:
         raise ValueError('must provide a Docker image name')
     image_dir = os.path.join(docker.IMAGE_DIR, name)
@@ -93,17 +93,17 @@ def build_image(name):
     # We obtain a context archive and build from that. Going through the
     # archive creation is important: it normalizes things like file owners
     # and mtimes to increase the chances that image generation is
     # deterministic.
     fd, context_path = tempfile.mkstemp()
-        docker.create_context_tar(GECKO, image_dir, context_path, name)
+        docker.create_context_tar(GECKO, image_dir, context_path, name, args)
         docker.build_from_context(docker_bin, context_path, name, tag)
     print('Successfully built %s and tagged with %s' % (name, tag))
     if tag.endswith(':latest'):
         print('*' * 50)
--- a/taskcluster/taskgraph/transforms/docker_image.py
+++ b/taskcluster/taskgraph/transforms/docker_image.py
@@ -29,35 +29,40 @@ docker_image_schema = Schema({
     Required('name'): basestring,
     # Treeherder symbol.
     Required('symbol'): basestring,
     # relative path (from config.path) to the file the docker image was defined
     # in.
     Optional('job-from'): basestring,
+    # Arguments to use for the Dockerfile.
+    Optional('args'): {basestring: basestring},
 def validate(config, tasks):
     for task in tasks:
         yield validate_schema(
             docker_image_schema, task,
             "In docker image {!r}:".format(task.get('name', 'unknown')))
 def fill_template(config, tasks):
     for task in tasks:
         image_name = task.pop('name')
         job_symbol = task.pop('symbol')
+        args = task.pop('args', {})
         context_path = os.path.join('taskcluster', 'docker', image_name)
-        context_hash = generate_context_hash(GECKO, context_path, image_name)
+        context_hash = generate_context_hash(
+            GECKO, context_path, image_name, args)
         description = 'Build the docker image {} for use by dependent tasks'.format(
         # Adjust the zstandard compression level based on the execution level.
         # We use faster compression for level 1 because we care more about
         # end-to-end times. We use slower/better compression for other levels
         # because images are read more often and it is worth the trade-off to
@@ -115,16 +120,19 @@ def fill_template(config, tasks):
                 'chain-of-trust': True,
                 'docker-in-docker': True,
                 'taskcluster-proxy': True,
                 'max-run-time': 7200,
+        for k, v in args.items():
+            taskdesc['worker']['env'][k] = v
             config, taskdesc,
         yield taskdesc
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -1,22 +1,24 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 from __future__ import absolute_import, print_function, unicode_literals
 import hashlib
 import os
+import re
 import shutil
 import subprocess
 import tarfile
 import tempfile
 from mozbuild.util import memoize
+from mozpack.files import GeneratedFile
 from mozpack.archive import (
 from .. import GECKO
 IMAGE_DIR = os.path.join(GECKO, 'taskcluster', 'docker')
@@ -44,60 +46,79 @@ def docker_image(name, by_tag=False):
         with open(os.path.join(IMAGE_DIR, name, 'VERSION')) as f:
             tag = f.read().strip()
     except IOError:
         tag = 'latest'
     return '{}/{}:{}'.format(registry, name, tag)
-def generate_context_hash(topsrcdir, image_path, image_name):
+def generate_context_hash(topsrcdir, image_path, image_name, args=None):
     """Generates a sha256 hash for context directory used to build an image."""
     # It is a bit unfortunate we have to create a temp file here - it would
     # be nicer to use an in-memory buffer.
     fd, p = tempfile.mkstemp()
-        return create_context_tar(topsrcdir, image_path, p, image_name)
+        return create_context_tar(topsrcdir, image_path, p, image_name, args)
-def create_context_tar(topsrcdir, context_dir, out_path, prefix):
+def create_context_tar(topsrcdir, context_dir, out_path, prefix, args=None):
     """Create a context tarball.
     A directory ``context_dir`` containing a Dockerfile will be assembled into
     a gzipped tar file at ``out_path``. Files inside the archive will be
     prefixed by directory ``prefix``.
     We also scan the source Dockerfile for special syntax that influences
     context generation.
     If a line in the Dockerfile has the form ``# %include <path>``,
     the relative path specified on that line will be matched against
     files in the source repository and added to the context under the
     path ``topsrcdir/``. If an entry is a directory, we add all files
     under that directory.
+    If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
+    the string ``$<name>`` in subsequent lines are replaced with the value
+    found in the ``args`` argument. Exception: this doesn't apply to VOLUME
+    definitions.
     Returns the SHA-256 hex digest of the created archive.
     archive_files = {}
+    replace = []
     for root, dirs, files in os.walk(context_dir):
         for f in files:
             source_path = os.path.join(root, f)
             rel = source_path[len(context_dir) + 1:]
             archive_path = os.path.join(prefix, rel)
             archive_files[archive_path] = source_path
     # Parse Dockerfile for special syntax of extra files to include.
+    content = []
     with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
         for line in fh:
-            line = line.rstrip()
+            if line.startswith('# %ARG'):
+                p = line[len('# %ARG '):].strip()
+                if not args or p not in args:
+                    raise Exception('missing argument: {}'.format(p))
+                replace.append((re.compile(r'\${}\b'.format(p)),
+                                args[p].encode('ascii')))
+                continue
+            for regexp, s in replace:
+                line = re.sub(regexp, s, line)
+            content.append(line)
             if not line.startswith('# %include'):
             p = line[len('# %include '):].strip()
             if os.path.isabs(p):
                 raise Exception('extra include path cannot be absolute: %s' % p)
             fs_path = os.path.normpath(os.path.join(topsrcdir, p))
@@ -114,16 +135,19 @@ def create_context_tar(topsrcdir, contex
                         source_path = os.path.join(root, f)
                         rel = source_path[len(fs_path) + 1:]
                         archive_path = os.path.join(prefix, 'topsrcdir', p, rel)
                         archive_files[archive_path] = source_path
                 archive_path = os.path.join(prefix, 'topsrcdir', p)
                 archive_files[archive_path] = fs_path
+    archive_files[os.path.join(prefix, 'Dockerfile')] = \
+        GeneratedFile(b''.join(content))
     with open(out_path, 'wb') as fh:
         create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)
     h = hashlib.sha256()
     with open(out_path, 'rb') as fh:
         while True:
             data = fh.read(32768)
             if not data:
@@ -169,16 +193,17 @@ def build_from_context(docker_bin, conte
 def parse_volumes(image):
     """Parse VOLUME entries from a Dockerfile for an image."""
     volumes = set()
     with open(os.path.join(IMAGE_DIR, image, 'Dockerfile'), 'rb') as fh:
         for line in fh:
             line = line.strip()
+            # We assume VOLUME definitions don't use %ARGS.
             if not line.startswith(b'VOLUME '):
             v = line.split(None, 1)[1]
             if v.startswith(b'['):
                 raise ValueError('cannot parse array syntax for VOLUME; '
                                  'convert to multiple entries')