Bug 1316446 - Improve mach taskcluster-load-image r=gps
authorJonas Finnemann Jensen <jopsen@gmail.com>
Thu, 10 Nov 2016 18:05:52 -0800
changeset 439669 b660931ce41d684d37848ff6646129b0fe99e3f1
parent 439668 5d62e2bc7b593ed79ef048618350aaed7ab0a94a
child 439670 6d4c987877fa4136b4f5e35ca525155e5124c505
push id36064
push userrthijssen@mozilla.com
push dateWed, 16 Nov 2016 13:38:27 +0000
reviewersgps
bugs1316446
milestone53.0a1
Bug 1316446 - Improve mach taskcluster-load-image r=gps MozReview-Commit-ID: 3Y8fI5WXP1Y
taskcluster/mach_commands.py
taskcluster/taskgraph/docker.py
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -243,32 +243,36 @@ class MachCommands(MachCommandBase):
               sort_keys=True, indent=2, separators=(',', ': ')))
 
 
 @CommandProvider
 class TaskClusterImagesProvider(object):
     @Command('taskcluster-load-image', category="ci",
              description="Load a pre-built Docker image")
     @CommandArgument('--task-id',
-                     help="Load the image at public/image.tar in this task,"
+                     help="Load the image at public/image.tar.zst in this task,"
                           "rather than searching the index")
+    @CommandArgument('-t', '--tag',
+                     help="tag that the image should be loaded as. If not "
+                          "image will be loaded with tag from the tarball",
+                     metavar="name:tag")
     @CommandArgument('image_name', nargs='?',
                      help="Load the image of this name based on the current"
                           "contents of the tree (as built for mozilla-central"
                           "or mozilla-inbound)")
-    def load_image(self, image_name, task_id):
+    def load_image(self, image_name, task_id, tag):
         from taskgraph.docker import load_image_by_name, load_image_by_task_id
         if not image_name and not task_id:
             print("Specify either IMAGE-NAME or TASK-ID")
             sys.exit(1)
         try:
             if task_id:
-                ok = load_image_by_task_id(task_id)
+                ok = load_image_by_task_id(task_id, tag)
             else:
-                ok = load_image_by_name(image_name)
+                ok = load_image_by_name(image_name, tag)
             if not ok:
                 sys.exit(1)
         except Exception:
             traceback.print_exc()
             sys.exit(1)
 
     @Command('taskcluster-build-image', category='ci',
              description='Build a Docker image')
--- a/taskcluster/taskgraph/docker.py
+++ b/taskcluster/taskgraph/docker.py
@@ -3,77 +3,54 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import json
 import os
+import sys
 import subprocess
 import tarfile
 import tempfile
 import urllib2
 import which
+from subprocess import Popen, PIPE
+from io import BytesIO
 
 from taskgraph.util import docker
 
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..'))
 IMAGE_DIR = os.path.join(GECKO, 'testing', 'docker')
 INDEX_URL = 'https://index.taskcluster.net/v1/task/' + docker.INDEX_PREFIX + '.{}.{}.hash.{}'
 ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 
 
-def load_image_by_name(image_name):
+def load_image_by_name(image_name, tag=None):
     context_path = os.path.join(GECKO, 'testing', 'docker', image_name)
     context_hash = docker.generate_context_hash(GECKO, context_path, image_name)
 
-    image_index_url = INDEX_URL.format('mozilla-central', image_name, context_hash)
+    image_index_url = INDEX_URL.format('level-3', image_name, context_hash)
     print("Fetching", image_index_url)
     task = json.load(urllib2.urlopen(image_index_url))
 
-    return load_image_by_task_id(task['taskId'])
+    return load_image_by_task_id(task['taskId'], tag)
 
 
-def load_image_by_task_id(task_id):
-    # because we need to read this file twice (and one read is not all the way
-    # through), it is difficult to stream it.  So we download to disk and then
-    # read it back.
-    filename = 'temp-docker-image.tar'
-
+def load_image_by_task_id(task_id, tag=None):
     artifact_url = ARTIFACT_URL.format(task_id, 'public/image.tar.zst')
-    print("Downloading", artifact_url)
-    tempfilename = 'temp-docker-image.tar.zst'
-    subprocess.check_call(['curl', '-#', '-L', '-o', tempfilename, artifact_url])
-    print("Decompressing")
-    subprocess.check_call(['zstd', '-d', tempfilename, '-o', filename])
-    print("Deleting temporary file")
-    os.unlink(tempfilename)
-
-    print("Determining image name")
-    tf = tarfile.open(filename)
-    repositories = json.load(tf.extractfile('repositories'))
-    name = repositories.keys()[0]
-    tag = repositories[name].keys()[0]
-    name = '{}:{}'.format(name, tag)
-    print("Image name:", name)
-
-    print("Loading image into docker")
-    try:
-        subprocess.check_call(['docker', 'load', '-i', filename])
-    except subprocess.CalledProcessError:
-        print("*** `docker load` failed.  You may avoid re-downloading that tarball by fixing the")
-        print("*** problem and running `docker load < {}`.".format(filename))
-        raise
-
-    print("Deleting temporary file")
-    os.unlink(filename)
-
-    print("The requested docker image is now available as", name)
-    print("Try: docker run -ti --rm {} bash".format(name))
+    result = load_image(artifact_url, tag)
+    print("Found docker image: {}:{}".format(result['image'], result['tag']))
+    if tag:
+        print("Re-tagged as: {}".format(tag))
+    else:
+        tag = '{}:{}'.format(result['image'], result['tag'])
+    print("Try: docker run -ti --rm {} bash".format(tag))
+    return True
 
 
 def build_context(name, outputFile):
     """Build a context.tar for image with specified name.
     """
     if not name:
         raise ValueError('must provide a Docker image name')
     if not outputFile:
@@ -125,8 +102,101 @@ def build_image(name):
 
     if tag.endswith(':latest'):
         print('*' * 50)
         print('WARNING: no VERSION file found in image directory.')
         print('Image is not suitable for deploying/pushing.')
         print('Create an image suitable for deploying/pushing by creating')
         print('a VERSION file in the image directory.')
         print('*' * 50)
+
+
+def load_image(url, imageName=None, imageTag=None):
+    """
+    Load docker image from URL as imageName:tag, if no imageName or tag is given
+    it will use whatever is inside the zstd compressed tarball.
+
+    Returns an object with properties 'image', 'tag' and 'layer'.
+    """
+    # If imageName is given and we don't have an imageTag
+    # we parse out the imageTag from imageName, or default it to 'latest'
+    # if no imageName and no imageTag is given, 'repositories' won't be rewritten
+    if imageName and not imageTag:
+        if ':' in imageName:
+            imageName, imageTag = imageName.split(':', 1)
+        else:
+            imageTag = 'latest'
+
+    curl, zstd, docker = None, None, None
+    image, tag, layer = None, None, None
+    error = None
+    try:
+        # Setup piping: curl | zstd | tarin
+        curl = Popen(['curl', '-#', '--fail', '-L', '--retry', '8', url], stdout=PIPE)
+        zstd = Popen(['zstd', '-d'], stdin=curl.stdout, stdout=PIPE)
+        tarin = tarfile.open(mode='r|', fileobj=zstd.stdout)
+        # Seutp piping: tarout | docker
+        docker = Popen(['docker', 'load'], stdin=PIPE)
+        tarout = tarfile.open(mode='w|', fileobj=docker.stdin, format=tarfile.GNU_FORMAT)
+
+        # Read from tarin and write to tarout
+        for member in tarin:
+            # Write non-file members directly (don't use extractfile on links)
+            if not member.isfile():
+                tarout.addfile(member)
+                continue
+
+            # Open reader for the member
+            reader = tarin.extractfile(member)
+
+            # If member is repository, we parse and possibly rewrite the image tags
+            if member.name == 'repositories':
+                # Read and parse repositories
+                repos = json.loads(reader.read())
+                reader.close()
+
+                # If there is more than one image or tag, we can't handle it here
+                if len(repos.keys()) > 1:
+                    raise Exception('file contains more than one image')
+                image = repos.keys()[0]
+                if len(repos[image].keys()) > 1:
+                    raise Exception('file contains more than one tag')
+                tag = repos[image].keys()[0]
+                layer = repos[image][tag]
+
+                # Rewrite the repositories file
+                data = json.dumps({imageName or image: {imageTag or tag: layer}})
+                reader = BytesIO(data)
+                member.size = len(data)
+
+            # Add member and reader
+            tarout.addfile(member, reader)
+            reader.close()
+        tarout.close()
+    except Exception:
+        error = sys.exc_info()[0]
+    finally:
+        def trykill(proc):
+            try:
+                proc.kill()
+            except:
+                pass
+
+        # Check that all subprocesses finished correctly
+        if curl and curl.wait() != 0:
+            trykill(zstd)
+            trykill(docker)
+            raise Exception('failed to download from url: {}'.format(url))
+        if zstd and zstd.wait() != 0:
+            trykill(docker)
+            raise Exception('zstd decompression failed')
+        if docker:
+            docker.stdin.close()
+        if docker and docker.wait() != 0:
+            raise Exception('loading into docker failed')
+        if error:
+            raise error
+
+    # Check that we found a repositories file
+    if not image or not tag or not layer:
+        raise Exception('No repositories file found!')
+
+    return {'image': image, 'tag': tag, 'layer': layer}