Bug 1275409: remove taskcluster_graph.image_builder; r=wcosta
authorDustin J. Mitchell <dustin@mozilla.com>
Mon, 06 Jun 2016 18:55:10 +0000
changeset 341114 48d7584daf9e7fb7c97b582e7e1209fb8c5e70a0
parent 341113 eb228c042e377917c70e35c5a905a00d3b389f30
child 341115 4ddcce18639bd8dc726301467478042f62210e0e
push id6389
push userraliiev@mozilla.com
push dateMon, 19 Sep 2016 13:38:22 +0000
treeherdermozilla-beta@01d67bfe6c81 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerswcosta
bugs1275409
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1275409: remove taskcluster_graph.image_builder; r=wcosta MozReview-Commit-ID: 21HBtcDVPXC
taskcluster/mach_commands.py
taskcluster/taskgraph/docker.py
taskcluster/taskgraph/kind/docker_image.py
taskcluster/taskgraph/kind/legacy.py
taskcluster/taskgraph/test/test_kind_docker_image.py
taskcluster/taskgraph/test/test_util_docker.py
taskcluster/taskgraph/util/docker.py
testing/taskcluster/taskcluster_graph/image_builder.py
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -213,29 +213,22 @@ class LoadImage(object):
         description="Load a pre-built Docker image")
     @CommandArgument('--task-id',
         help="Load the image at public/image.tar in this task, rather than "
              "searching the index")
     @CommandArgument('image_name', nargs='?',
         help="Load the image of this name based on the current contents of the tree "
              "(as built for mozilla-central or mozilla-inbound)")
     def load_image(self, image_name, task_id):
-        from taskcluster_graph.image_builder import (
-            task_id_for_image,
-            docker_load_from_url
-        )
-
+        from taskgraph.docker import load_image_by_name, load_image_by_task_id
         if not image_name and not task_id:
             print("Specify either IMAGE-NAME or TASK-ID")
             sys.exit(1)
-
-        if not task_id:
-            task_id = task_id_for_image({}, 'mozilla-inbound', image_name, create=False)
-            if not task_id:
-                print("No task found in the TaskCluster index for", image_name)
+        try:
+            if task_id:
+                ok = load_image_by_task_id(task_id)
+            else:
+                ok = load_image_by_name(image_name)
+            if not ok:
                 sys.exit(1)
-
-        print("Task ID:", task_id)
-
-        ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
-        image_name = docker_load_from_url(ARTIFACT_URL.format(task_id, 'public/image.tar'))
-
-        print("Loaded image is named", image_name)
+        except Exception as e:
+            traceback.print_exc()
+            sys.exit(1)
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/docker.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import json
+import os
+import subprocess
+import tarfile
+import urllib2
+
+from taskgraph.util import docker
+
+GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..'))
+INDEX_URL = 'https://index.taskcluster.net/v1/task/docker.images.v1.{}.{}.hash.{}'
+ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+
+
+def load_image_by_name(image_name):
+    context_path = os.path.join(GECKO, 'testing', 'docker', image_name)
+    context_hash = docker.generate_context_hash(context_path)
+
+    image_index_url = INDEX_URL.format('mozilla-central', image_name, context_hash)
+    print("Fetching", image_index_url)
+    task = json.load(urllib2.urlopen(image_index_url))
+
+    return load_image_by_task_id(task['taskId'])
+
+
+def load_image_by_task_id(task_id):
+    # because we need to read this file twice (and one read is not all the way
+    # through), it is difficult to stream it.  So we download to disk and then
+    # read it back.
+    filename = 'temp-docker-image.tar'
+
+    artifact_url = ARTIFACT_URL.format(task_id, 'public/image.tar')
+    print("Downloading", artifact_url)
+    subprocess.check_call(['curl', '-#', '-L', '-o', filename, artifact_url])
+
+    print("Determining image name")
+    tf = tarfile.open(filename)
+    repositories = json.load(tf.extractfile('repositories'))
+    name = repositories.keys()[0]
+    tag = repositories[name].keys()[0]
+    name = '{}:{}'.format(name, tag)
+    print("Image name:", name)
+
+    print("Loading image into docker")
+    try:
+        subprocess.check_call(['docker', 'load', '-i', filename])
+    except subprocess.CalledProcessError:
+        print("*** `docker load` failed.  You may avoid re-downloading that tarball by fixing the")
+        print("*** problem and running `docker load < {}`.".format(filename))
+        raise
+
+    print("Deleting temporary file")
+    os.unlink(filename)
+
+    print("The requested docker image is now available as", name)
+    print("Try: docker run -ti --rm {} bash".format(name))
--- a/taskcluster/taskgraph/kind/docker_image.py
+++ b/taskcluster/taskgraph/kind/docker_image.py
@@ -9,17 +9,20 @@ import json
 import os
 import urllib2
 import hashlib
 import tarfile
 import time
 
 from . import base
 from ..types import Task
-from taskgraph.util.docker import docker_image
+from taskgraph.util.docker import (
+    docker_image,
+    generate_context_hash
+)
 from taskgraph.util.templates import Templates
 from taskgraph.util.time import (
     json_time_from_now,
     current_json_time,
 )
 
 logger = logging.getLogger(__name__)
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
@@ -53,17 +56,17 @@ class DockerImageKind(base.Kind):
             'source': '{repo}file/{rev}/testing/taskcluster/tasks/image.yml'
                     .format(repo=params['head_repository'], rev=params['head_rev']),
         }
 
         tasks = []
         templates = Templates(self.path)
         for image_name in self.config['images']:
             context_path = os.path.join('testing', 'docker', image_name)
-            context_hash = self.generate_context_hash(context_path)
+            context_hash = generate_context_hash(context_path)
 
             image_parameters = dict(parameters)
             image_parameters['context_hash'] = context_hash
             image_parameters['context_path'] = context_path
             image_parameters['artifact_path'] = 'public/image.tar'
             image_parameters['image_name'] = image_name
 
             image_artifact_path = "public/decision_task/image_contexts/{}/context.tar.gz".format(image_name)
@@ -127,34 +130,8 @@ class DockerImageKind(base.Kind):
     def create_context_tar(self, context_dir, destination, image_name):
         'Creates a tar file of a particular context directory.'
         destination = os.path.abspath(destination)
         if not os.path.exists(os.path.dirname(destination)):
             os.makedirs(os.path.dirname(destination))
 
         with tarfile.open(destination, 'w:gz') as tar:
             tar.add(context_dir, arcname=image_name)
-
-    def generate_context_hash(self, image_path):
-        '''Generates a sha256 hash for context directory used to build an image.
-
-        Contents of the directory are sorted alphabetically, contents of each file is hashed,
-        and then a hash is created for both the file hashes as well as their paths.
-
-        This ensures that hashs are consistent and also change based on if file locations
-        within the context directory change.
-        '''
-        context_hash = hashlib.sha256()
-        files = []
-
-        for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
-            for filename in filenames:
-                files.append(os.path.join(dirpath, filename))
-
-        for filename in sorted(files):
-            relative_filename = filename.replace(GECKO, '')
-            with open(filename, 'rb') as f:
-                file_hash = hashlib.sha256()
-                data = f.read()
-                file_hash.update(data)
-                context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
-
-        return context_hash.hexdigest()
--- a/taskcluster/taskgraph/kind/legacy.py
+++ b/taskcluster/taskgraph/kind/legacy.py
@@ -53,20 +53,16 @@ TREEHERDER_ROUTES = {
 # time after which a try build's results will expire
 TRY_EXPIRATION = "14 days"
 
 logger = logging.getLogger(__name__)
 
 def mklabel():
     return TASKID_PLACEHOLDER.format(slugid())
 
-# monkey-patch mklabel into image_builder, as well
-from taskcluster_graph import image_builder
-image_builder.mklabel = mklabel
-
 def set_expiration(task, timestamp):
     task_def = task['task']
     task_def['expires'] = timestamp
     if task_def.get('deadline', timestamp) > timestamp:
         task_def['deadline'] = timestamp
 
     try:
         artifacts = task_def['payload']['artifacts']
--- a/taskcluster/taskgraph/test/test_kind_docker_image.py
+++ b/taskcluster/taskgraph/test/test_kind_docker_image.py
@@ -29,26 +29,10 @@ class TestDockerImageKind(unittest.TestC
 
     def test_create_context_tar(self):
         image_dir = os.path.join(docker_image.GECKO, 'testing', 'docker', 'image_builder')
         tarball = tempfile.mkstemp()[1]
         self.kind.create_context_tar(image_dir, tarball, 'image_builder')
         self.failUnless(os.path.exists(tarball))
         os.unlink(tarball)
 
-    def test_generate_context_hash(self):
-        tmpdir = tempfile.mkdtemp()
-        old_GECKO = docker_image.GECKO
-        docker_image.GECKO = tmpdir
-        try:
-            os.makedirs(os.path.join(tmpdir, 'docker', 'my-image'))
-            with open(os.path.join(tmpdir, 'docker', 'my-image', 'Dockerfile'), "w") as f:
-                f.write("FROM node\nADD a-file\n")
-            with open(os.path.join(tmpdir, 'docker', 'my-image', 'a-file'), "w") as f:
-                f.write("data\n")
-            self.assertEqual(self.kind.generate_context_hash('docker/my-image'),
-                    '781143fcc6cc72c9024b058665265cb6bae3fb8031cad7227dd169ffbfced434')
-        finally:
-            docker_image.GECKO = old_GECKO
-            shutil.rmtree(tmpdir)
-
 if __name__ == '__main__':
     main()
--- a/taskcluster/taskgraph/test/test_util_docker.py
+++ b/taskcluster/taskgraph/test/test_util_docker.py
@@ -1,27 +1,46 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import os
+import shutil
+import tempfile
 import unittest
 
-from ..util.docker import docker_image, DOCKER_ROOT
+from ..util import docker
 from mozunit import main, MockedOpen
 
 
-class TestDockerImage(unittest.TestCase):
+class TestDocker(unittest.TestCase):
+
+    def test_generate_context_hash(self):
+        tmpdir = tempfile.mkdtemp()
+        old_GECKO = docker.GECKO
+        docker.GECKO = tmpdir
+        try:
+            os.makedirs(os.path.join(tmpdir, 'docker', 'my-image'))
+            with open(os.path.join(tmpdir, 'docker', 'my-image', 'Dockerfile'), "w") as f:
+                f.write("FROM node\nADD a-file\n")
+            with open(os.path.join(tmpdir, 'docker', 'my-image', 'a-file'), "w") as f:
+                f.write("data\n")
+            self.assertEqual(docker.generate_context_hash('docker/my-image'),
+                    '781143fcc6cc72c9024b058665265cb6bae3fb8031cad7227dd169ffbfced434')
+        finally:
+            docker.GECKO = old_GECKO
+            shutil.rmtree(tmpdir)
 
     def test_docker_image_explicit_registry(self):
         files = {}
-        files["{}/myimage/REGISTRY".format(DOCKER_ROOT)] = "cool-images"
-        files["{}/myimage/VERSION".format(DOCKER_ROOT)] = "1.2.3"
+        files["{}/myimage/REGISTRY".format(docker.DOCKER_ROOT)] = "cool-images"
+        files["{}/myimage/VERSION".format(docker.DOCKER_ROOT)] = "1.2.3"
         with MockedOpen(files):
-            self.assertEqual(docker_image('myimage'), "cool-images/myimage:1.2.3")
+            self.assertEqual(docker.docker_image('myimage'), "cool-images/myimage:1.2.3")
 
     def test_docker_image_default_registry(self):
         files = {}
-        files["{}/REGISTRY".format(DOCKER_ROOT)] = "mozilla"
-        files["{}/myimage/VERSION".format(DOCKER_ROOT)] = "1.2.3"
+        files["{}/REGISTRY".format(docker.DOCKER_ROOT)] = "mozilla"
+        files["{}/myimage/VERSION".format(docker.DOCKER_ROOT)] = "1.2.3"
         with MockedOpen(files):
-            self.assertEqual(docker_image('myimage'), "mozilla/myimage:1.2.3")
+            self.assertEqual(docker.docker_image('myimage'), "mozilla/myimage:1.2.3")
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -1,26 +1,54 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import hashlib
 import os
 
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
 DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
+ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 
 def docker_image(name):
     '''Determine the docker image name, including repository and tag, from an
     in-tree docker file.'''
     try:
         with open(os.path.join(DOCKER_ROOT, name, 'REGISTRY')) as f:
             registry = f.read().strip()
     except IOError:
         with open(os.path.join(DOCKER_ROOT, 'REGISTRY')) as f:
             registry = f.read().strip()
 
     with open(os.path.join(DOCKER_ROOT, name, 'VERSION')) as f:
         version = f.read().strip()
 
     return '{}/{}:{}'.format(registry, name, version)
 
+
+def generate_context_hash(image_path):
+    '''Generates a sha256 hash for context directory used to build an image.
+
+    Contents of the directory are sorted alphabetically, contents of each file is hashed,
+    and then a hash is created for both the file hashs as well as their paths.
+
+    This ensures that hashs are consistent and also change based on if file locations
+    within the context directory change.
+    '''
+    context_hash = hashlib.sha256()
+    files = []
+
+    for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
+        for filename in filenames:
+            files.append(os.path.join(dirpath, filename))
+
+    for filename in sorted(files):
+        relative_filename = filename.replace(GECKO, '')
+        with open(filename, 'rb') as f:
+            file_hash = hashlib.sha256()
+            data = f.read()
+            file_hash.update(data)
+            context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
+
+    return context_hash.hexdigest()
deleted file mode 100644
--- a/testing/taskcluster/taskcluster_graph/image_builder.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import hashlib
-import json
-import os
-import subprocess
-import tarfile
-import urllib2
-
-from slugid import nice as slugid
-from taskgraph.util.templates import Templates
-
-TASKCLUSTER_ROOT = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
-IMAGE_BUILD_TASK = os.path.join(TASKCLUSTER_ROOT, 'tasks', 'image.yml')
-GECKO = os.path.realpath(os.path.join(TASKCLUSTER_ROOT, '..', '..'))
-DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
-REGISTRY = open(os.path.join(DOCKER_ROOT, 'REGISTRY')).read().strip()
-INDEX_URL = 'https://index.taskcluster.net/v1/task/docker.images.v1.{}.{}.hash.{}'
-ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
-DEFINE_TASK = 'queue:define-task:aws-provisioner-v1/{}'
-
-def is_docker_registry_image(registry_path):
-    return os.path.isfile(registry_path)
-
-# make a task label; in old decision tasks, this is a regular slugid, but when called
-# from the taskgraph generator's legacy kind, this is monkey-patched to return a label
-# (`TaskLabel==..`)
-def mklabel():
-    return slugid()
-
-def task_id_for_image(seen_images, project, name, create=True):
-    if name in seen_images:
-        return seen_images[name]['taskId']
-
-    context_path = os.path.join('testing', 'docker', name)
-    context_hash = generate_context_hash(context_path)
-    task_id = get_task_id_for_namespace(project, name, context_hash)
-
-    if task_id:
-        seen_images[name] = {'taskId': task_id}
-        return task_id
-
-    if not create:
-        return None
-
-    task_id = mklabel()
-    seen_images[name] = {
-        'taskId': task_id,
-        'path': context_path,
-        'hash': context_hash
-    }
-
-    return task_id
-
-def image_artifact_exists_for_task_id(task_id, path):
-    ''' Verifies that the artifact exists for the task ID '''
-    try:
-        request = urllib2.Request(ARTIFACT_URL.format(task_id, path))
-        request.get_method = lambda : 'HEAD'
-        urllib2.urlopen(request)
-        return True
-    except urllib2.HTTPError,e:
-        return False
-
-def get_task_id_for_namespace(project, name, context_hash):
-    '''
-    Determine the Task ID for an indexed image.
-
-    As an optimization, if the context hash exists for mozilla-central, that image
-    task ID will be used.  The reasoning behind this is that eventually everything ends
-    up on mozilla-central at some point if most tasks use this as a common image
-    for a given context hash, a worker within Taskcluster does not need to contain
-    the same image per branch.
-    '''
-    for p in ['mozilla-central', project]:
-        image_index_url = INDEX_URL.format(p, name, context_hash)
-        try:
-            task = json.load(urllib2.urlopen(image_index_url))
-            # Ensure that the artifact exists for the task and hasn't expired
-            artifact_exists = image_artifact_exists_for_task_id(task['taskId'],
-                                                                'public/image.tar')
-            # Only return the task ID if the artifact exists for the indexed
-            # task.  Otherwise, continue on looking at each of the branches.  Method
-            # continues trying other branches in case mozilla-central has an expired
-            # artifact, but 'project' might not. Only return no task ID if all
-            # branches have been tried
-            if artifact_exists:
-                return task['taskId']
-        except urllib2.HTTPError:
-            pass
-
-    return None
-
-def generate_context_hash(image_path):
-    '''
-    Generates a sha256 hash for context directory used to build an image.
-
-    Contents of the directory are sorted alphabetically, contents of each file is hashed,
-    and then a hash is created for both the file hashs as well as their paths.
-
-    This ensures that hashs are consistent and also change based on if file locations
-    within the context directory change.
-    '''
-    context_hash = hashlib.sha256()
-    files = []
-
-    for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
-        for filename in filenames:
-            files.append(os.path.join(dirpath, filename))
-
-    for filename in sorted(files):
-        relative_filename = filename.replace(GECKO, '')
-        with open(filename, 'rb') as f:
-            file_hash = hashlib.sha256()
-            while True:
-                data = f.read()
-                if not data:
-                    break
-                file_hash.update(data)
-            context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
-
-    return context_hash.hexdigest()
-
-def docker_load_from_url(url):
-    """Get a docker image from a `docker save` tarball at the given URL,
-    loading it into the running daemon and returning the image name."""
-
-    # because we need to read this file twice (and one read is not all the way
-    # through), it is difficult to stream it.  So we downlaod to disk and then
-    # read it back.
-    filename = 'temp-docker-image.tar'
-
-    print("Downloading {}".format(url))
-    subprocess.check_call(['curl', '-#', '-L', '-o', filename, url])
-
-    print("Determining image name")
-    tf = tarfile.open(filename)
-    repositories = json.load(tf.extractfile('repositories'))
-    name = repositories.keys()[0]
-    tag = repositories[name].keys()[0]
-    name = '{}:{}'.format(name, tag)
-    print("Image name: {}".format(name))
-
-    print("Loading image into docker")
-    try:
-        subprocess.check_call(['docker', 'load', '-i', filename])
-    except subprocess.CalledProcessError:
-        print("*** `docker load` failed.  You may avoid re-downloading that tarball by fixing the")
-        print("*** problem and running `docker load < {}`.".format(filename))
-        raise
-
-    print("Deleting temporary file")
-    os.unlink(filename)
-
-    return name