Bug 1244189: add 'mach taskcluster-load-image'; r=garndt
authorDustin J. Mitchell <dustin@mozilla.com>
Fri, 25 Mar 2016 21:49:05 +0000
changeset 290702 4840ae780ac1b30464fd93be2c1170939a512887
parent 290701 f1cf3b00918f9b08d28f675f49198681855599cd
child 290703 073bbd96f75766ec2843bb905da2f7d1f0eaa0d2
push id19656
push usergwagner@mozilla.com
push dateMon, 04 Apr 2016 13:43:23 +0000
treeherderb2g-inbound@e99061fde28a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgarndt
bugs1244189
milestone48.0a1
Bug 1244189: add 'mach taskcluster-load-image'; r=garndt The command helpfully downloads a docker image created by a taskcluster task, which can be helpful when trying to replicate subtle bugs that only occur in-tree. This also fixes a bug in hashing Dockerfile directories, where the full pathname was taken into account. While this pathname is consistent from decision task to decision task, it is not consistent with developers' home directories. This change omits the directory prefix, which will cause a one-time shift in all directory hashes. MozReview-Commit-ID: EamQzUGG5qY
testing/taskcluster/mach_commands.py
testing/taskcluster/taskcluster_graph/image_builder.py
--- a/testing/taskcluster/mach_commands.py
+++ b/testing/taskcluster/mach_commands.py
@@ -222,16 +222,50 @@ class DecisionTask(object):
             'as_slugid': SlugidJar(),
             'from_now': json_time_from_now,
             'now': current_json_time()
         }.items())
         task = templates.load(params['task'], parameters)
         print(json.dumps(task, indent=4))
 
 @CommandProvider
+class LoadImage(object):
+    @Command('taskcluster-load-image', category="ci",
+        description="Load a pre-built Docker image")
+    @CommandArgument('--task-id',
+        help="Load the image at public/image.tar in this task, rather than "
+             "searching the index")
+    @CommandArgument('image_name', nargs='?',
+        help="Load the image of this name based on the current contents of the tree "
+             "(as built for mozilla-central or mozilla-inbound)")
+    def load_image(self, image_name, task_id):
+        from taskcluster_graph.image_builder import (
+            task_id_for_image,
+            docker_load_from_url
+        )
+
+        if not image_name and not task_id:
+            print("Specify either IMAGE-NAME or TASK-ID")
+            sys.exit(1)
+
+        if not task_id:
+            task_id = task_id_for_image({}, 'mozilla-inbound', image_name, create=False)
+            if not task_id:
+                print("No task found in the TaskCluster index for {}".format(image_name))
+                sys.exit(1)
+
+        print("Task ID: {}".format(task_id))
+
+        ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+        image_name = docker_load_from_url(ARTIFACT_URL.format(task_id, 'public/image.tar'))
+
+        print("Loaded image is named {}".format(image_name))
+
+
+@CommandProvider
 class Graph(object):
     @Command('taskcluster-graph', category="ci",
         description="Create taskcluster task graph")
     @CommandArgument('--base-repository',
         default=os.environ.get('GECKO_BASE_REPOSITORY'),
         help='URL for "base" repository to clone')
     @CommandArgument('--head-repository',
         default=os.environ.get('GECKO_HEAD_REPOSITORY'),
--- a/testing/taskcluster/taskcluster_graph/image_builder.py
+++ b/testing/taskcluster/taskcluster_graph/image_builder.py
@@ -1,11 +1,12 @@
 import hashlib
 import json
 import os
+import subprocess
 import tarfile
 import urllib2
 
 import taskcluster_graph.transform.routes as routes_transform
 import taskcluster_graph.transform.treeherder as treeherder_transform
 from slugid import nice as slugid
 from taskcluster_graph.templates import Templates
 
@@ -28,28 +29,31 @@ def docker_image(name):
 
     version = open(os.path.join(DOCKER_ROOT, name, 'VERSION')).read().strip()
 
     if os.path.isfile(repository_path):
         repository = open(repository_path).read().strip()
 
     return '{}/{}:{}'.format(repository, name, version)
 
-def task_id_for_image(seen_images, project, name):
+def task_id_for_image(seen_images, project, name, create=True):
     if name in seen_images:
         return seen_images[name]['taskId']
 
     context_path = os.path.join('testing', 'docker', name)
     context_hash = generate_context_hash(context_path)
     task_id = get_task_id_for_namespace(project, name, context_hash)
 
     if task_id:
         seen_images[name] = {'taskId': task_id}
         return task_id
 
+    if not create:
+        return None
+
     task_id = slugid()
     seen_images[name] = {
         'taskId': task_id,
         'path': context_path,
         'hash': context_hash
     }
 
     return task_id
@@ -106,24 +110,25 @@ def generate_context_hash(image_path):
     context_hash = hashlib.sha256()
     files = []
 
     for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
         for filename in filenames:
             files.append(os.path.join(dirpath, filename))
 
     for filename in sorted(files):
+        relative_filename = filename.replace(GECKO, '')
         with open(filename, 'rb') as f:
             file_hash = hashlib.sha256()
             while True:
                 data = f.read()
                 if not data:
                     break
                 file_hash.update(data)
-            context_hash.update(file_hash.hexdigest() + '\t' + filename + '\n')
+            context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
 
     return context_hash.hexdigest()
 
 def create_context_tar(context_dir, destination, image_name):
     ''' Creates a tar file of a particular context directory '''
     if not os.path.exists(os.path.dirname(destination)):
         os.makedirs(os.path.dirname(destination))
 
@@ -224,8 +229,41 @@ def normalize_image_details(graph, task,
     )
 
     graph['scopes'].add(define_task)
     graph['scopes'] |= set(image_task['task'].get('scopes', []))
     route_scopes = map(lambda route: 'queue:route:' + route, image_task['task'].get('routes', []))
     graph['scopes'] |= set(route_scopes)
 
     details['required'] = True
+
+def docker_load_from_url(url):
+    """Get a docker image from a `docker save` tarball at the given URL,
+    loading it into the running daemon and returning the image name."""
+
+    # because we need to read this file twice (and one read is not all the way
+    # through), it is difficult to stream it.  So we downlaod to disk and then
+    # read it back.
+    filename = 'temp-docker-image.tar'
+
+    print("Downloading {}".format(url))
+    subprocess.check_call(['curl', '-#', '-L', '-o', filename, url])
+
+    print("Determining image name")
+    tf = tarfile.open(filename)
+    repositories = json.load(tf.extractfile('repositories'))
+    name = repositories.keys()[0]
+    tag = repositories[name].keys()[0]
+    name = '{}:{}'.format(name, tag)
+    print("Image name: {}".format(name))
+
+    print("Loading image into docker")
+    try:
+        subprocess.check_call(['docker', 'load', '-i', filename])
+    except subprocess.CalledProcessError:
+        print("*** `docker load` failed.  You may avoid re-downloading that tarball by fixing the")
+        print("*** problem and running `docker load < {}`.".format(filename))
+        raise
+
+    print("Deleting temporary file")
+    os.unlink(filename)
+
+    return name