Bug 1288567 - Use create_context_tar in generate_context_hash; r?dustin draft
authorGregory Szorc <gps@mozilla.com>
Fri, 22 Jul 2016 12:46:06 -0700
changeset 392506 853f5bf425bb70f9a8ca05aa4512eb4d3f7b8efe
parent 392505 41c83e6c6aa3c4286ab37ef9482ed0c316d1c9b2
child 392507 6124f28061d9fe4be430749a658f6c5a3e78c268
push id24042
push userbmo:gps@mozilla.com
push dateMon, 25 Jul 2016 18:25:42 +0000
reviewersdustin
bugs1288567
milestone50.0a1
Bug 1288567 - Use create_context_tar in generate_context_hash; r?dustin This restores order to only having a single hash for a context directory. Using a tempfile here is a bit unfortunate. It can be optimized later, if needed. MozReview-Commit-ID: LMNsvt3fDYx
taskcluster/taskgraph/docker.py
taskcluster/taskgraph/task/docker_image.py
taskcluster/taskgraph/test/test_util_docker.py
taskcluster/taskgraph/util/docker.py
--- a/taskcluster/taskgraph/docker.py
+++ b/taskcluster/taskgraph/docker.py
@@ -16,17 +16,17 @@ from taskgraph.util import docker
 
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..'))
 INDEX_URL = 'https://index.taskcluster.net/v1/task/docker.images.v1.{}.{}.hash.{}'
 ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 
 
 def load_image_by_name(image_name):
     context_path = os.path.join(GECKO, 'testing', 'docker', image_name)
-    context_hash = docker.generate_context_hash(context_path)
+    context_hash = docker.generate_context_hash(GECKO, context_path, image_name)
 
     image_index_url = INDEX_URL.format('mozilla-central', image_name, context_hash)
     print("Fetching", image_index_url)
     task = json.load(urllib2.urlopen(image_index_url))
 
     return load_image_by_task_id(task['taskId'])
 
 
--- a/taskcluster/taskgraph/task/docker_image.py
+++ b/taskcluster/taskgraph/task/docker_image.py
@@ -80,17 +80,17 @@ class DockerImageTask(base.Task):
                     os.environ['TASK_ID'], image_artifact_path)
                 context_hash = cls.create_context_tar(context_path, destination,
                                                       image_name)
             else:
                 # skip context generation since this isn't a decision task
                 # TODO: generate context tarballs using subdirectory clones in
                 # the image-building task so we don't have to worry about this.
                 image_parameters['context_url'] = 'file:///tmp/' + image_artifact_path
-                context_hash = generate_context_hash(context_path)
+                context_hash = generate_context_hash(GECKO, context_path, image_name)
 
             image_parameters['context_hash'] = context_hash
 
             image_task = templates.load('image.yml', image_parameters)
 
             attributes = {'image_name': image_name}
 
             # As an optimization, if the context hash exists for mozilla-central, that image
--- a/taskcluster/taskgraph/test/test_util_docker.py
+++ b/taskcluster/taskgraph/test/test_util_docker.py
@@ -26,18 +26,18 @@ class TestDocker(unittest.TestCase):
         docker.GECKO = tmpdir
         try:
             os.makedirs(os.path.join(tmpdir, 'docker', 'my-image'))
             with open(os.path.join(tmpdir, 'docker', 'my-image', 'Dockerfile'), "w") as f:
                 f.write("FROM node\nADD a-file\n")
             with open(os.path.join(tmpdir, 'docker', 'my-image', 'a-file'), "w") as f:
                 f.write("data\n")
             self.assertEqual(
-                docker.generate_context_hash('docker/my-image'),
-                '781143fcc6cc72c9024b058665265cb6bae3fb8031cad7227dd169ffbfced434'
+                docker.generate_context_hash(docker.GECKO, 'docker/my-image', 'my-image'),
+                '872d76a656f47ea17c043023ecc9ae6a222ba6d2a8df67b75498bba382e4fb07'
                 )
         finally:
             docker.GECKO = old_GECKO
             shutil.rmtree(tmpdir)
 
     def test_docker_image_explicit_registry(self):
         files = {}
         files["{}/myimage/REGISTRY".format(docker.DOCKER_ROOT)] = "cool-images"
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -1,16 +1,17 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import hashlib
 import os
+import tempfile
 
 from mozpack.archive import (
     create_tar_gz_from_files,
 )
 
 
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
 DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
@@ -28,41 +29,27 @@ def docker_image(name):
             registry = f.read().strip()
 
     with open(os.path.join(DOCKER_ROOT, name, 'VERSION')) as f:
         version = f.read().strip()
 
     return '{}/{}:{}'.format(registry, name, version)
 
 
-def generate_context_hash(image_path):
-    '''Generates a sha256 hash for context directory used to build an image.
-
-    Contents of the directory are sorted alphabetically, contents of each file is hashed,
-    and then a hash is created for both the file hashs as well as their paths.
-
-    This ensures that hashs are consistent and also change based on if file locations
-    within the context directory change.
-    '''
-    context_hash = hashlib.sha256()
-    files = []
+def generate_context_hash(topsrcdir, image_path, image_name):
+    """Generates a sha256 hash for context directory used to build an image."""
 
-    for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
-        for filename in filenames:
-            files.append(os.path.join(dirpath, filename))
-
-    for filename in sorted(files):
-        relative_filename = filename.replace(GECKO, '')
-        with open(filename, 'rb') as f:
-            file_hash = hashlib.sha256()
-            data = f.read()
-            file_hash.update(data)
-            context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
-
-    return context_hash.hexdigest()
+    # It is a bit unfortunate we have to create a temp file here - it would
+    # be nicer to use an in-memory buffer.
+    fd, p = tempfile.mkstemp()
+    os.close(fd)
+    try:
+        return create_context_tar(topsrcdir, image_path, p, image_name)
+    finally:
+        os.unlink(p)
 
 
 def create_context_tar(topsrcdir, context_dir, out_path, prefix):
     """Create a context tarball.
 
     A directory ``context_dir`` containing a Dockerfile will be assembled into
     a gzipped tar file at ``out_path``. Files inside the archive will be
     prefixed by directory ``prefix``.