Bug 1288567 - Use deterministic tar archive generation; r=dustin
authorGregory Szorc <gps@mozilla.com>
Fri, 22 Jul 2016 10:29:58 -0700
changeset 331558 ce408d28753e15e523a9a3b3e2b4404dd1a88836
parent 331557 f36727c412bc19f62cf0ad54a8149db9c32f88b5
child 331559 df2587962e55286f9f46070df84620fc92d3cf50
push id9858
push userjlund@mozilla.com
push dateMon, 01 Aug 2016 14:37:10 +0000
treeherdermozilla-aurora@203106ef6cb6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdustin
bugs1288567
milestone50.0a1
Bug 1288567 - Use deterministic tar archive generation; r=dustin We recently implemented code in mozpack for performing deterministic tar file creation. It normalizes things like uids, gids, and mtimes that creep into archives. MozReview-Commit-ID: 1tn5eXkqACQ
taskcluster/taskgraph/util/docker.py
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -1,17 +1,21 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import hashlib
 import os
-import tarfile
+
+from mozpack.archive import (
+    create_tar_gz_from_files,
+)
+
 
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
 DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
 ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 
 
 def docker_image(name):
     '''Determine the docker image name, including repository and tag, from an
@@ -60,18 +64,27 @@ def create_context_tar(context_dir, out_
     """Create a context tarball.
 
     A directory ``context_dir`` containing a Dockerfile will be assembled into
     a gzipped tar file at ``out_path``. Files inside the archive will be
     prefixed by directory ``prefix``.
 
     Returns the SHA-256 hex digest of the created archive.
     """
-    with tarfile.open(out_path, 'w:gz') as tar:
-        tar.add(context_dir, arcname=prefix)
+    archive_files = {}
+
+    for root, dirs, files in os.walk(context_dir):
+        for f in files:
+            source_path = os.path.join(root, f)
+            rel = source_path[len(context_dir) + 1:]
+            archive_path = os.path.join(prefix, rel)
+            archive_files[archive_path] = source_path
+
+    with open(out_path, 'wb') as fh:
+        create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)
 
     h = hashlib.sha256()
     with open(out_path, 'rb') as fh:
         while True:
             data = fh.read(32768)
             if not data:
                 break
             h.update(data)