Bug 1288567 - Use deterministic tar archive generation; r=dustin
authorGregory Szorc <gps@mozilla.com>
Fri, 22 Jul 2016 10:29:58 -0700
changeset 348594 ce408d28753e15e523a9a3b3e2b4404dd1a88836
parent 348593 f36727c412bc19f62cf0ad54a8149db9c32f88b5
child 348595 df2587962e55286f9f46070df84620fc92d3cf50
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1288567 - Use deterministic tar archive generation; r=dustin We recently implemented code in mozpack for performing deterministic tar file creation. It normalizes things like uids, gids, and mtimes that creep into archives. MozReview-Commit-ID: 1tn5eXkqACQ
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -1,17 +1,21 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 from __future__ import absolute_import, print_function, unicode_literals
 import hashlib
 import os
-import tarfile
+from mozpack.archive import (
+    create_tar_gz_from_files,
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
 DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
 ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 def docker_image(name):
     '''Determine the docker image name, including repository and tag, from an
@@ -60,18 +64,27 @@ def create_context_tar(context_dir, out_
     """Create a context tarball.
     A directory ``context_dir`` containing a Dockerfile will be assembled into
     a gzipped tar file at ``out_path``. Files inside the archive will be
     prefixed by directory ``prefix``.
     Returns the SHA-256 hex digest of the created archive.
-    with tarfile.open(out_path, 'w:gz') as tar:
-        tar.add(context_dir, arcname=prefix)
+    archive_files = {}
+    for root, dirs, files in os.walk(context_dir):
+        for f in files:
+            source_path = os.path.join(root, f)
+            rel = source_path[len(context_dir) + 1:]
+            archive_path = os.path.join(prefix, rel)
+            archive_files[archive_path] = source_path
+    with open(out_path, 'wb') as fh:
+        create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)
     h = hashlib.sha256()
     with open(out_path, 'rb') as fh:
         while True:
             data = fh.read(32768)
             if not data: