docker: support adding a subset of files to the tar archive
authorGregory Szorc <>
Tue, 10 Mar 2015 11:44:29 -0700
changeset 360524 2931ea56d7c4b5a4fc901d87ff3a181d3633196c
parent 360523 b35135180867b933cbad788600bda52607a95afc
child 360525 cccdde2d9f45aff35ac583701de64f8567c0adfc
push id16998
push dateMon, 02 May 2016 19:42:03 +0000
docker: support adding a subset of files to the tar archive Profiling revealed that adding all files from v-c-t to the tar archive used for the build context slowed down image generation. This patch adds support for specifying a subset of files to include in the archive.
--- a/testing/vcttesting/
+++ b/testing/vcttesting/
@@ -147,39 +147,43 @@ class Docker(object):
                 'recommended to use Python 2.7.8 until a workaround is '
                 'identified: %s' % e.message)
             return False
         except requests.exceptions.RequestException as e:
             return False
-    def ensure_built(self, name, verbose=False, add_vct=False):
+    def ensure_built(self, name, verbose=False, add_vct=False, vct_paths=None):
         """Ensure a Docker image from a builder directory is built and up to date.
         This function is docker build++. Under the hood, it talks to the same
         ``build`` Docker API. However, it does one important thing differently:
         it builds the context archive manually.
         We supplement all contexts with the content of the source in this
         repository related to building Docker containers. If ``add_vct`` is
         True, we add the entire source repository to the Docker context.
+        If ``vct_paths`` is an iterable, we add only the paths specified to the
+        context.
+        If an entry in ``vct_paths`` ends in a ``/``, we add all files under
+        that directory. Otherwise, we assume it is a literal match and only add
+        a single file.
         This added content can be ``ADD``ed to the produced image inside the
         Dockerfile. If the content changes, the Docker image ID changes and the
         cache is invalidated. This effectively allows downstream consumers to
         call ``ensure_built()`` as there *is the image up to date* check.
         p = os.path.join(self._ddir, 'builder-%s' % name)
         if not os.path.isdir(p):
             raise Exception('Unknown Docker builder name: %s' % name)
-        # TODO create a lock to avoid race conditions.
         # We build the build context for the image manually because we need to
         # include things outside of the directory containing the Dockerfile.
         buf = BytesIO()
         tar ='w', fileobj=buf)
         for root, dirs, files in os.walk(p):
             for f in files:
                 if f == '.dockerignore':
@@ -196,29 +200,53 @@ class Docker(object):
         # Add ourself.
         tar.add(os.path.join(HERE, ''), 'extra/vcttesting/')
         # Add the script for managing docker. This shouldn't be needed, but you
         # never know.
         tar.add(os.path.join(HERE, '..', ''),
-        if add_vct:
+        if add_vct or vct_paths:
             # We grab the set of tracked files in this repository.
             hg = os.path.join(ROOT, 'venv', 'bin', 'hg')
             env = dict(os.environ)
             env['HGRCPATH'] = '/dev/null'
             args = [hg, '-R', ROOT, 'locate', '-r', '.']
             null = open(os.devnull, 'wb')
             output = subprocess.check_output(args, env=env, cwd='/',
-            # And add them to the archive.
-            for line in output.splitlines():
-                filename = line.strip()
-                tar.add(os.path.join(ROOT, filename), 'extra/vct/%s' % filename)
+            vct_files = output.splitlines()
+            if add_vct:
+                for f in vct_files:
+                    f = f.strip()
+                    tar.add(os.path.join(ROOT, f), 'extra/vct/%s' % f)
+            else:
+                added = set()
+                for p in vct_paths:
+                    ap = os.path.join(ROOT, p)
+                    if not os.path.exists(ap):
+                        raise Exception('specified path not under version '
+                                        'control: %s' % p)
+                    if p.endswith('/'):
+                        for f in vct_files:
+                            if not f.startswith(p):
+                                continue
+                            full = os.path.join(ROOT, f)
+                            rel = 'extra/vct/%s' % f
+                            if full in added:
+                                continue
+                            tar.add(full, rel)
+                    else:
+                        full = os.path.join(ROOT, p)
+                        if full in added:
+                            continue
+                        rel = 'extra/vct/%s' % p
+                        tar.add(full, rel)
         # Need to seek to beginning so .read() inside docker.client will return
         # data.
         # The API here is wonky, possibly due to buggy behavior in