docker: support adding a subset of files to the tar archive
authorGregory Szorc <gps@mozilla.com>
Tue, 10 Mar 2015 11:44:29 -0700
changeset 360524 2931ea56d7c4b5a4fc901d87ff3a181d3633196c
parent 360523 b35135180867b933cbad788600bda52607a95afc
child 360525 cccdde2d9f45aff35ac583701de64f8567c0adfc
push id16998
push userrwood@mozilla.com
push dateMon, 02 May 2016 19:42:03 +0000
docker: support adding a subset of files to the tar archive Profiling revealed that adding all files from v-c-t to the tar archive used for the build context slowed down image generation. This patch adds support for specifying a subset of files to include in the archive.
testing/vcttesting/docker.py
--- a/testing/vcttesting/docker.py
+++ b/testing/vcttesting/docker.py
@@ -147,39 +147,43 @@ class Docker(object):
                 'recommended to use Python 2.7.8 until a workaround is '
                 'identified: %s' % e.message)
 
             return False
 
         except requests.exceptions.RequestException as e:
             return False
 
-    def ensure_built(self, name, verbose=False, add_vct=False):
+    def ensure_built(self, name, verbose=False, add_vct=False, vct_paths=None):
         """Ensure a Docker image from a builder directory is built and up to date.
 
         This function is docker build++. Under the hood, it talks to the same
         ``build`` Docker API. However, it does one important thing differently:
         it builds the context archive manually.
 
         We supplement all contexts with the content of the source in this
         repository related to building Docker containers. If ``add_vct`` is
         True, we add the entire source repository to the Docker context.
+        If ``vct_paths`` is an iterable, we add only the paths specified to the
+        context.
+
+        If an entry in ``vct_paths`` ends in a ``/``, we add all files under
+        that directory. Otherwise, we assume it is a literal match and only add
+        a single file.
 
         This added content can be ``ADD``ed to the produced image inside the
         Dockerfile. If the content changes, the Docker image ID changes and the
         cache is invalidated. This effectively allows downstream consumers to
         call ``ensure_built()`` as there *is the image up to date* check.
         """
 
         p = os.path.join(self._ddir, 'builder-%s' % name)
         if not os.path.isdir(p):
             raise Exception('Unknown Docker builder name: %s' % name)
 
-        # TODO create a lock to avoid race conditions.
-
         # We build the build context for the image manually because we need to
         # include things outside of the directory containing the Dockerfile.
         buf = BytesIO()
         tar = tarfile.open(mode='w', fileobj=buf)
 
         for root, dirs, files in os.walk(p):
             for f in files:
                 if f == '.dockerignore':
@@ -196,29 +200,53 @@ class Docker(object):
         # Add ourself.
         tar.add(os.path.join(HERE, 'docker.py'), 'extra/vcttesting/docker.py')
 
         # Add the script for managing docker. This shouldn't be needed, but you
         # never know.
         tar.add(os.path.join(HERE, '..', 'docker-control.py'),
                 'extra/docker-control.py')
 
-        if add_vct:
+        if add_vct or vct_paths:
             # We grab the set of tracked files in this repository.
             hg = os.path.join(ROOT, 'venv', 'bin', 'hg')
             env = dict(os.environ)
             env['HGRCPATH'] = '/dev/null'
             args = [hg, '-R', ROOT, 'locate', '-r', '.']
             null = open(os.devnull, 'wb')
             output = subprocess.check_output(args, env=env, cwd='/',
                                              stderr=null)
-            # And add them to the archive.
-            for line in output.splitlines():
-                filename = line.strip()
-                tar.add(os.path.join(ROOT, filename), 'extra/vct/%s' % filename)
+
+            vct_files = output.splitlines()
+            if add_vct:
+                for f in vct_files:
+                    f = f.strip()
+                    tar.add(os.path.join(ROOT, f), 'extra/vct/%s' % f)
+            else:
+                added = set()
+                for p in vct_paths:
+                    ap = os.path.join(ROOT, p)
+                    if not os.path.exists(ap):
+                        raise Exception('specified path not under version '
+                                        'control: %s' % p)
+                    if p.endswith('/'):
+                        for f in vct_files:
+                            if not f.startswith(p):
+                                continue
+                            full = os.path.join(ROOT, f)
+                            rel = 'extra/vct/%s' % f
+                            if full in added:
+                                continue
+                            tar.add(full, rel)
+                    else:
+                        full = os.path.join(ROOT, p)
+                        if full in added:
+                            continue
+                        rel = 'extra/vct/%s' % p
+                        tar.add(full, rel)
 
         tar.close()
 
         # Need to seek to beginning so .read() inside docker.client will return
         # data.
         buf.seek(0)
 
         # The API here is wonky, possibly due to buggy behavior in