Bug 1329282 - mach qemu commands draft
authorJonas Finnemann Jensen <jopsen@gmail.com>
Mon, 11 Sep 2017 12:54:16 -0700
changeset 664956 761a156acecc825edf4d063bded93449cb13c4f9
parent 662594 15128312c02a238ac158589c59fd3ee3e384ce80
child 664957 38062cb33d90eb54b7ce37d947076cceca4a59b7
push id79876
push userjojensen@mozilla.com
push dateThu, 14 Sep 2017 17:51:34 +0000
bugs1329282
milestone57.0a1
Bug 1329282 - mach qemu commands This adds the following mach commands for working with QEMU images. * `mach qemu setup` to verify KVM and docker setup, * `mach qemu images` to list images cached locally, * `mach qemu build` to build images locally (using QEMU inside docker), * `mach qemu pull` to download images from indexed tasks or S3, * `mach qemu push` to upload manually built images to S3, * `mach qemu run` to execute a command within a VM locally, * `mach qemu shell` to obtain an interactie shell into a VM locally, * `mach qemu export` to export an image file from local cache, * `mach qemu import` to import an image file into local cache, * `mach qemu purge` to clear local image cache, * `mach qemu clear-cache` to clear local cache of build dependencies. These commands help manage a local cache of QEMU images, where images have names and are tagged by the content-hash of the recipe from which they where built. These commands helps ensure that the correct images are referenced when image recipes are modified. MozReview-Commit-ID: AyunGd9ltMp
taskcluster/mach_commands.py
taskcluster/taskgraph/qemu/__init__.py
taskcluster/taskgraph/qemu/buildimage.py
taskcluster/taskgraph/qemu/httpcache.py
taskcluster/taskgraph/qemu/imagecache.py
taskcluster/taskgraph/qemu/imagehash.py
taskcluster/taskgraph/qemu/imageschema.py
taskcluster/taskgraph/qemu/run.py
taskcluster/taskgraph/util/schema.py
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -5,28 +5,29 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import json
 import logging
 import os
+import re
 import sys
 import traceback
-import re
 
 from mach.decorators import (
     CommandArgument,
     CommandProvider,
     Command,
     SubCommand,
 )
 
 from mozbuild.base import MachCommandBase
+from mozbuild.util import memoize
 
 
 class ShowTaskGraphSubCommand(SubCommand):
     """A SubCommand with TaskGraph-specific arguments"""
 
     def __call__(self, func):
         after = SubCommand.__call__(self, func)
         args = [
@@ -499,8 +500,389 @@ class TaskClusterImagesProvider(object):
         try:
             if context_only is None:
                 build_image(image_name)
             else:
                 build_context(image_name, context_only)
         except Exception:
             traceback.print_exc()
             sys.exit(1)
+
+
+@CommandProvider
+class QEMUCommands(MachCommandBase):
+    @Command('qemu', category='ci',
+             description='taskcluster QEMU image development commands.')
+    def qemu(self):
+        """Develop and test QEMU images for use in automation on Taskcluster.
+
+        Use |mach qemu| to assists in building, managing, testing, uploading and
+        downloading QEMU images for use in Taskcluster. Towards this end images are
+        cached locally in ''MOZBUILD_STATE_PATH'' which defaults to ''~/.mozbuild''.
+
+        When working with images you should notice that an ''image_hash'' will be
+        computed from ''image.yml'', this hash will change whenever a command or
+        file referenced in ''image.yml'' changes. Image cached locally will be
+        cached as ''<image>-<image_hash>.tar.zst'', and all these commands operate
+        on the current ''image_hash'', that is the hash computed from the ''image.yml''
+        as is in the current checkout. To operator on older versions of an image
+        you must checkout an older version of gecko.
+
+        Note: These commands will only work on linux, and requires certain
+        tools to be installed and certain permissions to be configured, use
+        |mach qemu setup| to validate your setup.
+        """
+        self.list_images(None, None)
+        print('\nUsage see |mach help qemu|')
+
+    @property
+    @memoize
+    def http_file_cache(self):
+        from taskgraph.qemu.httpcache import HTTPFileCache
+        return HTTPFileCache(os.path.join(self._mach_context.state_dir, 'tc-qemu-cache'))
+
+    @property
+    @memoize
+    def image_cache(self):
+        from taskgraph.qemu.imagecache import ImageCache
+        return ImageCache(self._mach_context.state_dir)
+
+    @SubCommand('qemu', 'setup', description='Setup environment using QEMU')
+    def setup(self):
+        import subprocess
+        import platform
+        from which import which
+        from taskgraph.qemu.run import DOCKER_IMAGE
+        # Check that we're on linux
+        if platform.system() != 'Linux':
+            print('|mach qemu| commands only works on Linux')
+            return 1
+        # Check that we have kvm
+        if 'kvm' not in (s.split(' ')[0] for s in open('/proc/modules', 'r').read().split('\n')):
+            print('|mach qemu| commands requires kvm to be loaded')
+            return 1
+        if not os.path.exists('/dev/kvm'):
+            print('|mach qemu| commands requires /dev/kvm to be present')
+            return 1
+        # Check that we have docker
+        try:
+            which('docker')
+        except Exception:
+            print('|mach qemu| commands requires docker to be installed')
+            return 1
+        data = json.loads(subprocess.check_output(['docker', 'version', '--format', '{{json .}}']))
+        major, minor = data.get('Server', {}).get('ApiVersion').split('.')
+        if int(major) != 1 or int(minor) < 29:
+            print('requires docker API version >= 1.29')
+            return 1
+        print('pulling docker image used for building images:')
+        if subprocess.call(['docker', 'pull', DOCKER_IMAGE]) != 0:
+            print('failed to pull docker image: {}'.format(DOCKER_IMAGE))
+            return 1
+        print('|mach qemu| commands are ready for use')
+
+    @SubCommand('qemu', 'images', description='List cached QEMU images')
+    @CommandArgument('image_name', nargs='?', help='List only instance of given image')
+    @CommandArgument('--json', required=False, action='store_true',
+                     help='dump result as JSON to stdout')
+    def list_images(self, image_name, json):
+        """
+        List QEMU images present in your cache. This will be listed as a table
+        or json object if ''--json'' is given. Optionally, an image name
+        can be given to only list said image.
+        """
+        rows = [image for image in self.image_cache.list_images()
+                if image_name in (None, image['image'])]
+        rows = sorted(rows, key=lambda row: (row['image'], row['origin'], row['image_hash']))
+        if json:
+            from json import dumps
+            print(dumps({'images': rows}, sort_keys=True, indent=2))
+        else:
+            max_name = max([8] + [len(row['image']) for row in rows]) + 1
+            fmt = '{:<' + str(max_name) + '} {:<32}  {:<16} {:>10}'
+            print(fmt.format('image', 'image-hash (short)', 'origin', 'size'))
+            for row in rows:
+                size = float(row['size'])
+                if size > 1024 * 1024 * 1024:
+                    size = '{0:.1f} GiB'.format(size / (1024 * 1024 * 1024))
+                if size > 1024 * 1024:
+                    size = '{0:.1f} MiB'.format(size / (1024 * 1024))
+                elif size > 1024:
+                    size = '{0:.1f} KiB'.format(size / 1024)
+                elif size == 0:
+                    size = ' - '
+                else:
+                    size = '{0:.1f} B'.format(size)
+                print(fmt.format(
+                    ('*' if row['current'] else ' ') + row['image'],
+                    row['image_hash'][:32],
+                    row['origin'],
+                    size,
+                ))
+
+    @SubCommand('qemu', 'build', description='Build QEMU image')
+    @CommandArgument('image_name', help='Name of the image to build')
+    @CommandArgument('--vnc', required=False, action='store_true',
+                     help='open VNC display while building image')
+    @CommandArgument('--force', required=False, action='store_true',
+                     help='overwrite existing image with same image_hash, if present')
+    @CommandArgument('--output', required=False,
+                     help='File to write to output, defaults to mach qemu image cache')
+    def build_image(self, image_name, vnc, force, output):
+        """
+        Build an image from in-tree recipe. If an image with matching ''image_hash''
+        is present in local cache you must specify ''--force''.
+
+        Unless ''--output'' is specified the image built will be placed in local
+        cache, from where you can push it to S3, run it locally, or export it as
+        a file.
+        """
+        from taskgraph.qemu.buildimage import build_image
+        from taskgraph.qemu.imagehash import compute_image_hash
+        image_hash = compute_image_hash(image_name)
+        if not (force or output) and self.image_cache.has_image(image_name, image_hash):
+            print('image {} with image-hash {} already exists, use --force to overwrite'.format(
+                image_name, image_hash[:32],
+            ))
+            return 1
+        if output:
+            targetFile = output
+        else:
+            targetFile = self.image_cache.get_temporary_filename()
+        build_image(image_name, targetFile, vnc, self.http_file_cache)
+        if not output:
+            self.image_cache.insert_from_temporary_file(
+                image_name, image_hash, targetFile, 'built-locally',
+            )
+        print('built {}@sha256:{}'.format(image_name, image_hash))
+
+    @SubCommand('qemu', 'pull', description='Pull image into local cache')
+    @CommandArgument('image_name', help='Name of the image to pull')
+    @CommandArgument('--image-hash', required=False,
+                     help='image-hash to pull, defaults to current image-hash')
+    @CommandArgument('--level', required=False, type=int,
+                     help='force pulling from index with given commit-level')
+    @CommandArgument('--force', required=False, action='store_true',
+                     help='overwrite existing image with same image_hash, if present')
+    def pull_image(self, image_name, image_hash, level, force):
+        """
+        Pull image from S3 or artifact under which it has been indexed by ''image_hash''.
+        This command uses the current ''image_hash'' from ''image.yml'' at the current
+        revision, and will fail if it's not present on either S3 or as artifact on
+        an indexed task.
+
+        If ''--level <level>'' is specified it will only download from task indexed
+        for said ''<level>'', and not attempt to download from S3, which will
+        otherwise be preferred.
+
+        If an image with current ''image_hash'' already exists in the local image
+        cache, the ''--force'' option must be passed.
+        """
+        self._activate_virtualenv()
+        self.virtualenv_manager.install_pip_package('taskcluster==1.3.4')
+        from taskgraph.qemu.imagehash import compute_image_hash
+        from taskgraph.qemu.imageschema import load_reference
+        if not image_hash:
+            image_hash = compute_image_hash(image_name)
+        else:
+            image_hash = self.image_cache.resolve_image_hash(image_name, image_hash)
+        if not force and self.image_cache.has_image(image_name, image_hash):
+            print('image {} with image-hash {} exists in cache, use --force to overwrite'.format(
+                image_name, image_hash[:32],
+            ))
+            return 1
+        if not level:
+            ref = load_reference(image_name)
+            if ref and ref['image-hash'] == image_hash:
+                self.image_cache.pull_from_url(image_name, image_hash, ref['url'],
+                                               origin='from-reference', sha256=ref['sha256'])
+                print('image {} with image-hash {} downloaded from S3'.format(
+                    image_name, image_hash[:32],
+                ))
+                return
+            elif ref:
+                print('reference.yml does not reference image-hash {} as requested'.format(
+                    image_hash[:32],
+                ))
+        level = self.image_cache.pull_index(image_name, image_hash, level or 1)
+        if not level:
+            print('image {} with image-hash {} was not found in index'.format(
+                image_name, image_hash[:32],
+            ))
+            return 1
+        print('image {} with image-hash {} downloaded from index with level-{}'.format(
+            image_name, image_hash[:32], level,
+        ))
+
+    @SubCommand('qemu', 'push', description='Push image from local cache to S3')
+    @CommandArgument('image_name', help='Name of the image to push')
+    @CommandArgument('--image-hash', required=False,
+                     help='image-hash to push, defaults to current image-hash')
+    @CommandArgument('--force', required=False, action='store_true',
+                     help='overwrite existing image with same image_hash, if present in S3')
+    def push_image(self, image_name, image_hash, force):
+        """
+        Push an image from local cache to S3, this requires that you have an image
+        in local cache with the current ''image_hash'' otherwise this command will
+        fail. Once uploaded this command will write a reference file at
+        ''/taskcluster/qemu/<image>/reference.yml'' which will contain the URL,
+        ''image_hash'' and content-hash of the image uploaded to S3.
+
+        If an image with the current ''image_hash'' already exists in S3, the
+        ''--force'' must be passed.
+
+        To upload to S3 you must have level 3 commit access, and have taskcluster
+        credentials in your environment variables: ''TASKCLUSTER_CLIENT_ID'',
+        ''TASKCLUSTER_ACCESS_TOKEN'', and optionally ''TASKCLUSTER_CERTIFICATE''.
+        """
+        self._activate_virtualenv()
+        self.virtualenv_manager.install_pip_package('taskcluster==1.3.4')
+        self.virtualenv_manager.install_pip_package('boto3==1.4.5')
+        from taskgraph.qemu.imagehash import compute_image_hash
+        if not image_hash:
+            image_hash = compute_image_hash(image_name)
+        else:
+            image_hash = self.image_cache.resolve_image_hash(image_name, image_hash)
+        self.image_cache.push_to_s3(image_name, image_hash, force)
+
+    @SubCommand('qemu', 'run', description='Run command inside VM')
+    @CommandArgument('image_name', help='Name of the image to start VM from')
+    @CommandArgument('--image-hash', required=False,
+                     help='image-hash to use, defaults to current image-hash')
+    @CommandArgument('--vnc', required=False, action='store_true',
+                     help='open VNC display to running image')
+    @CommandArgument('--keep-alive', required=False, action='store_true',
+                     help='Keep VM alive after command is done, terminate with SIGTERM')
+    @CommandArgument('--verbose', required=False, action='store_true',
+                     help='Print debug information logging from taskcluster-worker')
+    @CommandArgument('command', nargs='+', help='Command and arguments to run inside VM')
+    def run_vm(self, image_name, image_hash, vnc, keep_alive, verbose, command):
+        """
+        Run command in a new virtual machine using ''image'' with current
+        ''image_hash''. This will fail if no such image is available in local cache.
+
+        This is useful for testing that guest-tools have been installed correctly,
+        and that the image works as expected. Notice, that this command does not
+        allow you specify input for the virtual machine, it will merely run the
+        command passed and terminate while printing the log to stdout.
+
+        Use |mach qemu shell| to obtain an interactive shell into a new VM
+        created from an image. Notice that for simplicity and sanity
+        |mach qemu run| only allows running one VM at the time.
+        """
+        from taskgraph.qemu.imagehash import compute_image_hash
+        if not image_hash:
+            image_hash = compute_image_hash(image_name)
+        else:
+            image_hash = self.image_cache.resolve_image_hash(image_name, image_hash)
+        from taskgraph.qemu.run import qemu_run
+        image_file = self.image_cache.get_image_file(image_name, image_hash)
+        if not image_file:
+            print('image {} with image-hash {} was not found in local cache'.format(
+                image_name, image_hash[:32],
+            ))
+            return False
+        return qemu_run(image_file, command, vnc=vnc, keep_alive=keep_alive, verbose=verbose)
+
+    @SubCommand('qemu', 'shell', description='Start interactive shell inside VM')
+    @CommandArgument('command', nargs='*', help='Command and arguments for shell (optional)')
+    def shell_vm(self, command):
+        """
+        Start an interactive shell in a virtual machine started with |mach qemu run|.
+        For simplicity and sanity |mach qemu run| only allows running one VM at
+        the time.
+
+        If no ''command'' is passed this will start whatever default shell the image
+        is configured to provide. This will usually be ''/bin/bash'' or ''cmd.exe''
+        depending on platform.
+        """
+        from taskgraph.qemu.run import start_shell
+        return start_shell(command)
+
+    @SubCommand('qemu', 'export', description='Export image from local cache')
+    @CommandArgument('image_name', help='Name of the image to export')
+    @CommandArgument('output_file', help='Path to write outfile to')
+    @CommandArgument('--image-hash', required=False,
+                     help='image-hash to export, defaults to current image-hash')
+    @CommandArgument('--force', required=False, action='store_true',
+                     help='overwrite output_file if it already exists')
+    def export_image(self, image_name, output_file, image_hash, force):
+        """
+        Export ''image'' with current ''image_hash'' to a file, this will fail if
+        no such image is present in local cache.
+        """
+        from taskgraph.qemu.imagehash import compute_image_hash
+        import shutil
+        if not image_hash:
+            image_hash = compute_image_hash(image_name)
+        else:
+            image_hash = self.image_cache.resolve_image_hash(image_name, image_hash)
+        imgfile = self.image_cache.get_image_file(image_name, image_hash)
+        if not imgfile:
+            print('Error: image {} with image-hash {} is not present in cache'.format(
+                image_name, image_hash[:32],
+            ))
+            return 1
+        if not force and os.path.exists(output_file):
+            print('Error: output file {} already exists, use --force to overwrite'.format(
+                output_file))
+            return 1
+        shutil.copyfile(imgfile, output_file)
+
+    @SubCommand('qemu', 'import', description='Import image from file to local cache')
+    @CommandArgument('image_name', help='Image name to import as')
+    @CommandArgument('image_hash', help='image-hash to import as')
+    @CommandArgument('input_file', help='Path to file to import')
+    @CommandArgument('--force', required=False, action='store_true',
+                     help='overwrite existing image with same image_hash, if present')
+    def import_image(self, image_name, image_hash, input_file, force):
+        """
+        Import ''image'' as if it has current ''image_hash'' from a file.
+
+        WARNING: this can easily poison your local-cache, you should use this with care.
+        """
+        import shutil
+        image_hash = image_hash.lower()  # always use lower-case
+        if not re.match(r'^[0-9a-f]{64}$', image_hash):
+            print('image-hash "{}" must be a full hex encoded sha256')
+            return 1
+        if not force and self.image_cache.has_image(image_name, image_hash):
+            print('image {} with image-hash {} already exists, use --force to overwrite'.format(
+                image_name, image_hash[:32],
+            ))
+            return 1
+        tmp = self.image_cache.get_temporary_filename()  # insert transfers ownership
+        shutil.copyfile(input_file, tmp)
+        self.image_cache.insert_from_temporary_file(image_name, image_hash, tmp)
+
+    @SubCommand('qemu', 'purge', description='Purge images from local cache')
+    @CommandArgument('image_name', nargs='?',
+                     help='Name of image to purge, defaults to all')
+    @CommandArgument('image_hash', nargs='?',
+                     help='image-hash to purge, defaults to all')
+    @CommandArgument('--force', required=False, action='store_true',
+                     help='Remove all images without prompting for confirmation')
+    def purge_images(self, image_name, image_hash, force):
+        """
+        Purge the cache for a specific image and image-hash or all images if no
+        arguments are passed.
+        """
+        if image_hash:
+            image_hash = self.image_cache.resolve_image_hash(image_name, image_hash)
+        if not image_hash and not force:
+            if raw_input('CONFIRM: Remove all images (y/n)? [N]: ').lower() != 'y':
+                print('No images removed!')
+                return
+        for row in self.image_cache.purge(image_name, image_hash):
+            fmt = 'Removed: {:<32} {:<32} {:<15}'
+            print(fmt.format(row['image'], row['image_hash'][:32], row['origin']))
+
+    @SubCommand('qemu', 'clear-cache',
+                description='Clear blob cache used for image builds')
+    def clear_cache(self):
+        """
+        Clear blob cache used for building images.
+
+        This does not remove locally cached images, to do this use
+        |mach qemu purge|.
+        """
+        self.http_file_cache.clear()
+        self.image_cache.cleanup_temporary_files()
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/qemu/buildimage.py
@@ -0,0 +1,207 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import json
+import os
+import shutil
+import subprocess
+import tempfile
+import sys
+import time
+from copy import deepcopy
+
+from concurrent.futures import ThreadPoolExecutor
+
+from mach.base import FailedCommandError
+from taskgraph import GECKO
+from .httpcache import HTTPFileCache
+from .imageschema import IMAGE_DIR, load_and_validate_image
+from .run import docker_run, wait_for_docker_and_vnc
+
+
+def build_image(name, targetFile, vnc=False, file_cache=None):
+    image = load_and_validate_image(name)
+    # Create temporary to work in
+    workdir = tempfile.mkdtemp(prefix='mach-qemu-')
+
+    # Ensure we have a file cache object
+    if not file_cache:
+        file_cache = HTTPFileCache(os.path.join(workdir, 'blob_cache'))
+
+    def fetch_source(source, targetFile):
+        # Copy from source tree
+        if isinstance(source, basestring):
+            if source.startswith('./'):
+                source = os.path.join(IMAGE_DIR, name, source[2:])
+            else:
+                source = os.path.join(GECKO, source[1:])
+            try:
+                if os.path.isdir(source):
+                    shutil.copytree(source, targetFile)
+                else:
+                    shutil.copy(source, targetFile)
+            except Exception as e:
+                raise FailedCommandError('failed to fetch source: {}, error: {}'.format(source, e))
+            return
+
+        # Download from URL
+        if 'url' in source:
+            try:
+                shutil.copy2(file_cache.fetch_file(source['url'], source['sha256']), targetFile)
+            except Exception as e:
+                raise FailedCommandError('failed to fetch source: {}, error: {}'.format(source, e))
+
+        # Download from tooltool
+        if 'tooltool' in source:
+            # TODO: Figure out how to call tooltool programmatically, it can't
+            #       be right that it there is no way to just call:
+            #         tooltool(digest, algorithm, size, targetFile, url=...)
+            #       Or something like that would really nice.
+            manifest = os.path.join(workdir, 'manifest.tt')
+            with open(manifest, 'wb') as f:
+                json.dump(f, [{
+                    'size': source['size'],
+                    'digest': source['digest'],
+                    'algorithm': source['algorithm'],
+                    'filename': targetFile,
+                }], sort_keys=True)
+            url = []
+            if source['tooltool'] != "":
+                url = ['--url', source['tooltool']]
+            ret = subprocess.call([
+                'python', os.path.join(GECKO, 'python/mozbuild/mozbuild/action/tooltool.py'),
+                'fetch'] + url + ['-m', manifest])
+            os.unlink(manifest)
+            if ret != 0:
+                raise FailedCommandError('failed to fetch source: {} from tooltool'.format(source))
+
+    def produce_iso(steps, fileName):
+        if not isinstance(steps, list):
+            return fetch_source(steps, os.path.join(workdir, fileName))
+
+        # Create folder for the disk contents
+        diskdir = os.path.join(workdir, 'disk')
+        if os.path.exists(diskdir):
+            shutil.rmtree(diskdir)
+        os.mkdir(diskdir)
+
+        # Check that last step is a genisoimage
+        if 'genisoimage' not in steps[-1]:
+            raise FailedCommandError('Last step should always be genisoimage')
+
+        # For each step run the step
+        for index, step in enumerate(steps):
+            if 'copy' in step:
+                print('## Step {}: copy to {}'.format(index+1, step['target']))
+                fetch_source(step['copy'], os.path.join(diskdir, step['target'][1:]))
+
+            if 'extract' in step:
+                print('## Step {}: extract to {}'.format(index+1, step['target']))
+                archive = os.path.join(workdir, 'archive')
+                fetch_source(step['extract'], archive)
+                print('calling 7z to extract')
+                ok = docker_run([
+                    '7z',
+                    'x',
+                    '-t{}'.format(step['format']),
+                    '-o{}'.format(os.path.join('/disk-dir', step['target'][1:])),
+                    '/input-archive',
+                ], {
+                    diskdir: '/disk-dir',
+                    archive: '/input-archive',
+                }, stdout=open(os.devnull, 'w'), stderr=sys.stdout)
+                os.unlink(archive)
+                if not ok:
+                    raise FailedCommandError(
+                        'extract step: {} failed with exit code non-zero'.format(step))
+
+            if 'sed' in step:
+                print('## Step {}: sed on {}'.format(index+1, step['target']))
+                ok = docker_run([
+                    'sed', '-i', step['sed'],
+                    os.path.join('/disk-dir', step['target'][1:]),
+                ], {
+                    diskdir: '/disk-dir',
+                })
+                if not ok:
+                    raise FailedCommandError(
+                        'sed step: {} failed with exit code non-zero'.format(step))
+
+            if 'chmod' in step:
+                print('## Step {}: chmod on {}'.format(index+1, step['target']))
+                ok = docker_run([
+                    'chmod', step['chmod'],
+                    os.path.join('/disk-dir', step['target'][1:]),
+                ], {
+                    diskdir: '/disk-dir',
+                })
+                if not ok:
+                    raise FailedCommandError(
+                        'chmod step: {} failed with exit code non-zero'.format(step))
+
+            if 'genisoimage' in step:
+                print('## Step {}: genisoimage'.format(index+1))
+                ok = docker_run([
+                    'genisoimage'
+                ] + step['genisoimage'] + [
+                    '-o', os.path.join('/work-dir', fileName),
+                    '/disk-dir',
+                ], {
+                    workdir: '/work-dir',
+                    diskdir: '/disk-dir',
+                })
+                if not ok:
+                    raise FailedCommandError(
+                        'genisoimage step: {} failed with exit code non-zero'.format(step)
+                    )
+
+    try:
+        # Create machine.json
+        print('# Creating machine definition')
+        m = deepcopy(image['machine'])
+        m['version'] = 1
+        with open(os.path.join(workdir, 'machine.json'), 'wb') as f:
+            json.dump(m, f, sort_keys=True)
+
+        # Create isoA
+        print('# Producing ISO for cdromA:')
+        produce_iso(image['cdromA'], 'cdromA.iso')
+
+        # Create isoB, if there is a recipe
+        cdrom = []
+        if 'cdromB' in image:
+            print('# Producing ISO for cdromB:')
+            produce_iso(image['cdromB'], 'cdromB.iso')
+            cdrom = ['--cdrom', '/work-dir/cdromB.iso']
+
+        # Build the vm with disksize, machine, cdromA/B -> targetFile
+        vnc_opts = ['--vnc', '5900'] if vnc else []
+        print('# Building VM Image')
+        with ThreadPoolExecutor(max_workers=2) as e:
+            if vnc:
+                e.submit(wait_for_docker_and_vnc)
+                time.sleep(0.1)  # give docker events time to start
+            ok = docker_run([
+                'taskcluster-worker', 'qemu-build',
+                ] + vnc_opts + [
+                '--size', str(image['disksize']),
+                '--boot', '/work-dir/cdromA.iso',
+                ] + cdrom + [
+                'from-new', '/work-dir/machine.json', '/work-dir/image.tar.zst',
+            ], {
+                workdir: '/work-dir',
+            })
+            if not ok:
+                raise FailedCommandError('failed to build QEMU image, exit code non-zero')
+        shutil.copyfile(os.path.join(workdir, 'image.tar.zst'), targetFile)
+        print('# Build process completed')
+    finally:
+        # Remove everything using docker
+        subprocess.call(['docker', 'kill', 'mach-qemu-tc-worker'],
+                        stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w'))
+        docker_run(['bash', '-c', 'rm -rf /work-dir/*'], {workdir: '/work-dir'},
+                   name='mach-qemu-cleanup')
+        shutil.rmtree(workdir)
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/qemu/httpcache.py
@@ -0,0 +1,94 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+import hashlib
+import urllib2
+import shutil
+import os
+
+import redo
+from mach.base import FailedCommandError
+from math import floor
+from time import time, sleep
+
+CHUNK_SIZE = 256 * 1024
+
+
+def verify_file_sha256(file_path, sha256):
+    """ Verify file digest, returns False if not valid """
+    print('verifying {}'.format(sha256))
+    h = hashlib.sha256()
+    with open(file_path, 'rb') as f:
+        while True:
+            c = f.read(CHUNK_SIZE)
+            if not c:
+                break
+            h.update(c)
+    return h.hexdigest() == sha256
+
+
+class FetchFileException(FailedCommandError):
+    """Exception because fetch_file failed to fetch a file with retries exchausted"""
+    pass
+
+
+@redo.retriable(attempts=8, sleeptime=2, max_sleeptime=90, sleepscale=2, jitter=1,
+                retry_exceptions=(FetchFileException,))
+def fetch_file(url, targetFile, sha256=None, description=None):
+    """ Download url to targetFile validating that content hash matches ''sha256''"""
+    description = description or url
+    try:
+        req = urllib2.urlopen(url)
+        size = int(req.info().getheader('Content-Length').strip())
+        read = 0
+        updated = time()
+        with open(targetFile, 'wb') as f:
+            while True:
+                c = req.read(CHUNK_SIZE)
+                if not c:
+                    break
+                read += len(c)
+                f.write(c)
+                if time() - updated > 2:
+                    updated = time()
+                    print('downloading: {} at {} %'.format(
+                        description, int(floor((float(read) / size)*100))))
+        if read != size:
+            raise FetchFileException('retrying {} content-length mismatch'.format(url))
+        if sha256 and not verify_file_sha256(targetFile, sha256):
+            raise FetchFileException('retrying {} sha256 mismatch'.format(url))
+    except urllib2.HTTPError as e:
+        if 300 <= e.code < 400 or 400 <= e.code < 500:
+            raise FailedCommandError(
+                'failed to fetch {} retries exchausted, status: {}'.format(url, e.code))
+        raise FetchFileException('HTTP status {} error fetching {}'.format(e.code, url))
+    except urllib2.URLError as e:
+        raise FetchFileException('network error fetching {}'.format(url))
+    except IOError as e:
+        raise FetchFileException('network error fetching {}'.format(url))
+
+
+class HTTPFileCache(object):
+    """ Object that can cache things from HTTP in a local folder """
+    def __init__(self, folder):
+        self.folder = folder
+
+    def fetch_file(self, url, sha256):
+        """
+        Fetch a file from HTTP, requires sha256 hash
+
+        Note: it's not safe to fetch the same file concurrently.
+        """
+        if not os.path.isdir(self.folder):
+            os.mkdir(self.folder)
+        target = os.path.join(self.folder, 'sha256:{}'.format(sha256))
+        if not os.path.isfile(target) or not verify_file_sha256(target, sha256):
+            fetch_file(url, target, sha256)
+        return target
+
+    def clear(self):
+        """ Clear cache, delete all files in the cache """
+        if os.path.exists(self.folder):
+            shutil.rmtree(self.folder)
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/qemu/imagecache.py
@@ -0,0 +1,352 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import errno
+import json
+import os
+import uuid
+import sys
+import yaml
+import urllib2
+from time import time
+
+from mach.base import FailedCommandError
+from taskgraph import GECKO
+from .imageschema import load_and_validate_image, IMAGE_DIR, list_images
+from .imagehash import file_sha256, compute_image_hash
+from .httpcache import fetch_file
+
+# Location where store images in S3
+S3_BUCKET = 'public-qemu-images'
+S3_REGION = 'us-west-2'
+S3_PREFIX = 'repository/hg.mozilla.org/mozilla-central/'
+
+# Index prefix pattern
+_INDEX_PREFIX_PATTERN = 'gecko.cache.level-{level}.qemu-images.v1.{name}'
+
+# Artifact names for images stored in index
+PRIVATE_ARTIFACT_NAME = 'repo/hg.mozilla.org/mozilla-central/qemu-images/image.tar.zst'
+PUBLIC_ARTIFACT_NAME = 'public/qemu-images/image.tar.zst'
+
+
+def _ensureFolder(folder):
+    """Ensure that folder exists by creating it if it doesn't"""
+    try:
+        os.makedirs(folder)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def index_namespace(image_name, image_hash, level):
+    return (_INDEX_PREFIX_PATTERN + '.hash.{hash}').format(
+        name=image_name,
+        hash=image_hash,
+        level=level,
+    )
+
+
+def auxiliary_index_namespaces(image_name, level, moz_build_date):
+    return [(_INDEX_PREFIX_PATTERN + suffix).format(
+        level=level,
+        name=image_name,
+        pushtime=moz_build_date[8:],
+        year=moz_build_date[0:4],
+        month=moz_build_date[4:6],
+        day=moz_build_date[6:8],
+    ) for suffix in [
+        '.latest',
+        '.pushdate.{year}.{month}-{day}-{pushtime}',
+    ]]
+
+
+def _signed_index_artifact_url(namespace, artifact):
+    """Build signed indexed artifact URL using taskcluster credentials from
+    environment variables
+    """
+    import taskcluster
+    try:
+        return taskcluster.Index().buildSignedUrl('findArtifactFromTask', namespace, artifact)
+    except taskcluster.exceptions.TaskclusterAuthFailure:
+        raise FailedCommandError(
+            'taskcluster credentials required, please set TASKCLUSTER_CLIENT_ID ' +
+            'TASKCLUSTER_ACCESS_TOKEN', reason='missing-credentials')
+
+
+class Progress(object):
+    """Progress printer for boto S3 operations"""
+    def __init__(self, fmt, size):
+        import threading
+        self.fmt = fmt
+        self.size = size
+        self.count = 0
+        self.lock = threading.Lock()
+        self.updated = time()
+
+    def __call__(self, count):
+        with self.lock:
+            self.count += count
+            # Update progress every 2s
+            if time() - self.updated > 2:
+                self.updated = time()
+                print(self.fmt.format(int((float(self.count) / self.size) * 100)))
+                sys.stdout.flush()
+
+
+class ImageCache(object):
+    """Object that manages the QEMU image cache"""
+    def __init__(self, mach_state_folder):
+        self.folder = os.path.join(mach_state_folder, 'qemu-images-v1')
+        _ensureFolder(self.folder)
+        self.cleanup_temporary_files()
+
+    def _image_path(self, image_name, image_hash):
+        return os.path.join(self.folder, '{}@sha256:{}.tar.zst'.format(image_name, image_hash))
+
+    def _meta_path(self, image_name, image_hash):
+        return os.path.join(self.folder, '{}@sha256:{}.json'.format(image_name, image_hash))
+
+    def get_temporary_filename(self):
+        """Get temporary filename inside the cache folder, for use when building,
+        Downloading and other things, notably these can be atomically moved to
+        another filename.
+        """
+        return os.path.join(self.folder, 'tmp_' + str(uuid.uuid4()))
+
+    def cleanup_temporary_files(self):
+        """Cleanup old temporary files"""
+        for fname in os.listdir(self.folder):
+            path = os.path.join(self.folder, fname)
+            if not fname.startswith('tmp_'):
+                continue
+            try:
+                mtime = os.path.getmtime(path)
+            except OSError:
+                continue
+            # If older than 7 days we delete it
+            if time() - mtime > 7 * 24 * 60 * 60:
+                try:
+                    os.unlink(path)
+                except OSError as e:
+                    if e.errno != errno.EEXIST:
+                        raise
+
+    def insert_from_temporary_file(self, image_name, image_hash, image_file,
+                                   origin='imported-locally', sha256=None):
+        """
+        Insert image_name with image_hash from image_file,
+
+        Notice, the image_file must have been created by ''get_temporary_filename()'',
+        in-order to allow for atomic renaming.
+        """
+        if not sha256:
+            sha256 = file_sha256(image_file)
+        img_file = self._image_path(image_name, image_hash)
+        meta_file = self._meta_path(image_name, image_hash)
+        if os.path.exists(img_file):
+            os.unlink(img_file)
+        with open(meta_file, 'wb') as f:
+            json.dump({
+                'origin': origin,
+                'sha256': sha256,
+            }, f, sort_keys=True)
+        os.rename(image_file, img_file)  # atomic rename for the file of record
+
+    def pull_from_url(self, image_name, image_hash, url, origin, sha256=None):
+        """Insert image_name with image_hash from url"""
+        tmp_file = self.get_temporary_filename()
+        try:
+            fetch_file(url, tmp_file, sha256, description='{}@sha256:{}'.format(
+                image_name, image_hash[:32],
+            ))
+            self.insert_from_temporary_file(image_name, image_hash, tmp_file, origin, sha256)
+        finally:
+            try:  # Always try to cleanup
+                os.unlink(tmp_file)
+            except OSError:
+                pass  # Ignore failure to cleanup
+
+    def pull_index(self, image_name, image_hash, min_level, auth_proxy_prefix=None):
+        """Pull image_name with image_hash from task index with minimum min_level.
+
+        Returns level that the image as pulled from, None, if it was not found.
+
+        If the image is private this requires ''taskcluster'' module to be installed,
+        and ''TASKCLUSTER_CLIENT_ID'' and ''TASKCLUSTER_ACCESS_TOKEN'' environment variables.
+        Alternatively, the ''auth_proxy_prefix'' can be specified, under docker-worker
+        this would be ''http://taskcluster/''.
+        """
+        image = load_and_validate_image(image_name)
+        for level in reversed(range(int(min_level), 3+1)):
+            ns = index_namespace(image_name, image_hash, level)
+            if image['private']:
+                if auth_proxy_prefix:
+                    url = '{prefix}/index.taskcluster.net/v1/task/{ns}/artifacts/{name}'.format(
+                        prefix=auth_proxy_prefix.rstrip('/'),
+                        ns=ns,
+                        name=PRIVATE_ARTIFACT_NAME,
+                    )
+                else:
+                    import taskcluster  # only used locally
+                    url = taskcluster.Index().buildSignedUrl(
+                        'findArtifactFromTask', ns,
+                        PRIVATE_ARTIFACT_NAME,
+                    )
+            else:
+                url = 'https://index.taskcluster.net/v1/task/{ns}/artifacts/{name}'.format(
+                    ns=ns,
+                    name=PUBLIC_ARTIFACT_NAME,
+                )
+            try:
+                self.pull_from_url(image_name, image_hash, url, 'index-level-{}'.format(level))
+            except urllib2.HTTPError as e:
+                if e.code == 404:
+                    continue  # Try the next level
+                raise
+            return level
+        return None
+
+    def push_to_s3(self, image_name, image_hash, allow_overwrite=False):
+        """Push ''image_name'' with ''image_hash'' to S3.
+
+        This only works for public images and requires ''boto3'' and ''taskcluster''
+        to be installed as well as taskcluster credentials in environment variables
+        ''TASKCLUSTER_CLIENT_ID'' and ''TASKCLUSTER_ACCESS_TOKEN''.
+        """
+        import boto3
+        import botocore
+        import taskcluster
+        image = load_and_validate_image(image_name)
+        if image['private']:
+            raise FailedCommandError('image is private, pushing to S3 not supported')
+        image_file = self.get_image_file(image_name, image_hash)
+        if not image_file:
+            raise FailedCommandError('image {} with image-hash {} does not exist'.format(
+                image_name, image_hash[:32],
+            ))
+        creds = taskcluster.Auth().awsS3Credentials('read-write', S3_BUCKET, S3_PREFIX + '*')
+        s3 = boto3.client(
+            's3', region_name=S3_REGION,
+            aws_access_key_id=creds['credentials']['accessKeyId'],
+            aws_secret_access_key=creds['credentials']['secretAccessKey'],
+            aws_session_token=creds['credentials']['sessionToken'],
+        )
+        key = '{}{}@sha256:{}.tar.zstd'.format(S3_PREFIX, image_name, image_hash)
+        if not allow_overwrite:
+            try:  # Check that object does not exist
+                s3.head_object(Bucket=S3_BUCKET, Key=key)
+                # if it does exist raise exception
+                raise FailedCommandError('Image already exists remotely, use --force to overwrite')
+            except botocore.exceptions.ClientError as e:
+                if int(e.response['Error']['Code']) != 404:
+                    raise
+        s3.upload_file(image_file, S3_BUCKET, key, Callback=Progress(
+            'uploading {}@sha256:{} at {}'.format(image_name, image_hash[:32], '{} %'),
+            os.path.getsize(image_file),
+        ))
+        ref_file = os.path.join(IMAGE_DIR, image_name, 'reference.yml')
+        with open(ref_file, 'wb') as f:
+            f.write('# generated by |mach push {}|\n'.format(image_name))
+            yaml.safe_dump({
+                'image-hash': image_hash,
+                'sha256': file_sha256(image_file),
+                'url': 'https://s3-{region}.amazonaws.com/{bucket}/{key}'.format(
+                    region=S3_REGION, bucket=S3_BUCKET, key=key,
+                ),
+            }, f, allow_unicode=True, default_flow_style=False)
+        print('image {} with image-hash {} has been uploaded and is now referenced in:'.format(
+            image_name, image_hash[:32],
+        ))
+        print('  {}'.format(os.path.relpath(ref_file, GECKO)))
+        print('Please commit reference.yml')
+
+    def list_images(self):
+        """List QEMU images from cache and source tree as object on the form:
+        {image, image_hash, origin, size, current}
+        """
+        images = []
+        for fname in os.listdir(self.folder):
+            if not fname.endswith('.tar.zst'):
+                continue
+            image, image_hash = fname[:-len('.tar.zst')].split('@sha256:')
+            with open(self._meta_path(image, image_hash), 'rb') as f:
+                metadata = json.load(f)
+            images.append({
+                'image': image,
+                'image_hash': image_hash,
+                'origin': metadata['origin'],
+                'size': os.path.getsize(self._image_path(image, image_hash)),
+                'current': False,
+            })
+        for image_name in list_images():
+            image_hash = compute_image_hash(image_name)
+            for img in images:
+                if img['image'] == image_name and img['image_hash'] == image_hash:
+                    img['current'] = True
+                    break
+            else:
+                images.append({
+                    'image': image_name,
+                    'image_hash': image_hash,
+                    'origin': 'not-present',
+                    'size': 0,
+                    'current': True,
+                })
+        return images
+
+    def purge(self, image_name=None, image_hash=None):
+        """Purge cache for specific image_name and image_hash,
+        specifying image_hash=None to purge all image_hashes, and image_name=None to
+        purge all images.
+
+        returns a list of images removed.
+        """
+        removed = []
+        for img in self.list_images():
+            if img['origin'] == 'not-present':
+                continue
+            if image_name in (img['image'], None) and image_hash in (img['image_hash'], None):
+                removed.append(img)
+                try:
+                    os.unlink(self._image_path(img['image'], img['image_hash']))
+                except OSError as e:
+                    if e.errno != errno.EEXIST:
+                        raise
+                    continue
+                # if image file was removed, we must remove meta file
+                os.unlink(self._meta_path(img['image'], img['image_hash']))
+        return removed
+
+    def resolve_image_hash(self, image_name, image_hash_prefix):
+        """Resolve image_hash given an image_hash_prefix"""
+        image_hash_prefix = image_hash_prefix.lower()  # always convert to lower-case
+        image_hash = None
+        for img in self.list_images():
+            if img['image'] == image_name and img['image_hash'].startswith(image_hash_prefix):
+                if image_hash:
+                    raise FailedCommandError((
+                        'image-hash prefix: "{}" does not uniquely identify a ' +
+                        'full image-hash for image: {}'
+                    ).format(image_hash_prefix, image_name))
+                image_hash = img['image_hash']
+        if not image_hash:
+            raise FailedCommandError(
+                'image-hash prefix: "{}" does not identify an image-hash for image: {}'.format(
+                    image_hash_prefix, image_name,
+                )
+            )
+        return image_hash
+
+    def has_image(self, image_name, image_hash):
+        """Returns true if the image_name with given image_hash is in the cache"""
+        return os.path.exists(self._image_path(image_name, image_hash))
+
+    def get_image_file(self, image_name, image_hash):
+        """Returns filepath to the image_name file, or None if not present in the cache"""
+        img_file = self._image_path(image_name, image_hash)
+        if os.path.exists(img_file):
+            return img_file
+        return None
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/qemu/imagehash.py
@@ -0,0 +1,78 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import hashlib
+import json
+import os
+from copy import deepcopy
+
+from mozbuild.util import memoize
+from taskgraph import GECKO
+
+from .imageschema import IMAGE_DIR, load_and_validate_image
+
+CHUNK_SIZE = 4 * 1024
+
+
+def _hash_json(data):
+    """Dump JSON with sorted keys and return a hash"""
+    return hashlib.sha256(json.dumps(data, sort_keys=True)).hexdigest()
+
+
+def file_sha256(filepath):
+    """Return sha256 of a file"""
+    h = hashlib.sha256()
+    with open(filepath, 'rb') as f:
+        while True:
+            c = f.read(CHUNK_SIZE)
+            if not c:
+                break
+            h.update(c)
+    return h.hexdigest()
+
+
+@memoize
+def compute_image_hash(image_name):
+    """ Compute hash from image """
+    image = deepcopy(load_and_validate_image(image_name))
+    context_path = os.path.join(IMAGE_DIR, image_name)
+    # Note that while changing the hashing algorithm causes cached images to
+    # be rebuilt. So while it's not idea to change the hashing logic, the cost
+    # low, and we shouldn't be afraid to change it when adding new features in
+    # in the future
+
+    # Hash a source declaration
+    def hash_source(source):
+        if not isinstance(source, basestring):
+            return source
+        if source.startswith('./'):
+            source = os.path.join(context_path, source[2:])
+        else:
+            source = os.path.join(GECKO, source[1:])
+        if os.path.isdir(source):
+            files = sorted([os.path.relpath(os.path.join(d, f), GECKO)
+                            for d, _, files in os.walk(source) for f in files])
+            return [[f, file_sha256(os.path.join(GECKO, f))] for f in files]
+        else:
+            return file_sha256(source)
+
+    # Hash an ISO image step
+    def hash_step(step):
+        if 'copy' in step:
+            step['copy'] = hash_source(step['copy'])
+        if 'extract' in step:
+            step['extract'] = hash_source(step['extract'])
+        return step
+
+    # Hash a list of steps
+    def hash_steps(steps):
+        if not isinstance(steps, list):
+            return hash_source(steps)
+        return [hash_step(step) for step in steps]
+    image['cdromA'] = hash_steps(image['cdromA'])
+    if 'cdromB' in image:
+        image['cdromB'] = hash_steps(image['cdromB'])
+    return _hash_json(image)
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/qemu/imageschema.py
@@ -0,0 +1,153 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import os
+import errno
+import yaml
+from copy import deepcopy
+
+from mozbuild.util import memoize
+from taskgraph import GECKO
+from voluptuous import Any, Match, Optional, Range, Required, Schema
+
+from ..util.schema import validate_schema
+
+IMAGE_DIR = os.path.join(GECKO, 'taskcluster', 'qemu')
+
+# Source for something that is refernced, either a path relative to gecko
+# defined as string /<...> or relative current file ./<...>
+Source = Any(Match(r'^/|./'), {
+    # File to be downloaded from tooltool
+    Required('tooltool'): basestring,
+    Required('digest'): basestring,
+    Required('algorithm'): 'sha512',
+    Required('size'): int,
+}, {
+    # File to be downloaded from a url
+    Required('url'): basestring,
+    Required('sha256'): basestring,
+})
+
+# Step for construction of ISO
+Step = Any({
+    # Copy file from source to target path in ISO image
+    Required('copy'): Source,
+    Required('target'): Match(r'^/'),
+}, {
+    # Copy file from source to target path in ISO image
+    Required('extract'): Source,
+    Required('format'): Any(
+        'tar.xz', 'zip', 'cab', 'rar', 'ar',
+        'tar.gz', 'tar.bz2', 'tar', 'rpm', 'iso',
+    ),
+    Required('target'): Match(r'^/'),
+}, {
+    # GNU sed script to run on file in target path (useful for tweaking grub menu, etc)
+    Required('sed'): basestring,
+    Required('target'): Match(r'^/'),
+}, {
+    # chmod to run on file in target path
+    Required('chmod'): basestring,
+    Required('target'): Match(r'^/'),
+}, {
+    # Run genisoimage [options] -o TARGET INPUT_FOLDER
+    Required('genisoimage'): [basestring],
+})
+
+# Image for construction for a QEMU image by creating two ISOs and booting an
+# empty VM with those in the cd drive.
+Image = Schema({
+    Required('symbol'): basestring,  # treeherder symbol
+    Required('private'): bool,  # true, if image is private
+    Required('description'): basestring,  # markdown explaining what this image is
+    Required('disksize'): Range(1, 60),  # disk size in GB
+    Required('machine'): {
+        # For documentation see:
+        # https://github.com/taskcluster/taskcluster-worker/blob/387b1fd7c0cad9512052d1c760d9a2580135611e/engines/qemu/vm/machine.go
+        Optional('uuid'): Match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}' +
+                                '-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'),
+        Optional('chipset'): Any('pc-i440fx-2.8'),
+        Optional('cpu'): Any('host'),
+        Optional('flags'): [],
+        Optional('threads'): Range(min=1, max=255),
+        Optional('cores'): Range(min=1, max=255),
+        Optional('sockets'): Range(min=1, max=255),
+        Optional('memory'): Range(min=1, max=1024*1024),  # memory in mb
+        Optional('usb'): Any('piix3-usb-uhci', 'piix4-usb-uhci', 'nec-usb-xhci'),
+        Optional('network'): Any('rtl8139', 'e1000'),
+        Optional('mac'): Match(r'^[0-9a-f][26ae](:[0-9a-f]{2}){5}$'),
+        Optional('storage'): Any('virtio-blk-pci'),
+        Optional('graphics'): Any('VGA', 'vmware-svga', 'qxl-vga', 'virtio-vga'),
+        Optional('sound'): Any('none', 'AC97', 'ES1370',
+                               'hda-duplex/intel-hda', 'hda-micro/intel-hda',
+                               'hda-output/intel-hda',
+                               'hda-duplex/ich9-intel-hda', 'hda-micro/ich9-intel-hda',
+                               'hda-output/ich9-intel-hda'),
+        Optional('keyboard'): Any('usb-kbd', 'PS/2'),
+        Optional('keyboardLayout'): Any('ar', 'da', 'de', 'de-ch', 'en-gb', 'en-us', 'es', 'et',
+                                        'fi', 'fo', 'fr', 'fr-be', 'fr-ca', 'fr-ch', 'hr', 'hu',
+                                        'is', 'it', 'ja', 'lt', 'lv', 'mk', 'nl', 'nl-be', 'no',
+                                        'pl', 'pt', 'pt-br', 'ru', 'sl', 'sv', 'th', 'tr'),
+        Optional('mouse'): Any('usb-mouse', 'PS/2'),
+        Optional('tablet'): Any('usb-tablet', 'none'),
+    },
+    Required('cdromA'): Any(Source, [Step]),
+    Optional('cdromB'): Any(Source, [Step]),
+})
+
+
+# Schema for reference.yml used to indicate an image is stored in S3
+Reference = Schema({
+    Required('url'): basestring,
+    Required('sha256'): basestring,
+    Required('image-hash'): basestring,
+})
+
+
+@memoize
+def _load_and_validate_image(image_name):
+    # Load and validate the image file
+    imagePath = os.path.join(IMAGE_DIR, image_name, 'image.yml')
+    with open(imagePath, 'rb') as f:
+        image = yaml.load(f)
+    return validate_schema(Image, image, 'invalid image file: {}'.format(imagePath))
+
+
+def load_and_validate_image(image_name):
+    """Load and validate image meta-data from image.yml"""
+    return deepcopy(_load_and_validate_image(image_name))
+
+
+def list_images():
+    """List all images located in-tree"""
+    for image_name in os.listdir(IMAGE_DIR):
+        if os.path.isfile(os.path.join(IMAGE_DIR, image_name, 'image.yml')):
+            yield image_name
+
+
+def has_image(image_name):
+    """True, if image_name exists in-tree"""
+    image_file_path = os.path.join(IMAGE_DIR, image_name, 'image.yml')
+    return os.path.isfile(image_file_path)
+
+
+def has_reference(image_name):
+    """True, if image_name exists and has a reference.yml"""
+    ref_file_path = os.path.join(IMAGE_DIR, image_name, 'reference.yml')
+    return has_image(image_name) and os.path.isfile(ref_file_path)
+
+
+def load_reference(image_name):
+    """Load ''reference.yml'' from image folder, returns None if not present"""
+    ref_file = os.path.join(IMAGE_DIR, image_name, 'reference.yml')
+    try:
+        with open(ref_file, 'rb') as f:
+            ref = yaml.load(f)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            return None
+        raise
+    return validate_schema(Reference, ref, 'invalid reference file: {}'.format(ref_file))
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/qemu/run.py
@@ -0,0 +1,128 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import subprocess
+import json
+import time
+import socket
+import sys
+import os
+from uuid import uuid4
+
+from concurrent.futures import ThreadPoolExecutor
+
+DOCKER_IMAGE_HASH = '9fad51caf737befabcaf1827946a85cac7513594758ac10d408de60532c03685'
+DOCKER_IMAGE = 'taskcluster/tc-worker@sha256:{}'.format(DOCKER_IMAGE_HASH)
+
+
+def docker_run(args, mounts={}, interactive=False, stdout=sys.stdout, stderr=sys.stderr,
+               name='mach-qemu-tc-worker'):
+    opts = []
+    for src, dst in mounts.iteritems():
+        if isinstance(dst, tuple):
+            dst, flag = dst
+            opts += ['--mount', 'type=bind,src={},dst={},{}'.format(src, dst, flag)]
+        else:
+            opts += ['--mount', 'type=bind,src={},dst={}'.format(src, dst)]
+    stdin = None
+    if interactive:
+        opts += ['--interactive']
+        stdin = sys.stdin
+    return subprocess.call([
+        'docker', 'run',
+        '--rm',
+        '--privileged',
+        '--tty',
+        ] + opts + [
+        '--entrypoint', args[0],
+        '--name', name,
+        DOCKER_IMAGE,
+    ] + args[1:], stdin=stdin, stdout=stdout, stderr=stderr) == 0
+
+
+def wait_for_docker_and_vnc():
+    # run "docker events" waiting for the "start" event.
+    # thus, we only need to read the first line, then we can kill the events
+    # process.
+    proc = subprocess.Popen([
+        'docker', 'events',
+        '-f', 'name=mach-qemu-tc-worker',
+        '-f', 'event=start',
+        '--format', '{{json .}}',
+    ], stdout=subprocess.PIPE)
+    proc.stdout.readline()  # wait for first "start" event, then kill proc
+    proc.kill()
+    data = json.loads(subprocess.check_output([
+        'docker', 'inspect', 'mach-qemu-tc-worker',
+    ]))
+    ip = data[0]['NetworkSettings']['IPAddress']
+    while True:
+        try:
+            sock = socket.socket()
+            sock.connect((ip, 5900))
+            sock.close()
+            break
+        except socket.error:
+            time.sleep(0.5)
+    print('VNC socket exposed on {}:5900'.format(ip))
+    viewer = None
+    if subprocess.call(['which', 'vinagre'], stdout=open(os.devnull, 'w')) == 0:
+        viewer = 'vinagre'
+    elif subprocess.call(['which', 'xtightvncviewer'], stdout=open(os.devnull, 'w')) == 0:
+        viewer = 'xtightvncviewer'
+    else:
+        print('No VNC viewer found')
+    if viewer:
+        print('Starting {} {}:5900'.format(viewer, ip))
+        subprocess.call([viewer, '{}:5900'.format(ip)],
+                        stdout=open(os.devnull, 'w'),  stderr=open(os.devnull, 'w'))
+
+
+def qemu_run(image_file, command, vnc=False, keep_alive=False, verbose=False):
+    """Create VM using ''image_file'' running ''command''"""
+    keep_alive_flag = ['--keep-alive'] if keep_alive else []
+    try:
+        with ThreadPoolExecutor(max_workers=2) as e:
+            if vnc:
+                e.submit(wait_for_docker_and_vnc)
+                time.sleep(0.1)  # give docker events time to start
+            return docker_run([
+                'taskcluster-worker', 'qemu-run',
+                '--vnc', '5900',
+                '--meta', '80',
+                ] + keep_alive_flag + [
+                '--log-level', 'debug' if verbose else 'warning',
+                '/image.tar.zst',
+                '--',
+            ] + command, {
+                image_file: ('/image.tar.zst', 'readonly'),
+            })
+    except KeyboardInterrupt:
+        if not keep_alive:
+            raise
+        subprocess.call(['docker', 'stop', 'mach-qemu-tc-worker'],
+                        stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w'))
+    finally:
+        # Remove everything using docker
+        subprocess.call(['docker', 'kill', 'mach-qemu-tc-worker'],
+                        stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w'))
+
+
+def start_shell(command=[]):
+    shell_id = 'mach-qemu-shell-{}'.format(str(uuid4())[:8])
+    try:
+        data = json.loads(subprocess.check_output([
+            'docker', 'inspect', 'mach-qemu-tc-worker',
+        ]))
+        ip = data[0]['NetworkSettings']['IPAddress']
+        return docker_run([
+            'taskcluster-worker', 'shell',
+            'ws://{}/shell/'.format(ip),
+            '--',
+        ] + command, name=shell_id, interactive=True)
+    finally:
+        subprocess.call(['docker', 'kill', shell_id],
+                        stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w'))
--- a/taskcluster/taskgraph/util/schema.py
+++ b/taskcluster/taskgraph/util/schema.py
@@ -5,32 +5,34 @@
 from __future__ import absolute_import, print_function, unicode_literals
 
 import re
 import copy
 import pprint
 import collections
 import voluptuous
 
+from mach.base import FailedCommandError
 from .attributes import keymatch
 
 
 def validate_schema(schema, obj, msg_prefix):
     """
     Validate that object satisfies schema.  If not, generate a useful exception
     beginning with msg_prefix.
     """
     try:
         # deep copy the result since it may include mutable defaults
         return copy.deepcopy(schema(obj))
     except voluptuous.MultipleInvalid as exc:
         msg = [msg_prefix]
         for error in exc.errors:
             msg.append(str(error))
-        raise Exception('\n'.join(msg) + '\n' + pprint.pformat(obj))
+        raise FailedCommandError('\n'.join(msg) + '\n' + pprint.pformat(obj),
+                                 reason='schema-validation-failed')
 
 
 def optionally_keyed_by(*arguments):
     """
     Mark a schema value as optionally keyed by any of a number of fields.  The
     schema is the last argument, and the remaining fields are taken to be the
     field names.  For example: