Bug 1356524 - Add a `mach artifact toolchain` option to get artifacts from taskcluster builds. r=gps
authorMike Hommey <mh+mozilla@glandium.org>
Fri, 14 Apr 2017 11:34:53 +0900
changeset 563295 98c08360e61b83fe1eeefd42c93a9099ebf86806
parent 563294 be3413a130d8a0c42c936b8a2bd6e3d119ad6ede
child 563296 54f9c9d1a0948d615d4105d753568bc502bae52b
push id54258
push usercpeterson@mozilla.com
push dateSun, 16 Apr 2017 05:52:14 +0000
reviewersgps
bugs1356524, 1328454
milestone55.0a1
Bug 1356524 - Add a `mach artifact toolchain` option to get artifacts from taskcluster builds. r=gps Now that we have automated build jobs that produce toolchains, we want to avoid the burden of uploading them to tooltool and then update the tooltool manifests. But we don't have build jobs for all the possible toolchains, so we allow `mach artifact toolchain` to get a mix of tooltool and taskcluster artifacts. For taskcluster artifacts, we can give a list of job names (conveniently automatically normalized to begin with 'toolchain-' and end with '/opt') for which the artifacts will be downloaded, in place of any tooltool package with the same name (if a tooltool manifest is given). The taskcluster artifacts that we download are the ones matching the contents of the tree the command is run with, per the resources declared for the corresponding toolchain build job (in taskcluster/ci/toolchain*.yml) So for example, a linux64 build could call the following command: mach artifact toolchain --tooltool-manifest \ browser/config/tooltool-manifests/linux64/releng.manifest \ --from-build linux64-gcc and get the right gcc corresponding to the build-gcc script in tree, along with the other non-gcc files from the tooltool manifest. Things are however planned to be even more convenient, but some commands can already benefit from this form (even without a tooltool manifest). See e.g. bug 1328454.
python/mozbuild/mozbuild/mach_commands.py
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -9,16 +9,18 @@ import errno
 import itertools
 import json
 import logging
 import operator
 import os
 import subprocess
 import sys
 
+from collections import OrderedDict
+
 import mozpack.path as mozpath
 
 from mach.decorators import (
     CommandArgument,
     CommandArgumentGroup,
     CommandProvider,
     Command,
     SubCommand,
@@ -1536,45 +1538,56 @@ class PackageFrontend(MachCommandBase):
     @SubCommand('artifact', 'toolchain')
     @CommandArgument('--verbose', '-v', action='store_true',
         help='Print verbose output.')
     @CommandArgument('--cache-dir', metavar='DIR',
         help='Directory where to store the artifacts cache')
     @CommandArgument('--skip-cache', action='store_true',
         help='Skip all local caches to force re-fetching remote artifacts.',
         default=False)
+    @CommandArgument('--from-build', metavar='BUILD', nargs='+',
+        help='Get toolchains resulting from the given build(s)')
     @CommandArgument('--tooltool-manifest', metavar='MANIFEST',
         help='Explicit tooltool manifest to process')
     @CommandArgument('--authentication-file', metavar='FILE',
         help='Use the RelengAPI token found in the given file to authenticate')
     @CommandArgument('--tooltool-url', metavar='URL',
         help='Use the given url as tooltool server')
     @CommandArgument('--no-unpack', action='store_true',
         help='Do not unpack any downloaded file')
     @CommandArgument('--retry', type=int, default=0,
         help='Number of times to retry failed downloads')
     @CommandArgument('files', nargs='*',
         help='Only download the given file names (you may use file name stems)')
     def artifact_toolchain(self, verbose=False, cache_dir=None,
-                          skip_cache=False, tooltool_manifest=None,
-                          authentication_file=None, tooltool_url=None,
-                          no_unpack=False, retry=None, files=()):
+                          skip_cache=False, from_build=(),
+                          tooltool_manifest=None, authentication_file=None,
+                          tooltool_url=None, no_unpack=False, retry=None,
+                          files=()):
         '''Download, cache and install pre-built toolchains.
         '''
         from mozbuild.artifacts import ArtifactCache
         from mozbuild.action.tooltool import (
             FileRecord,
             open_manifest,
             unpack_file,
         )
         from requests.adapters import HTTPAdapter
         import redo
         import requests
         import shutil
 
+        from taskgraph.generator import Kind
+        from taskgraph.optimize import optimize_task
+        from taskgraph.util.taskcluster import (
+            get_artifact_url,
+            list_artifacts,
+        )
+        import yaml
+
         self._set_log_level(verbose)
         # Normally, we'd use self.log_manager.enable_unstructured(),
         # but that enables all logging, while we only really want tooltool's
         # and it also makes structured log output twice.
         # So we manually do what it does, and limit that to the tooltool
         # logger.
         if self.log_manager.terminal_handler:
             logging.getLogger('mozbuild.action.tooltool').addHandler(
@@ -1601,35 +1614,119 @@ class PackageFrontend(MachCommandBase):
                     request.headers['Authorization'] = \
                         'Bearer {}'.format(token)
                     return super(TooltoolAuthenticator, self).send(
                         request, *args, **kwargs)
 
             cache._download_manager.session.mount(
                 tooltool_url, TooltoolAuthenticator())
 
-        manifest = open_manifest(tooltool_manifest)
-        downloaded_files = {}
+        class DownloadRecord(FileRecord):
+            def __init__(self, url, *args, **kwargs):
+                super(DownloadRecord, self).__init__(*args, **kwargs)
+                self.url = url
+                self.basename = self.filename
+
+            def fetch_with(self, cache):
+                self.filename = cache.fetch(self.url)
+                return self.filename
+
+            def validate(self):
+                if self.size is None and self.digest is None:
+                    return True
+                return super(DownloadRecord, self).validate()
+
+        records = OrderedDict()
+        downloaded = []
+
+        if tooltool_manifest:
+            manifest = open_manifest(tooltool_manifest)
+            for record in manifest.file_records:
+                url = '{}/{}/{}'.format(tooltool_url, record.algorithm,
+                                        record.digest)
+                records[record.filename] = DownloadRecord(
+                    url, record.filename, record.size, record.digest,
+                    record.algorithm, unpack=record.unpack,
+                    version=record.version, visibility=record.visibility,
+                    setup=record.setup)
+
+        if from_build:
+            params = {
+                'message': '',
+                'project': '',
+                'level': os.environ.get('MOZ_SCM_LEVEL', '3'),
+                'base_repository': '',
+                'head_repository': '',
+                'head_rev': '',
+                'moz_build_date': '',
+                'build_date': 0,
+                'pushlog_id': 0,
+                'owner': '',
+            }
 
-        for record in manifest.file_records:
-            if files and not any(record.filename == f or
-                                      record.filename.startswith('%s.' % f)
+            # TODO: move to the taskcluster package
+            def tasks(kind):
+                kind_path = mozpath.join('taskcluster', 'ci', kind)
+                with open(mozpath.join(self.topsrcdir, kind_path, 'kind.yml')) as f:
+                    config = yaml.load(f)
+                    tasks = Kind(kind, kind_path, config).load_tasks(params, {})
+                    return {
+                        task.task['metadata']['name']: task
+                        for task in tasks
+                    }
+
+            toolchains = tasks('toolchain')
+
+            for b in from_build:
+                user_value = b
+
+                if '/' not in b:
+                    b = '{}/opt'.format(b)
+
+                if not b.startswith('toolchain-'):
+                    b = 'toolchain-{}'.format(b)
+
+                task = toolchains.get(b)
+                if not task:
+                    self.log(logging.ERROR, 'artifact', {'build': user_value},
+                             'Could not find a toolchain build named `{build}`')
+                    return 1
+
+                optimized, task_id = optimize_task(task, {})
+                if not optimized:
+                    self.log(logging.ERROR, 'artifact', {'build': user_value},
+                             'Could not find artifacts for a toolchain build '
+                             'named `{build}`')
+                    return 1
+
+                for artifact in list_artifacts(task_id):
+                    name = artifact['name']
+                    if not name.startswith('public/'):
+                        continue
+                    name = name[len('public/'):]
+                    if name.startswith('logs/'):
+                        continue
+                    records[name] = DownloadRecord(
+                        get_artifact_url(task_id, 'public/{}'.format(name)),
+                        name, None, None, None, unpack=True)
+
+        for record in records.itervalues():
+            if files and not any(record.basename == f or
+                                      record.basename.startswith('%s.' % f)
                                       for f in files):
                 continue
 
-            self.log(logging.INFO, 'artifact', {'name': record.filename},
+            self.log(logging.INFO, 'artifact', {'name': record.basename},
                      'Downloading {name}')
-            url = '{}/{}/{}'.format(tooltool_url, record.algorithm,
-                                    record.digest)
             valid = False
             # sleeptime is 60 per retry.py, used by tooltool_wrapper.sh
             for attempt, _ in enumerate(redo.retrier(attempts=retry+1,
                                                      sleeptime=60)):
                 try:
-                    downloaded = cache.fetch(url)
+                    record.fetch_with(cache)
                 except requests.exceptions.HTTPError as e:
                     status = e.response.status_code
                     # The relengapi proxy likes to return error 400 bad request
                     # which seems improbably to be due to our (simple) GET
                     # being borked.
                     should_retry = status >= 500 or status == 400
                     if should_retry or attempt < retry:
                         level = logging.WARN
@@ -1637,62 +1734,52 @@ class PackageFrontend(MachCommandBase):
                         level = logging.ERROR
                     self.log(level, 'artifact', {}, e.message)
                     if not should_retry:
                         break
                     if attempt < retry:
                         self.log(logging.INFO, 'artifact', {},
                                  'Will retry in a moment...')
                     continue
-                validate_record = FileRecord(
-                    os.path.basename(downloaded), record.size, record.digest,
-                    record.algorithm)
-                # FileRecord.validate needs the file in the current directory
-                # (https://github.com/mozilla/build-tooltool/issues/38)
-                curdir = os.getcwd()
-                os.chdir(os.path.dirname(downloaded))
                 try:
-                    valid = validate_record.validate()
-                finally:
-                    os.chdir(curdir)
+                    valid = record.validate()
+                except Exception:
+                    pass
                 if not valid:
-                    os.unlink(downloaded)
+                    os.unlink(record.filename)
                     if attempt < retry:
                         self.log(logging.INFO, 'artifact', {},
                                  'Will retry in a moment...')
                     continue
 
-                downloaded_files[record.filename] = downloaded
+                downloaded.append(record)
                 break
 
             if not valid:
-                self.log(logging.ERROR, 'artifact', {'name': record.filename},
+                self.log(logging.ERROR, 'artifact', {'name': record.basename},
                          'Failed to download {name}')
                 return 1
 
-        for record in manifest.file_records:
-            downloaded = downloaded_files.get(record.filename)
-            if not downloaded:
-                continue
-            local = os.path.join(os.getcwd(), record.filename)
+        for record in downloaded:
+            local = os.path.join(os.getcwd(), record.basename)
             if os.path.exists(local):
                 os.unlink(local)
             # unpack_file needs the file with its final name to work
             # (https://github.com/mozilla/build-tooltool/issues/38), so we
             # need to copy it, even though we remove it later. Use hard links
             # when possible.
             try:
-                os.link(downloaded, local)
-            except:
-                shutil.copy(downloaded, local)
+                os.link(record.filename, local)
+            except Exception:
+                shutil.copy(record.filename, local)
             if record.unpack and not no_unpack:
                 unpack_file(local, record.setup)
                 os.unlink(local)
 
-        if not downloaded_files:
+        if not downloaded:
             self.log(logging.ERROR, 'artifact', {}, 'Nothing to download')
             return 1
 
         return 0
 
 
 @CommandProvider
 class Vendor(MachCommandBase):