Bug 1421734 - Download and unpack toolchain artifacts in parallel; r=nalexander
☠☠ backed out by ff525890cbff ☠ ☠
authorChris AtLee <catlee@mozilla.com>
Mon, 12 Feb 2018 15:07:36 -0500
changeset 461881 5ceb1365ae756eabe0763de42cb64fd9dc3e79dc
parent 461880 a50ae6e8b614d0c1c1cc0196b691f7732def60f4
child 461882 7d40106bafebdd868ca7f6d188b5f0e30cb6f0e3
push id1683
push usersfraser@mozilla.com
push dateThu, 26 Apr 2018 16:43:40 +0000
treeherdermozilla-release@5af6cb21869d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnalexander
bugs1421734
milestone60.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1421734 - Download and unpack toolchain artifacts in parallel; r=nalexander MozReview-Commit-ID: BMe6zqIqNHP
python/mozbuild/mozbuild/artifacts.py
python/mozbuild/mozbuild/mach_commands.py
--- a/python/mozbuild/mozbuild/artifacts.py
+++ b/python/mozbuild/mozbuild/artifacts.py
@@ -773,29 +773,29 @@ class ArtifactCache(object):
     def __init__(self, cache_dir, log=None, skip_cache=False):
         mkdir(cache_dir, not_indexed=True)
         self._cache_dir = cache_dir
         self._log = log
         self._skip_cache = skip_cache
         self._persist_limit = ArtifactPersistLimit(log)
         self._download_manager = DownloadManager(
             self._cache_dir, persist_limit=self._persist_limit)
-        self._last_dl_update = -1
 
     def log(self, *args, **kwargs):
         if self._log:
             self._log(*args, **kwargs)
 
     def fetch(self, url, force=False):
         fname = os.path.basename(url)
         try:
             # Use the file name from the url if it looks like a hash digest.
             if len(fname) not in (32, 40, 56, 64, 96, 128):
                 raise TypeError()
             binascii.unhexlify(fname)
+            basename = fname
         except TypeError:
             # We download to a temporary name like HASH[:16]-basename to
             # differentiate among URLs with the same basenames.  We used to then
             # extract the build ID from the downloaded artifact and use it to make a
             # human readable unique name, but extracting build IDs is time consuming
             # (especially on Mac OS X, where we must mount a large DMG file).
             hash = hashlib.sha256(url).hexdigest()[:16]
             # Strip query string and fragments.
@@ -807,46 +807,44 @@ class ArtifactCache(object):
             self.log(logging.DEBUG, 'artifact',
                 {'path': path},
                 'Skipping cache: removing cached downloaded artifact {path}')
             os.remove(path)
 
         self.log(logging.INFO, 'artifact',
             {'path': path},
             'Downloading to temporary location {path}')
-        try:
-            dl = self._download_manager.download(url, fname)
+        dl = self._download_manager.download(url, fname)
 
-            def download_progress(dl, bytes_so_far, total_size):
-                if not total_size:
-                    return
-                percent = (float(bytes_so_far) / total_size) * 100
-                now = int(percent / 5)
-                if now == self._last_dl_update:
-                    return
-                self._last_dl_update = now
-                self.log(logging.INFO, 'artifact',
-                         {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent},
-                         'Downloading... {percent:02.1f} %')
+        _last_dl_update = [-1]
 
-            if dl:
-                dl.set_progress(download_progress)
-                dl.wait()
-            else:
-                # Avoid the file being removed if it was in the cache already.
-                path = os.path.join(self._cache_dir, fname)
-                self._persist_limit.register_file(path)
+        def download_progress(dl, bytes_so_far, total_size):
+            if not total_size:
+                return
+            percent = (float(bytes_so_far) / total_size) * 100
+            now = int(percent / 10)
+            if now == _last_dl_update[0]:
+                return
+            _last_dl_update[0] = now
+            self.log(logging.INFO, 'artifact',
+                     {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent, 'basename': basename},
+                     'Downloading {basename}... {percent:02.1f} %')
 
-            self.log(logging.INFO, 'artifact',
-                {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
-                'Downloaded artifact to {path}')
-            return os.path.abspath(mozpath.join(self._cache_dir, fname))
-        finally:
-            # Cancel any background downloads in progress.
-            self._download_manager.cancel()
+        if dl:
+            dl.set_progress(download_progress)
+            dl.wait()
+        else:
+            # Avoid the file being removed if it was in the cache already.
+            path = os.path.join(self._cache_dir, fname)
+            self._persist_limit.register_file(path)
+
+        self.log(logging.INFO, 'artifact',
+            {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
+            'Downloaded artifact to {path}')
+        return os.path.abspath(mozpath.join(self._cache_dir, fname))
 
     def clear_cache(self):
         if self._skip_cache:
             self.log(logging.DEBUG, 'artifact',
                 {},
                 'Skipping cache: ignoring clear_cache!')
             return
 
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -37,16 +37,18 @@ from mozbuild.base import (
     MozbuildObject,
 )
 from mozbuild.util import ensureParentDir
 
 from mozbuild.backend import (
     backends,
 )
 
+from concurrent.futures import ThreadPoolExecutor
+
 
 BUILD_WHAT_HELP = '''
 What to build. Can be a top-level make target or a relative directory. If
 multiple options are provided, they will be built serially. Takes dependency
 information from `topsrcdir/build/dumbmake-dependencies` to build additional
 targets as needed. BUILDING ONLY PARTS OF THE TREE CAN RESULT IN BAD TREE
 STATE. USE AT YOUR OWN RISK.
 '''.strip()
@@ -1206,24 +1208,26 @@ class PackageFrontend(MachCommandBase):
     @CommandArgument('--tooltool-url', metavar='URL',
         help='Use the given url as tooltool server')
     @CommandArgument('--no-unpack', action='store_true',
         help='Do not unpack any downloaded file')
     @CommandArgument('--retry', type=int, default=0,
         help='Number of times to retry failed downloads')
     @CommandArgument('--artifact-manifest', metavar='FILE',
         help='Store a manifest about the downloaded taskcluster artifacts')
+    @CommandArgument('--jobs', '-j', default='4', metavar='jobs', type=int,
+        help='Number of artifacts to fetch concurrently. Default is 4.')
     @CommandArgument('files', nargs='*',
         help='A list of files to download, in the form path@task-id, in '
              'addition to the files listed in the tooltool manifest.')
     def artifact_toolchain(self, verbose=False, cache_dir=None,
                           skip_cache=False, from_build=(),
                           tooltool_manifest=None, authentication_file=None,
                           tooltool_url=None, no_unpack=False, retry=None,
-                          artifact_manifest=None, files=()):
+                          artifact_manifest=None, jobs=4, files=()):
         '''Download, cache and install pre-built toolchains.
         '''
         from mozbuild.artifacts import ArtifactCache
         from mozbuild.action.tooltool import (
             FileRecord,
             open_manifest,
             unpack_file,
         )
@@ -1396,17 +1400,19 @@ class PackageFrontend(MachCommandBase):
             if '@' not in f:
                 self.log(logging.ERROR, 'artifact', {},
                          'Expected a list of files of the form path@task-id')
                 return 1
             name, task_id = f.rsplit('@', 1)
             record = ArtifactRecord(task_id, name)
             records[record.filename] = record
 
-        for record in records.itervalues():
+        artifacts = {} if artifact_manifest else None
+
+        def _fetch_and_unpack_record(record):
             self.log(logging.INFO, 'artifact', {'name': record.basename},
                      'Downloading {name}')
             valid = False
             # sleeptime is 60 per retry.py, used by tooltool_wrapper.sh
             for attempt, _ in enumerate(redo.retrier(attempts=retry+1,
                                                      sleeptime=60)):
                 try:
                     record.fetch_with(cache)
@@ -1441,27 +1447,23 @@ class PackageFrontend(MachCommandBase):
                     pass
                 if not valid:
                     os.unlink(record.filename)
                     if attempt < retry:
                         self.log(logging.INFO, 'artifact', {},
                                  'Will retry in a moment...')
                     continue
 
-                downloaded.append(record)
                 break
 
             if not valid:
                 self.log(logging.ERROR, 'artifact', {'name': record.basename},
                          'Failed to download {name}')
-                return 1
+                raise Exception('Failed to download {name}'.format(name=record.basename))
 
-        artifacts = {} if artifact_manifest else None
-
-        for record in downloaded:
             local = os.path.join(os.getcwd(), record.basename)
             if os.path.exists(local):
                 os.unlink(local)
             # unpack_file needs the file with its final name to work
             # (https://github.com/mozilla/build-tooltool/issues/38), so we
             # need to copy it, even though we remove it later. Use hard links
             # when possible.
             try:
@@ -1480,28 +1482,34 @@ class PackageFrontend(MachCommandBase):
                         h.update(data)
                 artifacts[record.url] = {
                     'sha256': h.hexdigest(),
                 }
             if record.unpack and not no_unpack:
                 unpack_file(local, record.setup)
                 os.unlink(local)
 
+            return record
+
+        with ThreadPoolExecutor(jobs) as pool:
+            downloaded = pool.map(_fetch_and_unpack_record, records.values())
+
         if not downloaded:
             self.log(logging.ERROR, 'artifact', {}, 'Nothing to download')
             if files:
                 return 1
 
         if artifacts:
             ensureParentDir(artifact_manifest)
             with open(artifact_manifest, 'w') as fh:
                 json.dump(artifacts, fh, indent=4, sort_keys=True)
 
         return 0
 
+
 class StaticAnalysisSubCommand(SubCommand):
     def __call__(self, func):
         after = SubCommand.__call__(self, func)
         args = [
             CommandArgument('--verbose', '-v', action='store_true',
                             help='Print verbose output.'),
         ]
         for arg in args: