Bug 1533670 - extend download_file_from_url so it can extract archive r=Bebe
authorTarek Ziadé <tarek@mozilla.com>
Fri, 08 Mar 2019 13:25:09 +0000
changeset 521157 622e735af62d
parent 521156 5833aea8a83b
child 521158 db0f22709ad1
push id10862
push userffxbld-merge
push dateMon, 11 Mar 2019 13:01:11 +0000
treeherdermozilla-beta@a2e7f5c935da [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersBebe
bugs1533670
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1533670 - extend download_file_from_url so it can extract archive r=Bebe Adds archive extraction support to download_file_from_url (and also make it py3 compatible) Differential Revision: https://phabricator.services.mozilla.com/D22662
testing/mozbase/mozproxy/mozproxy/utils.py
testing/mozbase/mozproxy/tests/archive.tar.gz
testing/mozbase/mozproxy/tests/manifest.ini
testing/mozbase/mozproxy/tests/test_utils.py
--- a/testing/mozbase/mozproxy/mozproxy/utils.py
+++ b/testing/mozbase/mozproxy/mozproxy/utils.py
@@ -1,19 +1,32 @@
 """Utility functions for Raptor"""
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import
 
+import subprocess
+import time
+import bz2
+import gzip
 import os
 import signal
 import sys
-import urllib
+from six.moves.urllib.request import urlretrieve
+
+try:
+    import zstandard
+except ImportError:
+    zstandard = None
+try:
+    import lzma
+except ImportError:
+    lzma = None
 
 from mozlog import get_proxy_logger
 from mozprocess import ProcessHandler
 from mozproxy import mozharness_dir
 
 
 LOG = get_proxy_logger(component="mozproxy")
 
@@ -51,16 +64,17 @@ def transform_platform(str_to_transform,
         else:
             str_to_transform = str_to_transform.replace("{x64}", "")
 
     return str_to_transform
 
 
 def tooltool_download(manifest, run_local, raptor_dir):
     """Download a file from tooltool using the provided tooltool manifest"""
+
     def outputHandler(line):
         LOG.info(line)
 
     if run_local:
         command = [sys.executable, TOOLTOOL_PATH, "fetch", "-o", "-m", manifest]
     else:
         # we want to use the tooltool cache in production
         if os.environ.get("TOOLTOOLCACHE") is not None:
@@ -90,19 +104,98 @@ def tooltool_download(manifest, run_loca
 
     try:
         proc.wait()
     except Exception:
         if proc.poll() is None:
             proc.kill(signal.SIGTERM)
 
 
-def download_file_from_url(url, local_dest):
+def archive_type(path):
+    filename, extension = os.path.splitext(path)
+    filename, extension2 = os.path.splitext(filename)
+    if extension2 != "":
+        extension = extension2
+    if extension == ".tar":
+        return "tar"
+    elif extension == ".zip":
+        return "zip"
+    return None
+
+
+def extract_archive(path, dest_dir, typ):
+    """Extract an archive to a destination directory."""
+
+    # Resolve paths to absolute variants.
+    path = os.path.abspath(path)
+    dest_dir = os.path.abspath(dest_dir)
+    suffix = os.path.splitext(path)[-1]
+
+    # We pipe input to the decompressor program so that we can apply
+    # custom decompressors that the program may not know about.
+    if typ == "tar":
+        if suffix == ".bz2":
+            ifh = bz2.open(str(path), "rb")
+        elif suffix == ".gz":
+            ifh = gzip.open(str(path), "rb")
+        elif suffix == ".xz":
+            if not lzma:
+                raise ValueError("lzma Python package not available")
+            ifh = lzma.open(str(path), "rb")
+        elif suffix == ".zst":
+            if not zstandard:
+                raise ValueError("zstandard Python package not available")
+            dctx = zstandard.ZstdDecompressor()
+            ifh = dctx.stream_reader(path.open("rb"))
+        elif suffix == ".tar":
+            ifh = path.open("rb")
+        else:
+            raise ValueError("unknown archive format for tar file: %s" % path)
+        args = ["tar", "xf", "-"]
+        pipe_stdin = True
+    elif typ == "zip":
+        # unzip from stdin has wonky behavior. We don't use a pipe for it.
+        ifh = open(os.devnull, "rb")
+        args = ["unzip", "-o", str(path)]
+        pipe_stdin = False
+    else:
+        raise ValueError("unknown archive format: %s" % path)
+
+    LOG.info("Extracting %s to %s using %r" % (path, dest_dir, args))
+    t0 = time.time()
+    with ifh:
+        p = subprocess.Popen(args, cwd=str(dest_dir), bufsize=0, stdin=subprocess.PIPE)
+        while True:
+            if not pipe_stdin:
+                break
+            chunk = ifh.read(131072)
+            if not chunk:
+                break
+            p.stdin.write(chunk)
+        # make sure we wait for the command to finish
+        p.communicate()
+
+    if p.returncode:
+        raise Exception("%r exited %d" % (args, p.returncode))
+    LOG.info("%s extracted in %.3fs" % (path, time.time() - t0))
+
+
+def download_file_from_url(url, local_dest, extract=False):
     """Receive a file in a URL and download it, i.e. for the hostutils tooltool manifest
     the url received would be formatted like this:
-    https://hg.mozilla.org/try/raw-file/acb5abf52c04da7d4548fa13bd6c6848a90c32b8/testing/
       config/tooltool-manifests/linux64/hostutils.manifest"""
     if os.path.exists(local_dest):
         LOG.info("file already exists at: %s" % local_dest)
-        return True
-    LOG.info("downloading: %s to %s" % (url, local_dest))
-    _file, _headers = urllib.urlretrieve(url, local_dest)
-    return os.path.exists(local_dest)
+        if not extract:
+            return True
+    else:
+        LOG.info("downloading: %s to %s" % (url, local_dest))
+        _file, _headers = urlretrieve(url, local_dest)
+
+    if not extract:
+        return os.path.exists(local_dest)
+
+    typ = archive_type(local_dest)
+    if typ is None:
+        return False
+
+    extract_archive(local_dest, os.path.dirname(local_dest), typ)
+    return True
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b4f9461b09c43b9915bc31e263c390af10235655
GIT binary patch
literal 184
zc$|~(=3v;M+Z4mV{Pw&pAEP6~(T@{zP3}1OxI3p^uaP;-YSQIt=6IOP>f)}(EN=0K
z3V92z%@S2nx_Qd&^MBFLlV*nR@%+1N{#xTe)umIeMv1*#Q<?JN)By=e{w-lv8(jmB
zcAENmguYz0{Hc=k`NLBRetCN4Ppz`ot^EII%Ab#?PHp*KTKMLzo6@d%S-k~?=kgCn
iUVi%i*frO7@72n?>Th!}AcG%e?Em@$9x`Y!FaQ8Oh*P)#
--- a/testing/mozbase/mozproxy/tests/manifest.ini
+++ b/testing/mozbase/mozproxy/tests/manifest.ini
@@ -1,3 +1,4 @@
 [DEFAULT]
 subsuite = mozbase
 [test_proxy.py]
+[test_utils.py]
new file mode 100644
--- /dev/null
+++ b/testing/mozbase/mozproxy/tests/test_utils.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+from __future__ import absolute_import, print_function
+
+import os
+import shutil
+import contextlib
+import mock
+import mozunit
+import tempfile
+
+from mozproxy.utils import download_file_from_url
+
+here = os.path.dirname(__file__)
+
+
+@contextlib.contextmanager
+def tempdir():
+    dest_dir = tempfile.mkdtemp()
+    yield dest_dir
+    shutil.rmtree(dest_dir, ignore_errors=True)
+
+
+def urlretrieve(*args, **kw):
+    def _urlretrieve(url, local_dest):
+        # simply copy over our tarball
+        shutil.copyfile(os.path.join(here, "archive.tar.gz"), local_dest)
+        return local_dest, {}
+
+    return _urlretrieve
+
+
+@mock.patch("mozproxy.utils.urlretrieve", new_callable=urlretrieve)
+def test_download_file(*args):
+    with tempdir() as dest_dir:
+        dest = os.path.join(dest_dir, "archive.tar.gz")
+        download_file_from_url("http://example.com/archive.tar.gz", dest, extract=True)
+        # archive.tar.gz contains hey.txt, if it worked we should see it
+        assert os.path.exists(os.path.join(dest_dir, "hey.txt"))
+
+
+if __name__ == "__main__":
+    mozunit.main(runwith="pytest")