Bug 1258539 - [mozharness] Use ZipFile and TarFile classes for unpacking archives. r=jlund
☠☠ backed out by ff43d3e4c4fc ☠ ☠
authorHenrik Skupin <mail@hskupin.info>
Mon, 18 Jan 2016 19:50:26 +0100
changeset 349533 cc2996a53b7107a36433b265318340469cc5eb6f
parent 349532 3f66f98ebf11bfd0933a145cad375c7a0785a46b
child 349534 8322ffecd9d90d31ff451cd9763fe2cfa5ae09b4
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjlund
bugs1258539
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1258539 - [mozharness] Use ZipFile and TarFile classes for unpacking archives. r=jlund Get rid of external unpack tools (unzip and tar) and use the Python internal classes instead. This patch only changes this behavior for the base script class but not for custom code in other test scripts and modules, which would make it too complex. A follow-up bug will be filed instead. MozReview-Commit-ID: L0eoITlqTdC
testing/mozharness/mozharness/base/script.py
testing/mozharness/mozharness/mozilla/testing/firefox_ui_tests.py
testing/mozharness/mozharness/mozilla/testing/testbase.py
testing/mozharness/test/helper_files/archives/archive.tar
testing/mozharness/test/helper_files/archives/archive.tar.bz2
testing/mozharness/test/helper_files/archives/archive.tar.gz
testing/mozharness/test/helper_files/archives/archive.zip
testing/mozharness/test/helper_files/archives/archive_invalid_filename.zip
testing/mozharness/test/helper_files/archives/reference/bin/script.sh
testing/mozharness/test/helper_files/archives/reference/lorem.txt
testing/mozharness/test/helper_files/create_archives.sh
testing/mozharness/test/test_base_script.py
--- a/testing/mozharness/mozharness/base/script.py
+++ b/testing/mozharness/mozharness/base/script.py
@@ -10,29 +10,34 @@
 script.py, along with config.py and log.py, represents the core of
 mozharness.
 """
 
 import codecs
 from contextlib import contextmanager
 import datetime
 import errno
+import fnmatch
+import functools
 import gzip
 import inspect
+import itertools
 import os
 import platform
 import pprint
 import re
 import shutil
 import socket
 import subprocess
 import sys
+import tarfile
 import time
 import traceback
 import urllib2
+import zipfile
 import httplib
 import urlparse
 import hashlib
 if os.name == 'nt':
     try:
         import win32file
         import win32api
         PYWIN32 = True
@@ -42,20 +47,20 @@ if os.name == 'nt':
 try:
     import simplejson as json
     assert json
 except ImportError:
     import json
 
 from mozprocess import ProcessHandler
 from mozharness.base.config import BaseConfig
-from mozharness.base.errors import ZipErrorList
 from mozharness.base.log import SimpleFileLogger, MultiFileLogger, \
     LogMixin, OutputParser, DEBUG, INFO, ERROR, FATAL
 
+
 def platform_name():
     pm = PlatformMixin()
 
     if pm._is_linux() and pm._is_64_bit():
         return 'linux64'
     elif pm._is_linux() and not pm._is_64_bit():
         return 'linux'
     elif pm._is_darwin():
@@ -448,49 +453,35 @@ class ScriptMixin(PlatformMixin):
             kwargs = {"url": url, "file_name": file_name}
 
         return self.retry(
             download_func,
             kwargs=kwargs,
             **retry_args
         )
 
-    def download_unzip(self, url, parent_dir, target_unzip_dirs=None, halt_on_failure=True):
+    def download_unzip(self, url, parent_dir, target_unzip_dirs=None):
         """Generic method to download and extract a zip file.
 
         The downloaded file will always be saved to the working directory and is not getting
         deleted after extracting.
 
         Args:
             url (str): URL where the file to be downloaded is located.
             parent_dir (str): directory where the downloaded file will
                               be extracted to.
             target_unzip_dirs (list, optional): directories inside the zip file to extract.
                                                 Defaults to `None`.
-            halt_on_failure (bool, optional): whether or not to redefine the
-                                              log level as `FATAL` on errors. Defaults to True.
 
         """
         dirs = self.query_abs_dirs()
         zipfile = self.download_file(url, parent_dir=dirs['abs_work_dir'],
                                      error_level=FATAL)
 
-        command = self.query_exe('unzip', return_type='list')
-        # Always overwrite to not get an input in a hidden pipe if files already exist
-        command.extend(['-q', '-o', zipfile, '-d', parent_dir])
-        if target_unzip_dirs:
-            command.extend(target_unzip_dirs)
-        # TODO error_list: http://www.info-zip.org/mans/unzip.html#DIAGNOSTICS
-        # unzip return code 11 is 'no matching files were found'
-        self.run_command(command,
-                         error_list=ZipErrorList,
-                         halt_on_failure=halt_on_failure,
-                         fatal_exit_code=3,
-                         success_codes=[0, 11],
-                         )
+        self.unpack(zipfile, parent_dir, target_unzip_dirs)
 
     def load_json_url(self, url, error_level=None, *args, **kwargs):
         """ Returns a json object from a url (it retries). """
         contents = self._retry_download(
             url=url, error_level=error_level, *args, **kwargs
         )
         return json.loads(contents.read())
 
@@ -1100,17 +1091,17 @@ class ScriptMixin(PlatformMixin):
             throw_exception (bool, optional): whether or not to raise an
               exception if the return value of the command doesn't match
               any of the `success_codes`. Defaults to False.
             output_parser (OutputParser, optional): lets you provide an
               instance of your own OutputParser subclass. Defaults to `OutputParser`.
             output_timeout (int): amount of seconds to wait for output before
               the process is killed.
             fatal_exit_code (int, optional): call `self.fatal` if the return value
-              of the command is not on in `success_codes`. Defaults to 2.
+              of the command is not in `success_codes`. Defaults to 2.
             error_level (str, optional): log level name to use on error. Defaults
               to `ERROR`.
             **kwargs: Arbitrary keyword arguments.
 
         Returns:
             int: -1 on error.
             Any: `command` return value is returned otherwise.
         """
@@ -1392,36 +1383,85 @@ class ScriptMixin(PlatformMixin):
         except OSError:
             try:
                 open(file_name, 'w').close()
             except IOError as e:
                 msg = "I/O error(%s): %s" % (e.errno, e.strerror)
                 self.log(msg, error_level=error_level)
         os.utime(file_name, times)
 
-    def unpack(self, filename, extract_to):
-        '''
+    def unpack(self, filename, extract_to, extract_dirs=None,
+               error_level=ERROR, halt_on_failure=True, fatal_exit_code=2,
+               verbose=False):
+        """
         This method allows us to extract a file regardless of its extension
 
         Args:
             filename (str): filename of the compressed file.
             extract_to (str): where to extract the compressed file.
-        '''
-        # XXX: Make sure that filename has a extension of one of our supported file formats
-        m = re.search('\.tar\.(bz2|gz)$', filename)
-        if m:
-            command = self.query_exe('tar', return_type='list')
-            tar_cmd = "jxfv"
-            if m.group(1) == "gz":
-                tar_cmd = "zxfv"
-            command.extend([tar_cmd, filename, "-C", extract_to])
-            self.run_command(command, halt_on_failure=True)
+            extract_dirs (list, optional): directories inside the archive file to extract.
+                                           Defaults to `None`.
+            halt_on_failure (bool, optional): whether or not to redefine the
+                                              log level as `FATAL` on errors. Defaults to True.
+            fatal_exit_code (int, optional): call `self.fatal` if the return value
+              of the command is not in `success_codes`. Defaults to 2.
+            verbose (bool, optional): whether or not extracted content should be displayed.
+                                      Defaults to False.
+
+        Raises:
+            IOError: on `filename` file not found.
+
+        """
+        def _filter_entries(namelist):
+            """Filter entries of the archive based on the specified list of to extract dirs."""
+            filter_partial = functools.partial(fnmatch.filter, namelist)
+            for entry in itertools.chain(*map(filter_partial, extract_dirs or ['*'])):
+                yield entry
+
+        if not os.path.isfile(filename):
+            raise IOError('Could not find file to extract: %s' % filename)
+
+        level = FATAL if halt_on_failure else error_level
+
+        if zipfile.is_zipfile(filename):
+            try:
+                self.info('Using ZipFile to extract {} to {}'.format(filename, extract_to))
+                with zipfile.ZipFile(filename) as bundle:
+                    for entry in _filter_entries(bundle.namelist()):
+                        if verbose:
+                            self.info(' %s' % entry)
+                        bundle.extract(entry, path=extract_to)
+
+                        # ZipFile doesn't preserve permissions during extraction:
+                        # http://bugs.python.org/issue15795
+                        fname = os.path.realpath(os.path.join(extract_to, entry))
+                        mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
+                        # Only set permissions if attributes are available. Otherwise all
+                        # permissions will be removed eg. on Windows.
+                        if mode:
+                            os.chmod(fname, mode)
+            except zipfile.BadZipfile as e:
+                self.log('%s (%s)' % (e.message, filename),
+                         level=level, exit_code=fatal_exit_code)
+
+        # Bug 1211882 - is_tarfile cannot be trusted for dmg files
+        elif tarfile.is_tarfile(filename) and not filename.lower().endswith('.dmg'):
+            try:
+                self.info('Using TarFile to extract {} to {}'.format(filename, extract_to))
+                with tarfile.open(filename) as bundle:
+                    for entry in _filter_entries(bundle.getnames()):
+                        if verbose:
+                            self.info(' %s' % entry)
+                        bundle.extract(entry, path=extract_to)
+            except tarfile.TarError as e:
+                self.log('%s (%s)' % (e.message, filename),
+                         level=level, exit_code=fatal_exit_code)
         else:
-            # XXX implement
-            pass
+            self.log('No extraction method found for: %s' % filename,
+                     level=level, exit_code=fatal_exit_code)
 
 
 def PreScriptRun(func):
     """Decorator for methods that will be called before script execution.
 
     Each method on a BaseScript having this decorator will be called at the
     beginning of BaseScript.run().
 
--- a/testing/mozharness/mozharness/mozilla/testing/firefox_ui_tests.py
+++ b/testing/mozharness/mozharness/mozilla/testing/firefox_ui_tests.py
@@ -294,63 +294,16 @@ class FirefoxUITests(TestingMixin, VCSTo
 
     def run_tests(self):
         """Run all the tests"""
         return self.run_test(
             binary_path=self.binary_path,
             env=self.query_env(),
         )
 
-    def download_unzip(self, url, parent_dir, target_unzip_dirs=None, halt_on_failure=True):
-        """Overwritten method from BaseScript until bug 1258539 is fixed.
-
-        The downloaded file will always be saved to the working directory and is not getting
-        deleted after extracting.
-
-        Args:
-            url (str): URL where the file to be downloaded is located.
-            parent_dir (str): directory where the downloaded file will
-                              be extracted to.
-            target_unzip_dirs (list, optional): directories inside the zip file to extract.
-                                                Defaults to `None`.
-            halt_on_failure (bool, optional): whether or not to redefine the
-                                              log level as `FATAL` on errors. Defaults to True.
-
-        """
-        import fnmatch
-        import itertools
-        import functools
-        import zipfile
-
-        def _filter_entries(namelist):
-            """Filter entries of the archive based on the specified list of extract_dirs."""
-            filter_partial = functools.partial(fnmatch.filter, namelist)
-            for entry in itertools.chain(*map(filter_partial, target_unzip_dirs or ['*'])):
-                yield entry
-
-        dirs = self.query_abs_dirs()
-        zip = self.download_file(url, parent_dir=dirs['abs_work_dir'],
-                                 error_level=FATAL)
-
-        try:
-            self.info('Using ZipFile to extract {0} to {1}'.format(zip, parent_dir))
-            with zipfile.ZipFile(zip) as bundle:
-                for entry in _filter_entries(bundle.namelist()):
-                    bundle.extract(entry, path=parent_dir)
-
-                    # ZipFile doesn't preserve permissions: http://bugs.python.org/issue15795
-                    fname = os.path.realpath(os.path.join(parent_dir, entry))
-                    mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
-                    # Only set permissions if attributes are available.
-                    if mode:
-                        os.chmod(fname, mode)
-        except zipfile.BadZipfile as e:
-            self.log('{0} ({1})'.format(e.message, zip),
-                     level=FATAL, exit_code=2)
-
 
 class FirefoxUIFunctionalTests(FirefoxUITests):
 
     cli_script = 'cli_functional.py'
     default_tests = [
         os.path.join('puppeteer', 'manifest.ini'),
         os.path.join('functional', 'manifest.ini'),
     ]
--- a/testing/mozharness/mozharness/mozilla/testing/testbase.py
+++ b/testing/mozharness/mozharness/mozilla/testing/testbase.py
@@ -396,27 +396,16 @@ You can set this by:
 
 1. specifying --test-url URL, or
 2. running via buildbot and running the read-buildbot-config action
 
 """
         if message:
             self.fatal(message + "Can't run download-and-extract... exiting")
 
-        if self.config.get("developer_mode") and self._is_darwin():
-            # Bug 1066700 only affects Mac users that try to run mozharness locally
-            version = self._query_binary_version(
-                    regex=re.compile("UnZip\ (\d+\.\d+)\ .*", re.MULTILINE),
-                    cmd=[self.query_exe('unzip'), '-v']
-            )
-            if not version >= 6:
-                self.fatal("We require a more recent version of unzip to unpack our tests.zip files.\n"
-                        "You are currently using version %s. Please update to at least 6.0.\n"
-                        "You can visit http://www.info-zip.org/UnZip.html" % version)
-
     def _read_packages_manifest(self):
         dirs = self.query_abs_dirs()
         source = self.download_file(self.test_packages_url,
                                     parent_dir=dirs['abs_work_dir'],
                                     error_level=FATAL)
 
         with self.opened(os.path.realpath(source)) as (fh, err):
             package_requirements = json.load(fh)
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..c34000ebe229e790837d7f582e4397c3dbd96080
GIT binary patch
literal 10240
zc%1FkO>V;=5Cve?KE*`cW3c(TLKeM*lOP$@4iONw$1gOBlucT7L8_?l+hAcNMtW$5
z_pCWlSgY$QtroZI6_bNiN{(h6$~h^tGK_Tl(il6ZJVhclyJPfk_s{+G|4H$F3DK=Q
zf8=I&)PRjq_wygsHGiq~Q1Ty?w2VZvif;V#{9kMO<WK3m(jk<$K*?vCR+oAA*R?@=
zyx;j_;~DUa{x4PCm*qpC|3AVd|Fn<(lzEz)+;Pd@e$)SL$=_(H%itet|1Wbuq5nU^
r%jW9=?4~q#o!ifWwr*Jhy@$9QHW&c_0000000000000000MFtR!?a|K
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..969e6de4447ff5a99c1a355108a643899e8ace17
GIT binary patch
literal 268
zc$@(Y0rUPsT4*^jL0KkKSs)5+;{X7Ge}%%30RU(L|9}8QA`yOX-oO9?AOHXuFaY*g
z5~EWEJxrMm0BC4!L6abqQ}qajngc^aMt}eWq7Xo2^#da#8Y3WSaBuHPhbZw6#rFnh
zpcor9a;?BhhFQf_UI8Ezf`S7Bd*{~>Mi_$w%lr;(IsK13+BRQPD$Juc6>>m$32o4a
z07!sPP$VstsNi)e$i>N!2|?Tmw%na4Nv*Y~l|mrkMl;ItTG<A^ErVF3CIM_#YS09%
zC1`uX_jZ-1a4dN^NItj<6%=*gB@|sx<`_Ll1uyiw*NcaQRpbhidz)5($0*e{pn~!U
Saoh?I;_gVN3K9eXO}t=Q6KN#?
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7ddfe68015d5a5c27c1464540949ecc3fad06c55
GIT binary patch
literal 276
zc$@(g0qg!BiwFqL#E@4217UJwXlZt3E_7jX0PWRHZo?oD1z^@b#YEj>u=%+{7QKX%
zAQ{yT5fHV<FEomjO<Hw9s;KYVU|}RidT56CtT|CwtLrMQ7PspalY>=Cj%FOnIVrR<
zjCA|b7(1stMItu4WAtzL&;9iON%4LO(XBjx<Ysr&fQ?c2^B>hUf2s9Q@*kA6j6}1F
zZv6B7Uu*j0PwBkUA(Xd3$!D5YmwETswLyEl-}z(X8Ssn#FIC-_<wKzVKf)#dw2%Ij
zd77Ktamn9))BkPB-)O1J;2&%MFLOYl|3AXZ=Ia6MrZjh*+s}cvZdn4ohqxRz7y$qP
a000000000000000&*Bs9m#(A$C;$L4^@;TW
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..118be7a48e2a7292f0a99a0d3a0d3f36910cd464
GIT binary patch
literal 517
zc$^FHW@h1H0D<c8ZypRtfQ3PZAt^IYKQx4sfw}X+#BdNUt>9*0WO>01lm-z2D7sB{
zU7IftWP>m-RCjT5QD#AjUU3FKW0V#3L28o{i!->mQj;_C6+BXNa`F|*^NVs)6uD4s
zIsY&?Tn5MnVNM{KlV6mYt5;G{g3ksYkRpZ5g5uI#g_L}t7KP%>5{1Ov)Dk_Q@r+Dz
z%(wzU0?HC#`0EIwAwj|lu?ZtMP)$M&5Qs^PK>WAS4r&ro@DOJts_zj-ZfRTsB$2`b
TmvO9YAT7*5_y$CS^fCYds?T(B
new file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..20bdc5acdf11e3c466c5b59090d1aaa52b721d31
GIT binary patch
literal 166
zc$^FHW@h1H00F+TuO19YfP+DX!9LDPMX#iyBs7GRfqC|k3E?1ITEWf0$nt`jfdNbe
mcr!A|G2=2r0?yvj2qF<CvO-M6FpZTBq>d2?{eZL+ST6u72^ygQ
new file mode 100755
--- /dev/null
+++ b/testing/mozharness/test/helper_files/archives/reference/bin/script.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+echo Hello world!
new file mode 100644
--- /dev/null
+++ b/testing/mozharness/test/helper_files/archives/reference/lorem.txt
@@ -0,0 +1,1 @@
+Lorem ipsum dolor sit amet.
new file mode 100755
--- /dev/null
+++ b/testing/mozharness/test/helper_files/create_archives.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# Script to auto-generate the different archive types under the archives directory.
+
+cd archives
+
+rm archive.*
+
+tar cf archive.tar -C reference .
+gzip -fk archive.tar >archive.tar.gz
+bzip2 -fk archive.tar >archive.tar.bz2
+cd reference && zip ../archive.zip -r * && cd ..
--- a/testing/mozharness/test/test_base_script.py
+++ b/testing/mozharness/test/test_base_script.py
@@ -1,12 +1,14 @@
 import gc
 import mock
 import os
 import re
+import shutil
+import tempfile
 import types
 import unittest
 PYWIN32 = False
 if os.name == 'nt':
     try:
         import win32file
         PYWIN32 = True
     except:
@@ -14,32 +16,37 @@ if os.name == 'nt':
 
 
 import mozharness.base.errors as errors
 import mozharness.base.log as log
 from mozharness.base.log import DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL, IGNORE
 import mozharness.base.script as script
 from mozharness.base.config import parse_config_file
 
+
+here = os.path.dirname(os.path.abspath(__file__))
+
 test_string = '''foo
 bar
 baz'''
 
 
 class CleanupObj(script.ScriptMixin, log.LogMixin):
     def __init__(self):
         super(CleanupObj, self).__init__()
         self.log_obj = None
         self.config = {'log_level': ERROR}
 
 
-def cleanup():
+def cleanup(files=None):
+    files = files or []
+    files.extend(('test_logs', 'test_dir', 'tmpfile_stdout', 'tmpfile_stderr'))
     gc.collect()
     c = CleanupObj()
-    for f in ('test_logs', 'test_dir', 'tmpfile_stdout', 'tmpfile_stderr'):
+    for f in files:
         c.rmtree(f)
 
 
 def get_debug_script_obj():
     s = script.BaseScript(config={'log_type': 'multi',
                                   'log_level': DEBUG},
                           initial_config_file='test/test.json')
     return s
@@ -51,22 +58,23 @@ def _post_fatal(self, **kwargs):
     fh.close()
 
 
 # TestScript {{{1
 class TestScript(unittest.TestCase):
     def setUp(self):
         cleanup()
         self.s = None
+        self.tmpdir = tempfile.mkdtemp(suffix='.mozharness')
 
     def tearDown(self):
         # Close the logfile handles, or windows can't remove the logs
         if hasattr(self, 's') and isinstance(self.s, object):
             del(self.s)
-        cleanup()
+        cleanup([self.tmpdir])
 
     # test _dump_config_hierarchy() when --dump-config-hierarchy is passed
     def test_dump_config_hierarchy_valid_files_len(self):
         try:
             self.s = script.BaseScript(
                 initial_config_file='test/test.json',
                 option_args=['--cfg', 'test/test_override.py,test/test_override2.py'],
                 config={'dump_config_hierarchy': True}
@@ -246,16 +254,48 @@ class TestScript(unittest.TestCase):
                 'regex': re.compile(',$'), 'level': IGNORE,
             }, {
                 'substr': ']$', 'level': WARNING,
             }])
         error_logsize = os.path.getsize("test_logs/test_error.log")
         self.assertTrue(error_logsize > 0,
                         msg="error list not working properly")
 
+    def test_unpack(self):
+        self.s = get_debug_script_obj()
+
+        archives_path = os.path.join(here, 'helper_files', 'archives')
+
+        # Test basic decompression
+        for archive in ('archive.tar', 'archive.tar.bz2', 'archive.tar.gz', 'archive.zip'):
+            self.s.unpack(os.path.join(archives_path, archive), self.tmpdir)
+            self.assertIn('script.sh', os.listdir(os.path.join(self.tmpdir, 'bin')))
+            self.assertIn('lorem.txt', os.listdir(self.tmpdir))
+            shutil.rmtree(self.tmpdir)
+
+        # Test permissions for extracted entries from zip archive
+        self.s.unpack(os.path.join(archives_path, 'archive.zip'), self.tmpdir)
+        file_stats = os.stat(os.path.join(self.tmpdir, 'bin', 'script.sh'))
+        orig_fstats = os.stat(os.path.join(archives_path, 'reference', 'bin', 'script.sh'))
+        self.assertEqual(file_stats.st_mode, orig_fstats.st_mode)
+        shutil.rmtree(self.tmpdir)
+
+        # Test extract specific dirs only
+        self.s.unpack(os.path.join(archives_path, 'archive.zip'), self.tmpdir,
+                      extract_dirs=['bin/*'])
+        self.assertIn('bin', os.listdir(self.tmpdir))
+        self.assertNotIn('lorem.txt', os.listdir(self.tmpdir))
+        shutil.rmtree(self.tmpdir)
+
+        # Test for invalid filenames (Windows only)
+        if PYWIN32:
+            with self.assertRaises(IOError):
+                self.s.unpack(os.path.join(archives_path, 'archive_invalid_filename.zip'),
+                              self.tmpdir)
+
 
 # TestHelperFunctions {{{1
 class TestHelperFunctions(unittest.TestCase):
     temp_file = "test_dir/mozilla"
 
     def setUp(self):
         cleanup()
         self.s = None