Bug 1646427 - [mozfile] Add ability to ignore file patterns while extracting, r=gbrown
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Thu, 18 Jun 2020 19:39:14 +0000
changeset 536624 97e16bc7c6041592170c4a45252593ecb866d3d5
parent 536623 1bafbdf97b9b7f1cb58a4940546437a8e8d031c1
child 536625 eba33e59bc8f5c2a42a8e19e7de84ab62e21f048
push id119582
push userahalberstadt@mozilla.com
push dateMon, 22 Jun 2020 18:42:21 +0000
treeherderautoland@eba33e59bc8f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgbrown
bugs1646427
milestone79.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1646427 - [mozfile] Add ability to ignore file patterns while extracting, r=gbrown Differential Revision: https://phabricator.services.mozilla.com/D80208
testing/mozbase/mozfile/mozfile/mozfile.py
testing/mozbase/mozfile/tests/test_extract.py
--- a/testing/mozbase/mozfile/mozfile/mozfile.py
+++ b/testing/mozbase/mozfile/mozfile/mozfile.py
@@ -5,87 +5,94 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # We don't import all modules at the top for performance reasons. See Bug 1008943
 
 from __future__ import absolute_import, print_function
 
 import errno
 import os
+import re
 import stat
 import sys
 import time
 import warnings
 from contextlib import contextmanager
 
 from six.moves import urllib
 
 
 __all__ = [
     "extract_tarball",
     "extract_zip",
     "extract",
     "is_url",
     "load",
     "copy_contents",
+    "match",
     "move",
     "remove",
     "rmtree",
     "tree",
     "which",
     "NamedTemporaryFile",
     "TemporaryDirectory",
 ]
 
 # utilities for extracting archives
 
 
-def extract_tarball(src, dest):
+def extract_tarball(src, dest, ignore=None):
     """extract a .tar file"""
 
     import tarfile
 
     with tarfile.open(src) as bundle:
         namelist = []
 
         for m in bundle:
+            if ignore and any(match(m.name, i) for i in ignore):
+                continue
             bundle.extract(m, path=dest)
             namelist.append(m.name)
 
     return namelist
 
 
-def extract_zip(src, dest):
+def extract_zip(src, dest, ignore=None):
     """extract a zip file"""
 
     import zipfile
 
     if isinstance(src, zipfile.ZipFile):
         bundle = src
     else:
         try:
             bundle = zipfile.ZipFile(src)
         except Exception:
             print("src: %s" % src)
             raise
 
     namelist = bundle.namelist()
 
     for name in namelist:
+        if ignore and any(match(name, i) for i in ignore):
+            continue
+
         bundle.extract(name, dest)
         filename = os.path.realpath(os.path.join(dest, name))
         mode = bundle.getinfo(name).external_attr >> 16 & 0x1FF
         # Only update permissions if attributes are set. Otherwise fallback to the defaults.
         if mode:
             os.chmod(filename, mode)
     bundle.close()
     return namelist
 
 
-def extract(src, dest=None):
+def extract(src, dest=None, ignore=None):
     """
     Takes in a tar or zip file and extracts it to dest
 
     If dest is not specified, extracts to os.path.dirname(src)
 
     Returns the list of top level files that were extracted
     """
 
@@ -96,19 +103,19 @@ def extract(src, dest=None):
 
     if dest is None:
         dest = os.path.dirname(src)
     elif not os.path.isdir(dest):
         os.makedirs(dest)
     assert not os.path.isfile(dest), "dest cannot be a file"
 
     if tarfile.is_tarfile(src):
-        namelist = extract_tarball(src, dest)
+        namelist = extract_tarball(src, dest, ignore=ignore)
     elif zipfile.is_zipfile(src):
-        namelist = extract_zip(src, dest)
+        namelist = extract_zip(src, dest, ignore=ignore)
     else:
         raise Exception("mozfile.extract: no archive format found for '%s'" % src)
 
     # namelist returns paths with forward slashes even in windows
     top_level_files = [
         os.path.join(dest, name.rstrip("/"))
         for name in namelist
         if len(name.rstrip("/").split("/")) == 1
@@ -554,8 +561,53 @@ def load(resource):
     if resource.startswith("file://"):
         resource = resource[len("file://"):]
 
     if not is_url(resource):
         # if no scheme is given, it is a file path
         return open(resource)
 
     return urllib.request.urlopen(resource)
+
+
+# We can't depend on mozpack.path here, so copy the 'match' function over.
+
+re_cache = {}
+# Python versions < 3.7 return r'\/' for re.escape('/').
+if re.escape('/') == '/':
+    MATCH_STAR_STAR_RE = re.compile(r'(^|/)\\\*\\\*/')
+    MATCH_STAR_STAR_END_RE = re.compile(r'(^|/)\\\*\\\*$')
+else:
+    MATCH_STAR_STAR_RE = re.compile(r'(^|\\\/)\\\*\\\*\\\/')
+    MATCH_STAR_STAR_END_RE = re.compile(r'(^|\\\/)\\\*\\\*$')
+
+
+def match(path, pattern):
+    '''
+    Return whether the given path matches the given pattern.
+    An asterisk can be used to match any string, including the null string, in
+    one part of the path:
+
+        ``foo`` matches ``*``, ``f*`` or ``fo*o``
+
+    However, an asterisk matching a subdirectory may not match the null string:
+
+        ``foo/bar`` does *not* match ``foo/*/bar``
+
+    If the pattern matches one of the ancestor directories of the path, the
+    patch is considered matching:
+
+        ``foo/bar`` matches ``foo``
+
+    Two adjacent asterisks can be used to match files and zero or more
+    directories and subdirectories.
+
+        ``foo/bar`` matches ``foo/**/bar``, or ``**/bar``
+    '''
+    if not pattern:
+        return True
+    if pattern not in re_cache:
+        p = re.escape(pattern)
+        p = MATCH_STAR_STAR_RE.sub(r'\1(?:.+/)?', p)
+        p = MATCH_STAR_STAR_END_RE.sub(r'(?:\1.+)?', p)
+        p = p.replace(r'\*', '[^/]*') + '(?:/.*)?$'
+        re_cache[pattern] = re.compile(p)
+    return re_cache[pattern].match(path) is not None
--- a/testing/mozbase/mozfile/tests/test_extract.py
+++ b/testing/mozbase/mozfile/tests/test_extract.py
@@ -114,10 +114,18 @@ def test_extract_non_archive(tarpath, zi
         exception = exc
     finally:
         os.remove(filename)
         os.rmdir(dest)
 
     assert isinstance(exception, Exception)
 
 
+def test_extract_ignore(tmpdir, bundlepath):
+    dest = tmpdir.mkdir('dest').strpath
+    ignore = ('foo', '**/fleem.txt', 'read*.txt')
+    mozfile.extract(bundlepath, dest, ignore=ignore)
+
+    assert sorted(os.listdir(dest)) == ['bar.txt', 'foo.txt']
+
+
 if __name__ == '__main__':
     mozunit.main()