Bug 1494069 - [mozlint] Collapse exclude paths into their smallest possible set, r=egao
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Fri, 12 Oct 2018 15:57:42 +0000
changeset 496645 b7e586708ecccea803d5c4b77e3f9d7bbcb912f6
parent 496644 85575fc37555213a204b8565bbadef7270edd19e
child 496646 2064477895c3d93ec55cab9dede17ba01a434b21
push id9984
push userffxbld-merge
push dateMon, 15 Oct 2018 21:07:35 +0000
treeherdermozilla-beta@183d27ea8570 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersegao
bugs1494069
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1494069 - [mozlint] Collapse exclude paths into their smallest possible set, r=egao Often we specify globs in our exclude patterns, e.g: exclude: - **/node_modules - obj* However, these globs get expanded out to *every* file that matches them. This can sometimes be thousands or even tens of thousands of files. We then pass these paths on to the underlying linters and tell them to exclude them all. This causes a lot of overhead and slows down performance. This commit implements a "collapse" function. Given a set of paths, it'll collapse them into the smallest set of parent directories that contain the original set, and that don't contain any extra files. For example, given a directory structure like this: a -- foo.txt -- b -- bar.txt -- baz.txt -- c -- ham.txt -- d -- spam.txt Then the following will happen: >>> collapse(['a/foo.txt', 'a/b/bar.txt', 'a/c/ham.txt', 'a/c/d/spam.txt']) ['a/foo.txt', 'b/bar.txt', 'c'] Since all files under directory 'c' are specified by the original set (both 'c/ham.txt' and 'c/d/spam.txt'), we can collapse it down to just 'c'. However not all files under 'b' are specified (we're missing 'a/b/baz.txt'), so we can't collapse 'b' (and by extension also can't collapse 'a'). If we had included 'a/b/baz.txt': >>> collapse(['a/foo.txt', 'a/b/bar.txt', 'a/b/baz.txt', 'a/c/ham.txt', 'a/c/d/spam.txt']) ['a'] In both cases, the smallest set of paths that contains the original set (and only the original set) is computed. The collapse function has a little bit of overhead but it's not too bad. For example collapsing all files matched by '**/node_modules' takes ~0.015s. Collapsing two full objdirs, takes ~0.6s. But a follow up commit is planned to make sure we stop using 'obj*' to reduce that overhead. Depends on D7738 Differential Revision: https://phabricator.services.mozilla.com/D7739
python/mozlint/mozlint/pathutils.py
python/mozlint/test/test_pathutils.py
tools/lint/mach_commands.py
--- a/python/mozlint/mozlint/pathutils.py
+++ b/python/mozlint/mozlint/pathutils.py
@@ -7,17 +7,17 @@ from __future__ import unicode_literals,
 import os
 
 from mozpack import path as mozpath
 from mozpack.files import FileFinder
 
 
 class FilterPath(object):
     """Helper class to make comparing and matching file paths easier."""
-    def __init__(self, path, exclude=None):
+    def __init__(self, path):
         self.path = os.path.normpath(path)
         self._finder = None
 
     @property
     def finder(self):
         if self._finder:
             return self._finder
         self._finder = FileFinder(mozpath.normsep(self.path))
@@ -62,16 +62,73 @@ class FilterPath(object):
         if b.startswith(a):
             return True
         return False
 
     def __repr__(self):
         return repr(self.path)
 
 
+def collapse(paths, base=None, dotfiles=False):
+    """Given an iterable of paths, collapse them into the smallest possible set
+    of paths that contain the original set (without containing any extra paths).
+
+    For example, if directory 'a' contains two files b.txt and c.txt, calling:
+
+        collapse(['a/b.txt', 'a/c.txt'])
+
+    returns ['a']. But if a third file d.txt also exists, then it will return
+    ['a/b.txt', 'a/c.txt'] since ['a'] would also include that extra file.
+
+    :param paths: An iterable of paths (files and directories) to collapse.
+    :returns: The smallest set of paths (files and directories) that contain
+              the original set of paths and only the original set.
+    """
+    if not paths:
+        if not base:
+            return []
+
+        # Need to test whether directory chain is empty. If it is then bubble
+        # the base back up so that it counts as 'covered'.
+        for _, _, names in os.walk(base):
+            if names:
+                return []
+        return [base]
+
+    if not base:
+        paths = map(mozpath.abspath, paths)
+        base = mozpath.commonprefix(paths)
+
+        if not os.path.isdir(base):
+            base = os.path.dirname(base)
+
+    covered = set()
+    full = set()
+    for name in os.listdir(base):
+        if not dotfiles and name[0] == '.':
+            continue
+
+        path = mozpath.join(base, name)
+        full.add(path)
+
+        if path in paths:
+            # This path was explicitly specified, so just bubble it back up
+            # without recursing down into it (if it was a directory).
+            covered.add(path)
+        elif os.path.isdir(path):
+            new_paths = [p for p in paths if p.startswith(path)]
+            covered.update(collapse(new_paths, base=path, dotfiles=dotfiles))
+
+    if full == covered:
+        # Every file under this base was covered, so we can collapse them all
+        # up into the base path.
+        return [base]
+    return covered
+
+
 def filterpaths(paths, linter, **lintargs):
     """Filters a list of paths.
 
     Given a list of paths, and a linter definition plus extra
     arguments, return the set of paths that should be linted.
 
     :param paths: A starting list of paths to possibly lint.
     :param linter: A linter definition.
@@ -138,17 +195,17 @@ def filterpaths(paths, linter, **lintarg
 
         # Next expand excludes with globs in them so we can add them to
         # the set of files to discard.
         for pattern in excludeglobs:
             for p, f in path.finder.find(pattern):
                 discard.add(path.join(p))
 
     # Only pass paths we couldn't exclude here to the underlying linter
-    lintargs['exclude'] = [f.path for f in discard]
+    lintargs['exclude'] = collapse([f.path for f in discard])
     return [f.path for f in keep]
 
 
 def findobject(path):
     """
     Find a Python object given a path of the form <modulepath>:<objectpath>.
     Conceptually equivalent to
 
--- a/python/mozlint/test/test_pathutils.py
+++ b/python/mozlint/test/test_pathutils.py
@@ -1,16 +1,17 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-from __future__ import absolute_import
+from __future__ import absolute_import, print_function
 
 import os
 from copy import deepcopy
+from fnmatch import fnmatch
 
 import mozunit
 import pytest
 
 from mozlint import pathutils
 
 here = os.path.abspath(os.path.dirname(__file__))
 root = os.path.join(here, 'filter')
@@ -97,10 +98,34 @@ TEST_CASES = (
 @pytest.mark.parametrize('test', TEST_CASES)
 def test_filterpaths(filterpaths, test):
     expected = test.pop('expected')
 
     paths = filterpaths(**test)
     assert_paths(paths, expected)
 
 
+@pytest.mark.parametrize('paths,expected', [
+    (['subdir1/*'], ['subdir1']),
+    (['subdir2/*'], ['subdir2']),
+    (['subdir1/*.*', 'subdir1/subdir3/*', 'subdir2/*'], ['subdir1', 'subdir2']),
+    ([root + '/*', 'subdir1/*.*', 'subdir1/subdir3/*', 'subdir2/*'], [root]),
+    (['subdir1/b.py', 'subdir1/subdir3'], ['subdir1/b.py', 'subdir1/subdir3']),
+    (['subdir1/b.py', 'subdir1/b.js'], ['subdir1/b.py', 'subdir1/b.js']),
+])
+def test_collapse(paths, expected):
+    inputs = []
+    for path in paths:
+        base, name = os.path.split(path)
+        if '*' in name:
+            for n in os.listdir(base):
+                if not fnmatch(n, name):
+                    continue
+                inputs.append(os.path.join(base, n))
+        else:
+            inputs.append(path)
+
+    print("inputs: {}".format(inputs))
+    assert_paths(pathutils.collapse(inputs), expected)
+
+
 if __name__ == '__main__':
     mozunit.main()
--- a/tools/lint/mach_commands.py
+++ b/tools/lint/mach_commands.py
@@ -31,21 +31,22 @@ def setup_argument_parser():
 class MachCommands(MachCommandBase):
 
     @Command(
         'lint', category='devenv',
         description='Run linters.',
         parser=setup_argument_parser)
     def lint(self, *runargs, **lintargs):
         """Run linters."""
+        self._activate_virtualenv()
+
         from mozlint import cli
         lintargs.setdefault('root', self.topsrcdir)
         lintargs['exclude'] = ['obj*', 'tools/lint/test/files']
         cli.SEARCH_PATHS.append(here)
-        self._activate_virtualenv()
         return cli.run(*runargs, **lintargs)
 
     @Command('eslint', category='devenv',
              description='Run eslint or help configure eslint for optimal development.')
     @CommandArgument('paths', default=None, nargs='*',
                      help="Paths to file or directories to lint, like "
                           "'browser/' Defaults to the "
                           "current directory if not given.")