includematcher: separate "parents" from "dirs"
authorspectral <spectral@google.com>
Mon, 06 Aug 2018 12:52:22 -0700
changeset 47448 5a7df82de1421a9780e32d7a086741c43d40dbe1
parent 47447 987d3a4b989f2ee8d5bddee4b78843f5f6b3d45c
child 47449 081cc9a95b652cef0ba1be9d37bdfb9338ffa45d
push id844
push usergszorc@mozilla.com
push dateWed, 08 Aug 2018 16:10:08 +0000
includematcher: separate "parents" from "dirs" A future patch will make use of this separation so that we can make more intelligent decisions about what to investigate/load when the matcher is in use. Currently, even with this patch, we typically use the 'visitdir' call to identify if we can skip some directory, something along the lines of: for f in all_items: if match.visitdir(f): <do stuff> This can be slower than we'd like if there are a lot of items; it requires N calls to match.visitdir in the best case. Commonly, especially with 'narrow', we have a situation where we do some work for the directory, possibly just loading it from disk (when using treemanifests) and then check if we should be interacting with it at all, which can be a huge slowdown in some pathological cases. Differential Revision: https://phab.mercurial-scm.org/D4129
mercurial/match.py
--- a/mercurial/match.py
+++ b/mercurial/match.py
@@ -440,28 +440,32 @@ class patternmatcher(basematcher):
 class includematcher(basematcher):
 
     def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
         super(includematcher, self).__init__(root, cwd, badfn)
 
         self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)',
                                                listsubrepos, root)
         self._prefix = _prefix(kindpats)
-        roots, dirs = _rootsanddirs(kindpats)
+        roots, dirs, parents = _rootsdirsandparents(kindpats)
         # roots are directories which are recursively included.
         self._roots = set(roots)
         # dirs are directories which are non-recursively included.
         self._dirs = set(dirs)
+        # parents are directories which are non-recursively included because
+        # they are needed to get to items in _dirs or _roots.
+        self._parents = set(parents)
 
     def visitdir(self, dir):
         if self._prefix and dir in self._roots:
             return 'all'
         return ('.' in self._roots or
                 dir in self._roots or
                 dir in self._dirs or
+                dir in self._parents or
                 any(parentdir in self._roots
                     for parentdir in util.finddirs(dir)))
 
     @encoding.strmethod
     def __repr__(self):
         return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
 
 class exactmatcher(basematcher):
@@ -999,50 +1003,51 @@ def _patternrootsanddirs(kindpats):
             r.append('.')
     return r, d
 
 def _roots(kindpats):
     '''Returns root directories to match recursively from the given patterns.'''
     roots, dirs = _patternrootsanddirs(kindpats)
     return roots
 
-def _rootsanddirs(kindpats):
+def _rootsdirsandparents(kindpats):
     '''Returns roots and exact directories from patterns.
 
     roots are directories to match recursively, whereas exact directories should
     be matched non-recursively. The returned (roots, dirs) tuple will also
     include directories that need to be implicitly considered as either, such as
     parent directories.
 
-    >>> _rootsanddirs(
+    >>> _rootsdirsandparents(
     ...     [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
     ...      (b'glob', b'g*', b'')])
-    (['g/h', 'g/h', '.'], ['g', '.'])
-    >>> _rootsanddirs(
+    (['g/h', 'g/h', '.'], [], ['g', '.'])
+    >>> _rootsdirsandparents(
     ...     [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
-    ([], ['g/h', '.', 'g', '.'])
-    >>> _rootsanddirs(
+    ([], ['g/h', '.'], ['g', '.'])
+    >>> _rootsdirsandparents(
     ...     [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
     ...      (b'path', b'', b'')])
-    (['r', 'p/p', '.'], ['p', '.'])
-    >>> _rootsanddirs(
+    (['r', 'p/p', '.'], [], ['p', '.'])
+    >>> _rootsdirsandparents(
     ...     [(b'relglob', b'rg*', b''), (b're', b're/', b''),
     ...      (b'relre', b'rr', b'')])
-    (['.', '.', '.'], ['.'])
+    (['.', '.', '.'], [], ['.'])
     '''
     r, d = _patternrootsanddirs(kindpats)
 
+    p = []
     # Append the parents as non-recursive/exact directories, since they must be
     # scanned to get to either the roots or the other exact directories.
-    d.extend(util.dirs(d))
-    d.extend(util.dirs(r))
+    p.extend(util.dirs(d))
+    p.extend(util.dirs(r))
     # util.dirs() does not include the root directory, so add it manually
-    d.append('.')
+    p.append('.')
 
-    return r, d
+    return r, d, p
 
 def _explicitfiles(kindpats):
     '''Returns the potential explicit filenames from the patterns.
 
     >>> _explicitfiles([(b'path', b'foo/bar', b'')])
     ['foo/bar']
     >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
     []