Bug 1108293 - mach dependencies: analyze build system dependencies draft
authorGregory Szorc <gps@mozilla.com>
Sat, 06 Dec 2014 21:45:55 -0800
changeset 809322 685be29e4c23cb01ff4f60eb1a5cbb11134e8456
parent 808986 3b66a441d832b077dde52a5a2692d1b07fde3f0e
push id113639
push userbmo:cmanchester@mozilla.com
push dateThu, 21 Jun 2018 20:55:52 +0000
bugs1108293
milestone62.0a1
Bug 1108293 - mach dependencies: analyze build system dependencies Developers often want to know how header files and other files factor in to build dependencies. This patch introduces the |mach dependencies| command for querying dependency info from the build system. It only offers basic querying so far. Functionality can be expanded as requested by developers. * * * Bug 1108293 - part 1a - resolve symbolic links whenever possible Entering: mach dependencies target $HEADER tells you what depends on $HEADER...unless $HEADER is exported. Then you get different answers depending on whether you pass a absolute-srcdir or an absolute-objdir path. This patch at least makes the answers what you expect when you pass a absolute-srcdir path, as we now normalize symbolic links when recording dependencies. (This patch doesn't help systems where we don't have symbolic links, and it also makes the absolute-objdir paths for such files impossible to query data for. We can at least partially solve the latter problem in a future patch.) Because resolving symbolic links repeatedly can be expensive, we include a cache for the resolved results. This cache makes things noticeably faster, even on an SSD. * * * part 1b - add some DWIM to |mach dependencies targets| We have enough information in mach commands to permit both srcdir- and objdir-relative pathnames; we should permit both for convenience. Given that we resolve symbolic links in mozbuild.analyze.Dependencies, we should do the same before passing the path into Dependencies.get_targets, so that objdir paths Just Work. MozReview-Commit-ID: 7bBKWkh8rYi
python/mozbuild/mozbuild/analyze.py
python/mozbuild/mozbuild/mach_commands.py
new file mode 100644
--- /dev/null
+++ b/python/mozbuild/mozbuild/analyze.py
@@ -0,0 +1,203 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import unicode_literals
+
+import os
+
+from mozbuild.makeutil import read_dep_makefile
+import mozpack.path as mozpath
+from mozpack.copier import FileRegistry
+from mozpack.manifests import InstallManifest
+
+def find_deps_files(path):
+    """Find paths to Make dependency files.
+
+    This is an iterator of (objdir, deps_path).
+    """
+    for root, dirs, files in os.walk(path):
+        root = mozpath.normpath(root)
+        if mozpath.basename(root) != '.deps':
+            continue
+
+        parent = os.path.dirname(root)
+
+        for f in files:
+            if f.endswith('.pp'):
+                yield parent, mozpath.join(root, f)
+
+class Dependencies(object):
+    """Data structure to hold basic build system dependencies.
+
+    This isn't a truly generic class. It is optimized to hold results from
+    ``load_deps_files()``.
+    """
+
+    def __init__(self, topsrcdir, topobjdir):
+        self.topsrcdir = topsrcdir
+        self.topobjdir = topobjdir
+        self.targets = {}
+        self.dependencies = {}
+        self._realpath_cache = {}
+        self._filemap = None
+
+    def load_deps_file(self, objdir, fh):
+        """Load a single dependency file."""
+        for rule in read_dep_makefile(fh):
+            for target in rule.targets():
+                full_target = mozpath.normpath(mozpath.join(objdir,
+                    target))
+
+                normalized_deps = []
+                for d in rule.dependencies():
+                    full_depend = mozpath.join(objdir, d)
+                    # Resolve symbolic links from $objdir/dist/include and
+                    # the like to their srcdir equivalents.  Don't use
+                    # _realpath_cache.get(full_depend, os.path.realpath(...)),
+                    # as the whole point of this cache is to avoid hitting
+                    # the filesystem if we don't have to.
+                    if full_depend in self._realpath_cache:
+                        full_depend = self._realpath_cache[full_depend]
+                    else:
+                        resolved = os.path.realpath(full_depend)
+                        self._realpath_cache[full_depend] = resolved
+                        full_depend = resolved
+                    normalized_deps.append(full_depend)
+                    self.dependencies.setdefault(full_depend, set()).add(full_target)
+
+                assert full_target not in self.targets
+                self.targets[full_target] = normalized_deps
+
+    def prune_system_paths(self):
+        """Obtain a Dependencies with system paths pruned."""
+        allowed = (self.topsrcdir, self.topobjdir)
+
+        newtargets = {}
+        newdepends = {}
+
+        for target, depends in self.targets.iteritems():
+            if not target.startswith(allowed):
+                continue
+
+            depends = [d for d in depends if d.startswith(allowed)]
+            newtargets[target] = depends
+
+        for depend, targets in self.dependencies.iteritems():
+            if not depend.startswith(allowed):
+                continue
+
+            targets = {t for t in targets if t.startswith(allowed)}
+            newdepends[depend] = targets
+
+        deps = Dependencies(self.topsrcdir, self.topobjdir)
+        deps.targets = newtargets
+        deps.dependencies = newdepends
+
+        return deps
+
+    def resolve_srcdir_paths(self):
+        """Obtain a Dependencies with objdir paths resolved to srcdir equivalents.
+
+        Dependencies often reference paths in the objdir. For example, headers
+        are often included from ``objdir/dist/include``. Calling this function
+        will rewrite paths to the srcdir equivalent, where possible.
+        """
+        files = self.filemap
+        newtargets = {}
+        newdepends = {}
+
+        for target, depends in self.targets.iteritems():
+            target = files.get(target, target)
+            depends = [files.get(d, d) for d in depends]
+            newtargets[target] = depends
+
+        for depend, targets in self.dependencies.iteritems():
+            depend = files.get(depend, depend)
+            targets = {files.get(t, t) for t in targets}
+            newdepends[depend] = targets
+
+        deps = Dependencies(self.topsrcdir, self.topobjdir)
+        deps.targets = newtargets
+        deps.dependencies = newdepends
+
+        return deps
+
+    @property
+    def filemap(self):
+        if self._filemap is None:
+            self._filemap = get_srcdir_objdir_file_map(self.topobjdir)
+
+        return self._filemap
+
+    def get_source_file(self, target):
+        """Try to obtain the corresponding source file for a target."""
+        deps = self.targets.get(target)
+        if not deps:
+            return None
+
+        source = deps[0]
+        # The suffix list should probably come from elsewhere.
+        if not source.endswith(('.c', '.cpp', '.cc', '.cxx', '.m', '.mm', '.s', '.S')):
+            return None
+
+        return source
+
+    def get_targets(self, path, resolve_source=False):
+        targets = self.dependencies.get(path, set())
+        resolved = self.resolve_srcdir_paths()
+        targets |= resolved.dependencies.get(path, set())
+
+        # Find references to objdir equivalent to this file or vice-versa.
+        if path.startswith(self.topobjdir):
+            # We would have addressed this above.
+            pass
+        elif path.startswith(self.topsrcdir):
+            reversemap = {v: k for k, v in self.filemap.iteritems()}
+            obj_path = reversemap.get(path)
+            if obj_path:
+                targets |= self.dependencies.get(obj_path, set())
+
+        if resolve_source:
+            newtargets = set()
+            for t in targets:
+                s = self.get_source_file(t)
+                if s:
+                    t = s
+
+                newtargets.add(t)
+
+            targets = newtargets
+
+        return targets
+
+def get_install_manifests(topobjdir):
+    """Obtain InstallManifest and prefix metadata."""
+    man_dir = mozpath.join(topobjdir, '_build_manifests', 'install')
+    j = mozpath.join
+    for f in sorted(os.listdir(man_dir)):
+        full = mozpath.join(man_dir, f)
+
+        if f in ('_tests', '_test_files'):
+            yield (j(topobjdir, '_tests'), InstallManifest(path=full))
+        elif f == 'xpidl':
+            yield (j(topobjdir, 'config/makefiles/xpidl'), InstallManifest(path=full))
+        elif f.startswith('dist_'):
+            yield (j(topobjdir, f.replace('_', '/')), InstallManifest(path=full))
+        else:
+            raise Exception('Unknown install manifest encountered: %s' % f)
+
+def get_srcdir_objdir_file_map(topobjdir):
+    """Obtains a mapping of filenames in the objdir to their srcdir equivalent."""
+    filemap = {}
+    for root, m in get_install_manifests(topobjdir):
+        r = FileRegistry()
+        m.populate_registry(r)
+        for p, f in r:
+            full_objdir = mozpath.normpath(mozpath.join(root, p))
+            if not hasattr(f, 'path'):
+                continue
+
+            filemap[full_objdir] = f.path
+
+    return filemap
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -458,16 +458,98 @@ class Logs(MachCommandBase):
             # descriptor for less's input. Replacing sys.stdout's file
             # descriptor with what it was before we replaced it will properly
             # close less's input.
             os.dup2(output_fd, sys.stdout.fileno())
             less.wait()
 
 
 @CommandProvider
+class DependenciesProvider(MachCommandBase):
+    @Command('dependencies', category='post-build',
+        description='Analyze build dependencies')
+    def analyze_dependencies(self):
+        pass
+
+    @SubCommand('dependencies', 'counts',
+        description='Print counts of how often dependencies are used.')
+    @CommandArgument('--show-all', action='store_true',
+        help='Show paths outside the source and object directories.')
+    @CommandArgument('--dont-resolve-paths', action='store_true',
+        help='Do not attempt to resolve paths to their original file. '
+            '(Show raw paths from build dependencies)')
+    @CommandArgument('--min', type=int,
+        help='Filter items without this many dependent targets.')
+    def dependency_counts(self, min=None, **kwargs):
+        deps = self._get_deps(**kwargs).dependencies
+
+        deps = {self._normalize_path(k): v for k, v in deps.iteritems()}
+
+        for d in sorted(deps, key=lambda k: len(deps[k]), reverse=True):
+            if isinstance(min, int) and len(deps[d]) < min:
+                continue
+
+            print('%d\t%s' % (len(deps[d]), d))
+
+    @SubCommand('dependencies', 'targets',
+        description='Print targets having a dependency.')
+    @CommandArgument('--source', action='store_true',
+        help='Try to resolve the source file instead of the actual target')
+    @CommandArgument('path',
+        help='Find targets that depend on a certain file')
+    def targets(self, path, source=False):
+        if not os.path.isabs(path):
+            # Accommodate srcdir-relative paths from within the objdir.
+            candidate = mozpath.join(self.topsrcdir, path)
+            if os.path.exists(candidate):
+                path = candidate
+            else:
+                # Assume we're running from the objdir, and abspath is as
+                # good as mozpath.join(self.topobjdir, ...)
+                path = mozpath.abspath(path)
+        if not os.path.exists(path):
+            print('specified path does not exist: %s' % path)
+            return 1
+
+        # We resolve symbolic links in Dependencies.  Do the same here so
+        # we get consistent results.
+        path = os.path.realpath(path)
+
+        deps = self._get_deps(show_all=True, dont_resolve_paths=False)
+
+        targets = deps.get_targets(path, resolve_source=source)
+        for t in sorted(self._normalize_path(t) for t in targets):
+            print(t)
+
+    def _get_deps(self, **kwargs):
+        from mozbuild.analyze import Dependencies, find_deps_files
+
+        deps = Dependencies(self.topsrcdir, self.topobjdir)
+        for objdir, path in find_deps_files(self.topobjdir):
+            with open(path, 'rb') as fh:
+                deps.load_deps_file(objdir, fh)
+
+        if not kwargs.get('show_all'):
+            deps = deps.prune_system_paths()
+
+        if not kwargs.get('dont_resolve_paths'):
+            deps = deps.resolve_srcdir_paths()
+
+        return deps
+
+    def _normalize_path(self, path):
+        if mozpath.basedir(path, [self.topobjdir]):
+            return mozpath.join('<objdir>', mozpath.relpath(path, self.topobjdir))
+        elif mozpath.basedir(path, [self.topsrcdir]):
+            return mozpath.relpath(path, self.topsrcdir)
+
+        return path
+
+
+@CommandProvider
 class Warnings(MachCommandBase):
     """Provide commands for inspecting warnings."""
 
     @property
     def database_path(self):
         return self._get_state_filename('warnings.json')
 
     @property