bug 1058662 - Add a mozbuild.util.capture_deps() context manager that wraps open() and import to capture input dependencies in Python scripts. draft
authorChris Manchester <cmanchester@mozilla.com>
Tue, 26 Aug 2014 12:07:06 -0400
changeset 307659 b3f3c6976de206bb6533c14b67fce77a4e38ac49
parent 307616 e1ef2be156de1dad31bb4189a51b178b12b23340
child 307660 06fcb5c06d26714815b34a8ada9da5408411dd7c
push id7337
push usercmanchester@mozilla.com
push dateMon, 09 Nov 2015 19:32:15 +0000
bugs1058662
milestone45.0a1
bug 1058662 - Add a mozbuild.util.capture_deps() context manager that wraps open() and import to capture input dependencies in Python scripts.
python/mozbuild/mozbuild/test/test_util.py
python/mozbuild/mozbuild/util.py
--- a/python/mozbuild/mozbuild/test/test_util.py
+++ b/python/mozbuild/mozbuild/test/test_util.py
@@ -15,16 +15,17 @@ import tempfile
 
 from mozfile.mozfile import NamedTemporaryFile
 from mozunit import (
     main,
     MockedOpen,
 )
 
 from mozbuild.util import (
+    capture_deps,
     FileAvoidWrite,
     group_unified_files,
     hash_file,
     memoize,
     memoized_property,
     resolve_target_to_make,
     MozbuildDeletionError,
     HierarchicalStringList,
@@ -466,16 +467,91 @@ class TestHierarchicalStringListWithFlag
         self.assertEqual(l.x['x'].bar, 42)
 
         l.x['y'].foo = True
         self.assertEqual(l.x['y'].foo, True)
 
         with self.assertRaises(AttributeError):
             l.x['y'].baz = False
 
+class TestCaptureDeps(unittest.TestCase):
+    def setUp(self):
+        self.tempdir = tempfile.mkdtemp()
+        self._old_sys_path = sys.path
+        sys.path.append(self.tempdir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tempdir)
+        sys.path = self._old_sys_path
+
+    def get_file(self, name):
+        return os.path.join(self.tempdir, name)
+
+    def test_capture_deps(self):
+        files = [self.get_file(f) for f in ['a', 'b/c', 'd/../e', 'f']]
+        for f in files:
+            try:
+                os.makedirs(os.path.dirname(f))
+            except OSError:
+                pass
+            open(f, 'wb').write('foo')
+
+        deps = set()
+        with capture_deps(deps):
+            open(files[0], 'r')
+            open(files[1], 'a+')
+            open(files[2], 'rb')
+            open(files[3], 'rU')
+            # Not opened for reading, shouldn't be in the set.
+            open(self.get_file('xyz'), 'w')
+
+        self.assertEqual(deps,
+                         set(os.path.normpath(f) for f in files))
+
+    def test_capture_deps_import(self):
+        py_file = self.get_file("pyfile.py")
+        with open(py_file, 'w') as fh:
+            fh.write("def member(): pass")
+
+        deps = set()
+        with capture_deps(deps):
+            from pyfile import member; member()
+
+        self.assertEqual(deps, {py_file})
+
+    def test_capture_deps_import_package(self):
+        files = {
+            self.get_file(os.path.join("m", "c.py")): "def f(): pass",
+            self.get_file(os.path.join("m", "__init__.py")): "###",
+            self.get_file(os.path.join("m", "s", "__init__.py")): "def h(): pass",
+            self.get_file(os.path.join("m", "s", "b.py")): "def i(): pass"
+        }
+        for name in files:
+            try:
+                os.makedirs(os.path.dirname(name))
+            except OSError:
+                pass
+            with open(name, 'w') as fh:
+                fh.write(files[name])
+
+        deps = set()
+        with capture_deps(deps):
+            import m.c; m.c.f()
+            import m.s.b; m.s.b.i(); m.s.h()
+
+        self.assertTrue(set(files.keys()) <= deps,
+                        "%s not detected" % (set(files.keys()) - deps))
+        deps = set()
+        with capture_deps(deps):
+            from m import s; s.h()
+            from m.c import f; f()
+            from m.s import b; b.i()
+
+        self.assertTrue(set(files.keys()) <= deps,
+                        "%s not detected" % (set(files.keys()) - deps))
 
 class TestMemoize(unittest.TestCase):
     def test_memoize(self):
         self._count = 0
         @memoize
         def wrapped(a, b):
             self._count += 1
             return a + b
--- a/python/mozbuild/mozbuild/util.py
+++ b/python/mozbuild/mozbuild/util.py
@@ -3,31 +3,32 @@
 # You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # This file contains miscellaneous utility functions that don't belong anywhere
 # in particular.
 
 from __future__ import absolute_import, unicode_literals
 
 import collections
+import contextlib
 import difflib
 import errno
 import functools
 import hashlib
 import itertools
 import os
 import stat
 import sys
 import time
 import types
 
-from collections import (
-    defaultdict,
-    OrderedDict,
-)
+import mozpack.path as mozpath
+
+from collections import OrderedDict
+
 from io import (
     StringIO,
     BytesIO,
 )
 
 
 if sys.version_info[0] == 3:
     str_type = str
@@ -908,16 +909,80 @@ def TypedList(type, base_class=List):
         @staticmethod
         def normalize(e):
             if not isinstance(e, type):
                 e = type(e)
             return e
 
     return _TypedList
 
+@contextlib.contextmanager
+def capture_deps(dep_set):
+    '''A context manager that wraps the open builtins and tracks imports to
+    capture the set of all files read or imported while it is active.
+
+    :param dep_set: A set that is populated with any files opened for reading
+                    or imported in this context.
+    '''
+    import __builtin__
+
+    real_open = __builtin__.open
+    real_import = __builtin__.__import__
+
+    def _add_module_file(name):
+        if name not in sys.modules:
+            return
+        module = sys.modules[name]
+        if hasattr(module, '__file__'):
+            fname = module.__file__
+            # If this is a bytecode file, provide the corresponding source.
+            if fname[-3:] in ('pyo', 'pyc'):
+                fname = fname[:-1]
+            dep_set.add(mozpath.normpath(os.path.abspath(fname)))
+
+    def fake_import(name, globals=None, locals=None, fromlist=None, level=-1):
+        mod = real_import(name, globals, locals, fromlist, level)
+
+        # We can't trust mod.__file__ here, because it will just point to
+        # an __init__.py. if we're importing within a module.
+        # A combination of the originally requested name and fromlist can be
+        # used to find entries on sys.modules that would have been accessed
+        # during this import, which themselves have the __file__ attribute we
+        # care about.
+        if fromlist:
+            for e in fromlist:
+                _add_module_file(name + '.' + e)
+
+        # Check each prefix of a dotted name import. This is based on the
+        # strategy for importing hierarchical names in cpython's
+        # import_module_level.
+        parts = name.split('.')
+        parts.reverse()
+        n = parts.pop()
+        _add_module_file(n)
+        while parts:
+            n += '.' + parts.pop()
+            _add_module_file(n)
+
+        return mod
+
+    def fake_open(name, mode='r', buffering=-1):
+        if mode.startswith('r') or 'U' in mode or mode == 'a+':
+            dep_set.add(mozpath.normpath(os.path.abspath(name)))
+        return real_open(name, mode, buffering)
+
+    __builtin__.__import__ = fake_import
+    __builtin__.open = fake_open
+
+    yield None
+
+    __builtin__.open = real_open
+    __builtin__.__import__ = real_import
+
+
 def group_unified_files(files, unified_prefix, unified_suffix,
                         files_per_unified_file):
     """Return an iterator of (unified_filename, source_filenames) tuples.
 
     We compile most C and C++ files in "unified mode"; instead of compiling
     ``a.cpp``, ``b.cpp``, and ``c.cpp`` separately, we compile a single file
     that looks approximately like::