Bug 920938 - [manifestparser] handle symlinks in populate_directory and from directories; r=jmaher
authorJulien Pagès <j.parkouss@gmail.com>
Fri, 05 Dec 2014 09:59:00 +0100
changeset 218655 00c9dd32fd19b39389e21966e670ef493ae6b508
parent 218654 2957dcebbfff3a760ca3af4049dd2c5b876cbff8
child 218656 1a33f3c968bde82a741eec7a512bb79060de5ed0
push id52605
push usercbook@mozilla.com
push dateMon, 08 Dec 2014 07:50:30 +0000
treeherdermozilla-inbound@00c9dd32fd19 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjmaher
bugs920938
milestone37.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 920938 - [manifestparser] handle symlinks in populate_directory and from directories; r=jmaher
testing/mozbase/manifestparser/manifestparser/manifestparser.py
testing/mozbase/manifestparser/tests/test_convert_directory.py
testing/mozbase/manifestparser/tests/test_convert_symlinks.py
--- a/testing/mozbase/manifestparser/manifestparser/manifestparser.py
+++ b/testing/mozbase/manifestparser/manifestparser/manifestparser.py
@@ -872,118 +872,78 @@ class ManifestParser(object):
                     print >> sys.stderr, message + " Skipping."
                     continue
                 destination = os.path.join(rootdir, _relpath)
                 shutil.copy(source, destination)
 
     ### directory importers
 
     @classmethod
-    def _walk_directories(cls, directories, function, pattern=None, ignore=()):
+    def _walk_directories(cls, directories, callback, pattern=None, ignore=()):
         """
         internal function to import directories
         """
 
-        class FilteredDirectoryContents(object):
-            """class to filter directory contents"""
-
-            sort = sorted
-
-            def __init__(self, pattern=pattern, ignore=ignore, cache=None):
-                if pattern is None:
-                    pattern = set()
-                if isinstance(pattern, basestring):
-                    pattern = [pattern]
-                self.patterns = pattern
-                self.ignore = set(ignore)
-
-                # cache of (dirnames, filenames) keyed on directory real path
-                # assumes volume is frozen throughout scope
-                self._cache = cache or {}
-
-            def __call__(self, directory):
-                """returns 2-tuple: dirnames, filenames"""
-                directory = os.path.realpath(directory)
-                if directory not in self._cache:
-                    dirnames, filenames = self.contents(directory)
+        if isinstance(pattern, basestring):
+            patterns = [pattern]
+        else:
+            patterns = pattern
+        ignore = set(ignore)
 
-                    # filter out directories without progeny
-                    # XXX recursive: should keep track of seen directories
-                    dirnames = [ dirname for dirname in dirnames
-                                 if not self.empty(os.path.join(directory, dirname)) ]
-
-                    self._cache[directory] = (tuple(dirnames), filenames)
-
-                # return cached values
-                return self._cache[directory]
+        if not patterns:
+            accept_filename = lambda filename: True
+        else:
+            def accept_filename(filename):
+                for pattern in patterns:
+                    if fnmatch.fnmatch(filename, pattern):
+                        return True
 
-            def empty(self, directory):
-                """
-                returns if a directory and its descendents are empty
-                """
-                return self(directory) == ((), ())
-
-            def contents(self, directory, sort=None):
-                """
-                return directory contents as (dirnames, filenames)
-                with `ignore` and `pattern` applied
-                """
-
-                if sort is None:
-                    sort = self.sort
+        if not ignore:
+            accept_dirname = lambda dirname: True
+        else:
+            accept_dirname = lambda dirname: dirname not in ignore
 
-                # split directories and files
-                dirnames = []
-                filenames = []
-                for item in os.listdir(directory):
-                    path = os.path.join(directory, item)
-                    if os.path.isdir(path):
-                        dirnames.append(item)
-                    else:
-                        # XXX not sure what to do if neither a file or directory
-                        # (if anything)
-                        assert os.path.isfile(path)
-                        filenames.append(item)
+        rootdirectories = directories[:]
+        seen_directories = set()
+        for rootdirectory in rootdirectories:
+            # let's recurse directories using list
+            directories = [os.path.realpath(rootdirectory)]
+            while directories:
+                directory = directories.pop(0)
+                if directory in seen_directories:
+                    # eliminate possible infinite recursion due to
+                    # symbolic links
+                    continue
+                seen_directories.add(directory)
 
-                # filter contents;
-                # this could be done in situ re the above for loop
-                # but it is really disparate in intent
-                # and could conceivably go to a separate method
-                dirnames = [dirname for dirname in dirnames
-                            if dirname not in self.ignore]
-                filenames = set(filenames)
-                # we use set functionality to filter filenames
-                if self.patterns:
-                    matches = set()
-                    matches.update(*[fnmatch.filter(filenames, pattern)
-                                     for pattern in self.patterns])
-                    filenames = matches
+                files = []
+                subdirs = []
+                for name in sorted(os.listdir(directory)):
+                    path = os.path.join(directory, name)
+                    if os.path.isfile(path):
+                        # os.path.isfile follow symbolic links, we don't
+                        # need to handle them here.
+                        if accept_filename(name):
+                            files.append(name)
+                        continue
+                    elif os.path.islink(path):
+                        # eliminate symbolic links
+                        path = os.path.realpath(path)
 
-                if sort is not None:
-                    # sort dirnames, filenames
-                    dirnames = sort(dirnames)
-                    filenames = sort(filenames)
-
-                return (tuple(dirnames), tuple(filenames))
-
-        # make a filtered directory object
-        directory_contents = FilteredDirectoryContents(pattern=pattern, ignore=ignore)
+                    # we must have a directory here
+                    if accept_dirname(name):
+                        subdirs.append(name)
+                        # this subdir is added for recursion
+                        directories.insert(0, path)
 
-        # walk the directories, generating manifests
-        for index, directory in enumerate(directories):
-
-            for dirpath, dirnames, filenames in os.walk(directory):
+                # here we got all subdirs and files filtered, we can
+                # call the callback function if directory is not empty
+                if subdirs or files:
+                    callback(rootdirectory, directory, subdirs, files)
 
-                # get the directory contents from the caching object
-                _dirnames, filenames = directory_contents(dirpath)
-                # filter out directory names
-                dirnames[:] = _dirnames
-
-                # call callback function
-                function(directory, dirpath, dirnames, filenames)
 
     @classmethod
     def populate_directory_manifests(cls, directories, filename, pattern=None, ignore=(), overwrite=False):
         """
         walks directories and writes manifests of name `filename` in-place; returns `cls` instance populated
         with the given manifests
 
         filename -- filename of manifests to write
--- a/testing/mozbase/manifestparser/tests/test_convert_directory.py
+++ b/testing/mozbase/manifestparser/tests/test_convert_directory.py
@@ -9,25 +9,37 @@ import shutil
 import tempfile
 import unittest
 
 from manifestparser import convert
 from manifestparser import ManifestParser
 
 here = os.path.dirname(os.path.abspath(__file__))
 
+# In some cases tempfile.mkdtemp() may returns a path which contains
+# symlinks. Some tests here will then break, as the manifestparser.convert
+# function returns paths that does not contains symlinks.
+#
+# Workaround is to use the following function, if absolute path of temp dir
+# must be compared.
+def create_realpath_tempdir():
+    """
+    Create a tempdir without symlinks.
+    """
+    return os.path.realpath(tempfile.mkdtemp())
+
 class TestDirectoryConversion(unittest.TestCase):
     """test conversion of a directory tree to a manifest structure"""
 
     def create_stub(self, directory=None):
         """stub out a directory with files in it"""
 
         files = ('foo', 'bar', 'fleem')
         if directory is None:
-            directory = tempfile.mkdtemp()
+            directory = create_realpath_tempdir()
         for i in files:
             file(os.path.join(directory, i), 'w').write(i)
         subdir = os.path.join(directory, 'subdir')
         os.mkdir(subdir)
         file(os.path.join(subdir, 'subfile'), 'w').write('baz')
         return directory
 
     def test_directory_to_manifest(self):
@@ -122,22 +134,22 @@ subsuite =
 
     def test_update(self):
         """
         Test our ability to update tests from a manifest and a directory of
         files
         """
 
         # boilerplate
-        tempdir = tempfile.mkdtemp()
+        tempdir = create_realpath_tempdir()
         for i in range(10):
             file(os.path.join(tempdir, str(i)), 'w').write(str(i))
 
         # otherwise empty directory with a manifest file
-        newtempdir = tempfile.mkdtemp()
+        newtempdir = create_realpath_tempdir()
         manifest_file = os.path.join(newtempdir, 'manifest.ini')
         manifest_contents = str(convert([tempdir], relative_to=tempdir))
         with file(manifest_file, 'w') as f:
             f.write(manifest_contents)
 
         # get the manifest
         manifest = ManifestParser(manifests=(manifest_file,))
 
--- a/testing/mozbase/manifestparser/tests/test_convert_symlinks.py
+++ b/testing/mozbase/manifestparser/tests/test_convert_symlinks.py
@@ -4,27 +4,23 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import os
 import shutil
 import tempfile
 import unittest
 
-from manifestparser import convert
+from manifestparser import convert, ManifestParser
 
 class TestSymlinkConversion(unittest.TestCase):
     """
     test conversion of a directory tree with symlinks to a manifest structure
     """
 
-    # Currently broken: see
-    # https://bugzilla.mozilla.org/show_bug.cgi?id=902610
-    # https://bugzilla.mozilla.org/show_bug.cgi?id=920938
-
     def create_stub(self, directory=None):
         """stub out a directory with files in it"""
 
         files = ('foo', 'bar', 'fleem')
         if directory is None:
             directory = tempfile.mkdtemp()
         for i in files:
             file(os.path.join(directory, i), 'w').write(i)
@@ -47,33 +43,31 @@ class TestSymlinkConversion(unittest.Tes
             self.assertEqual([i['name'] for i in parser.tests],
                              files)
         except:
             raise
         finally:
             shutil.rmtree(stub)
             os.chdir(oldcwd)
 
+    @unittest.skipIf(not hasattr(os, 'symlink'),
+                     "symlinks unavailable on this platform")
     def test_relpath_symlink(self):
         """
         Ensure `relative_to` works in a symlink.
         Not available on windows.
         """
 
-        symlink = getattr(os, 'symlink', None)
-        if symlink is None:
-            return # symlinks unavailable on this platform
-
         oldcwd = os.getcwd()
         workspace = tempfile.mkdtemp()
         try:
             tmpdir = os.path.join(workspace, 'directory')
             os.makedirs(tmpdir)
             linkdir = os.path.join(workspace, 'link')
-            symlink(tmpdir, linkdir)
+            os.symlink(tmpdir, linkdir)
             self.create_stub(tmpdir)
 
             # subdir with in-memory manifest
             files = ['../bar', '../fleem', '../foo', 'subfile']
             subdir = os.path.join(linkdir, 'subdir')
             os.chdir(os.path.realpath(subdir))
             for directory in (tmpdir, linkdir):
                 parser = convert([directory], relative_to='.')
@@ -85,31 +79,59 @@ class TestSymlinkConversion(unittest.Tes
 
         # a more complicated example
         oldcwd = os.getcwd()
         workspace = tempfile.mkdtemp()
         try:
             tmpdir = os.path.join(workspace, 'directory')
             os.makedirs(tmpdir)
             linkdir = os.path.join(workspace, 'link')
-            symlink(tmpdir, linkdir)
+            os.symlink(tmpdir, linkdir)
             self.create_stub(tmpdir)
             files = ['../bar', '../fleem', '../foo', 'subfile']
             subdir = os.path.join(linkdir, 'subdir')
             subsubdir = os.path.join(subdir, 'sub')
             os.makedirs(subsubdir)
             linksubdir = os.path.join(linkdir, 'linky')
             linksubsubdir = os.path.join(subsubdir, 'linky')
-            symlink(subdir, linksubdir)
-            symlink(subdir, linksubsubdir)
+            os.symlink(subdir, linksubdir)
+            os.symlink(subdir, linksubsubdir)
             for dest in (subdir,):
                 os.chdir(dest)
                 for directory in (tmpdir, linkdir):
                     parser = convert([directory], relative_to='.')
                     self.assertEqual([i['name'] for i in parser.tests],
                                      files)
         finally:
             shutil.rmtree(workspace)
             os.chdir(oldcwd)
 
+    @unittest.skipIf(not hasattr(os, 'symlink'),
+                     "symlinks unavailable on this platform")
+    def test_recursion_symlinks(self):
+        workspace = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, workspace)
+
+        # create two dirs
+        os.makedirs(os.path.join(workspace, 'dir1'))
+        os.makedirs(os.path.join(workspace, 'dir2'))
+
+        # create cyclical symlinks
+        os.symlink(os.path.join('..', 'dir1'),
+                   os.path.join(workspace, 'dir2', 'ldir1'))
+        os.symlink(os.path.join('..', 'dir2'),
+                   os.path.join(workspace, 'dir1', 'ldir2'))
+
+        # create one file in each dir
+        open(os.path.join(workspace, 'dir1', 'f1.txt'), 'a').close()
+        open(os.path.join(workspace, 'dir1', 'ldir2', 'f2.txt'), 'a').close()
+
+        data = []
+        def callback(rootdirectory, directory, subdirs, files):
+            for f in files:
+                data.append(f)
+
+        ManifestParser._walk_directories([workspace], callback)
+        self.assertEqual(sorted(data), ['f1.txt', 'f2.txt'])
+
 
 if __name__ == '__main__':
     unittest.main()