Bug 920938 - [ManifestDestiny] handle symlinks in populate_directory and from directories; r=jmaher
☠☠ backed out by 1ff8792d5db2 ☠ ☠
authorJulien Pagès <j.parkouss@gmail.com>
Mon, 03 Nov 2014 04:24:00 +0100
changeset 241215 d917152c263d52832535fc43cbed1bb7f9bd8dc2
parent 241214 790f6eefaa6af564be8145a8b3fd2a9a4a587926
child 241216 68a47aac2628b822027134bc30de30429a24eb60
push id4311
push userraliiev@mozilla.com
push dateMon, 12 Jan 2015 19:37:41 +0000
treeherdermozilla-beta@150c9fed433b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjmaher
bugs920938
milestone36.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 920938 - [ManifestDestiny] handle symlinks in populate_directory and from directories; r=jmaher
testing/mozbase/manifestparser/manifestparser/manifestparser.py
testing/mozbase/manifestparser/tests/test_convert_symlinks.py
--- a/testing/mozbase/manifestparser/manifestparser/manifestparser.py
+++ b/testing/mozbase/manifestparser/manifestparser/manifestparser.py
@@ -872,118 +872,78 @@ class ManifestParser(object):
                     print >> sys.stderr, message + " Skipping."
                     continue
                 destination = os.path.join(rootdir, _relpath)
                 shutil.copy(source, destination)
 
     ### directory importers
 
     @classmethod
-    def _walk_directories(cls, directories, function, pattern=None, ignore=()):
+    def _walk_directories(cls, directories, callback, pattern=None, ignore=()):
         """
         internal function to import directories
         """
 
-        class FilteredDirectoryContents(object):
-            """class to filter directory contents"""
-
-            sort = sorted
-
-            def __init__(self, pattern=pattern, ignore=ignore, cache=None):
-                if pattern is None:
-                    pattern = set()
-                if isinstance(pattern, basestring):
-                    pattern = [pattern]
-                self.patterns = pattern
-                self.ignore = set(ignore)
-
-                # cache of (dirnames, filenames) keyed on directory real path
-                # assumes volume is frozen throughout scope
-                self._cache = cache or {}
-
-            def __call__(self, directory):
-                """returns 2-tuple: dirnames, filenames"""
-                directory = os.path.realpath(directory)
-                if directory not in self._cache:
-                    dirnames, filenames = self.contents(directory)
+        if isinstance(pattern, basestring):
+            patterns = [pattern]
+        else:
+            patterns = pattern
+        ignore = set(ignore)
 
-                    # filter out directories without progeny
-                    # XXX recursive: should keep track of seen directories
-                    dirnames = [ dirname for dirname in dirnames
-                                 if not self.empty(os.path.join(directory, dirname)) ]
-
-                    self._cache[directory] = (tuple(dirnames), filenames)
-
-                # return cached values
-                return self._cache[directory]
+        if not patterns:
+            accept_filename = lambda filename: True
+        else:
+            def accept_filename(filename):
+                for pattern in patterns:
+                    if fnmatch.fnmatch(filename, pattern):
+                        return True
 
-            def empty(self, directory):
-                """
-                returns if a directory and its descendents are empty
-                """
-                return self(directory) == ((), ())
-
-            def contents(self, directory, sort=None):
-                """
-                return directory contents as (dirnames, filenames)
-                with `ignore` and `pattern` applied
-                """
-
-                if sort is None:
-                    sort = self.sort
+        if not ignore:
+            accept_dirname = lambda dirname: True
+        else:
+            accept_dirname = lambda dirname: dirname not in ignore
 
-                # split directories and files
-                dirnames = []
-                filenames = []
-                for item in os.listdir(directory):
-                    path = os.path.join(directory, item)
-                    if os.path.isdir(path):
-                        dirnames.append(item)
-                    else:
-                        # XXX not sure what to do if neither a file or directory
-                        # (if anything)
-                        assert os.path.isfile(path)
-                        filenames.append(item)
+        rootdirectories = directories[:]
+        seen_directories = set()
+        for rootdirectory in rootdirectories:
+            # let's recurse directories using list
+            directories = [os.path.realpath(rootdirectory)]
+            while directories:
+                directory = directories.pop(0)
+                if directory in seen_directories:
+                    # eliminate possible infinite recursion due to
+                    # symbolic links
+                    continue
+                seen_directories.add(directory)
 
-                # filter contents;
-                # this could be done in situ re the above for loop
-                # but it is really disparate in intent
-                # and could conceivably go to a separate method
-                dirnames = [dirname for dirname in dirnames
-                            if dirname not in self.ignore]
-                filenames = set(filenames)
-                # we use set functionality to filter filenames
-                if self.patterns:
-                    matches = set()
-                    matches.update(*[fnmatch.filter(filenames, pattern)
-                                     for pattern in self.patterns])
-                    filenames = matches
+                files = []
+                subdirs = []
+                for name in sorted(os.listdir(directory)):
+                    path = os.path.join(directory, name)
+                    if os.path.isfile(path):
+                        # os.path.isfile follow symbolic links, we don't
+                        # need to handle them here.
+                        if accept_filename(name):
+                            files.append(name)
+                        continue
+                    elif os.path.islink(path):
+                        # eliminate symbolic links
+                        path = os.path.realpath(path)
 
-                if sort is not None:
-                    # sort dirnames, filenames
-                    dirnames = sort(dirnames)
-                    filenames = sort(filenames)
-
-                return (tuple(dirnames), tuple(filenames))
-
-        # make a filtered directory object
-        directory_contents = FilteredDirectoryContents(pattern=pattern, ignore=ignore)
+                    # we must have a directory here
+                    if accept_dirname(name):
+                        subdirs.append(name)
+                        # this subdir is added for recursion
+                        directories.insert(0, path)
 
-        # walk the directories, generating manifests
-        for index, directory in enumerate(directories):
-
-            for dirpath, dirnames, filenames in os.walk(directory):
+                # here we got all subdirs and files filtered, we can
+                # call the callback function if directory is not empty
+                if subdirs or files:
+                    callback(rootdirectory, directory, subdirs, files)
 
-                # get the directory contents from the caching object
-                _dirnames, filenames = directory_contents(dirpath)
-                # filter out directory names
-                dirnames[:] = _dirnames
-
-                # call callback function
-                function(directory, dirpath, dirnames, filenames)
 
     @classmethod
     def populate_directory_manifests(cls, directories, filename, pattern=None, ignore=(), overwrite=False):
         """
         walks directories and writes manifests of name `filename` in-place; returns `cls` instance populated
         with the given manifests
 
         filename -- filename of manifests to write
--- a/testing/mozbase/manifestparser/tests/test_convert_symlinks.py
+++ b/testing/mozbase/manifestparser/tests/test_convert_symlinks.py
@@ -4,27 +4,23 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import os
 import shutil
 import tempfile
 import unittest
 
-from manifestparser import convert
+from manifestparser import convert, ManifestParser
 
 class TestSymlinkConversion(unittest.TestCase):
     """
     test conversion of a directory tree with symlinks to a manifest structure
     """
 
-    # Currently broken: see
-    # https://bugzilla.mozilla.org/show_bug.cgi?id=902610
-    # https://bugzilla.mozilla.org/show_bug.cgi?id=920938
-
     def create_stub(self, directory=None):
         """stub out a directory with files in it"""
 
         files = ('foo', 'bar', 'fleem')
         if directory is None:
             directory = tempfile.mkdtemp()
         for i in files:
             file(os.path.join(directory, i), 'w').write(i)
@@ -47,33 +43,31 @@ class TestSymlinkConversion(unittest.Tes
             self.assertEqual([i['name'] for i in parser.tests],
                              files)
         except:
             raise
         finally:
             shutil.rmtree(stub)
             os.chdir(oldcwd)
 
+    @unittest.skipIf(not hasattr(os, 'symlink'),
+                     "symlinks unavailable on this platform")
     def test_relpath_symlink(self):
         """
         Ensure `relative_to` works in a symlink.
         Not available on windows.
         """
 
-        symlink = getattr(os, 'symlink', None)
-        if symlink is None:
-            return # symlinks unavailable on this platform
-
         oldcwd = os.getcwd()
         workspace = tempfile.mkdtemp()
         try:
             tmpdir = os.path.join(workspace, 'directory')
             os.makedirs(tmpdir)
             linkdir = os.path.join(workspace, 'link')
-            symlink(tmpdir, linkdir)
+            os.symlink(tmpdir, linkdir)
             self.create_stub(tmpdir)
 
             # subdir with in-memory manifest
             files = ['../bar', '../fleem', '../foo', 'subfile']
             subdir = os.path.join(linkdir, 'subdir')
             os.chdir(os.path.realpath(subdir))
             for directory in (tmpdir, linkdir):
                 parser = convert([directory], relative_to='.')
@@ -85,31 +79,59 @@ class TestSymlinkConversion(unittest.Tes
 
         # a more complicated example
         oldcwd = os.getcwd()
         workspace = tempfile.mkdtemp()
         try:
             tmpdir = os.path.join(workspace, 'directory')
             os.makedirs(tmpdir)
             linkdir = os.path.join(workspace, 'link')
-            symlink(tmpdir, linkdir)
+            os.symlink(tmpdir, linkdir)
             self.create_stub(tmpdir)
             files = ['../bar', '../fleem', '../foo', 'subfile']
             subdir = os.path.join(linkdir, 'subdir')
             subsubdir = os.path.join(subdir, 'sub')
             os.makedirs(subsubdir)
             linksubdir = os.path.join(linkdir, 'linky')
             linksubsubdir = os.path.join(subsubdir, 'linky')
-            symlink(subdir, linksubdir)
-            symlink(subdir, linksubsubdir)
+            os.symlink(subdir, linksubdir)
+            os.symlink(subdir, linksubsubdir)
             for dest in (subdir,):
                 os.chdir(dest)
                 for directory in (tmpdir, linkdir):
                     parser = convert([directory], relative_to='.')
                     self.assertEqual([i['name'] for i in parser.tests],
                                      files)
         finally:
             shutil.rmtree(workspace)
             os.chdir(oldcwd)
 
+    @unittest.skipIf(not hasattr(os, 'symlink'),
+                     "symlinks unavailable on this platform")
+    def test_recursion_symlinks(self):
+        workspace = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, workspace)
+
+        # create two dirs
+        os.makedirs(os.path.join(workspace, 'dir1'))
+        os.makedirs(os.path.join(workspace, 'dir2'))
+
+        # create cyclical symlinks
+        os.symlink(os.path.join('..', 'dir1'),
+                   os.path.join(workspace, 'dir2', 'ldir1'))
+        os.symlink(os.path.join('..', 'dir2'),
+                   os.path.join(workspace, 'dir1', 'ldir2'))
+
+        # create one file in each dir
+        open(os.path.join(workspace, 'dir1', 'f1.txt'), 'a').close()
+        open(os.path.join(workspace, 'dir1', 'ldir2', 'f2.txt'), 'a').close()
+
+        data = []
+        def callback(rootdirectory, directory, subdirs, files):
+            for f in files:
+                data.append(f)
+
+        ManifestParser._walk_directories([workspace], callback)
+        self.assertEqual(sorted(data), ['f1.txt', 'f2.txt'])
+
 
 if __name__ == '__main__':
     unittest.main()