Bug 1497898 - Add a custom implementation of os.walk, r=ato
authorJames Graham <james@hoppipolla.co.uk>
Fri, 16 Nov 2018 18:48:28 +0000
changeset 503253 10e90d3295ee7a29b094b42912cdb584dfd47e98
parent 503252 9d61715037440d58adf237622245bb847b9a898c
child 503254 fe2d962a8ed2e722266a417df17f7085aeabad3b
push id10290
push userffxbld-merge
push dateMon, 03 Dec 2018 16:23:23 +0000
treeherdermozilla-beta@700bed2445e6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersato
bugs1497898
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1497898 - Add a custom implementation of os.walk, r=ato Compared to the normal os.walk this has a couple of differences: * It returns lists of (name, stat) for filenames and directories, allowing callers to reuse the stat data without going back to the system to re-request it. * Directories are always returned as paths relative to the root, and the root itself is returned as the empty string. * It is non-recursive. There are also a few features missing that aren't required for our use cases. Depends on D8226 Differential Revision: https://phabricator.services.mozilla.com/D8227
testing/web-platform/tests/tools/lint/tests/test_lint.py
testing/web-platform/tests/tools/manifest/vcs.py
--- a/testing/web-platform/tests/tools/lint/tests/test_lint.py
+++ b/testing/web-platform/tests/tools/lint/tests/test_lint.py
@@ -395,23 +395,23 @@ def test_check_css_globally_unique_ignor
             assert rv == 0
             assert mocked_check_path.call_count == 1
             assert mocked_check_file_contents.call_count == 1
     assert caplog.text == ""
 
 
 def test_all_filesystem_paths():
     with mock.patch(
-            'os.walk',
-            return_value=[('.',
-                           ['dir_a', 'dir_b'],
-                           ['file_a', 'file_b']),
-                          (os.path.join('.', 'dir_a'),
+            'tools.lint.lint.walk',
+            return_value=[('',
+                           [('dir_a', None), ('dir_b', None)],
+                           [('file_a', None), ('file_b', None)]),
+                          ('dir_a',
                            [],
-                           ['file_c', 'file_d'])]
+                           [('file_c', None), ('file_d', None)])]
     ):
         got = list(lint_mod.all_filesystem_paths('.'))
         assert got == ['file_a',
                        'file_b',
                        os.path.join('dir_a', 'file_c'),
                        os.path.join('dir_a', 'file_d')]
 
 
--- a/testing/web-platform/tests/tools/manifest/vcs.py
+++ b/testing/web-platform/tests/tools/manifest/vcs.py
@@ -186,8 +186,61 @@ class GitIgnoreCache(CacheFile):
 
     def __getitem__(self, key):
         return self.data[key]
 
     def __setitem__(self, key, value):
         if self.data.get(key) != value:
             self.modified = True
             self.data[key] = value
+
+
+def walk(root):
+    """Re-implementation of os.walk. Returns an iterator over
+    (dirpath, dirnames, filenames), with some semantic differences
+    to os.walk.
+
+    This has a similar interface to os.walk, with the important difference
+    that instead of lists of filenames and directory names, it yields
+    lists of tuples of the form [(name, stat)] where stat is the result of
+    os.stat for the file. That allows reusing the same stat data in the
+    caller. It also always returns the dirpath relative to the root, with
+    the root iself being returned as the empty string.
+
+    Unlike os.walk the implementation is not recursive."""
+
+    listdir = os.listdir
+    get_stat = os.stat
+    listdir = os.listdir
+    join = os.path.join
+    is_dir = stat.S_ISDIR
+    is_link = stat.S_ISLNK
+    relpath = os.path.relpath
+
+    root = os.path.abspath(root)
+    stack = deque([(root, "")])
+
+    while stack:
+        dir_path, rel_path = stack.popleft()
+        try:
+            # Note that listdir and error are globals in this module due
+            # to earlier import-*.
+            names = listdir(dir_path)
+        except OSError:
+            continue
+
+        dirs, non_dirs = [], []
+        for name in names:
+            path = join(dir_path, name)
+            try:
+                path_stat = get_stat(path)
+            except OSError:
+                continue
+            if is_dir(path_stat.st_mode):
+                dirs.append((name, path_stat))
+            else:
+                non_dirs.append((name, path_stat))
+
+        yield rel_path, dirs, non_dirs
+        for name, path_stat in dirs:
+            new_path = join(dir_path, name)
+            if not is_link(path_stat.st_mode):
+                stack.append((new_path, relpath(new_path)))