Bug 1361172 - Rewrite code for finding files in VCS checkout. r=glandium, a=release
authorGregory Szorc <gps@mozilla.com>
Thu, 18 May 2017 16:06:49 -0700
changeset 355916 91f75b4b32b7f809b4b7ad360d36616e01c8166c
parent 355915 cbad82e6e31bb8626b1fab205692b21e8e1ba346
child 355917 6e644bc1a57fddaea427a544e00ccd49a06240af
push id7134
push userryanvm@gmail.com
push dateWed, 31 May 2017 00:55:41 +0000
treeherdermozilla-esr52@91f75b4b32b7 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersglandium, release
bugs1361172
milestone52.1.3
Bug 1361172 - Rewrite code for finding files in VCS checkout. r=glandium, a=release We're getting an intermittent failure running `hg manifest` in CI. I have no clue why. What I do know is that we now have the mozversioncontrol Python package for containing utility code for interacting with version control. It is slightly more robust and I'm willing to support it more than I am check_utils.py. This commit adds a new API to our abstract repository class to obtain the files in the working directory by querying version control. Since I suspect cwd was coming into play in automation, I've also added a utility function to mozversioncontrol to attempt to find a version control checkout from the current working directory. It simply traces ancestor paths looking for a .hg or .git directory. Finally, I've ported all callers of the now-deleted API to the new one. The old code had some "../.." paths in it, meaning it only worked when cwd was just right. Since we resolve the absolute path to the checkout and store it on the repo object, I've updated the code so it should work no matter what cwd is as long as a repo can be found. I'm not 100% confident I found all consumers assuming cwd. But it's a start. I'm not 100% confident this will fix the intermittent issues in CI. But at least we should get a better error message and at least we'll be running less hacky code. MozReview-Commit-ID: AmCraHXcTEX
config/check_js_msg_encoding.py
config/check_macroassembler_style.py
config/check_spidermonkey_style.py
config/check_utils.py
python/mozversioncontrol/mozversioncontrol/__init__.py
--- a/config/check_js_msg_encoding.py
+++ b/config/check_js_msg_encoding.py
@@ -8,17 +8,19 @@
 #
 # JSErrorFormatString.format member should be in ASCII encoding.
 #----------------------------------------------------------------------------
 
 from __future__ import print_function
 
 import os
 import sys
-from check_utils import get_all_toplevel_filenames
+
+from mozversioncontrol import get_repository_from_env
+
 
 scriptname = os.path.basename(__file__);
 expected_encoding = 'ascii'
 
 # The following files don't define JSErrorFormatString.
 ignore_files = [
     'dom/base/domerr.msg',
     'js/xpconnect/src/xpc.msg',
@@ -40,20 +42,23 @@ def check_single_file(filename):
             log_fail(filename, 'not in {} encoding'.format(expected_encoding))
 
     log_pass(filename, 'ok')
     return True
 
 def check_files():
     result = True
 
-    for filename in get_all_toplevel_filenames():
+    repo = get_repository_from_env()
+    root = repo.path
+
+    for filename in repo.get_files_in_working_directory():
         if filename.endswith('.msg'):
             if filename not in ignore_files:
-                if not check_single_file(filename):
+                if not check_single_file(os.path.join(root, filename)):
                     result = False
 
     return result
 
 def main():
     if not check_files():
         sys.exit(1)
 
--- a/config/check_macroassembler_style.py
+++ b/config/check_macroassembler_style.py
@@ -20,19 +20,20 @@
 # proper methods annotations.
 #----------------------------------------------------------------------------
 
 from __future__ import print_function
 
 import difflib
 import os
 import re
-import subprocess
 import sys
-from check_utils import get_all_toplevel_filenames
+
+from mozversioncontrol import get_repository_from_env
+
 
 architecture_independent = set([ 'generic' ])
 all_architecture_names = set([ 'x86', 'x64', 'arm', 'arm64', 'mips32', 'mips64' ])
 all_shared_architecture_names = set([ 'x86_shared', 'mips_shared', 'arm', 'arm64' ])
 
 reBeforeArg = "(?<=[(,\s])"
 reArgType = "(?P<type>[\w\s:*&]+)"
 reArgName = "(?P<name>\s\w+)"
@@ -126,17 +127,17 @@ def get_macroassembler_definitions(filen
         fileAnnot = get_file_annotation(filename)
     except:
         return []
 
     style_section = False
     code_section = False
     lines = ''
     signatures = []
-    with open(os.path.join('../..', filename)) as f:
+    with open(filename) as f:
         for line in f:
             if '//{{{ check_macroassembler_style' in line:
                 style_section = True
             elif '//}}} check_macroassembler_style' in line:
                 style_section = False
             if not style_section:
                 continue
 
@@ -166,17 +167,17 @@ def get_macroassembler_definitions(filen
                 continue
 
     return signatures
 
 def get_macroassembler_declaration(filename):
     style_section = False
     lines = ''
     signatures = []
-    with open(os.path.join('../..', filename)) as f:
+    with open(filename) as f:
         for line in f:
             if '//{{{ check_macroassembler_style' in line:
                 style_section = True
             elif '//}}} check_macroassembler_style' in line:
                 style_section = False
             if not style_section:
                 continue
 
@@ -233,23 +234,27 @@ def generate_file_content(signatures):
 
 def check_style():
     # We read from the header file the signature of each function.
     decls = dict()      # type: dict(signature => ['x86', 'x64'])
 
     # We infer from each file the signature of each MacroAssembler function.
     defs = dict()       # type: dict(signature => ['x86', 'x64'])
 
+    repo = get_repository_from_env()
+
     # Select the appropriate files.
-    for filename in get_all_toplevel_filenames():
+    for filename in repo.get_files_in_working_directory():
         if not filename.startswith('js/src/jit/'):
             continue
         if 'MacroAssembler' not in filename:
             continue
 
+        filename = os.path.join(repo.path, filename)
+
         if filename.endswith('MacroAssembler.h'):
             decls = append_signatures(decls, get_macroassembler_declaration(filename))
         else:
             defs = append_signatures(defs, get_macroassembler_definitions(filename))
 
     # Compare declarations and definitions output.
     difflines = difflib.unified_diff(generate_file_content(decls),
                                      generate_file_content(defs),
--- a/config/check_spidermonkey_style.py
+++ b/config/check_spidermonkey_style.py
@@ -35,20 +35,20 @@
 #   isolation, but don't try to do any order checking between such blocks.
 #----------------------------------------------------------------------------
 
 from __future__ import print_function
 
 import difflib
 import os
 import re
-import subprocess
 import sys
-import traceback
-from check_utils import get_all_toplevel_filenames
+
+from mozversioncontrol import get_repository_from_env
+
 
 # We don't bother checking files in these directories, because they're (a) auxiliary or (b)
 # imported code that doesn't follow our coding style.
 ignored_js_src_dirs = [
    'js/src/config/',            # auxiliary stuff
    'js/src/ctypes/libffi/',     # imported code
    'js/src/devtools/',          # auxiliary stuff
    'js/src/editline/',          # imported code
@@ -236,18 +236,20 @@ def check_style():
     # - "js/src/vm/String.h"        -> "vm/String.h"
 
     non_js_dirnames = ('mfbt/',
                        'memory/mozalloc/',
                        'mozglue/')  # type: tuple(str)
     non_js_inclnames = set()        # type: set(inclname)
     js_names = dict()               # type: dict(filename, inclname)
 
+    repo = get_repository_from_env()
+
     # Select the appropriate files.
-    for filename in get_all_toplevel_filenames():
+    for filename in repo.get_files_in_working_directory():
         for non_js_dir in non_js_dirnames:
             if filename.startswith(non_js_dir) and filename.endswith('.h'):
                 inclname = 'mozilla/' + filename.split('/')[-1]
                 non_js_inclnames.add(inclname)
 
         if filename.startswith('js/public/') and filename.endswith('.h'):
             inclname = 'js/' + filename[len('js/public/'):]
             js_names[filename] = inclname
@@ -272,17 +274,17 @@ def check_style():
         inclname = js_names[filename]
         file_kind = FileKind.get(filename)
         if file_kind == FileKind.C or file_kind == FileKind.CPP or \
            file_kind == FileKind.H or file_kind == FileKind.INL_H:
             included_h_inclnames = set()    # type: set(inclname)
 
             # This script is run in js/src/, so prepend '../../' to get to the root of the Mozilla
             # source tree.
-            with open(os.path.join('../..', filename)) as f:
+            with open(os.path.join(repo.path, filename)) as f:
                 do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames)
 
         edges[inclname] = included_h_inclnames
 
     find_cycles(all_inclnames, edges)
 
     # Compare expected and actual output.
     difflines = difflib.unified_diff(expected_output, actual_output,
deleted file mode 100644
--- a/config/check_utils.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# vim: set ts=8 sts=4 et sw=4 tw=99:
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-import subprocess
-
-def get_all_toplevel_filenames():
-    '''Get a list of all the files in the (Mercurial or Git) repository.'''
-    failed_cmds = []
-    try:
-        cmd = ['hg', 'manifest', '-q']
-        all_filenames = subprocess.check_output(cmd, universal_newlines=True,
-                                                stderr=subprocess.PIPE).split('\n')
-        return all_filenames
-    except:
-        failed_cmds.append(cmd)
-
-    try:
-        # Get the relative path to the top-level directory.
-        cmd = ['git', 'rev-parse', '--show-cdup']
-        top_level = subprocess.check_output(cmd, universal_newlines=True,
-                                                stderr=subprocess.PIPE).split('\n')[0]
-        cmd = ['git', 'ls-files', '--full-name', top_level]
-        all_filenames = subprocess.check_output(cmd, universal_newlines=True,
-                                                stderr=subprocess.PIPE).split('\n')
-        return all_filenames
-    except:
-        failed_cmds.append(cmd)
-
-    raise Exception('failed to run any of the repo manifest commands', failed_cmds)
--- a/python/mozversioncontrol/mozversioncontrol/__init__.py
+++ b/python/mozversioncontrol/mozversioncontrol/__init__.py
@@ -61,45 +61,84 @@ class Repository(object):
         working copy.'''
         raise NotImplementedError
 
     def add_remove_files(self, path):
         '''Add and remove files under `path` in this repository's working copy.
         '''
         raise NotImplementedError
 
+    def get_files_in_working_directory(self):
+        """Obtain a list of managed files in the working directory."""
+        raise NotImplementedError
+
+
 class HgRepository(Repository):
     '''An implementation of `Repository` for Mercurial repositories.'''
     def __init__(self, path):
         super(HgRepository, self).__init__(path, 'hg')
         self._env[b'HGPLAIN'] = b'1'
 
     def get_modified_files(self):
         return [line.strip().split()[1] for line in self._run('status', '--modified').splitlines()]
 
     def add_remove_files(self, path):
         args = ['addremove', path]
         if self.tool_version >= b'3.9':
             args = ['--config', 'extensions.automv='] + args
         self._run(*args)
 
+    def get_files_in_working_directory(self):
+        # Can return backslashes on Windows. Normalize to forward slashes.
+        return list(p.replace('\\', '/') for p in
+                    self._run('files', '-0').split('\0'))
+
+
 class GitRepository(Repository):
     '''An implementation of `Repository` for Git repositories.'''
     def __init__(self, path):
         super(GitRepository, self).__init__(path, 'git')
 
     def get_modified_files(self):
         # This is a little wonky, but it's good enough for this purpose.
         return [bits[1] for bits in map(lambda line: line.strip().split(), self._run('status', '--porcelain').splitlines()) if 'M' in bits[0]]
 
     def add_remove_files(self, path):
         self._run('add', path)
 
+    def get_files_in_working_directory(self):
+        return self._run('ls-files', '-z').split('\0')
+
+
+class InvalidRepoPath(Exception):
+    """Represents a failure to find a VCS repo at a specified path."""
+
+
 def get_repository_object(path):
     '''Get a repository object for the repository at `path`.
     If `path` is not a known VCS repository, raise an exception.
     '''
     if os.path.isdir(os.path.join(path, '.hg')):
         return HgRepository(path)
     elif os.path.isdir(os.path.join(path, '.git')):
         return GitRepository(path)
     else:
-        raise Exception('Unknown VCS, or not a source checkout: %s' % path)
+        raise InvalidRepoPath('Unknown VCS, or not a source checkout: %s' %
+                              path)
+
+
+def get_repository_from_env():
+    """Obtain a repository object by looking at the environment."""
+    def ancestors(path):
+        while path:
+            yield path
+            path, child = os.path.split(path)
+            if child == '':
+                break
+
+    for path in ancestors(os.getcwd()):
+        try:
+            return get_repository_object(path)
+        except InvalidRepoPath:
+            continue
+
+    raise Exception('Could not find Mercurial or Git checkout for %s' %
+                    os.getcwd())