Bug 794472 - Allow pymake to run more commands without sending them to a shell. r=ted
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 18 Oct 2012 14:46:33 +0200
changeset 110788 d9bd08b3cfb11944d2a65d8901fe574fe86c2c0d
parent 110787 6511fd29e7d841f661d9da3a74837f0e2f0244a1
child 110789 d243048bc966c3657962ba32c93db77089667abc
push id93
push usernmatsakis@mozilla.com
push dateWed, 31 Oct 2012 21:26:57 +0000
reviewersted
bugs794472
milestone19.0a1
Bug 794472 - Allow pymake to run more commands without sending them to a shell. r=ted
build/pymake/pymake/data.py
build/pymake/pymake/functions.py
build/pymake/pymake/process.py
--- a/build/pymake/pymake/data.py
+++ b/build/pymake/pymake/data.py
@@ -1399,34 +1399,34 @@ class _CommandWrapper(object):
         self.usercb = cb
         process.call(self.cline, loc=self.loc, cb=self._cb, context=self.context, **self.kwargs)
 
 class _NativeWrapper(_CommandWrapper):
     def __init__(self, cline, ignoreErrors, loc, context,
                  pycommandpath, **kwargs):
         _CommandWrapper.__init__(self, cline, ignoreErrors, loc, context,
                                  **kwargs)
-        # get the module and method to call
-        parts, badchar = process.clinetoargv(cline, blacklist_gray=False)
-        if parts is None:
-            raise DataError("native command '%s': shell metacharacter '%s' in command line" % (cline, badchar), self.loc)
-        if len(parts) < 2:
-            raise DataError("native command '%s': no method name specified" % cline, self.loc)
         if pycommandpath:
             self.pycommandpath = re.split('[%s\s]+' % os.pathsep,
                                           pycommandpath)
         else:
             self.pycommandpath = None
-        self.module = parts[0]
-        self.method = parts[1]
-        self.cline_list = parts[2:]
 
     def __call__(self, cb):
+        # get the module and method to call
+        parts, badchar = process.clinetoargv(self.cline, self.kwargs['cwd'])
+        if parts is None:
+            raise DataError("native command '%s': shell metacharacter '%s' in command line" % (cline, badchar), self.loc)
+        if len(parts) < 2:
+            raise DataError("native command '%s': no method name specified" % cline, self.loc)
+        module = parts[0]
+        method = parts[1]
+        cline_list = parts[2:]
         self.usercb = cb
-        process.call_native(self.module, self.method, self.cline_list,
+        process.call_native(module, method, cline_list,
                             loc=self.loc, cb=self._cb, context=self.context,
                             pycommandpath=self.pycommandpath, **self.kwargs)
 
 def getcommandsforrule(rule, target, makefile, prerequisites, stem):
     v = Variables(parent=target.variables)
     setautomaticvariables(v, makefile, target, prerequisites)
     if stem is not None:
         setautomatic(v, '*', [stem])
--- a/build/pymake/pymake/functions.py
+++ b/build/pymake/pymake/functions.py
@@ -1,14 +1,14 @@
 """
 Makefile functions.
 """
 
 import parser, util
-import subprocess, os, logging
+import subprocess, os, logging, sys
 from globrelative import glob
 from cStringIO import StringIO
 
 log = logging.getLogger('pymake.data')
 
 def emit_expansions(descend, *expansions):
     """Helper function to emit all expansions within an input set."""
     for expansion in expansions:
@@ -761,26 +761,38 @@ class FlavorFunction(Function):
 class ShellFunction(Function):
     name = 'shell'
     minargs = 1
     maxargs = 1
 
     __slots__ = Function.__slots__
 
     def resolve(self, makefile, variables, fd, setting):
-        #TODO: call this once up-front somewhere and save the result?
-        shell, msys = util.checkmsyscompat()
+        from process import prepare_command
         cline = self._arguments[0].resolvestr(makefile, variables, setting)
+        executable, cline = prepare_command(cline, makefile.workdir, self.loc)
+
+        # subprocess.Popen doesn't use the PATH set in the env argument for
+        # finding the executable on some platforms (but strangely it does on
+        # others!), so set os.environ['PATH'] explicitly.
+        oldpath = os.environ['PATH']
+        if makefile.env is not None and 'PATH' in makefile.env:
+            os.environ['PATH'] = makefile.env['PATH']
 
-        log.debug("%s: running shell command '%s'" % (self.loc, cline))
-        cline = [shell, "-c", cline]
-        p = subprocess.Popen(cline, env=makefile.env, shell=False,
-                             stdout=subprocess.PIPE, cwd=makefile.workdir)
+        log.debug("%s: running command '%s'" % (self.loc, ' '.join(cline)))
+        try:
+            p = subprocess.Popen(cline, executable=executable, env=makefile.env, shell=False,
+                                 stdout=subprocess.PIPE, cwd=makefile.workdir)
+        except OSError, e:
+            print >>sys.stderr, "Error executing command %s" % cline[0], e
+            return
+        finally:
+            os.environ['PATH'] = oldpath
+
         stdout, stderr = p.communicate()
-
         stdout = stdout.replace('\r\n', '\n')
         if stdout.endswith('\n'):
             stdout = stdout[:-1]
         stdout = stdout.replace('\n', ' ')
 
         fd.write(stdout)
 
 class ErrorFunction(Function):
--- a/build/pymake/pymake/process.py
+++ b/build/pymake/pymake/process.py
@@ -10,118 +10,267 @@ import subprocess, shlex, re, logging, s
 subprocess._cleanup = lambda: None
 import command, util
 if sys.platform=='win32':
     import win32process
 
 _log = logging.getLogger('pymake.process')
 
 _escapednewlines = re.compile(r'\\\n')
-# Characters that most likely indicate a shell script and that native commands
-# should reject
-_blacklist = re.compile(r'[$><;\[~`|&]' +
-    r'|\${|(?:^|\s){(?:$|\s)')  # Blacklist ${foo} and { commands }
-# Characters that probably indicate a shell script, but that native commands
-# shouldn't just reject
-_graylist = re.compile(r'[()]')
-# Characters that indicate we need to glob
-_needsglob = re.compile(r'[\*\?]')
+
+def tokens2re(tokens):
+    # Create a pattern for non-escaped tokens, in the form:
+    #   (?<!\\)(?:a|b|c...)
+    # This is meant to match patterns a, b, or c, or ... if they are not
+    # preceded by a backslash.
+    # where a, b, c... are in the form
+    #   (?P<name>pattern)
+    # which matches the pattern and captures it in a named match group.
+    # The group names and patterns come are given as a dict in the function
+    # argument.
+    nonescaped = r'(?<!\\)(?:%s)' % '|'.join('(?P<%s>%s)' % (name, value) for name, value in tokens.iteritems())
+    # The final pattern matches either the above pattern, or an escaped
+    # backslash, captured in the "escape" match group.
+    return re.compile('(?:%s|%s)' % (nonescaped, r'(?P<escape>\\\\)'))
+
+_unquoted_tokens = tokens2re({
+  'whitespace': r'[\t\r\n ]',
+  'quote': r'[\'"]',
+  'comment': '#',
+  'special': r'[<>&|`~(){}$;]',
+  'backslashed': r'\\[^\\]',
+  'glob': r'[\*\?]',
+})
+
+_doubly_quoted_tokens = tokens2re({
+  'quote': '"',
+  'backslashedquote': r'\\"',
+  'special': '\$',
+  'backslashed': r'\\[^\\"]',
+})
+
+class MetaCharacterException(Exception):
+    def __init__(self, char):
+        self.char = char
+
+class ClineSplitter(list):
+    """
+    Parses a given command line string and creates a list of command
+    and arguments, with wildcard expansion.
+    """
+    def __init__(self, cline, cwd):
+        self.cwd = cwd
+        self.arg = ''
+        self.cline = cline
+        self.glob = False
+        self._parse_unquoted()
+
+    def _push(self, str):
+        """
+        Push the given string as part of the current argument
+        """
+        self.arg += str
+
+    def _next(self):
+        """
+        Finalize current argument, effectively adding it to the list.
+        Perform globbing if needed.
+        """
+        if not self.arg:
+            return
+        if self.glob:
+            if os.path.isabs(self.arg):
+                path = self.arg
+            else:
+                path = os.path.join(self.cwd, self.arg)
+            globbed = glob.glob(path)
+            if not globbed:
+                # If globbing doesn't find anything, the literal string is
+                # used.
+                self.append(self.arg)
+            else:
+                self.extend(f[len(path)-len(self.arg):] for f in globbed)
+            self.glob = False
+        else:
+            self.append(self.arg)
+        self.arg = ''
 
-def clinetoargv(cline, blacklist_gray):
+    def _parse_unquoted(self):
+        """
+        Parse command line remainder in the context of an unquoted string.
+        """
+        while self.cline:
+            # Find the next token
+            m = _unquoted_tokens.search(self.cline)
+            # If we find none, the remainder of the string can be pushed to
+            # the current argument and the argument finalized
+            if not m:
+                self._push(self.cline)
+                break
+            # The beginning of the string, up to the found token, is part of
+            # the current argument
+            self._push(self.cline[:m.start()])
+            self.cline = self.cline[m.end():]
+
+            match = dict([(name, value) for name, value in m.groupdict().items() if value])
+            if 'quote' in match:
+                # " or ' start a quoted string
+                if match['quote'] == '"':
+                    self._parse_doubly_quoted()
+                else:
+                    self._parse_quoted()
+            elif 'comment' in match:
+                # Comments are ignored. The current argument can be finalized,
+                # and parsing stopped.
+                break
+            elif 'special' in match:
+                # Unquoted, non-escaped special characters need to be sent to a
+                # shell.
+                raise MetaCharacterException, match['special']
+            elif 'whitespace' in match:
+                # Whitespaces terminate current argument.
+                self._next()
+            elif 'escape' in match:
+                # Escaped backslashes turn into a single backslash
+                self._push('\\')
+            elif 'backslashed' in match:
+                # Backslashed characters are unbackslashed
+                # e.g. echo \a -> a
+                self._push(match['backslashed'][1])
+            elif 'glob' in match:
+                # ? or * will need globbing
+                self.glob = True
+                self._push(m.group(0))
+            else:
+                raise Exception, "Shouldn't reach here"
+        self._next()
+
+    def _parse_quoted(self):
+        # Single quoted strings are preserved, except for the final quote
+        index = self.cline.find("'")
+        if index == -1:
+            raise Exception, 'Unterminated quoted string in command'
+        self._push(self.cline[:index])
+        self.cline = self.cline[index+1:]
+
+    def _parse_doubly_quoted(self):
+        if not self.cline:
+            raise Exception, 'Unterminated quoted string in command'
+        while self.cline:
+            m = _doubly_quoted_tokens.search(self.cline)
+            if not m:
+                raise Exception, 'Unterminated quoted string in command'
+            self._push(self.cline[:m.start()])
+            self.cline = self.cline[m.end():]
+            match = dict([(name, value) for name, value in m.groupdict().items() if value])
+            if 'quote' in match:
+                # a double quote ends the quoted string, so go back to
+                # unquoted parsing
+                return
+            elif 'special' in match:
+                # Unquoted, non-escaped special characters in a doubly quoted
+                # string still have a special meaning and need to be sent to a
+                # shell.
+                raise MetaCharacterException, match['special']
+            elif 'escape' in match:
+                # Escaped backslashes turn into a single backslash
+                self._push('\\')
+            elif 'backslashedquote' in match:
+                # Backslashed double quotes are un-backslashed
+                self._push('"')
+            elif 'backslashed' in match:
+                # Backslashed characters are kept backslashed
+                self._push(match['backslashed'])
+
+def clinetoargv(cline, cwd):
     """
     If this command line can safely skip the shell, return an argv array.
     @returns argv, badchar
     """
     str = _escapednewlines.sub('', cline)
-    m = _blacklist.search(str)
-    if m is not None:
-        return None, m.group(0)
-    if blacklist_gray:
-        m = _graylist.search(str)
-        if m is not None:
-            return None, m.group(0)
-
-    args = shlex.split(str, comments=True)
+    try:
+        args = ClineSplitter(str, cwd)
+    except MetaCharacterException, e:
+        return None, e.char
 
     if len(args) and args[0].find('=') != -1:
         return None, '='
 
     return args, None
 
-def doglobbing(args, cwd):
-    """
-    Perform any needed globbing on the argument list passed in
-    """
-    globbedargs = []
-    for arg in args:
-        if _needsglob.search(arg):
-            globbedargs.extend(glob.glob(os.path.join(cwd, arg)))
-        else:
-            globbedargs.append(arg)
-
-    return globbedargs
-
+# shellwords contains a set of shell builtin commands that need to be
+# executed within a shell. It also contains a set of commands that are known
+# to be giving problems when run directly instead of through the msys shell.
 shellwords = (':', '.', 'break', 'cd', 'continue', 'exec', 'exit', 'export',
               'getopts', 'hash', 'pwd', 'readonly', 'return', 'shift', 
               'test', 'times', 'trap', 'umask', 'unset', 'alias',
               'set', 'bind', 'builtin', 'caller', 'command', 'declare',
               'echo', 'enable', 'help', 'let', 'local', 'logout', 
               'printf', 'read', 'shopt', 'source', 'type', 'typeset',
-              'ulimit', 'unalias', 'set')
+              'ulimit', 'unalias', 'set', 'find')
 
-def call(cline, env, cwd, loc, cb, context, echo, justprint=False):
+def prepare_command(cline, cwd, loc):
+    """
+    Returns a list of command and arguments for the given command line string.
+    If the command needs to be run through a shell for some reason, the
+    returned list contains the shell invocation.
+    """
+
     #TODO: call this once up-front somewhere and save the result?
     shell, msys = util.checkmsyscompat()
 
     shellreason = None
+    executable = None
     if msys and cline.startswith('/'):
         shellreason = "command starts with /"
     else:
-        argv, badchar = clinetoargv(cline, blacklist_gray=True)
+        argv, badchar = clinetoargv(cline, cwd)
         if argv is None:
             shellreason = "command contains shell-special character '%s'" % (badchar,)
         elif len(argv) and argv[0] in shellwords:
             shellreason = "command starts with shell primitive '%s'" % (argv[0],)
-        else:
-            argv = doglobbing(argv, cwd)
+        elif argv and (os.sep in argv[0] or os.altsep and os.altsep in argv[0]):
+            executable = util.normaljoin(cwd, argv[0])
+            # Avoid "%1 is not a valid Win32 application" errors, assuming
+            # that if the executable path is to be resolved with PATH, it will
+            # be a Win32 executable.
+            if sys.platform == 'win32' and os.path.isfile(executable) and open(executable, 'rb').read(2) == "#!":
+                shellreason = "command executable starts with a hashbang"
 
     if shellreason is not None:
         _log.debug("%s: using shell: %s: '%s'", loc, shellreason, cline)
         if msys:
             if len(cline) > 3 and cline[1] == ':' and cline[2] == '/':
                 cline = '/' + cline[0] + cline[2:]
-        cline = [shell, "-c", cline]
-        context.call(cline, shell=False, env=env, cwd=cwd, cb=cb, echo=echo,
-                     justprint=justprint)
-        return
+        argv = [shell, "-c", cline]
+        executable = None
+
+    return executable, argv
+
+def call(cline, env, cwd, loc, cb, context, echo, justprint=False):
+    executable, argv = prepare_command(cline, cwd, loc)
 
     if not len(argv):
         cb(res=0)
         return
 
     if argv[0] == command.makepypath:
         command.main(argv[1:], env, cwd, cb)
         return
 
     if argv[0:2] == [sys.executable.replace('\\', '/'),
                      command.makepypath.replace('\\', '/')]:
         command.main(argv[2:], env, cwd, cb)
         return
 
-    if argv[0].find('/') != -1:
-        executable = util.normaljoin(cwd, argv[0])
-    else:
-        executable = None
-
     context.call(argv, executable=executable, shell=False, env=env, cwd=cwd, cb=cb,
                  echo=echo, justprint=justprint)
 
 def call_native(module, method, argv, env, cwd, loc, cb, context, echo, justprint=False,
                 pycommandpath=None):
-    argv = doglobbing(argv, cwd)
     context.call_native(module, method, argv, env=env, cwd=cwd, cb=cb,
                         echo=echo, justprint=justprint, pycommandpath=pycommandpath)
 
 def statustoresult(status):
     """
     Convert the status returned from waitpid into a prettier numeric result.
     """
     sig = status & 0xFF