I noticed Expansion.resolve still comes up really high on perf charts. This patch makes it much easier to resolve expansions which are just literals, which is very common for variable names. Unfortunately, this makes the code a fair bit more complex, and doesn't help nearly as much as I'd like. I'm beginning to wonder if getting gmake performance parity is impossible, or at least improbable given the current architecture: but I can't think of an alternate architecture that is better.

Skipping shell invocations is good, when possible. This wrapper around subprocess does dirty work of
parsing command lines into argv and making sure that no shell magic is being used.

import subprocess, shlex, re, logging, sys, traceback, os
import command, util
if sys.platform=='win32':
    import win32process

_log = logging.getLogger('pymake.process')

_blacklist = re.compile(r'[=\\$><;*?[{~`|&]')
def clinetoargv(cline):
    If this command line can safely skip the shell, return an argv array.
    @returns argv, badchar

    m =
    if m is not None:
        return None,

    return shlex.split(cline, comments=True), None

shellwords = (':', '.', 'break', 'cd', 'continue', 'exec', 'exit', 'export',
              'getopts', 'hash', 'pwd', 'readonly', 'return', 'shift', 
              'test', 'times', 'trap', 'umask', 'unset', 'alias',
              'set', 'bind', 'builtin', 'caller', 'command', 'declare',
              'echo', 'enable', 'help', 'let', 'local', 'logout', 
              'printf', 'read', 'shopt', 'source', 'type', 'typeset',
              'ulimit', 'unalias', 'set')

def call(cline, env, cwd, loc, cb, context, echo):
    #TODO: call this once up-front somewhere and save the result?
    shell, msys = util.checkmsyscompat()

    shellreason = None
    if msys and cline.startswith('/'):
        shellreason = "command starts with /"
        argv, badchar = clinetoargv(cline)
        if argv is None:
            shellreason = "command contains shell-special character '%s'" % (badchar,)
        elif len(argv) and argv[0] in shellwords:
            shellreason = "command starts with shell primitive '%s'" % (argv[0],)

    if shellreason is not None:
        _log.debug("%s: using shell: %s: '%s'", loc, shellreason, cline)
        if msys:
            cline = [shell, "-c", cline], shell=not msys, env=env, cwd=cwd, cb=cb, echo=echo)

    if not len(argv):

    if argv[0] == command.makepypath:
        command.main(argv[1:], env, cwd, context, cb)

    if argv[0:2] == [sys.executable.replace('\\', '/'),
                     command.makepypath.replace('\\', '/')]:
        command.main(argv[2:], env, cwd, context, cb)

    if argv[0].find('/') != -1:
        executable = os.path.join(cwd, argv[0])
        executable = None, executable=executable, shell=False, env=env, cwd=cwd, cb=cb, echo=echo)

def statustoresult(status):
    Convert the status returned from waitpid into a prettier numeric result.
    sig = status & 0xFF
    if sig:
        return -sig

    return status >>8

def getcontext(jcount):
    assert jcount > 0
    return ParallelContext(jcount)

class ParallelContext(object):
    Manages the parallel execution of processes.

    _allcontexts = set()

    def __init__(self, jcount):
        self.jcount = jcount
        self.exit = False

        self.pending = [] # list of (cb, args, kwargs)
        self.running = [] # list of (subprocess, cb)


    def finish(self):
        assert len(self.pending) == 0 and len(self.running) == 0, "pending: %i running: %i" % (len(self.pending), len(self.running))

    def run(self):
        while len(self.pending) and len(self.running) < self.jcount:
            cb, args, kwargs = self.pending.pop(0)
            cb(*args, **kwargs)

    def defer(self, cb, *args, **kwargs):
        self.pending.append((cb, args, kwargs))

    def _docall(self, argv, executable, shell, env, cwd, cb, echo):
            if echo is not None:
                print echo
                p = subprocess.Popen(argv, executable=executable, shell=shell, env=env, cwd=cwd)
            except OSError, e:
                print >>sys.stderr, e

            self.running.append((p, cb))

    def call(self, argv, shell, env, cwd, cb, echo, executable=None):
        Asynchronously call the process

        self.defer(self._docall, argv, executable, shell, env, cwd, cb, echo)

    if sys.platform == 'win32':
        def _waitany():
            return win32process.WaitForAnyProcess([p for c in ParallelContext._allcontexts for p, cb in c.running])

        def _comparepid(pid, process):
            return pid == process

        def _waitany():
            return os.waitpid(-1, 0)

        def _comparepid(pid, process):
            return pid ==

    def spin():
        Spin the 'event loop', and never return.

        while True:
            clist = list(ParallelContext._allcontexts)
            for c in clist:

            # In python 2.4, subprocess instances wait on child processes under the hood when they are created... this
            # unfortunate behavior means that before using os.waitpid, we need to check the status using .poll()
            # see
            found = False
            for c in clist:
                for i in xrange(0, len(c.running)):
                    p, cb = c.running[i]
                    result = p.poll()
                    if result != None:
                        del c.running[i]
                        found = True

                if found: break
            if found: continue

            dowait = util.any((len(c.running) for c in ParallelContext._allcontexts))

            if dowait:
                pid, status = ParallelContext._waitany()
                result = statustoresult(status)

                for c in ParallelContext._allcontexts:
                    for i in xrange(0, len(c.running)):
                        p, cb = c.running[i]
                        if ParallelContext._comparepid(pid, p):
                            del c.running[i]
                            found = True

                    if found: break

def makedeferrable(usercb, **userkwargs):
    def cb(*args, **kwargs):
        return usercb(*args, **kwargs)

    return cb