python/mozbuild/mozbuild/shellutil.py
author Chris Manchester <cmanchester@mozilla.com>
Thu, 12 May 2016 11:55:59 -0700
changeset 323517 5d4213512f1af0c13fb721db996b6f440bb3e789
parent 318986 adf8fb3e8d48a4704bde59394c6c51a6ff7e40be
permissions -rw-r--r--
Bug 1257326 - Move MOZ_SERVICES_SYNC to Python configure. r=glandium MozReview-Commit-ID: DCDoSgHfwVY

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

import re


def _tokens2re(**tokens):
    # Create a pattern for non-escaped tokens, in the form:
    #   (?<!\\)(?:a|b|c...)
    # This is meant to match patterns a, b, or c, or ... if they are not
    # preceded by a backslash.
    # where a, b, c... are in the form
    #   (?P<name>pattern)
    # which matches the pattern and captures it in a named match group.
    # The group names and patterns are given as arguments.
    all_tokens = '|'.join('(?P<%s>%s)' % (name, value)
                          for name, value in tokens.iteritems())
    nonescaped = r'(?<!\\)(?:%s)' % all_tokens

    # The final pattern matches either the above pattern, or an escaped
    # backslash, captured in the "escape" match group.
    return re.compile('(?:%s|%s)' % (nonescaped, r'(?P<escape>\\\\)'))

UNQUOTED_TOKENS_RE = _tokens2re(
  whitespace=r'[\t\r\n ]+',
  quote=r'[\'"]',
  comment='#',
  special=r'[<>&|`~(){}$;\*\?]',
  backslashed=r'\\[^\\]',
)

DOUBLY_QUOTED_TOKENS_RE = _tokens2re(
  quote='"',
  backslashedquote=r'\\"',
  special='\$',
  backslashed=r'\\[^\\"]',
)

ESCAPED_NEWLINES_RE = re.compile(r'\\\n')

# This regexp contains the same characters as all those listed in
# UNQUOTED_TOKENS_RE. Please keep in sync.
SHELL_QUOTE_RE = re.compile(r'[\\\t\r\n \'\"#<>&|`~(){}$;\*\?]')


class MetaCharacterException(Exception):
    def __init__(self, char):
        self.char = char


class _ClineSplitter(object):
    '''
    Parses a given command line string and creates a list of command
    and arguments, with wildcard expansion.
    '''
    def __init__(self, cline):
        self.arg = None
        self.cline = cline
        self.result = []
        self._parse_unquoted()

    def _push(self, str):
        '''
        Push the given string as part of the current argument
        '''
        if self.arg is None:
            self.arg = ''
        self.arg += str

    def _next(self):
        '''
        Finalize current argument, effectively adding it to the list.
        '''
        if self.arg is None:
            return
        self.result.append(self.arg)
        self.arg = None

    def _parse_unquoted(self):
        '''
        Parse command line remainder in the context of an unquoted string.
        '''
        while self.cline:
            # Find the next token
            m = UNQUOTED_TOKENS_RE.search(self.cline)
            # If we find none, the remainder of the string can be pushed to
            # the current argument and the argument finalized
            if not m:
                self._push(self.cline)
                break
            # The beginning of the string, up to the found token, is part of
            # the current argument
            if m.start():
                self._push(self.cline[:m.start()])
            self.cline = self.cline[m.end():]

            match = {name: value
                     for name, value in m.groupdict().items() if value}
            if 'quote' in match:
                # " or ' start a quoted string
                if match['quote'] == '"':
                    self._parse_doubly_quoted()
                else:
                    self._parse_quoted()
            elif 'comment' in match:
                # Comments are ignored. The current argument can be finalized,
                # and parsing stopped.
                break
            elif 'special' in match:
                # Unquoted, non-escaped special characters need to be sent to a
                # shell.
                raise MetaCharacterException(match['special'])
            elif 'whitespace' in match:
                # Whitespaces terminate current argument.
                self._next()
            elif 'escape' in match:
                # Escaped backslashes turn into a single backslash
                self._push('\\')
            elif 'backslashed' in match:
                # Backslashed characters are unbackslashed
                # e.g. echo \a -> a
                self._push(match['backslashed'][1])
            else:
                raise Exception("Shouldn't reach here")
        if self.arg:
            self._next()

    def _parse_quoted(self):
        # Single quoted strings are preserved, except for the final quote
        index = self.cline.find("'")
        if index == -1:
            raise Exception('Unterminated quoted string in command')
        self._push(self.cline[:index])
        self.cline = self.cline[index+1:]

    def _parse_doubly_quoted(self):
        if not self.cline:
            raise Exception('Unterminated quoted string in command')
        while self.cline:
            m = DOUBLY_QUOTED_TOKENS_RE.search(self.cline)
            if not m:
                raise Exception('Unterminated quoted string in command')
            self._push(self.cline[:m.start()])
            self.cline = self.cline[m.end():]
            match = {name: value
                     for name, value in m.groupdict().items() if value}
            if 'quote' in match:
                # a double quote ends the quoted string, so go back to
                # unquoted parsing
                return
            elif 'special' in match:
                # Unquoted, non-escaped special characters in a doubly quoted
                # string still have a special meaning and need to be sent to a
                # shell.
                raise MetaCharacterException(match['special'])
            elif 'escape' in match:
                # Escaped backslashes turn into a single backslash
                self._push('\\')
            elif 'backslashedquote' in match:
                # Backslashed double quotes are un-backslashed
                self._push('"')
            elif 'backslashed' in match:
                # Backslashed characters are kept backslashed
                self._push(match['backslashed'])


def split(cline):
    '''
    Split the given command line string.
    '''
    s = ESCAPED_NEWLINES_RE.sub('', cline)
    return _ClineSplitter(s).result


def _quote(s):
    '''Given a string, returns a version that can be used literally on a shell
    command line, enclosing it with single quotes if necessary.

    As a special case, if given an int, returns a string containing the int,
    not enclosed in quotes.
    '''
    if type(s) == int:
        return '%d' % s

    # Empty strings need to be quoted to have any significance
    if s and not SHELL_QUOTE_RE.search(s):
        return s

    # Single quoted strings can contain any characters unescaped except the
    # single quote itself, which can't even be escaped, so the string needs to
    # be closed, an escaped single quote added, and reopened.
    t = type(s)
    return t("'%s'") % s.replace(t("'"), t("'\\''"))


def quote(*strings):
    '''Given one or more strings, returns a quoted string that can be used
    literally on a shell command line.

        >>> quote('a', 'b')
        "a b"
        >>> quote('a b', 'c')
        "'a b' c"
    '''
    return ' '.join(_quote(s) for s in strings)


__all__ = ['MetaCharacterException', 'split', 'quote']