python/mozbuild/mozbuild/shellutil.py
author Hubert Boma Manilla <hmanilla@mozilla.com>
Sun, 27 Nov 2022 16:18:37 +0000
changeset 643736 f49e8eca9e344e5d8b9a5e67ff5859ba3afc3a4d
parent 554551 994ae8e4833c90447d91f0e26a718573cff5a514
permissions -rw-r--r--
Bug 1787198 - [devtools] Change caching of original and generated source text content r=ochameau Differential Revision: https://phabricator.services.mozilla.com/D160564

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import, print_function, unicode_literals

import re


def _tokens2re(**tokens):
    # Create a pattern for non-escaped tokens, in the form:
    #   (?<!\\)(?:a|b|c...)
    # This is meant to match patterns a, b, or c, or ... if they are not
    # preceded by a backslash.
    # where a, b, c... are in the form
    #   (?P<name>pattern)
    # which matches the pattern and captures it in a named match group.
    # The group names and patterns are given as arguments.
    all_tokens = "|".join(
        "(?P<%s>%s)" % (name, value) for name, value in tokens.items()
    )
    nonescaped = r"(?<!\\)(?:%s)" % all_tokens

    # The final pattern matches either the above pattern, or an escaped
    # backslash, captured in the "escape" match group.
    return re.compile("(?:%s|%s)" % (nonescaped, r"(?P<escape>\\\\)"))


UNQUOTED_TOKENS_RE = _tokens2re(
    whitespace=r"[\t\r\n ]+",
    quote=r'[\'"]',
    comment="#",
    special=r"[<>&|`(){}$;\*\?]",
    backslashed=r"\\[^\\]",
)

DOUBLY_QUOTED_TOKENS_RE = _tokens2re(
    quote='"',
    backslashedquote=r'\\"',
    special="\$",
    backslashed=r'\\[^\\"]',
)

ESCAPED_NEWLINES_RE = re.compile(r"\\\n")

# This regexp contains the same characters as all those listed in
# UNQUOTED_TOKENS_RE. Please keep in sync.
SHELL_QUOTE_RE = re.compile(r"[\\\t\r\n \'\"#<>&|`(){}$;\*\?]")


class MetaCharacterException(Exception):
    def __init__(self, char):
        self.char = char


class _ClineSplitter(object):
    """
    Parses a given command line string and creates a list of command
    and arguments, with wildcard expansion.
    """

    def __init__(self, cline):
        self.arg = None
        self.cline = cline
        self.result = []
        self._parse_unquoted()

    def _push(self, str):
        """
        Push the given string as part of the current argument
        """
        if self.arg is None:
            self.arg = ""
        self.arg += str

    def _next(self):
        """
        Finalize current argument, effectively adding it to the list.
        """
        if self.arg is None:
            return
        self.result.append(self.arg)
        self.arg = None

    def _parse_unquoted(self):
        """
        Parse command line remainder in the context of an unquoted string.
        """
        while self.cline:
            # Find the next token
            m = UNQUOTED_TOKENS_RE.search(self.cline)
            # If we find none, the remainder of the string can be pushed to
            # the current argument and the argument finalized
            if not m:
                self._push(self.cline)
                break
            # The beginning of the string, up to the found token, is part of
            # the current argument
            if m.start():
                self._push(self.cline[: m.start()])
            self.cline = self.cline[m.end() :]

            match = {name: value for name, value in m.groupdict().items() if value}
            if "quote" in match:
                # " or ' start a quoted string
                if match["quote"] == '"':
                    self._parse_doubly_quoted()
                else:
                    self._parse_quoted()
            elif "comment" in match:
                # Comments are ignored. The current argument can be finalized,
                # and parsing stopped.
                break
            elif "special" in match:
                # Unquoted, non-escaped special characters need to be sent to a
                # shell.
                raise MetaCharacterException(match["special"])
            elif "whitespace" in match:
                # Whitespaces terminate current argument.
                self._next()
            elif "escape" in match:
                # Escaped backslashes turn into a single backslash
                self._push("\\")
            elif "backslashed" in match:
                # Backslashed characters are unbackslashed
                # e.g. echo \a -> a
                self._push(match["backslashed"][1])
            else:
                raise Exception("Shouldn't reach here")
        if self.arg:
            self._next()

    def _parse_quoted(self):
        # Single quoted strings are preserved, except for the final quote
        index = self.cline.find("'")
        if index == -1:
            raise Exception("Unterminated quoted string in command")
        self._push(self.cline[:index])
        self.cline = self.cline[index + 1 :]

    def _parse_doubly_quoted(self):
        if not self.cline:
            raise Exception("Unterminated quoted string in command")
        while self.cline:
            m = DOUBLY_QUOTED_TOKENS_RE.search(self.cline)
            if not m:
                raise Exception("Unterminated quoted string in command")
            self._push(self.cline[: m.start()])
            self.cline = self.cline[m.end() :]
            match = {name: value for name, value in m.groupdict().items() if value}
            if "quote" in match:
                # a double quote ends the quoted string, so go back to
                # unquoted parsing
                return
            elif "special" in match:
                # Unquoted, non-escaped special characters in a doubly quoted
                # string still have a special meaning and need to be sent to a
                # shell.
                raise MetaCharacterException(match["special"])
            elif "escape" in match:
                # Escaped backslashes turn into a single backslash
                self._push("\\")
            elif "backslashedquote" in match:
                # Backslashed double quotes are un-backslashed
                self._push('"')
            elif "backslashed" in match:
                # Backslashed characters are kept backslashed
                self._push(match["backslashed"])


def split(cline):
    """
    Split the given command line string.
    """
    s = ESCAPED_NEWLINES_RE.sub("", cline)
    return _ClineSplitter(s).result


def _quote(s):
    """Given a string, returns a version that can be used literally on a shell
    command line, enclosing it with single quotes if necessary.

    As a special case, if given an int, returns a string containing the int,
    not enclosed in quotes.
    """
    if type(s) == int:
        return "%d" % s

    # Empty strings need to be quoted to have any significance
    if s and not SHELL_QUOTE_RE.search(s) and not s.startswith("~"):
        return s

    # Single quoted strings can contain any characters unescaped except the
    # single quote itself, which can't even be escaped, so the string needs to
    # be closed, an escaped single quote added, and reopened.
    t = type(s)
    return t("'%s'") % s.replace(t("'"), t("'\\''"))


def quote(*strings):
    """Given one or more strings, returns a quoted string that can be used
    literally on a shell command line.

        >>> quote('a', 'b')
        "a b"
        >>> quote('a b', 'c')
        "'a b' c"
    """
    return " ".join(_quote(s) for s in strings)


__all__ = ["MetaCharacterException", "split", "quote"]