python/mach/mach/sentry.py
author Emilio Cobos Álvarez <emilio@crisal.io>
Wed, 19 Jan 2022 20:51:45 +0100
changeset 604920 cc33400f0ff80f0eada6c3aa637f37d247a3ff46
parent 601799 c30d30105938446c596f333a8982eda9ee98a845
permissions -rw-r--r--
Bug 1747934 - Make marker-line-height pass reliably in all platforms. The line height might be font-family-dependent. MANUAL PUSH: Trivial orange fix CLOSED TREE

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import

import abc
import re

from pathlib import Path
from threading import Thread

import sentry_sdk
from mach.util import get_state_dir
from mach.telemetry import is_telemetry_enabled
from mozversioncontrol import (
    get_repository_object,
    InvalidRepoPath,
    MissingUpstreamRepo,
    MissingVCSTool,
)
from six import string_types

# https://sentry.prod.mozaws.net/operations/mach/
_SENTRY_DSN = "https://8228c9aff64949c2ba4a2154dc515f55@sentry.prod.mozaws.net/525"


class ErrorReporter(object):
    @abc.abstractmethod
    def report_exception(self, exception):
        """Report the exception to remote error-tracking software."""


class SentryErrorReporter(ErrorReporter):
    """Reports errors using Sentry."""

    def report_exception(self, exception):
        return sentry_sdk.capture_exception(exception)


class NoopErrorReporter(ErrorReporter):
    """Drops errors instead of reporting them.

    This is useful in cases where error-reporting is specifically disabled, such as
    when telemetry hasn't been allowed.
    """

    def report_exception(self, exception):
        return None


def register_sentry(argv, settings, topsrcdir: Path):
    if not is_telemetry_enabled(settings):
        return NoopErrorReporter()

    global _is_unmodified_mach_core_thread
    _is_unmodified_mach_core_thread = Thread(
        target=_is_unmodified_mach_core,
        args=[topsrcdir],
        daemon=True,
    )
    _is_unmodified_mach_core_thread.start()

    sentry_sdk.init(
        _SENTRY_DSN, before_send=lambda event, _: _process_event(event, topsrcdir)
    )
    sentry_sdk.add_breadcrumb(message="./mach {}".format(" ".join(argv)))
    return SentryErrorReporter()


def _process_event(sentry_event, topsrcdir: Path):
    # Returning nothing causes the event to be dropped:
    # https://docs.sentry.io/platforms/python/configuration/filtering/#using-beforesend
    repo = _get_repository_object(topsrcdir)
    if repo is None:
        # We don't know the repo state, so we don't know if mach files are
        # unmodified.
        return

    base_ref = repo.base_ref_as_hg()
    if not base_ref:
        # If we don't know which revision this exception is attached to, then it's
        # not worth sending
        return

    _is_unmodified_mach_core_thread.join()
    if not _is_unmodified_mach_core_result:
        return

    for map_fn in (_settle_mach_module_id, _patch_absolute_paths, _delete_server_name):
        sentry_event = map_fn(sentry_event, topsrcdir)

    sentry_event["release"] = "hg-rev-{}".format(base_ref)
    return sentry_event


def _settle_mach_module_id(sentry_event, _):
    # Sentry groups issues according to the stack frames and their associated
    # "module" properties. However, one of the modules is being reported
    # like "mach.commands.26a828ef5164403eaff4305ab4cb0fab" (with a generated id).
    # This function replaces that generated id with the static string "<generated>"
    # so that grouping behaves as expected

    stacktrace_frames = sentry_event["exception"]["values"][0]["stacktrace"]["frames"]
    for frame in stacktrace_frames:
        module = frame.get("module")
        if not module:
            continue

        module = re.sub(
            "mach\\.commands\\.[a-f0-9]{32}", "mach.commands.<generated>", module
        )
        frame["module"] = module
    return sentry_event


def _patch_absolute_paths(sentry_event, topsrcdir: Path):
    # As discussed here (https://bugzilla.mozilla.org/show_bug.cgi?id=1636251#c28),
    # we remove usernames from file names with a best-effort basis. The most likely
    # place for usernames to manifest in Sentry information is within absolute paths,
    # such as: "/home/mitch/dev/firefox/mach"
    # We replace the state_dir, obj_dir, src_dir with "<...>" placeholders.
    # Note that we also do a blanket find-and-replace of the user's name with "<user>",
    # which may have ill effects if the user's name is, by happenstance, a substring
    # of some other value within the Sentry event.
    def recursive_patch(value, needle, replacement):
        if isinstance(value, list):
            return [recursive_patch(v, needle, replacement) for v in value]
        elif isinstance(value, dict):
            for key in list(value.keys()):
                next_value = value.pop(key)
                key = needle.sub(replacement, key)
                value[key] = recursive_patch(next_value, needle, replacement)
            return value
        elif isinstance(value, string_types):
            return needle.sub(replacement, value)
        else:
            return value

    for (target_path, replacement) in (
        (get_state_dir(), "<statedir>"),
        (str(topsrcdir), "<topsrcdir>"),
        (str(Path.home()), "~"),
    ):
        # Sentry converts "vars" to their "representations". When paths are in local
        # variables on Windows, "C:\Users\MozillaUser\Desktop" becomes
        # "'C:\\Users\\MozillaUser\\Desktop'". To still catch this case, we "repr"
        # the home directory and scrub the beginning and end quotes, then
        # find-and-replace on that.
        repr_path = repr(target_path)[1:-1]

        for target in (target_path, repr_path):
            # Paths in the Sentry event aren't consistent:
            # * On *nix, they're mostly forward slashes.
            # * On *nix, not all absolute paths start with a leading forward slash.
            # * On Windows, they're mostly backslashes.
            # * On Windows, `.extra."sys.argv"` uses forward slashes.
            # * The Python variables in-scope captured by the Sentry report may be
            #   inconsistent, even for a single path. For example, on
            #   Windows, Mach calculates the state_dir as "C:\Users\<user>/.mozbuild".

            # Handle the case where not all absolute paths start with a leading
            # forward slash: make the initial slash optional in the search string.
            if target.startswith("/"):
                target = "/?" + target[1:]

            # Handle all possible slash variants: our search string should match
            # both forward slashes and backslashes. This is done by dynamically
            # replacing each "/" and "\" with the regex "[\/\\]" (match both).
            slash_regex = re.compile(r"[\/\\]")
            # The regex module parses string backslash escapes before compiling the
            # regex, so we need to add more backslashes:
            # "[\\/\\\\]" => [\/\\] => match "/" and "\"
            target = slash_regex.sub(r"[\\/\\\\]", target)

            # Compile the regex and patch the event.
            needle_regex = re.compile(target, re.IGNORECASE)
            sentry_event = recursive_patch(sentry_event, needle_regex, replacement)
    return sentry_event


def _delete_server_name(sentry_event, _):
    sentry_event.pop("server_name")
    return sentry_event


def _get_repository_object(topsrcdir: Path):
    try:
        return get_repository_object(str(topsrcdir))
    except (InvalidRepoPath, MissingVCSTool):
        return None


def _is_unmodified_mach_core(topsrcdir: Path):
    """True if mach is unmodified compared to the public tree.

    To avoid submitting Sentry events for errors caused by user's
    local changes, we attempt to detect if mach (or code affecting mach)
    has been modified in the user's local state:
    * In a revision off of a "ancestor to central" revision, or:
    * In the working, uncommitted state.

    If "$topsrcdir/mach" and "*.py" haven't been touched, then we can be
    pretty confident that the Mach behaviour that caused the exception
    also exists in the public tree.
    """
    global _is_unmodified_mach_core_result

    repo = _get_repository_object(topsrcdir)
    try:
        files = set(repo.get_outgoing_files()) | set(repo.get_changed_files())
        _is_unmodified_mach_core_result = not any(
            [file for file in files if file == "mach" or file.endswith(".py")]
        )
    except MissingUpstreamRepo:
        # If we don't know the upstream state, we don't know if the mach files
        # have been unmodified.
        _is_unmodified_mach_core_result = False


_is_unmodified_mach_core_result = None
_is_unmodified_mach_core_thread = None