mozilla-central: build/moz.configure/lto-pgo.configure@df2495d1668868c4b99f85c68babc03a0f426bd1

# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
# vim: set filetype=python:
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# PGO
# ==============================================================


option(
    "--enable-profile-generate",
    env="MOZ_PROFILE_GENERATE",
    nargs="?",
    choices=("cross",),
    help="Build a PGO instrumented binary",
)

enable_profile_generate = depends_if("--enable-profile-generate")(lambda _: True)


imply_option("MOZ_PGO", enable_profile_generate)

set_config("MOZ_PROFILE_GENERATE", enable_profile_generate)

set_define("MOZ_PROFILE_GENERATE", enable_profile_generate)

option(
    "--enable-profile-use",
    env="MOZ_PROFILE_USE",
    nargs="?",
    choices=("cross",),
    help="Use a generated profile during the build",
)
enable_profile_use = depends_if("--enable-profile-use")(lambda _: True)

imply_option("MOZ_PGO", enable_profile_use)
set_config("MOZ_PROFILE_USE", enable_profile_use)

llvm_profdata = check_prog(
    "LLVM_PROFDATA", ["llvm-profdata"], allow_missing=True, paths=clang_search_path
)


# --call-graph-profile-sort is the default behavior for lld, and it proves to be
# more efficient than pgo-based orderfile. Don't pass it explicitly because it's
# option support differ from one version to the other (w/ or W: argument and the
# argument value).
@depends(select_linker, target)
def pgo_cg_sort(linker, target):
    return linker and linker.KIND == "lld" and target.os != "OSX"


@depends_if(llvm_profdata, when=enable_profile_use & ~pgo_cg_sort)
@checking("whether llvm-profdata supports 'order' subcommand")
def llvm_profdata_order(profdata):
    retcode, _, _ = get_cmd_output(profdata, "order", "--help")
    return retcode == 0


option(
    "--with-pgo-profile-path",
    help="Path to the directory with unmerged profile data to use during the build"
    ", or to a merged profdata file",
    nargs=1,
)


@depends(
    "--with-pgo-profile-path",
    "--enable-profile-use",
    llvm_profdata,
    build_environment,
)
@imports("os")
def pgo_profile_path(path, pgo_use, profdata, build_env):
    topobjdir = build_env.topobjdir
    if topobjdir.endswith("/js/src"):
        topobjdir = topobjdir[:-7]

    if not path:
        return os.path.join(topobjdir, "instrumented", "merged.profdata")
    if path and not pgo_use:
        die("Pass --enable-profile-use to use --with-pgo-profile-path.")
    if path and not profdata:
        die("LLVM_PROFDATA must be set to process the pgo profile.")
    if not os.path.isfile(path[0]):
        die("Argument to --with-pgo-profile-path must be a file.")
    if not os.path.isabs(path[0]):
        die("Argument to --with-pgo-profile-path must be an absolute path.")
    return path[0]


set_config("PGO_PROFILE_PATH", pgo_profile_path)


@depends(
    "--enable-profile-use",
    pgo_profile_path,
    llvm_profdata,
    llvm_profdata_order,
    build_environment,
)
def orderfile_path(profile_use, path, profdata, profdata_order, build_env):
    if not profile_use:
        return None

    if not profdata_order:
        return None

    topobjdir = build_env.topobjdir

    orderfile = os.path.join(topobjdir, "orderfile.txt")
    check_cmd_output(profdata, "order", path, "-o", orderfile)
    return orderfile


pgo_temporal = c_compiler.try_compile(
    flags=["-fprofile-generate", "-mllvm", "-pgo-temporal-instrumentation"],
    check_msg="whether the C compiler supports temporal instrumentation",
    when=enable_profile_generate & ~pgo_cg_sort,
)


@depends(
    c_compiler,
    select_linker,
    target,
    pgo_profile_path,
    target_is_windows,
    pgo_temporal,
    orderfile_path,
)
@imports("multiprocessing")
def pgo_flags(
    compiler,
    linker,
    target,
    profdata,
    target_is_windows,
    pgo_temporal,
    orderfile,
):
    if compiler.type == "gcc":
        return namespace(
            gen_cflags=["-fprofile-generate"],
            gen_ldflags=["-fprofile-generate"],
            use_cflags=["-fprofile-use", "-fprofile-correction", "-Wcoverage-mismatch"],
            use_ldflags=["-fprofile-use"],
        )

    if compiler.type in ("clang-cl", "clang"):
        prefix = ""
        if compiler.type == "clang-cl":
            prefix = "/clang:"
            gen_ldflags = None
        else:
            gen_ldflags = ["-fprofile-generate"]

        use_ldflags = []
        if orderfile:
            if compiler.type == "clang-cl":
                use_ldflags += [
                    "-ORDER:@" + orderfile,
                    "/ignore:4037",  # Disable warn missing order symbol
                ]
            elif linker.KIND == "ld64" or (linker.KIND == "lld" and target.os == "OSX"):
                use_ldflags += ["-Wl,-order_file", orderfile]
            elif linker.KIND == "lld":
                use_ldflags += [
                    "-Wl,--symbol-ordering-file",
                    orderfile,
                    "-Wl,--no-warn-symbol-ordering",
                ]

            if use_ldflags:
                log.info("Activating PGO-based orderfile")

        gen_cflags = [prefix + "-fprofile-generate"]

        if pgo_temporal:
            gen_cflags += ["-mllvm", "-pgo-temporal-instrumentation"]

        if target_is_windows:
            # native llvm-profdata.exe on Windows can't read profile data
            # if name compression is enabled (which cross-compiling enables
            # by default)
            gen_cflags += ["-mllvm", "-enable-name-compression=false"]

        return namespace(
            gen_cflags=gen_cflags,
            gen_ldflags=gen_ldflags,
            use_cflags=[
                prefix + "-fprofile-use=%s" % profdata,
                # Some error messages about mismatched profile data
                # come in via -Wbackend-plugin, so disable those too.
                "-Wno-error=backend-plugin",
            ],
            use_ldflags=use_ldflags,
        )


set_config("PROFILE_GEN_CFLAGS", pgo_flags.gen_cflags)
set_config("PROFILE_GEN_LDFLAGS", pgo_flags.gen_ldflags)
set_config("PROFILE_USE_CFLAGS", pgo_flags.use_cflags)
set_config("PROFILE_USE_LDFLAGS", pgo_flags.use_ldflags)

option(
    "--with-pgo-jarlog",
    help="Use the provided jarlog file when packaging during a profile-use " "build",
    nargs=1,
)

set_config("PGO_JARLOG_PATH", depends_if("--with-pgo-jarlog")(lambda p: p))


@depends("MOZ_PGO", "--enable-profile-use", "--enable-profile-generate", c_compiler)
def moz_pgo_rust(pgo, profile_use, profile_generate, c_compiler):
    if not pgo:
        return

    # Enabling PGO through MOZ_PGO only and not --enable* flags.
    if not profile_use and not profile_generate:
        return

    if profile_use and profile_generate:
        die("Cannot build with --enable-profile-use and --enable-profile-generate.")

    want_cross = (len(profile_use) and profile_use[0] == "cross") or (
        len(profile_generate) and profile_generate[0] == "cross"
    )

    if not want_cross:
        return

    if c_compiler.type == "gcc":
        die("Cannot use cross-language PGO with GCC.")

    return True


set_config("MOZ_PGO_RUST", moz_pgo_rust)

# LTO
# ==============================================================

option(
    "--enable-lto",
    env="MOZ_LTO",
    nargs="*",
    choices=("full", "thin", "cross"),
    help="Enable LTO",
)

option(
    env="MOZ_LD64_KNOWN_GOOD",
    nargs=1,
    help="Indicate that ld64 is free of symbol aliasing bugs",
)

imply_option("MOZ_LD64_KNOWN_GOOD", moz_automation)


@depends(
    "--enable-lto",
    c_compiler,
    select_linker,
    "MOZ_LD64_KNOWN_GOOD",
    target,
    "--enable-profile-generate",
    pass_manager.enabled,
    "--enable-profile-use",
    moz_automation,
)
@imports("multiprocessing")
def lto(
    values,
    c_compiler,
    select_linker,
    ld64_known_good,
    target,
    instrumented_build,
    pass_manager,
    pgo_build,
    moz_automation,
):
    cflags = []
    ldflags = []
    enabled = None
    rust_lto = False

    if not values:
        return

    # Sanitize LTO modes.
    if "full" in values and "thin" in values:
        die("incompatible --enable-lto choices 'full' and 'thin'")

    # If a value was given to --enable-lto, use that.  Otherwise, make the lto
    # mode explicit, using full with gcc, and full or thin with clang depending
    # on the performance benefit.
    # Defaulting to full LTO is costly in terms of compilation time, so we only
    # default to it if MOZ_AUTOMATION and PGO are on, and for some platforms.
    # Based on speedometer3 scores, full lto + pgo is beneficial for Linux and
    # Windows for x86_64 targets.
    if values == () or values == ("cross",):
        if c_compiler.type == "gcc":
            values += ("full",)
        elif (
            pgo_build
            and moz_automation
            and target.os in ("WINNT", "GNU")
            and target.cpu == "x86_64"
        ):
            values += ("full",)
        else:
            values += ("thin",)

    if instrumented_build:
        log.warning("Disabling LTO because --enable-profile-generate is specified")
        return

    if c_compiler.type == "gcc":
        if "cross" in values:
            die("Cross-language LTO is not supported with GCC.")
        if "thin" in values:
            die(
                "gcc does not support thin LTO. Use `--enable-lto` "
                "to enable full LTO for gcc."
            )

    if (
        target.kernel == "Darwin"
        and "cross" in values
        and select_linker.KIND == "ld64"
        and not ld64_known_good
    ):
        die(
            "The Mac linker is known to have a bug that affects cross-language "
            "LTO.  If you know that your linker is free from this bug, please "
            "set the environment variable `MOZ_LD64_KNOWN_GOOD=1` and re-run "
            "configure."
        )

    if c_compiler.type == "clang":
        if "full" in values:
            cflags.append("-flto")
            ldflags.append("-flto")
        else:
            cflags.append("-flto=thin")
            ldflags.append("-flto=thin")

        if target.os == "Android" and "cross" in values:
            # Work around https://github.com/rust-lang/rust/issues/90088
            # by enabling the highest level of SSE the rust targets default
            # to.
            # https://github.com/rust-lang/rust/blob/bdfcb88e8b6203ccb46a2fb6649979b773efc8ac/compiler/rustc_target/src/spec/i686_linux_android.rs#L13
            # https://github.com/rust-lang/rust/blob/8d1083e319841624f64400e1524805a40d725439/compiler/rustc_target/src/spec/x86_64_linux_android.rs#L7
            if target.cpu == "x86":
                ldflags.append("-Wl,-plugin-opt=-mattr=+ssse3")
            elif target.cpu == "x86_64":
                ldflags.append("-Wl,-plugin-opt=-mattr=+sse4.2")
    elif c_compiler.type == "clang-cl":
        if "full" in values:
            cflags.append("-flto")
        else:
            cflags.append("-flto=thin")
        # With clang-cl, -flto can only be used with -c or -fuse-ld=lld.
        # AC_TRY_LINKs during configure don't have -c, so pass -fuse-ld=lld.
        cflags.append("-fuse-ld=lld")

        # Explicitly set the CPU to optimize for so the linker doesn't
        # choose a poor default.  Rust compilation by default uses the
        # pentium4 CPU on x86:
        #
        # https://github.com/rust-lang/rust/blob/049a49b91151a88c95fa0d62a53fd0a0ac2c3af9/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs#L5
        #
        # which specifically supports "long" (multi-byte) nops.  See
        # https://bugzilla.mozilla.org/show_bug.cgi?id=1568450#c8 for details.
        #
        # The pentium4 seems like kind of a weird CPU to optimize for, but
        # it seems to have worked out OK thus far.  LLVM does not seem to
        # specifically schedule code for the pentium4's deep pipeline, so
        # that probably contributes to it being an OK default for our
        # purposes.
        if target.cpu == "x86":
            ldflags.append("-mllvm:-mcpu=pentium4")
        # This is also the CPU that Rust uses.  The LLVM source code
        # recommends this as the "generic 64-bit specific x86 processor model":
        #
        # https://github.com/llvm/llvm-project/blob/e7694f34ab6a12b8bb480cbfcb396d0a64fe965f/llvm/lib/Target/X86/X86.td#L1165-L1187
        if target.cpu == "x86_64":
            ldflags.append("-mllvm:-mcpu=x86-64")
        # We do not need special flags for arm64.  Hooray for fixed-length
        # instruction sets.
    else:
        num_cores = multiprocessing.cpu_count()
        cflags.append("-flto")
        cflags.append("-flifetime-dse=1")

        ldflags.append("-flto=%s" % num_cores)
        ldflags.append("-flifetime-dse=1")

    # Tell LTO not to inline functions above a certain size, to mitigate
    # binary size growth while still getting good performance.
    # (For hot functions, PGO will put a multiplier on this limit.)
    if c_compiler.type == "clang-cl":
        ldflags.append("-mllvm:-import-instr-limit=10")
    elif target.kernel == "Darwin":
        ldflags.append("-Wl,-mllvm,-import-instr-limit=10")
    elif c_compiler.type == "clang":
        ldflags.append("-Wl,-plugin-opt=-import-instr-limit=10")

    # If we're using the new pass manager, we can also enable the new PM
    # during LTO. Further we can use the resulting size savings to increase
    # the import limit in hot functions.
    if pass_manager:
        if c_compiler.type == "clang-cl":
            if c_compiler.version >= "12.0.0" and c_compiler.version < "13.0.0":
                ldflags.append("-opt:ltonewpassmanager")
            if c_compiler.version >= "12.0.0":
                ldflags.append("-mllvm:-import-hot-multiplier=30")
        elif target.kernel == "Darwin":
            ldflags.append("-Wl,-mllvm,-import-hot-multiplier=30")
        else:
            if c_compiler.version < "13.0.0":
                ldflags.append("-Wl,-plugin-opt=new-pass-manager")
            ldflags.append("-Wl,-plugin-opt=-import-hot-multiplier=30")

    # Pick Rust LTO mode in case of cross lTO. Thin is the default.
    if "cross" in values:
        rust_lto = "full" if "full" in values else "thin"
    else:
        rust_lto = ""

    return namespace(
        enabled=True,
        cflags=cflags,
        ldflags=ldflags,
        rust_lto=rust_lto,
    )


@depends(
    dso_flags,
    when=building_with_gnu_compatible_cc
    & gcc_use_gnu_ld
    & ~developer_options
    & ~enable_profile_generate,
)
def remove_dead_symbols(dso_flags):
    dso_flags.ldopts.append("-Wl,--gc-sections")


set_config("MOZ_LTO", lto.enabled)
set_define("MOZ_LTO", lto.enabled)
set_config("MOZ_LTO_CFLAGS", lto.cflags)
set_config("MOZ_LTO_LDFLAGS", lto.ldflags)
set_config("MOZ_LTO_RUST_CROSS", lto.rust_lto)
author	Updatebot <updatebot@mozilla.com>
	Sat, 19 Jul 2025 21:32:42 +0000 (21 hours ago)
changeset 797266	df2495d1668868c4b99f85c68babc03a0f426bd1
parent 796504	0320c24dfe3214d5e2c61af6003ddb0929954ef4
permissions	-rw-r--r--