Bug 1630809 - generate partner attribution task r=aki
☠☠ backed out by 8f28d9b7a86c ☠ ☠
authorNick Thomas <nthomas@mozilla.com>
Thu, 27 Aug 2020 05:46:51 +0000
changeset 546663 04628c1f98e9d3a5d2ac1b26a6a417857745e960
parent 546662 4b4d50e0b1bf48ae143964a9e5551601bde57f4c
child 546664 afb5df61943a735b4ace472e9c686cc57ab05ea4
push id37736
push userapavel@mozilla.com
push dateFri, 28 Aug 2020 15:31:26 +0000
treeherdermozilla-central@56166cae2e26 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersaki
bugs1630809
milestone82.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1630809 - generate partner attribution task r=aki A single task is created to do all partner attributions. The partner_attribution transform processes the configuration into an environment variable for the tools/attribution/attribute.py script to use. This is quite verbose so a large number of configurations may cause problems. Applies the same priority modification to attribution tasks as to partner repacks, to not impede the main part of the graph. Differential Revision: https://phabricator.services.mozilla.com/D87729
python/mozrelease/mozrelease/partner_attribution.py
taskcluster/ci/release-partner-attribution/kind.yml
taskcluster/docs/kinds.rst
taskcluster/taskgraph/transforms/partner_attribution.py
taskcluster/taskgraph/transforms/partner_repack.py
taskcluster/taskgraph/util/partners.py
new file mode 100644
--- /dev/null
+++ b/python/mozrelease/mozrelease/partner_attribution.py
@@ -0,0 +1,191 @@
+#! /usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function
+
+import argparse
+import logging
+import mmap
+import json
+import os
+import shutil
+import struct
+import sys
+import tempfile
+import urllib.parse
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
+log = logging.getLogger()
+
+
+def write_attribution_data(filepath, data):
+    """Insert data into a prepared certificate in a signed PE file.
+
+    Returns False if the file isn't a valid PE file, or if the necessary
+    certificate was not found.
+
+    This function assumes that somewhere in the given file's certificate table
+    there exists a 1024-byte space which begins with the tag "__MOZCUSTOM__:".
+    The given data will be inserted into the file following this tag.
+
+    We don't bother updating the optional header checksum.
+    Windows doesn't check it for executables, only drivers and certain DLL's.
+    """
+    with open(filepath, "r+b") as file:
+        mapped = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_WRITE)
+
+        # Get the location of the PE header and the optional header
+        pe_header_offset = struct.unpack("<I", mapped[0x3C:0x40])[0]
+        optional_header_offset = pe_header_offset + 24
+
+        # Look up the magic number in the optional header,
+        # so we know if we have a 32 or 64-bit executable.
+        # We need to know that so that we can find the data directories.
+        pe_magic_number = struct.unpack(
+            "<H", mapped[optional_header_offset : optional_header_offset + 2]
+        )[0]
+        if pe_magic_number == 0x10B:
+            # 32-bit
+            cert_dir_entry_offset = optional_header_offset + 128
+        elif pe_magic_number == 0x20B:
+            # 64-bit. Certain header fields are wider.
+            cert_dir_entry_offset = optional_header_offset + 144
+        else:
+            # Not any known PE format
+            mapped.close()
+            return False
+
+        # The certificate table offset and length give us the valid range
+        # to search through for where we should put our data.
+        cert_table_offset = struct.unpack(
+            "<I", mapped[cert_dir_entry_offset : cert_dir_entry_offset + 4]
+        )[0]
+        cert_table_size = struct.unpack(
+            "<I", mapped[cert_dir_entry_offset + 4 : cert_dir_entry_offset + 8]
+        )[0]
+
+        if cert_table_offset == 0 or cert_table_size == 0:
+            # The file isn't signed
+            mapped.close()
+            return False
+
+        tag = b"__MOZCUSTOM__:"
+        tag_index = mapped.find(
+            tag, cert_table_offset, cert_table_offset + cert_table_size
+        )
+        if tag_index == -1:
+            mapped.close()
+            return False
+
+        # convert to quoted-url byte-string for insertion
+        data = urllib.parse.quote(data).encode("utf-8")
+        mapped[tag_index + len(tag) : tag_index + len(tag) + len(data)] = data
+
+        return True
+
+
+def validate_attribution_code(attribution):
+    log.info("Checking attribution %s" % attribution)
+    return_code = True
+
+    if len(attribution) == 0:
+        log.error("Attribution code has 0 length")
+        return False
+
+    # Set to match https://searchfox.org/mozilla-central/rev/a92ed79b0bc746159fc31af1586adbfa9e45e264/browser/components/attribution/AttributionCode.jsm#24  # noqa
+    MAX_LENGTH = 1010
+    if len(attribution) > MAX_LENGTH:
+        log.error("Attribution code longer than %s chars" % MAX_LENGTH)
+        return_code = False
+
+    # this leaves out empty values like 'foo='
+    params = urllib.parse.parse_qsl(attribution)
+    used_keys = set()
+    for key, value in params:
+        # check for invalid keys
+        if key not in (
+            "source",
+            "medium",
+            "campaign",
+            "content",
+            "experiment",
+            "variation",
+            "ua",
+        ):
+            log.error("Invalid key %s" % key)
+            return_code = False
+
+        # avoid ambiguity from repeated keys
+        if key in used_keys:
+            log.error("Repeated key %s" % key)
+            return_code = False
+        else:
+            used_keys.add(key)
+
+        # TODO the service checks for valid source, should we do that here too ?
+
+    # some keys are required
+    for key in ("source", "medium", "campaign", "content"):
+        if key not in used_keys:
+            log.error("key '%s' must be set, use '(not set)' if not needed" % key)
+            return_code = False
+
+    return return_code
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Add attribution to Windows installer(s).",
+        epilog="""
+        By default, configuration from envvar ATTRIBUTION_CONFIG is used, with
+        expected format
+          [{"input": "in/abc.exe", "output": "out/def.exe", "attribution": "abcdef"},
+           {"input": "in/ghi.exe", "output": "out/jkl.exe", "attribution": "ghijkl"}]
+        for 1 or more attributions. Or the script arguments may be used for a single attribution.
+
+        The attribution code should be a string which is not url-encoded.
+        """,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument("--input", help="Source installer to attribute a copy of")
+    parser.add_argument("--output", help="Location to write the attributed installer")
+    parser.add_argument("--attribution", help="Attribution code")
+    args = parser.parse_args()
+
+    if os.environ.get("ATTRIBUTION_CONFIG"):
+        work = json.loads(os.environ["ATTRIBUTION_CONFIG"])
+    elif args.input and args.output and args.attribution:
+        work = [
+            {
+                "input": args.input,
+                "output": args.output,
+                "attribution": args.attribution,
+            }
+        ]
+    else:
+        log.error("No configuration found. Set ATTRIBUTION_CONFIG or pass arguments.")
+        return 1
+
+    cached_code_checks = []
+    for job in work:
+        if job["attribution"] not in cached_code_checks:
+            status = validate_attribution_code(job["attribution"])
+            if status:
+                cached_code_checks.append(job["attribution"])
+            else:
+                log.error("Failed attribution code check")
+                return 1
+
+        with tempfile.TemporaryDirectory() as td:
+            log.info("Attributing installer %s ..." % job["input"])
+            tf = shutil.copy(job["input"], td)
+            if write_attribution_data(tf, job["attribution"]):
+                os.makedirs(os.path.dirname(job["output"]), exist_ok=True)
+                shutil.move(tf, job["output"])
+                log.info("Wrote %s" % job["output"])
+
+
+if __name__ == "__main__":
+    sys.exit(main())
new file mode 100644
--- /dev/null
+++ b/taskcluster/ci/release-partner-attribution/kind.yml
@@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+---
+loader: taskgraph.loader.transform:loader
+
+transforms:
+    - taskgraph.transforms.release_deps:transforms
+    - taskgraph.transforms.partner_attribution:transforms
+    - taskgraph.transforms.job:transforms
+    - taskgraph.transforms.task:transforms
+
+kind-dependencies:
+    - repackage-signing
+    - repackage-signing-l10n
+
+# move this into the single job ??
+job-defaults:
+    name: partner-attribution
+    description: Release Promotion partner attribution
+    run-on-projects: []  # to make sure this never runs as part of CI
+    shipping-product: firefox
+    shipping-phase: promote
+    worker-type: b-linux
+    worker:
+        docker-image:
+            in-tree: "partner-repack"
+        chain-of-trust: true
+        max-run-time: 1800
+    run:
+        using: mach
+        mach: python python/mozrelease/mozrelease/partner_attribution.py
+
+jobs:
+    partner-attribution:
+        attributes:
+            build_platform: linux-shippable
+            build_type: opt
+            artifact_prefix: releng/partner
+            shippable: true
--- a/taskcluster/docs/kinds.rst
+++ b/taskcluster/docs/kinds.rst
@@ -446,16 +446,20 @@ Generates source for the release
 release-source-signing
 ----------------------
 Signs source for the release
 
 release-partner-repack
 ----------------------
 Generates customized versions of releases for partners.
 
+release-partner-attribution
+---------------------------
+Generates attributed versions of releases for partners.
+
 release-partner-repack-chunking-dummy
 -------------------------------------
 Chunks the partner repacks by locale.
 
 release-partner-repack-signing
 ------------------------------
 Internal signing of partner repacks.
 
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/transforms/partner_attribution.py
@@ -0,0 +1,133 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Transform the partner attribution task into an actual task description.
+"""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+from collections import defaultdict
+import json
+import logging
+
+import six
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.partners import (
+    apply_partner_priority,
+    check_if_partners_enabled,
+    get_partner_config_by_kind,
+    generate_attribution_code,
+)
+
+log = logging.getLogger(__name__)
+
+transforms = TransformSequence()
+transforms.add(check_if_partners_enabled)
+transforms.add(apply_partner_priority)
+
+
+@transforms.add
+def add_command_arguments(config, tasks):
+    enabled_partners = config.params.get("release_partners")
+    dependencies = {}
+    fetches = defaultdict(set)
+    attributions = []
+    release_artifacts = []
+    attribution_config = get_partner_config_by_kind(config, config.kind)
+
+    for partner_config in attribution_config.get("configs", []):
+        # we might only be interested in a subset of all partners, eg for a respin
+        if enabled_partners and partner_config["campaign"] not in enabled_partners:
+            continue
+        attribution_code = generate_attribution_code(
+            attribution_config["defaults"], partner_config
+        )
+        for platform in partner_config["platforms"]:
+            stage_platform = platform.replace("-shippable", "")
+            for locale in partner_config["locales"]:
+                # find the upstream, throw away locales we don't have, somehow. Skip ?
+                if locale == "en-US":
+                    upstream_label = "repackage-signing-{platform}/opt".format(
+                        platform=platform
+                    )
+                    upstream_artifact = "target.installer.exe"
+                else:
+                    upstream_label = "repackage-signing-l10n-{locale}-{platform}/opt".format(
+                        locale=locale, platform=platform
+                    )
+                    upstream_artifact = "{locale}/target.installer.exe".format(
+                        locale=locale
+                    )
+                if upstream_label not in config.kind_dependencies_tasks:
+                    raise Exception(
+                        "Can't find upstream task for {} {}".format(
+                            platform, locale
+                        )
+                    )
+                upstream = config.kind_dependencies_tasks[upstream_label]
+
+                # set the dependencies to just what we need rather than all of l10n
+                dependencies.update({upstream.label: upstream.label})
+
+                fetches[upstream_label].add(
+                    (upstream_artifact, stage_platform, locale)
+                )
+
+                artifact_part = "{platform}/{locale}/target.installer.exe".format(
+                    platform=stage_platform, locale=locale
+                )
+                artifact = "releng/partner/{partner}/{sub_partner}/{artifact_part}".format(
+                    partner=partner_config["campaign"],
+                    sub_partner=partner_config["content"],
+                    artifact_part=artifact_part,
+                )
+                # config for script
+                # TODO - generalise input & output ??
+                #  add releng/partner prefix via get_artifact_prefix..()
+                attributions.append(
+                    {
+                        "input": "/builds/worker/fetches/{}".format(artifact_part),
+                        "output": "/builds/worker/artifacts/{}".format(artifact),
+                        "attribution": attribution_code,
+                    }
+                )
+                release_artifacts.append(artifact)
+
+    # bail-out early if we don't have any attributions to do
+    if not attributions:
+        return
+
+    for task in tasks:
+        worker = task.get("worker", {})
+        worker["chain-of-trust"] = True
+
+        task.setdefault("dependencies", {}).update(dependencies)
+        task.setdefault("fetches", {})
+        for upstream_label, upstream_artifacts in fetches.items():
+            task["fetches"][upstream_label] = [
+                {
+                    "artifact": upstream_artifact,
+                    "dest": "{platform}/{locale}".format(
+                        platform=platform, locale=locale
+                    ),
+                    "extract": False,
+                    "verify-hash": True,
+                }
+                for upstream_artifact, platform, locale in upstream_artifacts
+            ]
+        worker.setdefault("env", {})["ATTRIBUTION_CONFIG"] = six.ensure_text(
+            json.dumps(attributions, sort_keys=True)
+        )
+        worker["artifacts"] = [
+            {
+                "name": "releng/partner",
+                "path": "/builds/worker/artifacts/releng/partner",
+                "type": "directory",
+            }
+        ]
+        task["release-artifacts"] = release_artifacts
+        task["label"] = config.kind
+
+        yield task
--- a/taskcluster/taskgraph/transforms/partner_repack.py
+++ b/taskcluster/taskgraph/transforms/partner_repack.py
@@ -7,23 +7,25 @@ Transform the partner repack task into a
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from taskgraph.transforms.base import TransformSequence
 from taskgraph.util.schema import resolve_keyed_by
 from taskgraph.util.scriptworker import get_release_config
 from taskgraph.util.partners import (
     check_if_partners_enabled,
+    get_partner_config_by_kind,
     get_partner_url_config,
     get_repack_ids_by_platform,
     apply_partner_priority,
 )
 
 
 transforms = TransformSequence()
+transforms.add(check_if_partners_enabled)
 transforms.add(apply_partner_priority)
 
 
 @transforms.add
 def skip_unnecessary_platforms(config, tasks):
     for task in tasks:
         if config.kind == "release-partner-repack":
             platform = task['attributes']['build_platform']
@@ -68,21 +70,24 @@ def make_label(config, tasks):
     for task in tasks:
         task['label'] = "{}-{}".format(config.kind, task['name'])
         yield task
 
 
 @transforms.add
 def add_command_arguments(config, tasks):
     release_config = get_release_config(config)
+
+    # staging releases - pass reduced set of locales to the repacking script
     all_locales = set()
-    for partner_class in config.params['release_partner_config'].values():
-        for partner in partner_class.values():
-            for sub_partner in partner.values():
-                all_locales.update(sub_partner.get('locales', []))
+    partner_config = get_partner_config_by_kind(config, config.kind)
+    for partner in partner_config.values():
+        for sub_partner in partner.values():
+            all_locales.update(sub_partner.get('locales', []))
+
     for task in tasks:
         # add the MOZHARNESS_OPTIONS, eg version=61.0, build-number=1, platform=win64
         if not task['attributes']['build_platform'].endswith('-shippable'):
             raise Exception(
                 "Unexpected partner repack platform: {}".format(
                     task['attributes']['build_platform'],
                 ),
             )
@@ -105,13 +110,8 @@ def add_command_arguments(config, tasks)
         task['worker']['env']['UPSTREAM_TASKIDS'] = {
             'task-reference': ' '.join(['<{}>'.format(dep) for dep in task['dependencies']])
         }
 
         # Forward the release type for bouncer product construction
         task['worker']['env']['RELEASE_TYPE'] = config.params['release_type']
 
         yield task
-
-
-# This needs to be run at the *end*, because the generators are called in
-# reverse order, when each downstream transform references `tasks`.
-transforms.add(check_if_partners_enabled)
--- a/taskcluster/taskgraph/util/partners.py
+++ b/taskcluster/taskgraph/util/partners.py
@@ -355,20 +355,26 @@ def get_partner_config_by_kind(config, k
     for k in partner_configs:
         if kind.startswith(k):
             kind_config = partner_configs[k]
             break
     else:
         return {}
     # if we're only interested in a subset of partners we remove the rest
     if partner_subset:
-        # TODO - should be fatal to have an unknown partner in partner_subset
-        for partner in [p for p in kind_config.keys() if p not in partner_subset]:
-            del(kind_config[partner])
-
+        if kind.startswith('release-partner-repack'):
+            # TODO - should be fatal to have an unknown partner in partner_subset
+            for partner in [p for p in kind_config.keys() if p not in partner_subset]:
+                del(kind_config[partner])
+        elif kind.startswith('release-partner-attribution'):
+            all_configs = deepcopy(kind_config["configs"])
+            kind_config["configs"] = []
+            for this_config in all_configs:
+                if this_config["campaign"] in partner_subset:
+                    kind_config["configs"].append(this_config)
     return kind_config
 
 
 def _fix_subpartner_locales(orig_config, all_locales):
     subpartner_config = deepcopy(orig_config)
     # Get an ordered list of subpartner locales that is a subset of all_locales
     subpartner_config['locales'] = sorted(list(
         set(orig_config['locales']) & set(all_locales)
@@ -474,15 +480,31 @@ def get_partners_to_be_published(config)
 def apply_partner_priority(config, jobs):
     priority = None
     # Reduce the priority of the partner repack jobs because they don't block QE. Meanwhile
     # leave EME-free jobs alone because they do, and they'll get the branch priority like the rest
     # of the release. Only bother with this in production, not on staging releases on try.
     # medium is the same as mozilla-central, see taskcluster/ci/config.yml. ie higher than
     # integration branches because we don't want to wait a lot for the graph to be done, but
     # for multiple releases the partner tasks always wait for non-partner.
-    if (config.kind.startswith('release-partner-repack') and
+    if (config.kind.startswith(('release-partner-repack', 'release-partner-attribution')) and
             config.params.release_level() == "production"):
         priority = 'medium'
     for job in jobs:
         if priority:
             job['priority'] = priority
         yield job
+
+
+def generate_attribution_code(defaults, partner):
+    params = {
+        "medium": defaults["medium"],
+        "source": defaults["source"],
+        "campaign": partner["campaign"],
+        "content": partner["content"],
+    }
+    if partner.get("variation"):
+        params["variation"] = partner["variation"]
+    if partner.get("experiment"):
+        params["experiment"] = partner["experiment"]
+
+    code = six.moves.urllib.parse.urlencode(params)
+    return code