Bug 1633866 - [taskgraph] Create a 'bugbug' based test manifest loader, r=marco
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Thu, 04 Jun 2020 18:45:09 +0000
changeset 533969 69790da5ec2c40fa188feeee93801cf925af92f8
parent 533968 acace34eb88d62a102652e92cfb7290b18d9f5c9
child 533970 b39e4d67af924e5051d9371cd125cf4750799143
push id37481
push userncsoregi@mozilla.com
push dateFri, 05 Jun 2020 04:39:26 +0000
treeherdermozilla-central@fecffba489bd [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1633866 - [taskgraph] Create a 'bugbug' based test manifest loader, r=marco Loads manifests using bugbug's push/schedules endpoint. For now we use as low confidence threshold to select manifests. This is to try and both: 1) Improve regression detection rate 2) Save resources at the same time. This way we theoretically shouldn't regress either dimension. Note that we still optimize with CT_MEDIUM. My thinking is that at least all of the "extra" stuff is still guaranteed to be relevant. Though we may want to consider making these two thresholds match. Differential Revision: https://phabricator.services.mozilla.com/D76523
--- a/taskcluster/taskgraph/transforms/tests.py
+++ b/taskcluster/taskgraph/transforms/tests.py
@@ -1398,27 +1398,35 @@ def set_test_manifests(config, tasks):
         if manifests:
             if isinstance(manifests, list):
                 task['test-manifests'] = {'active': manifests, 'skipped': []}
             yield task
         mozinfo = guess_mozinfo_from_task(task)
-        loader = manifest_loaders[config.params['test_manifest_loader']]
+        loader_cls = manifest_loaders[config.params['test_manifest_loader']]
+        loader = loader_cls(config.params)
         task['test-manifests'] = loader.get_manifests(
         # Skip the task if the loader doesn't return any manifests for the
         # associated suite.
         if not task['test-manifests']['active'] and not task['test-manifests']['skipped']:
+        # The default loader loads all manifests. If we use a non-default
+        # loader, we'll only run some subset of manifests and the hardcoded
+        # chunk numbers will no longer be valid. Dynamic chunking should yield
+        # better results.
+        if config.params['test_manifest_loader'] != 'default':
+            task['chunks'] = "dynamic"
         yield task
 def resolve_dynamic_chunks(config, tasks):
     """Determine how many chunks are needed to handle the given set of manifests."""
     for task in tasks:
--- a/taskcluster/taskgraph/util/chunking.py
+++ b/taskcluster/taskgraph/util/chunking.py
@@ -17,16 +17,17 @@ from manifestparser.filters import chunk
 from mozbuild.util import memoize
 from moztest.resolve import (
 from taskgraph import GECKO
+from taskgraph.util.bugbug import CT_LOW, push_schedules
 here = os.path.abspath(os.path.dirname(__file__))
 resolver = TestResolver.from_environment(cwd=here, loader_cls=TestManifestLoader)
 def guess_mozinfo_from_task(task):
     """Attempt to build a mozinfo dict from a task definition.
@@ -88,17 +89,18 @@ def chunk_manifests(suite, platform, chu
         platform (str): Platform used to find runtime info.
         chunks (int): Number of chunks to split manifests into.
         manifests(list): Manifests to chunk.
         A list of length `chunks` where each item contains a list of manifests
         that run in that chunk.
-    runtimes = get_runtimes(platform, suite)
+    manifests = set(manifests)
+    runtimes = {k: v for k, v in get_runtimes(platform, suite).items() if k in manifests}
     if "web-platform-tests" not in suite:
         return [
             c[1] for c in chunk_by_runtime(
@@ -149,16 +151,20 @@ def chunk_manifests(suite, platform, chu
     chunked_manifests.sort(key=lambda x: (x[1], len(x[0])))
     # Return just the chunked test paths.
     return [c[0] for c in chunked_manifests]
 class BaseManifestLoader(object):
+    def __init__(self, params):
+        self.params = params
     def get_manifests(self, flavor, subsuite, mozinfo):
         """Compute which manifests should run for the given flavor, subsuite and mozinfo.
         This function returns skipped manifests separately so that more balanced
         chunks can be achieved by only considering "active" manifests in the
         chunking algorithm.
@@ -232,11 +238,29 @@ class DefaultLoader(BaseManifestLoader):
         m = TestManifest()
         m.tests = tests
         tests = m.active_tests(disabled=False, exists=False, **mozinfo)
         active = set(chunk_by_runtime.get_manifest(t) for t in tests)
         skipped = manifests - active
         return {"active": list(active), "skipped": list(skipped)}
+class BugbugLoader(DefaultLoader):
+    """Load manifests using metadata from the TestResolver, and then
+    filter them based on a query to bugbug."""
+    @memoize
+    def get_manifests(self, suite, mozinfo):
+        manifests = super(BugbugLoader, self).get_manifests(suite, mozinfo)
+        data = push_schedules(self.params['project'], self.params['head_rev'])
+        bugbug_manifests = {m for m, c in data.get('groups', {}).items()
+                            if c >= self.CONFIDENCE_THRESHOLD}
+        manifests['active'] = list(set(manifests['active']) & bugbug_manifests)
+        return manifests
 manifest_loaders = {
-    'default': DefaultLoader(),
+    'bugbug': BugbugLoader,
+    'default': DefaultLoader,