bug 1477747 - combine action task-graph-{suffix}.json into one. r=dustin draft
authorAki Sasaki <asasaki@mozilla.com>
Tue, 24 Jul 2018 10:21:51 -0700
changeset 822076 693591dd09b65bbfc2f27f23f47f8d7bba7e8ee2
parent 820131 5a8107262015714d2907a85abc24c847ad9b32d2
push id117277
push userasasaki@mozilla.com
push dateTue, 24 Jul 2018 17:35:44 +0000
reviewersdustin
bugs1477747
milestone63.0a1
bug 1477747 - combine action task-graph-{suffix}.json into one. r=dustin MozReview-Commit-ID: J3uaiMcTOBb
taskcluster/taskgraph/actions/backfill.py
taskcluster/taskgraph/actions/retrigger.py
taskcluster/taskgraph/actions/util.py
taskcluster/taskgraph/decision.py
--- a/taskcluster/taskgraph/actions/backfill.py
+++ b/taskcluster/taskgraph/actions/backfill.py
@@ -7,17 +7,17 @@
 from __future__ import absolute_import, print_function, unicode_literals
 
 import logging
 
 import requests
 from requests.exceptions import HTTPError
 
 from .registry import register_callback_action
-from .util import find_decision_task, create_tasks
+from .util import find_decision_task, create_tasks, combine_task_graph_files
 from taskgraph.util.taskcluster import get_artifact_from_index
 from taskgraph.taskgraph import TaskGraph
 
 PUSHLOG_TMPL = '{}/json-pushes?version=2&startID={}&endID={}'
 INDEX_TMPL = 'gecko.v2.{}.pushlog-id.{}.decision'
 
 logger = logging.getLogger(__name__)
 
@@ -84,16 +84,17 @@ def backfill_action(parameters, graph_co
             break
 
         end_id = start_id - 1
         start_id -= depth
         if start_id < 0:
             break
 
     pushes = sorted(pushes)[-depth:]
+    backfill_pushes = []
 
     for push in pushes:
         try:
             full_task_graph = get_artifact_from_index(
                     INDEX_TMPL.format(parameters['project'], push),
                     'public/full-task-graph.json')
             _, full_task_graph = TaskGraph.from_json(full_task_graph)
             label_to_taskid = get_artifact_from_index(
@@ -176,18 +177,20 @@ def backfill_action(parameters, graph_co
                     task.task['extra']['suite']['flavor'] = 'test-verify'
 
                     task.task['extra']['treeherder']['symbol'] = symbol
                     del task.task['extra']['treeherder']['groupSymbol']
                 return task
 
             create_tasks([label], full_task_graph, label_to_taskid,
                          push_params, push_decision_task_id, push, modifier=modifier)
+            backfill_pushes.append(push)
         else:
             logging.info('Could not find {} on {}. Skipping.'.format(label, push))
+    combine_task_graph_files(backfill_pushes)
 
 
 def remove_args_from_command(cmd_parts, preamble_length=0, args_to_ignore=[]):
     """
        We need to remove all extra instances of command line arguments
        that are suite/job specific, like suite=jsreftest, subsuite=devtools
        and other ones like --total-chunk=X.
        args:
--- a/taskcluster/taskgraph/actions/retrigger.py
+++ b/taskcluster/taskgraph/actions/retrigger.py
@@ -4,16 +4,17 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import logging
 
 from .util import (
+    combine_task_graph_files,
     create_tasks,
     fetch_graph_and_labels
 )
 from .registry import register_callback_action
 
 logger = logging.getLogger(__name__)
 
 
@@ -63,8 +64,9 @@ def retrigger_action(parameters, graph_c
         to_run = to_run & set(label_to_taskid.keys())
         with_downstream = ' (with downstream) '
 
     times = input.get('times', 1)
     for i in xrange(times):
         create_tasks(to_run, full_task_graph, label_to_taskid, parameters, decision_task_id, i)
 
         logger.info('Scheduled {}{}(time {}/{})'.format(label, with_downstream, i+1, times))
+    combine_task_graph_files(list(range(times)))
--- a/taskcluster/taskgraph/actions/util.py
+++ b/taskcluster/taskgraph/actions/util.py
@@ -9,17 +9,17 @@ from __future__ import absolute_import, 
 import copy
 import logging
 import requests
 import os
 
 from requests.exceptions import HTTPError
 
 from taskgraph import create
-from taskgraph.decision import write_artifact
+from taskgraph.decision import read_artifact, write_artifact
 from taskgraph.taskgraph import TaskGraph
 from taskgraph.optimize import optimize_task_graph
 from taskgraph.util.taskcluster import get_session, find_task_id, get_artifact, list_tasks
 
 logger = logging.getLogger(__name__)
 
 PUSHLOG_TMPL = '{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1'
 
@@ -147,8 +147,20 @@ def create_tasks(to_run, full_task_graph
                                                                 params,
                                                                 to_run,
                                                                 label_to_taskid)
     write_artifact('task-graph{}.json'.format(suffix), optimized_task_graph.to_json())
     write_artifact('label-to-taskid{}.json'.format(suffix), label_to_taskid)
     write_artifact('to-run{}.json'.format(suffix), list(to_run))
     create.create_tasks(optimized_task_graph, label_to_taskid, params, decision_task_id)
     return label_to_taskid
+
+
+def combine_task_graph_files(suffixes):
+    """Combine task-graph-{suffix}.json files into a single task-graph.json file.
+
+    Since Chain of Trust verification requires a task-graph.json file that
+    contains all children tasks, we can combine the various task-graph-0.json
+    type files into a master task-graph.json file at the end."""
+    all = {}
+    for suffix in suffixes:
+        all.update(read_artifact('task-graph-{}.json'.format(suffix)))
+    write_artifact('task-graph.json', all)
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -310,8 +310,24 @@ def write_artifact(filename, data):
         with open(path, 'w') as f:
             json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': '))
     elif filename.endswith('.gz'):
         import gzip
         with gzip.open(path, 'wb') as f:
             f.write(json.dumps(data))
     else:
         raise TypeError("Don't know how to write to {}".format(filename))
+
+
+def read_artifact(filename):
+    path = os.path.join(ARTIFACTS_DIR, filename)
+    if filename.endswith('.yml'):
+        with open(path, 'r') as f:
+            return yaml.load(f)
+    elif filename.endswith('.json'):
+        with open(path, 'r') as f:
+            return json.load(f)
+    elif filename.endswith('.gz'):
+        import gzip
+        with gzip.open(path, 'rb') as f:
+            return json.load(f)
+    else:
+        raise TypeError("Don't know how to read {}".format(filename))