Bug 1536722 - combine all taskgraph artifacts, not just task-graph; r=tomprince
authorDustin J. Mitchell <dustin@mozilla.com>
Wed, 01 May 2019 12:58:39 +0000
changeset 472068 092d677f7181521cb8dce8ae7787c1d88645286f
parent 472067 6843d141c496ade289d5d162532d9fc6d479e78c
child 472069 a027a998b8b79bd4afa15930d7ca22c3acb7b16c
push id35946
push userapavel@mozilla.com
push dateWed, 01 May 2019 15:54:31 +0000
treeherdermozilla-central@a027a998b8b7 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstomprince
bugs1536722
milestone68.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1536722 - combine all taskgraph artifacts, not just task-graph; r=tomprince This also adds an optimization for the case where there is only one result (Which is common for actions where `times` defaults to 1) Differential Revision: https://phabricator.services.mozilla.com/D28889
taskcluster/taskgraph/actions/util.py
taskcluster/taskgraph/decision.py
taskcluster/taskgraph/test/test_actions_util.py
--- a/taskcluster/taskgraph/actions/util.py
+++ b/taskcluster/taskgraph/actions/util.py
@@ -12,17 +12,17 @@ import logging
 import os
 import re
 
 from six import text_type
 
 from requests.exceptions import HTTPError
 
 from taskgraph import create
-from taskgraph.decision import read_artifact, write_artifact
+from taskgraph.decision import read_artifact, write_artifact, rename_artifact
 from taskgraph.taskgraph import TaskGraph
 from taskgraph.optimize import optimize_task_graph
 from taskgraph.util.taskcluster import (
     get_session,
     get_artifact,
     list_tasks,
     parse_time,
     CONCURRENCY,
@@ -164,26 +164,53 @@ def create_tasks(graph_config, to_run, f
         optimized_task_graph,
         label_to_taskid,
         params,
         decision_task_id,
     )
     return label_to_taskid
 
 
+def _update_reducer(accumulator, new_value):
+    "similar to set or dict `update` method, but returning the modified object"
+    accumulator.update(new_value)
+    return accumulator
+
 def combine_task_graph_files(suffixes):
     """Combine task-graph-{suffix}.json files into a single task-graph.json file.
 
     Since Chain of Trust verification requires a task-graph.json file that
     contains all children tasks, we can combine the various task-graph-0.json
-    type files into a master task-graph.json file at the end."""
-    all = {}
-    for suffix in suffixes:
-        all.update(read_artifact('task-graph-{}.json'.format(suffix)))
-    write_artifact('task-graph.json', all)
+    type files into a master task-graph.json file at the end.
+
+    Actions also look for various artifacts, so we combine those in a similar
+    fashion.
+
+    In the case where there is only one suffix, we simply rename it to avoid the
+    additional cost of uploading two copies of the same data.
+    """
+
+    if len(suffixes) == 1:
+        for filename in ['task-graph', 'label-to-taskid', 'to-run']:
+            rename_artifact(
+                "{}-{}.json".format(filename, suffixes[0]),
+                "{}.json".format(filename))
+        return
+
+    def combine(file_contents, base):
+        return reduce(_update_reducer, file_contents, base)
+
+    files = [read_artifact("task-graph-{}.json".format(suffix)) for suffix in suffixes]
+    write_artifact("task-graph.json", combine(files, dict()))
+
+    files = [read_artifact("label-to-taskid-{}.json".format(suffix)) for suffix in suffixes]
+    write_artifact("label-to-taskid.json", combine(files, dict()))
+
+    files = [read_artifact("to-run-{}.json".format(suffix)) for suffix in suffixes]
+    write_artifact("to-run.json", list(combine(files, set())))
 
 
 def relativize_datestamps(task_def):
     """
     Given a task definition as received from the queue, convert all datestamps
     to {relative_datestamp: ..} format, with the task creation time as "now".
     The result is useful for handing to ``create_task``.
     """
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -400,8 +400,12 @@ def read_artifact(filename):
         with open(path, 'r') as f:
             return json.load(f)
     elif filename.endswith('.gz'):
         import gzip
         with gzip.open(path, 'rb') as f:
             return json.load(f)
     else:
         raise TypeError("Don't know how to read {}".format(filename))
+
+
+def rename_artifact(src, dest):
+    os.rename(os.path.join(ARTIFACTS_DIR, src), os.path.join(ARTIFACTS_DIR, dest))
--- a/taskcluster/taskgraph/test/test_actions_util.py
+++ b/taskcluster/taskgraph/test/test_actions_util.py
@@ -1,18 +1,22 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import unittest
-from mozunit import main
+import json
+from mock import patch
+from mozunit import main, MockedOpen
+from taskgraph.decision import read_artifact
 from taskgraph.actions.util import (
-    relativize_datestamps
+    relativize_datestamps,
+    combine_task_graph_files,
 )
 
 TASK_DEF = {
     'created': '2017-10-10T18:33:03.460Z',
     # note that this is not an even number of seconds off!
     'deadline': '2017-10-11T18:33:03.461Z',
     'dependencies': [],
     'expires': '2018-10-10T18:33:04.461Z',
@@ -37,10 +41,49 @@ class TestRelativize(unittest.TestCase):
         pprint.pprint(rel)
         assert rel['created'] == {'relative-datestamp': '0 seconds'}
         assert rel['deadline'] == {'relative-datestamp': '86400 seconds'}
         assert rel['expires'] == {'relative-datestamp': '31536001 seconds'}
         assert rel['payload']['artifacts']['public']['expires'] == \
             {'relative-datestamp': '31536000 seconds'}
 
 
+class TestCombineTaskGraphFiles(unittest.TestCase):
+
+    def test_no_suffixes(self):
+        with MockedOpen({}):
+            combine_task_graph_files([])
+            self.assertRaises(Exception, open('artifacts/to-run.json'))
+
+    @patch('taskgraph.actions.util.rename_artifact')
+    def test_one_suffix(self, rename_artifact):
+        combine_task_graph_files(['0'])
+        rename_artifact.assert_any_call('task-graph-0.json', 'task-graph.json')
+        rename_artifact.assert_any_call('label-to-taskid-0.json', 'label-to-taskid.json')
+        rename_artifact.assert_any_call('to-run-0.json', 'to-run.json')
+
+    def test_several_suffixes(self):
+        files = {
+            'artifacts/task-graph-0.json': json.dumps({'taska': {}}),
+            'artifacts/label-to-taskid-0.json': json.dumps({'taska': 'TASKA'}),
+            'artifacts/to-run-0.json': json.dumps(['taska']),
+            'artifacts/task-graph-1.json': json.dumps({'taskb': {}}),
+            'artifacts/label-to-taskid-1.json': json.dumps({'taskb': 'TASKB'}),
+            'artifacts/to-run-1.json': json.dumps(['taskb']),
+        }
+        with MockedOpen(files):
+            combine_task_graph_files(['0', '1'])
+            self.assertEqual(read_artifact('task-graph.json'), {
+                'taska': {},
+                'taskb': {},
+            })
+            self.assertEqual(read_artifact('label-to-taskid.json'), {
+                'taska': 'TASKA',
+                'taskb': 'TASKB',
+            })
+            self.assertEqual(sorted(read_artifact('to-run.json')), [
+                'taska',
+                'taskb',
+            ])
+
+
 if __name__ == '__main__':
     main()