Bug 1507860: [taskgraph] Move most classes to use attrs; r=dustin
authorTom Prince <mozilla@hocat.ca>
Wed, 21 Nov 2018 23:50:04 +0000
changeset 504065 00e3e4c617a8f22df2e49a428eae8dbde2aea641
parent 504064 fab7024e765889bd2ea6d42e735e40b74e3ba166
child 504066 a85c1b95e53c768a284ea4238f288281abe6d04d
push id10290
push userffxbld-merge
push dateMon, 03 Dec 2018 16:23:23 +0000
treeherdermozilla-beta@700bed2445e6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdustin
bugs1507860
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1507860: [taskgraph] Move most classes to use attrs; r=dustin This moves most of the low-hanging fruit to use attrs. Differential Revision: https://phabricator.services.mozilla.com/D1141
taskcluster/taskgraph/generator.py
taskcluster/taskgraph/graph.py
taskcluster/taskgraph/morph.py
taskcluster/taskgraph/task.py
taskcluster/taskgraph/taskgraph.py
taskcluster/taskgraph/transforms/base.py
taskcluster/taskgraph/util/seta.py
taskcluster/taskgraph/util/verify.py
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@@ -2,47 +2,48 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 import logging
 import os
 import yaml
 import copy
+import attr
 
 from . import filter_tasks
 from .graph import Graph
 from .taskgraph import TaskGraph
 from .task import Task
 from .optimize import optimize_task_graph
 from .morph import morph
 from .util.python_path import find_object
 from .transforms.base import TransformSequence, TransformConfig
 from .util.verify import (
     verify_docs,
     verifications,
 )
-from .config import load_graph_config
+from .config import load_graph_config, GraphConfig
 
 logger = logging.getLogger(__name__)
 
 
 class KindNotFound(Exception):
     """
     Raised when trying to load kind from a directory without a kind.yml.
     """
 
 
+@attr.s(frozen=True)
 class Kind(object):
 
-    def __init__(self, name, path, config, graph_config):
-        self.name = name
-        self.path = path
-        self.config = config
-        self.graph_config = graph_config
+    name = attr.ib(type=basestring)
+    path = attr.ib(type=basestring)
+    config = attr.ib(type=dict)
+    graph_config = attr.ib(type=GraphConfig)
 
     def _get_loader(self):
         try:
             loader = self.config['loader']
         except KeyError:
             raise KeyError("{!r} does not define `loader`".format(self.path))
         return find_object(loader)
 
--- a/taskcluster/taskgraph/graph.py
+++ b/taskcluster/taskgraph/graph.py
@@ -1,49 +1,37 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import attr
 import collections
 
 
+@attr.s(frozen=True)
 class Graph(object):
     """
     Generic representation of a directed acyclic graph with labeled edges
     connecting the nodes.  Graph operations are implemented in a functional
     manner, so the data structure is immutable.
 
     It permits at most one edge of a given name between any set of nodes.  The
     graph is not checked for cycles, and methods may hang or otherwise fail if
     given a cyclic graph.
 
     The `nodes` and `edges` attributes may be accessed in a read-only fashion.
     The `nodes` attribute is a set of node names, while `edges` is a set of
     `(left, right, name)` tuples representing an edge named `name` going from
     node `left` to node `right..
     """
 
-    def __init__(self, nodes, edges):
-        """
-        Create a graph.  Nodes and edges are both as described in the class
-        documentation.  Both values are used by reference, and should not be
-        modified after building a graph.
-        """
-        assert isinstance(nodes, set)
-        assert isinstance(edges, set)
-        self.nodes = nodes
-        self.edges = edges
-
-    def __eq__(self, other):
-        return self.nodes == other.nodes and self.edges == other.edges
-
-    def __repr__(self):
-        return "<Graph nodes={!r} edges={!r}>".format(self.nodes, self.edges)
+    nodes = attr.ib(converter=frozenset)
+    edges = attr.ib(converter=frozenset)
 
     def transitive_closure(self, nodes, reverse=False):
         """
         Return the transitive closure of <nodes>: the graph containing all
         specified nodes as well as any nodes reachable from them, and any
         intervening edges.
 
         If `reverse` is true, the "reachability" will be reversed and this
--- a/taskcluster/taskgraph/morph.py
+++ b/taskcluster/taskgraph/morph.py
@@ -32,17 +32,17 @@ from .taskgraph import TaskGraph
 here = os.path.abspath(os.path.dirname(__file__))
 logger = logging.getLogger(__name__)
 MAX_ROUTES = 10
 
 
 def amend_taskgraph(taskgraph, label_to_taskid, to_add):
     """Add the given tasks to the taskgraph, returning a new taskgraph"""
     new_tasks = taskgraph.tasks.copy()
-    new_edges = taskgraph.graph.edges.copy()
+    new_edges = set(taskgraph.graph.edges)
     for task in to_add:
         new_tasks[task.task_id] = task
         assert task.label not in label_to_taskid
         label_to_taskid[task.label] = task.task_id
         for depname, dep in task.dependencies.iteritems():
             new_edges.add((task.task_id, dep, depname))
 
     taskgraph = TaskGraph(new_tasks, Graph(set(new_tasks), new_edges))
--- a/taskcluster/taskgraph/task.py
+++ b/taskcluster/taskgraph/task.py
@@ -1,15 +1,18 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import attr
 
+
+@attr.s
 class Task(object):
     """
     Representation of a task in a TaskGraph.  Each Task has, at creation:
 
     - kind: the name of the task kind
     - label; the label for this task
     - attributes: a dictionary of attributes for this task (used for filtering)
     - task: the task definition (JSON-able dictionary)
@@ -19,77 +22,58 @@ class Task(object):
 
     And later, as the task-graph processing proceeds:
 
     - task_id -- TaskCluster taskId under which this task will be created
 
     This class is just a convenience wrapper for the data type and managing
     display, comparison, serialization, etc. It has no functionality of its own.
     """
-    def __init__(self, kind, label, attributes, task,
-                 optimization=None, dependencies=None,
-                 release_artifacts=None):
-        self.kind = kind
-        self.label = label
-        self.attributes = attributes
-        self.task = task
-
-        self.task_id = None
-
-        self.attributes['kind'] = kind
 
-        self.optimization = optimization
-        self.dependencies = dependencies or {}
-        if release_artifacts:
-            self.release_artifacts = frozenset(release_artifacts)
-        else:
-            self.release_artifacts = None
+    kind = attr.ib()
+    label = attr.ib()
+    attributes = attr.ib()
+    task = attr.ib()
+    task_id = attr.ib(default=None, init=False)
+    optimization = attr.ib(default=None)
+    dependencies = attr.ib(factory=dict)
+    release_artifacts = attr.ib(
+        converter=attr.converters.optional(frozenset),
+        default=None,
+    )
 
-    def __eq__(self, other):
-        return self.kind == other.kind and \
-            self.label == other.label and \
-            self.attributes == other.attributes and \
-            self.task == other.task and \
-            self.task_id == other.task_id and \
-            self.optimization == other.optimization and \
-            self.dependencies == other.dependencies and \
-            self.release_artifacts == other.release_artifacts
-
-    def __repr__(self):
-        return ('Task({kind!r}, {label!r}, {attributes!r}, {task!r}, '
-                'optimization={optimization!r}, '
-                'dependencies={dependencies!r}, '
-                'release_artifacts={release_artifacts!r})'.format(**self.__dict__))
+    def __attrs_post_init__(self):
+        self.attributes['kind'] = self.kind
 
     def to_json(self):
         rv = {
             'kind': self.kind,
             'label': self.label,
             'attributes': self.attributes,
             'dependencies': self.dependencies,
             'optimization': self.optimization,
             'task': self.task,
         }
         if self.task_id:
             rv['task_id'] = self.task_id
         if self.release_artifacts:
-            rv['release_artifacts'] = sorted(self.release_artifacts),
+            rv['release_artifacts'] = sorted(self.release_artifacts)
         return rv
 
     @classmethod
     def from_json(cls, task_dict):
         """
         Given a data structure as produced by taskgraph.to_json, re-construct
         the original Task object.  This is used to "resume" the task-graph
         generation process, for example in Action tasks.
         """
         rv = cls(
             kind=task_dict['kind'],
             label=task_dict['label'],
             attributes=task_dict['attributes'],
             task=task_dict['task'],
             optimization=task_dict['optimization'],
             dependencies=task_dict.get('dependencies'),
-            release_artifacts=task_dict.get('release-artifacts')
+            release_artifacts=task_dict.get('release-artifacts'),
         )
         if 'task_id' in task_dict:
             rv.task_id = task_dict['task_id']
         return rv
--- a/taskcluster/taskgraph/taskgraph.py
+++ b/taskcluster/taskgraph/taskgraph.py
@@ -2,29 +2,33 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from .graph import Graph
 from .task import Task
 
+import attr
 
+
+@attr.s(frozen=True)
 class TaskGraph(object):
     """
     Representation of a task graph.
 
     A task graph is a combination of a Graph and a dictionary of tasks indexed
     by label. TaskGraph instances should be treated as immutable.
     """
 
-    def __init__(self, tasks, graph):
-        assert set(tasks) == graph.nodes
-        self.tasks = tasks
-        self.graph = graph
+    tasks = attr.ib()
+    graph = attr.ib()
+
+    def __attrs_post_init__(self):
+        assert set(self.tasks) == self.graph.nodes
 
     def for_each_task(self, f, *args, **kwargs):
         for task_label in self.graph.visit_postorder():
             task = self.tasks[task_label]
             f(task, self, *args, **kwargs)
 
     def __getitem__(self, label):
         "Get a task by label"
@@ -32,22 +36,16 @@ class TaskGraph(object):
 
     def __contains__(self, label):
         return label in self.tasks
 
     def __iter__(self):
         "Iterate over tasks in undefined order"
         return self.tasks.itervalues()
 
-    def __repr__(self):
-        return "<TaskGraph graph={!r} tasks={!r}>".format(self.graph, self.tasks)
-
-    def __eq__(self, other):
-        return self.tasks == other.tasks and self.graph == other.graph
-
     def to_json(self):
         "Return a JSON-able object representing the task graph, as documented"
         named_links_dict = self.graph.named_links_dict()
         # this dictionary may be keyed by label or by taskid, so let's just call it 'key'
         tasks = {}
         for key in self.graph.visit_postorder():
             tasks[key] = self.tasks[key].to_json()
             # overwrite dependencies with the information in the taskgraph's edges.
--- a/taskcluster/taskgraph/transforms/base.py
+++ b/taskcluster/taskgraph/transforms/base.py
@@ -1,74 +1,71 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import attr
 
+from ..config import GraphConfig
+from ..parameters import Parameters
 from ..util.schema import Schema, validate_schema
 
 
+@attr.s(frozen=True)
 class TransformConfig(object):
-    """A container for configuration affecting transforms.  The `config`
-    argument to transforms is an instance of this class, possibly with
-    additional kind-specific attributes beyond those set here."""
-    def __init__(self, kind, path, config, params,
-                 kind_dependencies_tasks=None, graph_config=None):
-        # the name of the current kind
-        self.kind = kind
+    """
+    A container for configuration affecting transforms.  The `config` argument
+    to transforms is an instance of this class.
+    """
 
-        # the path to the kind configuration directory
-        self.path = path
+    # the name of the current kind
+    kind = attr.ib()
+
+    # the path to the kind configuration directory
+    path = attr.ib(type=basestring)
 
-        # the parsed contents of kind.yml
-        self.config = config
+    # the parsed contents of kind.yml
+    config = attr.ib(type=dict)
 
-        # the parameters for this task-graph generation run
-        self.params = params
+    # the parameters for this task-graph generation run
+    params = attr.ib(type=Parameters)
 
-        # a list of all the tasks associated with the kind dependencies of the
-        # current kind
-        self.kind_dependencies_tasks = kind_dependencies_tasks
+    # a list of all the tasks associated with the kind dependencies of the
+    # current kind
+    kind_dependencies_tasks = attr.ib()
 
-        # Global configuration of the taskgraph
-        self.graph_config = graph_config or {}
+    # Global configuration of the taskgraph
+    graph_config = attr.ib(type=GraphConfig)
 
 
+@attr.s()
 class TransformSequence(object):
     """
     Container for a sequence of transforms.  Each transform is represented as a
     callable taking (config, items) and returning a generator which will yield
     transformed items.  The resulting sequence has the same interface.
 
     This is convenient to use in a file full of transforms, as it provides a
     decorator, @transforms.add, that will add the decorated function to the
     sequence.
     """
 
-    def __init__(self, transforms=None):
-        self.transforms = transforms or []
+    _transforms = attr.ib(factory=list)
 
     def __call__(self, config, items):
-        for xform in self.transforms:
+        for xform in self._transforms:
             items = xform(config, items)
             if items is None:
                 raise Exception("Transform {} is not a generator".format(xform))
         return items
 
-    def __repr__(self):
-        return '\n'.join(
-            ['TransformSequence(['] +
-            [repr(x) for x in self.transforms] +
-            ['])'])
-
     def add(self, func):
-        self.transforms.append(func)
+        self._transforms.append(func)
         return func
 
     def add_validate(self, schema):
         self.add(ValidateSchema(schema))
 
 
 @attr.s
 class ValidateSchema(object):
--- a/taskcluster/taskgraph/util/seta.py
+++ b/taskcluster/taskgraph/util/seta.py
@@ -5,42 +5,44 @@
 from __future__ import absolute_import, print_function, unicode_literals
 
 import json
 import logging
 import requests
 from collections import defaultdict
 from redo import retry
 from requests import exceptions
+import attr
 
 logger = logging.getLogger(__name__)
 
 # It's a list of project name which SETA is useful on
 SETA_PROJECTS = ['mozilla-inbound', 'autoland']
 PROJECT_SCHEDULE_ALL_EVERY_PUSHES = {'mozilla-inbound': 5, 'autoland': 5}
 PROJECT_SCHEDULE_ALL_EVERY_MINUTES = {'mozilla-inbound': 60, 'autoland': 60}
 
 SETA_ENDPOINT = "https://treeherder.mozilla.org/api/project/%s/seta/" \
                 "job-priorities/?build_system_type=%s"
 PUSH_ENDPOINT = "https://hg.mozilla.org/integration/%s/json-pushes/?startID=%d&endID=%d"
 
 
+@attr.s(frozen=True)
 class SETA(object):
     """
     Interface to the SETA service, which defines low-value tasks that can be optimized out
     of the taskgraph.
     """
-    def __init__(self):
-        # cached low value tasks, by project
-        self.low_value_tasks = {}
-        self.low_value_bb_tasks = {}
-        # cached push dates by project
-        self.push_dates = defaultdict(dict)
-        # cached push_ids that failed to retrieve datetime for
-        self.failed_json_push_calls = []
+
+    # cached low value tasks, by project
+    low_value_tasks = attr.ib(factory=dict, init=False)
+    low_value_bb_tasks = attr.ib(factory=dict, init=False)
+    # cached push dates by project
+    push_dates = attr.ib(factory=lambda: defaultdict(dict), init=False)
+    # cached push_ids that failed to retrieve datetime for
+    failed_json_push_calls = attr.ib(factory=list, init=False)
 
     def _get_task_string(self, task_tuple):
         # convert task tuple to single task string, so the task label sent in can match
         # remove any empty parts of the tuple
         task_tuple = [x for x in task_tuple if len(x) != 0]
 
         if len(task_tuple) == 0:
             return ''
--- a/taskcluster/taskgraph/util/verify.py
+++ b/taskcluster/taskgraph/util/verify.py
@@ -5,43 +5,45 @@
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import logging
 import re
 import os
 import sys
 
+import attr
+
 from .. import GECKO
 
 logger = logging.getLogger(__name__)
 base_path = os.path.join(GECKO, 'taskcluster', 'docs')
 
 
+@attr.s(frozen=True)
 class VerificationSequence(object):
     """
     Container for a sequence of verifications over a TaskGraph. Each
     verification is represented as a callable taking (task, taskgraph,
     scratch_pad), called for each task in the taskgraph, and one more
     time with no task but with the taskgraph and the same scratch_pad
     that was passed for each task.
     """
-    def __init__(self):
-        self.verifications = {}
+    _verifications = attr.ib(factory=dict)
 
     def __call__(self, graph_name, graph):
-        for verification in self.verifications.get(graph_name, []):
+        for verification in self._verifications.get(graph_name, []):
             scratch_pad = {}
             graph.for_each_task(verification, scratch_pad=scratch_pad)
             verification(None, graph, scratch_pad=scratch_pad)
         return graph_name, graph
 
     def add(self, graph_name):
         def wrap(func):
-            self.verifications.setdefault(graph_name, []).append(func)
+            self._verifications.setdefault(graph_name, []).append(func)
             return func
         return wrap
 
 
 verifications = VerificationSequence()
 
 
 def verify_docs(filename, identifiers, appearing_as):