Bug 1507860: [taskgraph] Move most clases to use attrs; r=dustin
☠☠ backed out by 28497e7f30ae ☠ ☠
authorTom Prince <mozilla@hocat.ca>
Tue, 20 Nov 2018 21:52:24 +0000
changeset 503811 d1d4e3bf394557881a9371b8b851175d5aaec117
parent 503810 48d5a633e54ac0c601528a6532e3c7ff4043bd8c
child 503812 a1fc429db01a944e5c805a7c5dc363cb9b380a56
push id10290
push userffxbld-merge
push dateMon, 03 Dec 2018 16:23:23 +0000
treeherdermozilla-beta@700bed2445e6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdustin
bugs1507860
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1507860: [taskgraph] Move most clases to use attrs; r=dustin This moves most of the low-hanging fruit to use attrs. Differential Revision: https://phabricator.services.mozilla.com/D1141
taskcluster/taskgraph/generator.py
taskcluster/taskgraph/graph.py
taskcluster/taskgraph/task.py
taskcluster/taskgraph/taskgraph.py
taskcluster/taskgraph/transforms/base.py
taskcluster/taskgraph/util/seta.py
taskcluster/taskgraph/util/verify.py
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@@ -2,47 +2,48 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 import logging
 import os
 import yaml
 import copy
+import attr
 
 from . import filter_tasks
 from .graph import Graph
 from .taskgraph import TaskGraph
 from .task import Task
 from .optimize import optimize_task_graph
 from .morph import morph
 from .util.python_path import find_object
 from .transforms.base import TransformSequence, TransformConfig
 from .util.verify import (
     verify_docs,
     verifications,
 )
-from .config import load_graph_config
+from .config import load_graph_config, GraphConfig
 
 logger = logging.getLogger(__name__)
 
 
 class KindNotFound(Exception):
     """
     Raised when trying to load kind from a directory without a kind.yml.
     """
 
 
+@attr.s(frozen=True)
 class Kind(object):
 
-    def __init__(self, name, path, config, graph_config):
-        self.name = name
-        self.path = path
-        self.config = config
-        self.graph_config = graph_config
+    name = attr.ib(type=basestring)
+    path = attr.ib(type=basestring)
+    config = attr.ib(type=dict)
+    graph_config = attr.ib(type=GraphConfig)
 
     def _get_loader(self):
         try:
             loader = self.config['loader']
         except KeyError:
             raise KeyError("{!r} does not define `loader`".format(self.path))
         return find_object(loader)
 
--- a/taskcluster/taskgraph/graph.py
+++ b/taskcluster/taskgraph/graph.py
@@ -1,49 +1,37 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import attr
 import collections
 
 
+@attr.s(frozen=True)
 class Graph(object):
     """
     Generic representation of a directed acyclic graph with labeled edges
     connecting the nodes.  Graph operations are implemented in a functional
     manner, so the data structure is immutable.
 
     It permits at most one edge of a given name between any set of nodes.  The
     graph is not checked for cycles, and methods may hang or otherwise fail if
     given a cyclic graph.
 
     The `nodes` and `edges` attributes may be accessed in a read-only fashion.
     The `nodes` attribute is a set of node names, while `edges` is a set of
     `(left, right, name)` tuples representing an edge named `name` going from
     node `left` to node `right..
     """
 
-    def __init__(self, nodes, edges):
-        """
-        Create a graph.  Nodes and edges are both as described in the class
-        documentation.  Both values are used by reference, and should not be
-        modified after building a graph.
-        """
-        assert isinstance(nodes, set)
-        assert isinstance(edges, set)
-        self.nodes = nodes
-        self.edges = edges
-
-    def __eq__(self, other):
-        return self.nodes == other.nodes and self.edges == other.edges
-
-    def __repr__(self):
-        return "<Graph nodes={!r} edges={!r}>".format(self.nodes, self.edges)
+    nodes = attr.ib(converter=frozenset)
+    edges = attr.ib(converter=frozenset)
 
     def transitive_closure(self, nodes, reverse=False):
         """
         Return the transitive closure of <nodes>: the graph containing all
         specified nodes as well as any nodes reachable from them, and any
         intervening edges.
 
         If `reverse` is true, the "reachability" will be reversed and this
--- a/taskcluster/taskgraph/task.py
+++ b/taskcluster/taskgraph/task.py
@@ -1,15 +1,18 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import attr
 
+
+@attr.s
 class Task(object):
     """
     Representation of a task in a TaskGraph.  Each Task has, at creation:
 
     - kind: the name of the task kind
     - label; the label for this task
     - attributes: a dictionary of attributes for this task (used for filtering)
     - task: the task definition (JSON-able dictionary)
@@ -19,77 +22,58 @@ class Task(object):
 
     And later, as the task-graph processing proceeds:
 
     - task_id -- TaskCluster taskId under which this task will be created
 
     This class is just a convenience wrapper for the data type and managing
     display, comparison, serialization, etc. It has no functionality of its own.
     """
-    def __init__(self, kind, label, attributes, task,
-                 optimization=None, dependencies=None,
-                 release_artifacts=None):
-        self.kind = kind
-        self.label = label
-        self.attributes = attributes
-        self.task = task
-
-        self.task_id = None
-
-        self.attributes['kind'] = kind
 
-        self.optimization = optimization
-        self.dependencies = dependencies or {}
-        if release_artifacts:
-            self.release_artifacts = frozenset(release_artifacts)
-        else:
-            self.release_artifacts = None
+    kind = attr.ib()
+    label = attr.ib()
+    attributes = attr.ib()
+    task = attr.ib()
+    task_id = attr.ib(default=None, init=False)
+    optimization = attr.ib(default=None)
+    dependencies = attr.ib(factory=dict)
+    release_artifacts = attr.ib(
+        converter=attr.converters.optional(frozenset),
+        default=None,
+    )
 
-    def __eq__(self, other):
-        return self.kind == other.kind and \
-            self.label == other.label and \
-            self.attributes == other.attributes and \
-            self.task == other.task and \
-            self.task_id == other.task_id and \
-            self.optimization == other.optimization and \
-            self.dependencies == other.dependencies and \
-            self.release_artifacts == other.release_artifacts
-
-    def __repr__(self):
-        return ('Task({kind!r}, {label!r}, {attributes!r}, {task!r}, '
-                'optimization={optimization!r}, '
-                'dependencies={dependencies!r}, '
-                'release_artifacts={release_artifacts!r})'.format(**self.__dict__))
+    def __attrs_post_init__(self):
+        self.attributes['kind'] = self.kind
 
     def to_json(self):
         rv = {
             'kind': self.kind,
             'label': self.label,
             'attributes': self.attributes,
             'dependencies': self.dependencies,
             'optimization': self.optimization,
             'task': self.task,
         }
         if self.task_id:
             rv['task_id'] = self.task_id
         if self.release_artifacts:
-            rv['release_artifacts'] = sorted(self.release_artifacts),
+            rv['release_artifacts'] = sorted(self.release_artifacts)
         return rv
 
     @classmethod
     def from_json(cls, task_dict):
         """
         Given a data structure as produced by taskgraph.to_json, re-construct
         the original Task object.  This is used to "resume" the task-graph
         generation process, for example in Action tasks.
         """
         rv = cls(
             kind=task_dict['kind'],
             label=task_dict['label'],
             attributes=task_dict['attributes'],
             task=task_dict['task'],
             optimization=task_dict['optimization'],
             dependencies=task_dict.get('dependencies'),
-            release_artifacts=task_dict.get('release-artifacts')
+            release_artifacts=task_dict.get('release-artifacts'),
         )
         if 'task_id' in task_dict:
             rv.task_id = task_dict['task_id']
         return rv
--- a/taskcluster/taskgraph/taskgraph.py
+++ b/taskcluster/taskgraph/taskgraph.py
@@ -2,29 +2,33 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from .graph import Graph
 from .task import Task
 
+import attr
 
+
+@attr.s(frozen=True)
 class TaskGraph(object):
     """
     Representation of a task graph.
 
     A task graph is a combination of a Graph and a dictionary of tasks indexed
     by label. TaskGraph instances should be treated as immutable.
     """
 
-    def __init__(self, tasks, graph):
-        assert set(tasks) == graph.nodes
-        self.tasks = tasks
-        self.graph = graph
+    tasks = attr.ib()
+    graph = attr.ib()
+
+    def __attrs_post_init__(self):
+        assert set(self.tasks) == self.graph.nodes
 
     def for_each_task(self, f, *args, **kwargs):
         for task_label in self.graph.visit_postorder():
             task = self.tasks[task_label]
             f(task, self, *args, **kwargs)
 
     def __getitem__(self, label):
         "Get a task by label"
@@ -32,22 +36,16 @@ class TaskGraph(object):
 
     def __contains__(self, label):
         return label in self.tasks
 
     def __iter__(self):
         "Iterate over tasks in undefined order"
         return self.tasks.itervalues()
 
-    def __repr__(self):
-        return "<TaskGraph graph={!r} tasks={!r}>".format(self.graph, self.tasks)
-
-    def __eq__(self, other):
-        return self.tasks == other.tasks and self.graph == other.graph
-
     def to_json(self):
         "Return a JSON-able object representing the task graph, as documented"
         named_links_dict = self.graph.named_links_dict()
         # this dictionary may be keyed by label or by taskid, so let's just call it 'key'
         tasks = {}
         for key in self.graph.visit_postorder():
             tasks[key] = self.tasks[key].to_json()
             # overwrite dependencies with the information in the taskgraph's edges.
--- a/taskcluster/taskgraph/transforms/base.py
+++ b/taskcluster/taskgraph/transforms/base.py
@@ -1,63 +1,63 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-
-class TransformConfig(object):
-    """A container for configuration affecting transforms.  The `config`
-    argument to transforms is an instance of this class, possibly with
-    additional kind-specific attributes beyond those set here."""
-    def __init__(self, kind, path, config, params,
-                 kind_dependencies_tasks=None, graph_config=None):
-        # the name of the current kind
-        self.kind = kind
-
-        # the path to the kind configuration directory
-        self.path = path
+import attr
 
-        # the parsed contents of kind.yml
-        self.config = config
-
-        # the parameters for this task-graph generation run
-        self.params = params
-
-        # a list of all the tasks associated with the kind dependencies of the
-        # current kind
-        self.kind_dependencies_tasks = kind_dependencies_tasks
-
-        # Global configuration of the taskgraph
-        self.graph_config = graph_config or {}
+from ..parameters import Parameters
+from ..config import GraphConfig
 
 
+@attr.s(frozen=True)
+class TransformConfig(object):
+    """
+    A container for configuration affecting transforms.  The `config` argument
+    to transforms is an instance of this class.
+    """
+
+    # the name of the current kind
+    kind = attr.ib()
+
+    # the path to the kind configuration directory
+    path = attr.ib(type=basestring)
+
+    # the parsed contents of kind.yml
+    config = attr.ib(type=dict)
+
+    # the parameters for this task-graph generation run
+    params = attr.ib(type=Parameters)
+
+    # a list of all the tasks associated with the kind dependencies of the
+    # current kind
+    kind_dependencies_tasks = attr.ib()
+
+    # Global configuration of the taskgraph
+    graph_config = attr.ib(type=GraphConfig)
+
+
+@attr.s()
 class TransformSequence(object):
     """
     Container for a sequence of transforms.  Each transform is represented as a
     callable taking (config, items) and returning a generator which will yield
     transformed items.  The resulting sequence has the same interface.
 
     This is convenient to use in a file full of transforms, as it provides a
     decorator, @transforms.add, that will add the decorated function to the
     sequence.
     """
 
-    def __init__(self, transforms=None):
-        self.transforms = transforms or []
+    _transforms = attr.ib(factory=list)
 
     def __call__(self, config, items):
-        for xform in self.transforms:
+        for xform in self._transforms:
             items = xform(config, items)
             if items is None:
                 raise Exception("Transform {} is not a generator".format(xform))
         return items
 
-    def __repr__(self):
-        return '\n'.join(
-            ['TransformSequence(['] +
-            [repr(x) for x in self.transforms] +
-            ['])'])
-
     def add(self, func):
-        self.transforms.append(func)
+        self._transforms.append(func)
         return func
--- a/taskcluster/taskgraph/util/seta.py
+++ b/taskcluster/taskgraph/util/seta.py
@@ -5,42 +5,44 @@
 from __future__ import absolute_import, print_function, unicode_literals
 
 import json
 import logging
 import requests
 from collections import defaultdict
 from redo import retry
 from requests import exceptions
+import attr
 
 logger = logging.getLogger(__name__)
 
 # It's a list of project name which SETA is useful on
 SETA_PROJECTS = ['mozilla-inbound', 'autoland']
 PROJECT_SCHEDULE_ALL_EVERY_PUSHES = {'mozilla-inbound': 5, 'autoland': 5}
 PROJECT_SCHEDULE_ALL_EVERY_MINUTES = {'mozilla-inbound': 60, 'autoland': 60}
 
 SETA_ENDPOINT = "https://treeherder.mozilla.org/api/project/%s/seta/" \
                 "job-priorities/?build_system_type=%s"
 PUSH_ENDPOINT = "https://hg.mozilla.org/integration/%s/json-pushes/?startID=%d&endID=%d"
 
 
+@attr.s(frozen=True)
 class SETA(object):
     """
     Interface to the SETA service, which defines low-value tasks that can be optimized out
     of the taskgraph.
     """
-    def __init__(self):
-        # cached low value tasks, by project
-        self.low_value_tasks = {}
-        self.low_value_bb_tasks = {}
-        # cached push dates by project
-        self.push_dates = defaultdict(dict)
-        # cached push_ids that failed to retrieve datetime for
-        self.failed_json_push_calls = []
+
+    # cached low value tasks, by project
+    low_value_tasks = attr.ib(factory=dict, init=False)
+    low_value_bb_tasks = attr.ib(factory=dict, init=False)
+    # cached push dates by project
+    push_dates = attr.ib(factory=lambda: defaultdict(dict), init=False)
+    # cached push_ids that failed to retrieve datetime for
+    failed_json_push_calls = attr.ib(factory=list, init=False)
 
     def _get_task_string(self, task_tuple):
         # convert task tuple to single task string, so the task label sent in can match
         # remove any empty parts of the tuple
         task_tuple = [x for x in task_tuple if len(x) != 0]
 
         if len(task_tuple) == 0:
             return ''
--- a/taskcluster/taskgraph/util/verify.py
+++ b/taskcluster/taskgraph/util/verify.py
@@ -5,43 +5,45 @@
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import logging
 import re
 import os
 import sys
 
+import attr
+
 from .. import GECKO
 
 logger = logging.getLogger(__name__)
 base_path = os.path.join(GECKO, 'taskcluster', 'docs')
 
 
+@attr.s(frozen=True)
 class VerificationSequence(object):
     """
     Container for a sequence of verifications over a TaskGraph. Each
     verification is represented as a callable taking (task, taskgraph,
     scratch_pad), called for each task in the taskgraph, and one more
     time with no task but with the taskgraph and the same scratch_pad
     that was passed for each task.
     """
-    def __init__(self):
-        self.verifications = {}
+    _verifications = attr.ib(factory=dict)
 
     def __call__(self, graph_name, graph):
-        for verification in self.verifications.get(graph_name, []):
+        for verification in self._verifications.get(graph_name, []):
             scratch_pad = {}
             graph.for_each_task(verification, scratch_pad=scratch_pad)
             verification(None, graph, scratch_pad=scratch_pad)
         return graph_name, graph
 
     def add(self, graph_name):
         def wrap(func):
-            self.verifications.setdefault(graph_name, []).append(func)
+            self._verifications.setdefault(graph_name, []).append(func)
             return func
         return wrap
 
 
 verifications = VerificationSequence()
 
 
 def verify_docs(filename, identifiers, appearing_as):