Bug 1568277 - [taskgraph] Pass push and time intervals into SETA.is_low_value_task r=tomprince
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Thu, 15 Aug 2019 18:49:10 +0000
changeset 488338 7b59ed5d703d3a9d6eae1860487da01a697106dd
parent 488337 f8b41cbaaf8e4f5205aa3d429bd56ccd36a4ace1
child 488339 d7e8f80e2c85288a5f59135f147475fe7ac73ce0
push id113906
push userncsoregi@mozilla.com
push dateFri, 16 Aug 2019 04:07:24 +0000
treeherdermozilla-inbound@d887276421d3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstomprince
bugs1568277
milestone70.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1568277 - [taskgraph] Pass push and time intervals into SETA.is_low_value_task r=tomprince This will allow us to easily tweak these values from the optimization strategy. Differential Revision: https://phabricator.services.mozilla.com/D40206
taskcluster/taskgraph/optimize/strategies.py
taskcluster/taskgraph/util/seta.py
--- a/taskcluster/taskgraph/optimize/strategies.py
+++ b/taskcluster/taskgraph/optimize/strategies.py
@@ -43,25 +43,30 @@ class IndexSearch(OptimizationStrategy):
                 # 404 will end up here and go on to the next index path
                 pass
 
         return False
 
 
 @register_strategy('seta')
 class SETA(OptimizationStrategy):
+    push_interval = 5
+    time_interval = 60
+
     def should_remove_task(self, task, params, _):
         label = task.label
 
         # we would like to return 'False, None' while it's high_value_task
         # and we wouldn't optimize it. Otherwise, it will return 'True, None'
         if is_low_value_task(label,
                              params.get('project'),
                              params.get('pushlog_id'),
-                             params.get('pushdate')):
+                             params.get('pushdate'),
+                             self.time_interval,
+                             self.push_interval):
             # Always optimize away low-value tasks
             return True
         else:
             return False
 
 
 @register_strategy("skip-unless-changed")
 class SkipUnlessChanged(OptimizationStrategy):
--- a/taskcluster/taskgraph/util/seta.py
+++ b/taskcluster/taskgraph/util/seta.py
@@ -11,18 +11,16 @@ from collections import defaultdict
 from redo import retry
 from requests import exceptions
 import attr
 
 logger = logging.getLogger(__name__)
 
 # It's a list of project name which SETA is useful on
 SETA_PROJECTS = ['mozilla-inbound', 'autoland']
-PROJECT_SCHEDULE_ALL_EVERY_PUSHES = {'mozilla-inbound': 5, 'autoland': 5}
-PROJECT_SCHEDULE_ALL_EVERY_MINUTES = {'mozilla-inbound': 60, 'autoland': 60}
 SETA_HIGH_PRIORITY = 1
 SETA_LOW_PRIORITY = 5
 
 SETA_ENDPOINT = "https://treeherder.mozilla.org/api/project/%s/seta/" \
                 "job-priorities/?build_system_type=%s&priority=%s"
 PUSH_ENDPOINT = "https://hg.mozilla.org/integration/%s/json-pushes/?startID=%d&endID=%d"
 
 
@@ -161,20 +159,20 @@ class SETA(object):
             logger.warning(error)
 
         # When we get invalid JSON (i.e. 500 error), it results in a ValueError (bug 1313426)
         except ValueError as error:
             logger.warning("Invalid JSON, possible server error: {}".format(error))
 
         return low_value_tasks
 
-    def minutes_between_pushes(self, project, cur_push_id, cur_push_date):
+    def minutes_between_pushes(self, project, cur_push_id, cur_push_date, time_interval):
         # figure out the minutes that have elapsed between the current push and previous one
         # defaulting to max min so if we can't get value, defaults to run the task
-        min_between_pushes = PROJECT_SCHEDULE_ALL_EVERY_MINUTES.get(project, 60)
+        min_between_pushes = time_interval
         prev_push_id = cur_push_id - 1
 
         # cache the pushdate for the current push so we can use it next time
         self.push_dates[project].update({cur_push_id: cur_push_date})
 
         # check if we already have the previous push id's datetime cached
         prev_push_date = self.push_dates[project].get(prev_push_id, 0)
 
@@ -227,32 +225,33 @@ class SETA(object):
 
         # We just print the error out as a debug message if we failed to catch the exception above
         except exceptions.RequestException as error:
             logger.warning(error)
             self.failed_json_push_calls.append(prev_push_id)
 
         return min_between_pushes
 
-    def is_low_value_task(self, label, project, pushlog_id, push_date):
+    def is_low_value_task(self, label, project, pushlog_id, push_date,
+                          push_interval, time_interval):
         # marking a task as low_value means it will be optimized out by tc
         if project not in SETA_PROJECTS:
             return False
 
-        schedule_all_every = PROJECT_SCHEDULE_ALL_EVERY_PUSHES.get(project, 5)
         # on every Nth push, want to run all tasks
-        if int(pushlog_id) % schedule_all_every == 0:
+        if int(pushlog_id) % push_interval == 0:
             return False
 
         # Nth push, so time to call seta based on number of pushes; however
         # we also want to ensure we run all tasks at least once per N minutes
         if self.minutes_between_pushes(
                 project,
                 int(pushlog_id),
-                int(push_date)) >= PROJECT_SCHEDULE_ALL_EVERY_MINUTES.get(project, 60):
+                int(push_date),
+                time_interval) >= time_interval:
             return False
 
         # cache the low value tasks per project to avoid repeated SETA server queries
         if project not in self.low_value_tasks:
             self.low_value_tasks[project] = self.query_low_value_tasks(project)
         return label in self.low_value_tasks[project]