Bug 1447460: Teach `mach taskgraph cron` to handle configuration in sub-repositories; r=dustin
authorTom Prince <mozilla@hocat.ca>
Wed, 28 Mar 2018 11:34:20 -0700
changeset 466635 c4730aaaf5575dbca450a9819c13f30dedc7a98e
parent 466634 d87865851500487d86a67978c7ab498640a1a4d1
child 466636 c09b8a694cb5d0442aa293eace75a18058b675d5
push id1728
push userjlund@mozilla.com
push dateMon, 18 Jun 2018 21:12:27 +0000
treeherdermozilla-release@c296fde26f5f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdustin
bugs1447460
milestone61.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1447460: Teach `mach taskgraph cron` to handle configuration in sub-repositories; r=dustin Differential Revision: https://phabricator.services.mozilla.com/D815
taskcluster/mach_commands.py
taskcluster/taskgraph/cron/__init__.py
taskcluster/taskgraph/cron/decision.py
taskcluster/taskgraph/cron/util.py
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -191,16 +191,19 @@ class MachCommands(MachCommandBase):
     @CommandArgument('--force-run',
                      required=False,
                      help='If given, force this cronjob to run regardless of time, '
                      'and run no others')
     @CommandArgument('--no-create',
                      required=False,
                      action='store_true',
                      help='Do not actually create tasks')
+    @CommandArgument('--root', '-r',
+                     required=False,
+                     help="root of the repository to get cron task definitions from")
     def taskgraph_cron(self, **options):
         """Run the cron task; this task creates zero or more decision tasks.  It is run
         from the hooks service on a regular basis."""
         import taskgraph.cron
         try:
             self.setup_logging()
             return taskgraph.cron.taskgraph_cron(options)
         except Exception:
--- a/taskcluster/taskgraph/cron/__init__.py
+++ b/taskcluster/taskgraph/cron/__init__.py
@@ -31,18 +31,18 @@ from taskgraph.util.taskcluster import g
 # createTask.
 JOB_TYPES = {
     'decision-task': decision.run_decision_task,
 }
 
 logger = logging.getLogger(__name__)
 
 
-def load_jobs(params):
-    with open(os.path.join(GECKO, '.cron.yml'), 'rb') as f:
+def load_jobs(params, root):
+    with open(os.path.join(root, '.cron.yml'), 'rb') as f:
         cron_yml = yaml.load(f)
     schema.validate(cron_yml)
 
     # resolve keyed_by fields in each job
     jobs = cron_yml['jobs']
 
     return {j['name']: j for j in jobs}
 
@@ -56,32 +56,33 @@ def should_run(job, params):
     resolve_keyed_by(job, 'when', 'Cron job ' + job['name'],
                      project=params['project'])
     if not any(match_utc(params, hour=sched.get('hour'), minute=sched.get('minute'))
                for sched in job.get('when', [])):
         return False
     return True
 
 
-def run_job(job_name, job, params):
+def run_job(job_name, job, params, root):
+    params = params.copy()
     params['job_name'] = job_name
 
     try:
         job_type = job['job']['type']
         if job_type in JOB_TYPES:
-            tasks = JOB_TYPES[job_type](job['job'], params)
+            tasks = JOB_TYPES[job_type](job['job'], params, root=root)
         else:
             raise Exception("job type {} not recognized".format(job_type))
         if params['no_create']:
             for task_id, task in tasks:
                 logger.info("Not creating task {} (--no-create):\n".format(task_id) +
                             json.dumps(task, sort_keys=True, indent=4, separators=(',', ': ')))
         else:
             for task_id, task in tasks:
-                create_task(get_session(), task_id, params['job_name'], task)
+                create_task(get_session(), task_id, job_name, task)
 
     except Exception:
         # report the exception, but don't fail the whole cron task, as that
         # would leave other jobs un-run.  NOTE: we could report job failure to
         # a responsible person here via tc-notify
         traceback.print_exc()
         logger.error("cron job {} run failed; continuing to next job".format(
             params['job_name']))
@@ -90,16 +91,17 @@ def run_job(job_name, job, params):
 def calculate_time(options):
     if 'TASK_ID' not in os.environ:
         # running in a development environment, so look for CRON_TIME or use
         # the current time
         if 'CRON_TIME' in os.environ:
             logger.warning("setting params['time'] based on $CRON_TIME")
             time = datetime.datetime.utcfromtimestamp(
                 int(os.environ['CRON_TIME']))
+            print(time)
         else:
             logger.warning("using current time for params['time']; try setting $CRON_TIME "
                            "to a timestamp")
             time = datetime.datetime.utcnow()
     else:
         # fetch this task from the queue
         res = get_session().get(
             'http://taskcluster/queue/v1/task/' + os.environ['TASK_ID'])
@@ -117,44 +119,43 @@ def calculate_time(options):
     # round down to the nearest 15m
     minute = time.minute - (time.minute % 15)
     time = time.replace(minute=minute, second=0, microsecond=0)
     logger.info("calculated cron schedule time is {}".format(time))
     return time
 
 
 def taskgraph_cron(options):
+    root = options.get('root') or GECKO
+
     params = {
-        # name of this cron job (set per job below)
-        'job_name': '..',
-
         # repositories
-        'head_repository': options['head_repository'],
+        'repository_url': options['head_repository'],
 
         # *calculated* head_rev; this is based on the current meaning of this
         # reference in the working copy
-        'head_rev': calculate_head_rev(options),
+        'head_rev': calculate_head_rev(root),
 
         # the project (short name for the repository) and its SCM level
         'project': options['project'],
         'level': options['level'],
 
         # if true, tasks will not actually be created
         'no_create': options['no_create'],
 
         # the time that this cron task was created (as a UTC datetime object)
         'time': calculate_time(options),
     }
 
-    jobs = load_jobs(params)
+    jobs = load_jobs(params, root=root)
 
     if options['force_run']:
         job_name = options['force_run']
         logger.info("force-running cron job {}".format(job_name))
-        run_job(job_name, jobs[job_name], params)
+        run_job(job_name, jobs[job_name], params, root)
         return
 
     for job_name, job in sorted(jobs.items()):
         if should_run(job, params):
             logger.info("running cron job {}".format(job_name))
-            run_job(job_name, job, params)
+            run_job(job_name, job, params, root)
         else:
             logger.info("not running cron job {}".format(job_name))
--- a/taskcluster/taskgraph/cron/decision.py
+++ b/taskcluster/taskgraph/cron/decision.py
@@ -11,31 +11,32 @@ import jsone
 import pipes
 import yaml
 import os
 import slugid
 
 from taskgraph.util.time import current_json_time
 
 
-def run_decision_task(job, params):
+def run_decision_task(job, params, root):
     arguments = []
     if 'target-tasks-method' in job:
         arguments.append('--target-tasks-method={}'.format(job['target-tasks-method']))
     return [
         make_decision_task(
             params,
             symbol=job['treeherder-symbol'],
-            arguments=arguments),
+            arguments=arguments,
+            root=root),
     ]
 
 
-def make_decision_task(params, symbol, arguments=[], head_rev=None):
+def make_decision_task(params, root, symbol, arguments=[], head_rev=None):
     """Generate a basic decision task, based on the root .taskcluster.yml"""
-    with open('.taskcluster.yml') as f:
+    with open(os.path.join(root, '.taskcluster.yml'), 'rb') as f:
         taskcluster_yml = yaml.load(f)
 
     if not head_rev:
         head_rev = params['head_rev']
 
     slugids = {}
 
     def as_slugid(name):
@@ -46,17 +47,17 @@ def make_decision_task(params, symbol, a
         return slugids[name]
 
     # provide a similar JSON-e context to what mozilla-taskcluster provides:
     # https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
     # but with a different tasks_for and an extra `cron` section
     context = {
         'tasks_for': 'cron',
         'repository': {
-            'url': params['head_repository'],
+            'url': params['repository_url'],
             'project': params['project'],
             'level': params['level'],
         },
         'push': {
             'revision': params['head_rev'],
             # remainder are fake values, but the decision task expects them anyway
             'pushlog_id': -1,
             'pushdate': 0,
--- a/taskcluster/taskgraph/cron/util.py
+++ b/taskcluster/taskgraph/cron/util.py
@@ -20,13 +20,13 @@ def match_utc(params, hour=None, minute=
         raise Exception("cron jobs only run on multiples of 15 minutes past the hour")
     if hour is not None and params['time'].hour != hour:
         return False
     if minute is not None and params['time'].minute != minute:
         return False
     return True
 
 
-def calculate_head_rev(options):
+def calculate_head_rev(root):
     # we assume that run-task has correctly checked out the revision indicated by
     # GECKO_HEAD_REF, so all that remains is to see what the current revision is.
     # Mercurial refers to that as `.`.
-    return subprocess.check_output(['hg', 'log', '-r', '.', '-T', '{node}'])
+    return subprocess.check_output(['hg', 'log', '-r', '.', '-T', '{node}'], cwd=root)