Bug 1328727: use json-e for cron decision tasks too; r=aki,jonasfj
authorDustin J. Mitchell <dustin@mozilla.com>
Fri, 21 Jul 2017 18:08:06 +0000
changeset 370619 2badbccc0c6e34cbdf542379a3c31db34dbb1cae
parent 370618 88784cbc71cc650ffa503e53da11c9e45e6b3564
child 370620 e5aad4922fba8855a4bfc979adf4e56c44d6db8c
push id32231
push usercbook@mozilla.com
push dateTue, 25 Jul 2017 12:20:10 +0000
treeherdermozilla-central@80394cbcae0f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersaki, jonasfj
bugs1328727
milestone56.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1328727: use json-e for cron decision tasks too; r=aki,jonasfj Changes to cron decision tasks: - drops some unnecessary routes - drops tags.createdForUser: nobody@.. - more use of environment variables within the command line MozReview-Commit-ID: 9zoqFvwrBRs
.taskcluster.yml
taskcluster/taskgraph/cron/decision.py
--- a/.taskcluster.yml
+++ b/.taskcluster.yml
@@ -1,60 +1,79 @@
-# This file is handled by mozilla-taskcluster; see
-# https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+# This file is rendered via JSON-e by
+# - mozilla-taskcluster - https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+# - cron tasks - taskcluster/taskgraph/cron/decision.py
 version: 1
 tasks:
   $let:
     # sometimes the push user is just `ffxbld` or the like, but we want an email-like field..
     ownerEmail: {$if: '"@" in push.owner', then: '${push.owner}', else: '${push.owner}@noreply.mozilla.org'}
+    # ensure there's no trailing `/` on the repo URL
+    repoUrl: {$if: 'repository.url[-1] == "/"', then: {$eval: 'repository.url[:-1]'}, else: {$eval: 'repository.url'}}
   in:
   - taskId: '${as_slugid("decision")}'
     taskGroupId: '${as_slugid("decision")}' # same as tsakId; this is how automation identifies a decision tsak
     schedulerId: 'gecko-level-${repository.level}'
 
     created: {$fromNow: ''}
     deadline: {$fromNow: '1 day'}
     expires: {$fromNow: '1 year 1 second'} # 1 second so artifacts expire first, despite rounding errors
     metadata:
-      owner: "${ownerEmail}"
-      source: "${repository.url}/raw-file/${push.revision}/.taskcluster.yml"
-      name: "Gecko Decision Task"
-      description: |
-          The task that creates all of the other tasks in the task graph
+      $merge:
+        - owner: "${ownerEmail}"
+          source: "${repoUrl}/raw-file/${push.revision}/.taskcluster.yml"
+        - $if: 'tasks_for == "hg-push"'
+          then:
+            name: "Gecko Decision Task"
+            description: 'The task that creates all of the other tasks in the task graph'
+          else:
+            name: "Decision Task for cron job ${cron.job_name}"
+            description: 'Created by a [cron task](https://tools.taskcluster.net/tasks/${cron.task_id})'
 
     provisionerId: "aws-provisioner-v1"
     workerType: "gecko-decision"
 
     tags:
-      createdForUser: "${ownerEmail}"
+      $if: 'tasks_for == "hg-push"'
+      then: {createdForUser: "${ownerEmail}"}
 
     routes:
-      - "index.gecko.v2.${repository.project}.latest.firefox.decision"
-      - "index.gecko.v2.${repository.project}.pushlog-id.${push.pushlog_id}.decision"
-      - "tc-treeherder.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
-      - "tc-treeherder-stage.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
-      - "notify.email.${ownerEmail}.on-failed"
-      - "notify.email.${ownerEmail}.on-exception"
+      $if: 'tasks_for == "hg-push"'
+      then:
+        - "index.gecko.v2.${repository.project}.latest.firefox.decision"
+        - "index.gecko.v2.${repository.project}.pushlog-id.${push.pushlog_id}.decision"
+        - "tc-treeherder.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
+        - "tc-treeherder-stage.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
+        - "notify.email.${ownerEmail}.on-failed"
+        - "notify.email.${ownerEmail}.on-exception"
+      else:
+        - "index.gecko.v2.${repository.project}.latest.firefox.decision-${cron.job_name}"
+        - "tc-treeherder.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
+        - "tc-treeherder-stage.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
 
     scopes:
-      - 'assume:repo:${repository.url[8:-1]}:*'
-      - 'queue:route:notify.email.${ownerEmail}.*'
+      $if: 'tasks_for == "hg-push"'
+      then:
+        - 'assume:repo:${repoUrl[8:]}:*'
+        - 'queue:route:notify.email.${ownerEmail}.*'
+      else:
+        - 'assume:repo:${repoUrl[8:]}:cron:${cron.job_name}'
 
     dependencies: []
     requires: all-completed
 
     priority: lowest
     retries: 5
 
     payload:
       env:
         # checkout-gecko uses these to check out the source; the inputs
         # to `mach taskgraph decision` are all on the command line.
         GECKO_BASE_REPOSITORY: 'https://hg.mozilla.org/mozilla-unified'
-        GECKO_HEAD_REPOSITORY: '${repository.url}'
+        GECKO_HEAD_REPOSITORY: '${repoUrl}'
         GECKO_HEAD_REF: '${push.revision}'
         GECKO_HEAD_REV: '${push.revision}'
         GECKO_COMMIT_MSG: '${push.comment}'
         HG_STORE_PATH: /home/worker/checkouts/hg-store
 
       cache:
         level-${repository.level}-checkouts: /home/worker/checkouts
 
@@ -73,32 +92,40 @@ tasks:
       # TODO use mozilla-unified for the base repository once the tc-vcs
       # tar.gz archives are created or tc-vcs isn't being used.
       command:
         - /home/worker/bin/run-task
         - '--vcs-checkout=/home/worker/checkouts/gecko'
         - '--'
         - bash
         - -cx
-        - >
+        - $let:
+            extraArgs: {$if: 'tasks_for == "hg-push"', then: '', else: '${cron.quoted_args}'}
+          in: >
             cd /home/worker/checkouts/gecko &&
             ln -s /home/worker/artifacts artifacts &&
             ./mach --log-no-times taskgraph decision
             --pushlog-id='${push.pushlog_id}'
             --pushdate='${push.pushdate}'
             --project='${repository.project}'
             --message="$GECKO_COMMIT_MSG"
             --owner='${ownerEmail}'
             --level='${repository.level}'
             --base-repository="$GECKO_BASE_REPOSITORY"
             --head-repository="$GECKO_HEAD_REPOSITORY"
             --head-ref="$GECKO_HEAD_REF"
             --head-rev="$GECKO_HEAD_REV"
+            ${extraArgs}
 
       artifacts:
         'public':
           type: 'directory'
           path: '/home/worker/artifacts'
           expires: {$fromNow: '1 year'}
 
     extra:
       treeherder:
-        symbol: D
+        $if: 'tasks_for == "hg-push"'
+        then:
+          symbol: D
+        else:
+          groupSymbol: cron
+          symbol: "${cron.job_symbol}"
--- a/taskcluster/taskgraph/cron/decision.py
+++ b/taskcluster/taskgraph/cron/decision.py
@@ -2,99 +2,82 @@
 
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import jsone
 import pipes
 import yaml
-import re
 import os
 import slugid
 
+from taskgraph.util.time import current_json_time
+
 
 def run_decision_task(job, params):
     arguments = []
     if 'target-tasks-method' in job:
         arguments.append('--target-tasks-method={}'.format(job['target-tasks-method']))
     return [
         make_decision_task(
             params,
             symbol=job['treeherder-symbol'],
             arguments=arguments),
     ]
 
 
 def make_decision_task(params, symbol, arguments=[], head_rev=None):
-    """Generate a basic decision task, based on the root
-    .taskcluster.yml"""
+    """Generate a basic decision task, based on the root .taskcluster.yml"""
     with open('.taskcluster.yml') as f:
-        taskcluster_yml = f.read()
+        taskcluster_yml = yaml.load(f)
 
     if not head_rev:
         head_rev = params['head_rev']
 
-    # do a cheap and dirty job of the template substitution that mozilla-taskcluster
-    # does when it reads .taskcluster.yml
-    comment = '"no push -- cron task \'{job_name}\'"'.format(**params),
-    replacements = {
-        '\'{{{?now}}}?\'': "{'relative-datestamp': '0 seconds'}",
-        '{{{?owner}}}?': 'nobody@mozilla.org',
-        '{{#shellquote}}{{{comment}}}{{/shellquote}}': comment,
-        '{{{?source}}}?': params['head_repository'],
-        '{{{?url}}}?': params['head_repository'],
-        '{{{?project}}}?': params['project'],
-        '{{{?level}}}?': params['level'],
-        '{{{?revision}}}?': head_rev,
-        '\'{{#from_now}}([^{]*){{/from_now}}\'': "{'relative-datestamp': '\\1'}",
-        '{{{?pushdate}}}?': '0',
-        # treeherder ignores pushlog_id, so set it to -1
-        '{{{?pushlog_id}}}?': '-1',
-        # omitted as unnecessary
-        # {{#as_slugid}}..{{/as_slugid}}
-    }
-    for pattern, replacement in replacements.iteritems():
-        taskcluster_yml = re.sub(pattern, replacement, taskcluster_yml)
+    slugids = {}
 
-    task = yaml.load(taskcluster_yml)['tasks'][0]['task']
-
-    # set some metadata
-    task['metadata']['name'] = 'Decision task for cron job ' + params['job_name']
-    cron_task_id = os.environ.get('TASK_ID', '<cron task id>')
-    descr_md = 'Created by a [cron task](https://tools.taskcluster.net/task-inspector/#{}/)'
-    task['metadata']['description'] = descr_md.format(cron_task_id)
+    def as_slugid(name):
+        # https://github.com/taskcluster/json-e/issues/164
+        name = name[0]
+        if name not in slugids:
+            slugids[name] = slugid.nice()
+        return slugids[name]
 
-    # create new indices so these aren't mixed in with regular decision tasks
-    for i, route in enumerate(task['routes']):
-        if route.startswith('index'):
-            task['routes'][i] = route + '-' + params['job_name']
-
-    th = task['extra']['treeherder']
-    th['groupSymbol'] = 'cron'
-    th['symbol'] = symbol
-
-    # add a scope based on the repository, with a cron:<job_name> suffix
-    match = re.match(r'https://(hg.mozilla.org)/(.*?)/?$', params['head_repository'])
-    if not match:
-        raise Exception('Unrecognized head_repository')
-    repo_scope = 'assume:repo:{}/{}:cron:{}'.format(
-        match.group(1), match.group(2), params['job_name'])
-    task.setdefault('scopes', []).append(repo_scope)
+    # provide a similar JSON-e context to what mozilla-taskcluster provides:
+    # https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+    # but with a different tasks_for and an extra `cron` section
+    context = {
+        'tasks_for': 'cron',
+        'repository': {
+            'url': params['head_repository'],
+            'project': params['project'],
+            'level': params['level'],
+        },
+        'push': {
+            'revision': params['head_rev'],
+            # remainder are fake values, but the decision task expects them anyway
+            'pushlog_id': -1,
+            'pushdate': 0,
+            'owner': 'nobody',
+            'comment': '',
+        },
+        'cron': {
+            'task_id': os.environ.get('TASK_ID', '<cron task id>'),
+            'job_name': params['job_name'],
+            'job_symbol': symbol,
+            # args are shell-quoted since they are given to `bash -c`
+            'quoted_args': ' '.join(pipes.quote(a) for a in arguments),
+        },
+        'now': current_json_time(),
+        'as_slugid': as_slugid,
+    }
 
-    # append arguments, quoted, to the decision task command
-    shellcmd = task['payload']['command']
-    shellcmd[-1] = shellcmd[-1].rstrip('\n')  # strip yaml artifact
-    for arg in arguments:
-        shellcmd[-1] += ' ' + pipes.quote(arg)
-
-    task_id = slugid.nice()
+    rendered = jsone.render(taskcluster_yml, context)
+    if len(rendered['tasks']) != 1:
+        raise Exception("Expected .taskcluster.yml to only produce one cron task")
+    task = rendered['tasks'][0]
 
-    # set taskGroupid = taskId, as expected of decision tasks by other systems.
-    # This creates a new taskGroup for this graph.
-    task['taskGroupId'] = task_id
-
-    # set the schedulerId based on the level
-    task['schedulerId'] = 'gecko-level-{}'.format(params['level'])
-
+    task_id = task.pop('taskId')
     return (task_id, task)