Bug 1337360: check for schema elements that aren't dashed-identifiers, with whitelist; r=Callek
authorDustin J. Mitchell <dustin@mozilla.com>
Wed, 22 Mar 2017 16:24:38 +0000
changeset 399708 cd66ec07e1a14b3827f1a4f674b4e3dc21dd201c
parent 399707 2fffcd4ce3e439eeb5f88cc524b69a78704f51fd
child 399709 14874597101be98a87e02b2a7e497e0d0ad1e840
push id1490
push usermtabara@mozilla.com
push dateMon, 31 Jul 2017 14:08:16 +0000
treeherdermozilla-release@70e32e6bf15e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersCallek
bugs1337360
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1337360: check for schema elements that aren't dashed-identifiers, with whitelist; r=Callek MozReview-Commit-ID: FirYCrjFXAS
taskcluster/taskgraph/cron/schema.py
taskcluster/taskgraph/transforms/balrog.py
taskcluster/taskgraph/transforms/beetmover.py
taskcluster/taskgraph/transforms/beetmover_checksums.py
taskcluster/taskgraph/transforms/checksums_signing.py
taskcluster/taskgraph/transforms/job/__init__.py
taskcluster/taskgraph/transforms/job/hazard.py
taskcluster/taskgraph/transforms/job/mach.py
taskcluster/taskgraph/transforms/job/mozharness.py
taskcluster/taskgraph/transforms/job/mozharness_test.py
taskcluster/taskgraph/transforms/job/run_task.py
taskcluster/taskgraph/transforms/job/spidermonkey.py
taskcluster/taskgraph/transforms/job/toolchain.py
taskcluster/taskgraph/transforms/l10n.py
taskcluster/taskgraph/transforms/signing.py
taskcluster/taskgraph/transforms/task.py
taskcluster/taskgraph/transforms/tests.py
taskcluster/taskgraph/util/schema.py
--- a/taskcluster/taskgraph/cron/schema.py
+++ b/taskcluster/taskgraph/cron/schema.py
@@ -2,20 +2,21 @@
 
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-from voluptuous import Schema, Any, Required, All
+from voluptuous import Any, Required, All
 from taskgraph.util.schema import (
     optionally_keyed_by,
     validate_schema,
+    Schema,
 )
 
 
 def even_15_minutes(minutes):
     if minutes % 15 != 0:
         raise ValueError("minutes must be evenly divisible by 15")
 
 cron_yml_schema = Schema({
--- a/taskcluster/taskgraph/transforms/balrog.py
+++ b/taskcluster/taskgraph/transforms/balrog.py
@@ -3,20 +3,20 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Transform the beetmover task into an actual task description.
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from taskgraph.transforms.base import TransformSequence
-from taskgraph.util.schema import validate_schema
+from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import get_balrog_server_scope
 from taskgraph.transforms.task import task_description_schema
-from voluptuous import Schema, Any, Required, Optional
+from voluptuous import Any, Required, Optional
 
 
 # Voluptuous uses marker objects as dictionary *keys*, but they are not
 # comparable, so we cast all of the keys back to regular strings
 task_description_schema = {str(k): v for k, v in task_description_schema.schema.iteritems()}
 
 transforms = TransformSequence()
 
--- a/taskcluster/taskgraph/transforms/beetmover.py
+++ b/taskcluster/taskgraph/transforms/beetmover.py
@@ -3,21 +3,21 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Transform the beetmover task into an actual task description.
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from taskgraph.transforms.base import TransformSequence
-from taskgraph.util.schema import validate_schema
+from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import (get_beetmover_bucket_scope,
                                          get_beetmover_action_scope)
 from taskgraph.transforms.task import task_description_schema
-from voluptuous import Schema, Any, Required, Optional
+from voluptuous import Any, Required, Optional
 
 
 # For developers: if you are adding any new artifacts here that need to be
 # transfered to S3, please be aware you also need to follow-up with patch in
 # the actual beetmoverscript logic that lies under
 # https://github.com/mozilla-releng/beetmoverscript/. See example in bug
 # 1348286
 _DESKTOP_UPSTREAM_ARTIFACTS_UNSIGNED_EN_US = [
--- a/taskcluster/taskgraph/transforms/beetmover_checksums.py
+++ b/taskcluster/taskgraph/transforms/beetmover_checksums.py
@@ -3,21 +3,21 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Transform the checksums signing task into an actual task description.
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from taskgraph.transforms.base import TransformSequence
-from taskgraph.util.schema import validate_schema
+from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import (get_beetmover_bucket_scope,
                                          get_beetmover_action_scope)
 from taskgraph.transforms.task import task_description_schema
-from voluptuous import Schema, Any, Required, Optional
+from voluptuous import Any, Required, Optional
 
 # Voluptuous uses marker objects as dictionary *keys*, but they are not
 # comparable, so we cast all of the keys back to regular strings
 task_description_schema = {str(k): v for k, v in task_description_schema.schema.iteritems()}
 
 transforms = TransformSequence()
 
 taskref_or_string = Any(
--- a/taskcluster/taskgraph/transforms/checksums_signing.py
+++ b/taskcluster/taskgraph/transforms/checksums_signing.py
@@ -3,20 +3,20 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Transform the checksums signing task into an actual task description.
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from taskgraph.transforms.base import TransformSequence
-from taskgraph.util.schema import validate_schema
+from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import get_signing_cert_scope
 from taskgraph.transforms.task import task_description_schema
-from voluptuous import Schema, Any, Required, Optional
+from voluptuous import Any, Required, Optional
 
 # Voluptuous uses marker objects as dictionary *keys*, but they are not
 # comparable, so we cast all of the keys back to regular strings
 task_description_schema = {str(k): v for k, v in task_description_schema.schema.iteritems()}
 
 transforms = TransformSequence()
 
 taskref_or_string = Any(
--- a/taskcluster/taskgraph/transforms/job/__init__.py
+++ b/taskcluster/taskgraph/transforms/job/__init__.py
@@ -11,24 +11,26 @@ run-using handlers in `taskcluster/taskg
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import copy
 import logging
 import os
 
 from taskgraph.transforms.base import TransformSequence
-from taskgraph.util.schema import validate_schema
+from taskgraph.util.schema import (
+    validate_schema,
+    Schema,
+)
 from taskgraph.transforms.task import task_description_schema
 from voluptuous import (
     Any,
     Extra,
     Optional,
     Required,
-    Schema,
 )
 
 logger = logging.getLogger(__name__)
 
 # Voluptuous uses marker objects as dictionary *keys*, but they are not
 # comparable, so we cast all of the keys back to regular strings
 task_description_schema = {str(k): v for k, v in task_description_schema.schema.iteritems()}
 
--- a/taskcluster/taskgraph/transforms/job/hazard.py
+++ b/taskcluster/taskgraph/transforms/job/hazard.py
@@ -2,17 +2,18 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Support for running hazard jobs via dedicated scripts
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-from voluptuous import Schema, Required, Optional, Any
+from taskgraph.util.schema import Schema
+from voluptuous import Required, Optional, Any
 
 from taskgraph.transforms.job import run_job_using
 from taskgraph.transforms.job.common import (
     docker_worker_add_workspace_cache,
     docker_worker_setup_secrets,
     docker_worker_add_public_artifacts,
     support_vcs_checkout,
 )
--- a/taskcluster/taskgraph/transforms/job/mach.py
+++ b/taskcluster/taskgraph/transforms/job/mach.py
@@ -7,17 +7,18 @@ Support for running mach tasks (via run-
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from taskgraph.transforms.job import run_job_using
 from taskgraph.transforms.job.run_task import (
     docker_worker_run_task,
     native_engine_run_task,
 )
-from voluptuous import Schema, Required
+from taskgraph.util.schema import Schema
+from voluptuous import Required
 
 mach_schema = Schema({
     Required('using'): 'mach',
 
     # The mach command (omitting `./mach`) to run
     Required('mach'): basestring,
 
     # Whether the job requires a build artifact or not. If True, the task
--- a/taskcluster/taskgraph/transforms/job/mozharness.py
+++ b/taskcluster/taskgraph/transforms/job/mozharness.py
@@ -5,17 +5,18 @@
 
 Support for running jobs via mozharness.  Ideally, most stuff gets run this
 way, and certainly anything using mozharness should use this approach.
 
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-from voluptuous import Schema, Required, Optional, Any
+from taskgraph.util.schema import Schema
+from voluptuous import Required, Optional, Any
 
 from taskgraph.transforms.job import run_job_using
 from taskgraph.transforms.job.common import (
     docker_worker_add_workspace_cache,
     docker_worker_add_gecko_vcs_env_vars,
     docker_worker_setup_secrets,
     docker_worker_add_public_artifacts,
     support_vcs_checkout,
--- a/taskcluster/taskgraph/transforms/job/mozharness_test.py
+++ b/taskcluster/taskgraph/transforms/job/mozharness_test.py
@@ -1,17 +1,18 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-from voluptuous import Schema, Required
+from voluptuous import Required
 from taskgraph.util.taskcluster import get_artifact_url
 from taskgraph.transforms.job import run_job_using
+from taskgraph.util.schema import Schema
 from taskgraph.transforms.tests import (
     test_description_schema,
     get_firefox_version,
     normpath
 )
 from taskgraph.transforms.job.common import (
     support_vcs_checkout,
 )
--- a/taskcluster/taskgraph/transforms/job/run_task.py
+++ b/taskcluster/taskgraph/transforms/job/run_task.py
@@ -3,21 +3,22 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Support for running jobs that are invoked via the `run-task` script.
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from taskgraph.transforms.job import run_job_using
+from taskgraph.util.schema import Schema
 from taskgraph.transforms.job.common import (
     add_build_dependency,
     support_vcs_checkout,
 )
-from voluptuous import Schema, Required, Any
+from voluptuous import Required, Any
 
 run_task_schema = Schema({
     Required('using'): 'run-task',
 
     # if true, add a cache at ~worker/.cache, which is where things like pip
     # tend to hide their caches.  This cache is never added for level-1 jobs.
     Required('cache-dotcache', default=False): bool,
 
--- a/taskcluster/taskgraph/transforms/job/spidermonkey.py
+++ b/taskcluster/taskgraph/transforms/job/spidermonkey.py
@@ -2,17 +2,18 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Support for running spidermonkey jobs via dedicated scripts
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-from voluptuous import Schema, Required, Optional, Any
+from taskgraph.util.schema import Schema
+from voluptuous import Required, Optional, Any
 
 from taskgraph.transforms.job import run_job_using
 from taskgraph.transforms.job.common import (
     docker_worker_add_public_artifacts,
     support_vcs_checkout,
 )
 
 sm_run_schema = Schema({
--- a/taskcluster/taskgraph/transforms/job/toolchain.py
+++ b/taskcluster/taskgraph/transforms/job/toolchain.py
@@ -2,17 +2,18 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Support for running toolchain-building jobs via dedicated scripts
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-from voluptuous import Schema, Optional, Required, Any
+from taskgraph.util.schema import Schema
+from voluptuous import Optional, Required, Any
 
 from taskgraph.transforms.job import run_job_using
 from taskgraph.transforms.job.common import (
     docker_worker_add_tc_vcs_cache,
     docker_worker_add_gecko_vcs_env_vars,
     support_vcs_checkout,
 )
 from taskgraph.util.hash import hash_paths
--- a/taskcluster/taskgraph/transforms/l10n.py
+++ b/taskcluster/taskgraph/transforms/l10n.py
@@ -13,24 +13,24 @@ import json
 from mozbuild.chunkify import chunkify
 from taskgraph.transforms.base import (
     TransformSequence,
 )
 from taskgraph.util.schema import (
     validate_schema,
     optionally_keyed_by,
     resolve_keyed_by,
+    Schema,
 )
 from taskgraph.util.treeherder import split_symbol, join_symbol
 from voluptuous import (
     Any,
     Extra,
     Optional,
     Required,
-    Schema,
 )
 
 
 def _by_platform(arg):
     return optionally_keyed_by('build-platform', arg)
 
 # shortcut for a string where task references are allowed
 taskref_or_string = Any(
--- a/taskcluster/taskgraph/transforms/signing.py
+++ b/taskcluster/taskgraph/transforms/signing.py
@@ -3,20 +3,20 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Transform the signing task into an actual task description.
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 from taskgraph.transforms.base import TransformSequence
-from taskgraph.util.schema import validate_schema
+from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import get_signing_cert_scope
 from taskgraph.transforms.task import task_description_schema
-from voluptuous import Schema, Any, Required, Optional
+from voluptuous import Any, Required, Optional
 
 
 # Voluptuous uses marker objects as dictionary *keys*, but they are not
 # comparable, so we cast all of the keys back to regular strings
 task_description_schema = {str(k): v for k, v in task_description_schema.schema.iteritems()}
 
 transforms = TransformSequence()
 
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -10,19 +10,19 @@ complexities of worker implementations, 
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import json
 import time
 
 from taskgraph.util.treeherder import split_symbol
 from taskgraph.transforms.base import TransformSequence
-from taskgraph.util.schema import validate_schema
+from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import get_release_config
-from voluptuous import Schema, Any, Required, Optional, Extra
+from voluptuous import Any, Required, Optional, Extra
 
 from .gecko_v2_whitelist import JOB_NAME_WHITELIST, JOB_NAME_WHITELIST_ERROR
 
 
 # shortcut for a string where task references are allowed
 taskref_or_string = Any(
     basestring,
     {Required('task-reference'): basestring})
--- a/taskcluster/taskgraph/transforms/tests.py
+++ b/taskcluster/taskgraph/transforms/tests.py
@@ -20,22 +20,22 @@ for example - use `all_tests.py` instead
 from __future__ import absolute_import, print_function, unicode_literals
 
 from taskgraph.transforms.base import TransformSequence
 from taskgraph.util.schema import resolve_keyed_by
 from taskgraph.util.treeherder import split_symbol, join_symbol
 from taskgraph.util.schema import (
     validate_schema,
     optionally_keyed_by,
+    Schema,
 )
 from voluptuous import (
     Any,
     Optional,
     Required,
-    Schema,
 )
 
 import copy
 import logging
 
 WORKER_TYPE = {
     # default worker types keyed by instance-size
     'large': 'aws-provisioner-v1/gecko-t-linux-large',
--- a/taskcluster/taskgraph/util/schema.py
+++ b/taskcluster/taskgraph/util/schema.py
@@ -1,16 +1,18 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import re
 import copy
 import pprint
+import collections
 import voluptuous
 
 from .attributes import keymatch
 
 
 def validate_schema(schema, obj, msg_prefix):
     """
     Validate that object satisfies schema.  If not, generate a useful exception
@@ -121,8 +123,59 @@ def resolve_keyed_by(item, field, item_n
                     keyed_by, key, field, item_name))
         elif matches:
             value = container[subfield] = matches[0]
             continue
 
         raise Exception(
             "No {} matching {!r} nor 'default' found while determining item {} in {}".format(
                 keyed_by, key, field, item_name))
+
+# Schemas for YAML files should use dashed identifiers by default.  If there are
+# components of the schema for which there is a good reason to use another format,
+# they can be whitelisted here.
+WHITELISTED_SCHEMA_IDENTIFIERS = [
+    # upstream-artifacts are handed directly to scriptWorker, which expects interCaps
+    lambda path: "[u'upstream-artifacts']" in path,
+
+    # chainOfTrust (TODO)
+    lambda path: path.startswith("schema[u'chainOfTrust']"),
+
+    # attributes (TODO)
+    lambda path: path.startswith("schema[u'attributes']"),
+]
+
+
+def check_schema(schema):
+    identifier_re = re.compile('^[a-z][a-z0-9-]*$')
+
+    def whitelisted(path):
+        return any(f(path) for f in WHITELISTED_SCHEMA_IDENTIFIERS)
+
+    def iter(path, sch):
+        if isinstance(sch, collections.Mapping):
+            for k, v in sch.iteritems():
+                child = "{}[{!r}]".format(path, k)
+                if isinstance(k, (voluptuous.Optional, voluptuous.Required)):
+                    k = str(k)
+                if isinstance(k, basestring):
+                    if not identifier_re.match(k) and not whitelisted(child):
+                        raise RuntimeError(
+                            'YAML schemas should use dashed lower-case identifiers, '
+                            'not {!r} @ {}'.format(k, child))
+                iter(child, v)
+        elif isinstance(sch, (list, tuple)):
+            for i, v in enumerate(sch):
+                iter("{}[{}]".format(path, i), v)
+        elif isinstance(sch, voluptuous.Any):
+            for v in sch.validators:
+                iter(path, v)
+    iter('schema', schema.schema)
+
+
+def Schema(*args, **kwargs):
+    """
+    Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks
+    in the process.
+    """
+    schema = voluptuous.Schema(*args, **kwargs)
+    check_schema(schema)
+    return schema