Bug 1401612 - Telemetry histogram validator now prints multiple error messages at a time. r=chutten
authorAditya Bharti <adibhar97@gmail.com>
Fri, 15 Dec 2017 13:14:28 +0530
changeset 396762 92dbf32cc352
parent 396761 21bc5f64c339
child 396763 12214b5efbe4
child 396830 c9ce08c45635
push id98373
push userryanvm@gmail.com
push dateTue, 19 Dec 2017 02:12:55 +0000
treeherdermozilla-inbound@92dbf32cc352 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerschutten
bugs1401612
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1401612 - Telemetry histogram validator now prints multiple error messages at a time. r=chutten The histogram validator and other probe parsers printed a single error at a time and halted the build. Extended the ParserError() class to support printing multiple errors at a time. Changed the parse_histograms.py histograms validator and partitioned all ParserErrors into 'immediately fatal' and 'eventually fatal'. Eventual errors are now accumulated and printed at one go upon exit. Future work might involve changing the other probe parsers (scalars and events) to make use of the extended functionality.
toolkit/components/telemetry/parse_histograms.py
toolkit/components/telemetry/shared_telemetry_utils.py
--- a/toolkit/components/telemetry/parse_histograms.py
+++ b/toolkit/components/telemetry/parse_histograms.py
@@ -4,20 +4,22 @@
 
 import collections
 import itertools
 import json
 import math
 import os
 import re
 import sys
+import atexit
 import shared_telemetry_utils as utils
 
 from shared_telemetry_utils import ParserError
 from collections import OrderedDict
+atexit.register(ParserError.exit_func)
 
 # Constants.
 MAX_LABEL_LENGTH = 20
 MAX_LABEL_COUNT = 100
 MAX_KEY_COUNT = 30
 MAX_KEY_LENGTH = 20
 MIN_CATEGORICAL_BUCKET_COUNT = 50
 CPP_IDENTIFIER_PATTERN = '^[a-z][a-z0-9_]+[a-z0-9]$'
@@ -95,20 +97,20 @@ def load_whitelist():
         whitelist_path = os.path.join(os.path.abspath(os.path.realpath(os.path.dirname(__file__))),
                                       'histogram-whitelists.json')
         with open(whitelist_path, 'r') as f:
             try:
                 whitelists = json.load(f)
                 for name, whitelist in whitelists.iteritems():
                     whitelists[name] = set(whitelist)
             except ValueError:
-                raise ParserError('Error parsing whitelist: %s' % whitelist_path)
+                ParserError('Error parsing whitelist: %s' % whitelist_path).handle_now()
     except IOError:
         whitelists = None
-        raise ParserError('Unable to parse whitelist: %s.' % whitelist_path)
+        ParserError('Unable to parse whitelist: %s.' % whitelist_path).handle_now()
 
 
 class Histogram:
     """A class for representing a histogram definition."""
 
     def __init__(self, name, definition, strict_type_checks=False):
         """Initialize a histogram named name with the given definition.
 definition is a dict-like object that must contain at least the keys:
@@ -213,34 +215,36 @@ associated with the histogram.  Returns 
             'count': linear_buckets,
             'enumerated': linear_buckets,
             'categorical': linear_buckets,
             'linear': linear_buckets,
             'exponential': exponential_buckets,
         }
 
         if self._kind not in bucket_fns:
-            raise ParserError('Unknown kind "%s" for histogram "%s".' % (self._kind, self._name))
+            ParserError('Unknown kind "%s" for histogram "%s".' %
+                        (self._kind, self._name)).handle_later()
 
         fn = bucket_fns[self._kind]
         return fn(self.low(), self.high(), self.n_buckets())
 
     def compute_bucket_parameters(self, definition):
         bucket_fns = {
             'boolean': Histogram.boolean_flag_bucket_parameters,
             'flag': Histogram.boolean_flag_bucket_parameters,
             'count': Histogram.boolean_flag_bucket_parameters,
             'enumerated': Histogram.enumerated_bucket_parameters,
             'categorical': Histogram.categorical_bucket_parameters,
             'linear': Histogram.linear_bucket_parameters,
             'exponential': Histogram.exponential_bucket_parameters,
         }
 
         if self._kind not in bucket_fns:
-            raise ParserError('Unknown kind "%s" for histogram "%s".' % (self._kind, self._name))
+            ParserError('Unknown kind "%s" for histogram "%s".' %
+                        (self._kind, self._name)).handle_later()
 
         fn = bucket_fns[self._kind]
         self.set_bucket_parameters(*fn(definition))
 
     def verify_attributes(self, name, definition):
         global ALWAYS_ALLOWED_KEYS
         general_keys = ALWAYS_ALLOWED_KEYS + ['low', 'high', 'n_buckets']
 
@@ -255,108 +259,110 @@ associated with the histogram.  Returns 
         }
         # We removed extended_statistics_ok on the client, but the server-side,
         # where _strict_type_checks==False, has to deal with historical data.
         if not self._strict_type_checks:
             table['exponential'].append('extended_statistics_ok')
 
         kind = definition['kind']
         if kind not in table:
-            raise ParserError('Unknown kind "%s" for histogram "%s".' % (kind, name))
+            ParserError('Unknown kind "%s" for histogram "%s".' % (kind, name)).handle_later()
         allowed_keys = table[kind]
 
         self.check_name(name)
         self.check_keys(name, definition, allowed_keys)
         self.check_keys_field(name, definition)
         self.check_field_types(name, definition)
         self.check_whitelisted_kind(name, definition)
         self.check_whitelistable_fields(name, definition)
         self.check_expiration(name, definition)
         self.check_label_values(name, definition)
         self.check_record_in_processes(name, definition)
 
     def check_name(self, name):
         if '#' in name:
-            raise ParserError('Error for histogram name "%s": "#" is not allowed.' % (name))
+            ParserError('Error for histogram name "%s": "#" is not allowed.' %
+                        (name)).handle_later()
 
         # Avoid C++ identifier conflicts between histogram enums and label enum names.
         if name.startswith("LABELS_"):
-            raise ParserError('Error for histogram name "%s":  can not start with "LABELS_".' %
-                              (name))
+            ParserError('Error for histogram name "%s":  can not start with "LABELS_".' %
+                        (name)).handle_later()
 
         # To make it easier to generate C++ identifiers from this etc., we restrict
         # the histogram names to a strict pattern.
         # We skip this on the server to avoid failures with old Histogram.json revisions.
         if self._strict_type_checks:
             pattern = '^[a-z][a-z0-9_]+[a-z0-9]$'
             if not re.match(pattern, name, re.IGNORECASE):
-                raise ParserError('Error for histogram name "%s": name does not conform to "%s"' %
-                                  (name, pattern))
+                ParserError('Error for histogram name "%s": name does not conform to "%s"' %
+                            (name, pattern)).handle_later()
 
     def check_expiration(self, name, definition):
         field = 'expires_in_version'
         expiration = definition.get(field)
 
         if not expiration:
             return
 
         # We forbid new probes from using "expires_in_version" : "default" field/value pair.
         # Old ones that use this are added to the whitelist.
         if expiration == "default" and \
            whitelists is not None and \
            name not in whitelists['expiry_default']:
-            raise ParserError('New histogram "%s" cannot have "default" %s value.' % (name, field))
+            ParserError('New histogram "%s" cannot have "default" %s value.' %
+                        (name, field)).handle_later()
 
         if expiration != "default" and not utils.validate_expiration_version(expiration):
-            raise ParserError(('Error for histogram {} - invalid {}: {}.'
-                               '\nSee: {}#expires-in-version')
-                              .format(name, field, expiration, HISTOGRAMS_DOC_URL))
+            ParserError(('Error for histogram {} - invalid {}: {}.'
+                        '\nSee: {}#expires-in-version')
+                        .format(name, field, expiration, HISTOGRAMS_DOC_URL)).handle_later()
 
         expiration = utils.add_expiration_postfix(expiration)
 
         definition[field] = expiration
 
     def check_label_values(self, name, definition):
         labels = definition.get('labels')
         if not labels:
             return
 
         invalid = filter(lambda l: len(l) > MAX_LABEL_LENGTH, labels)
         if len(invalid) > 0:
-            raise ParserError('Label values for "%s" exceed length limit of %d: %s' %
-                              (name, MAX_LABEL_LENGTH, ', '.join(invalid)))
+            ParserError('Label values for "%s" exceed length limit of %d: %s' %
+                        (name, MAX_LABEL_LENGTH, ', '.join(invalid))).handle_later()
 
         if len(labels) > MAX_LABEL_COUNT:
-            raise ParserError('Label count for "%s" exceeds limit of %d' %
-                              (name, MAX_LABEL_COUNT))
+            ParserError('Label count for "%s" exceeds limit of %d' %
+                        (name, MAX_LABEL_COUNT)).handle_now()
 
         # To make it easier to generate C++ identifiers from this etc., we restrict
         # the label values to a strict pattern.
         invalid = filter(lambda l: not re.match(CPP_IDENTIFIER_PATTERN, l, re.IGNORECASE), labels)
         if len(invalid) > 0:
-            raise ParserError('Label values for %s are not matching pattern "%s": %s' %
-                              (name, CPP_IDENTIFIER_PATTERN, ', '.join(invalid)))
+            ParserError('Label values for %s are not matching pattern "%s": %s' %
+                        (name, CPP_IDENTIFIER_PATTERN, ', '.join(invalid))).handle_later()
 
     def check_record_in_processes(self, name, definition):
         if not self._strict_type_checks:
             return
 
         field = 'record_in_processes'
         rip = definition.get(field)
 
         DOC_URL = HISTOGRAMS_DOC_URL + "#record-in-processes"
 
         if not rip:
-            raise ParserError('Histogram "%s" must have a "%s" field:\n%s'
-                              % (name, field, DOC_URL))
+            ParserError('Histogram "%s" must have a "%s" field:\n%s'
+                        % (name, field, DOC_URL)).handle_later()
 
         for process in rip:
             if not utils.is_valid_process_name(process):
-                raise ParserError('Histogram "%s" has unknown process "%s" in %s.\n%s' %
-                                  (name, process, field, DOC_URL))
+                ParserError('Histogram "%s" has unknown process "%s" in %s.\n%s' %
+                            (name, process, field, DOC_URL)).handle_later()
 
     def check_keys_field(self, name, definition):
         keys = definition.get('keys')
         if not self._strict_type_checks or keys is None:
             return
 
         if not definition.get('keyed', False):
             raise ValueError("'keys' field is not valid for %s; only allowed for keyed histograms."
@@ -390,43 +396,44 @@ associated with the histogram.  Returns 
         # don't support scalars there yet.
         hist_kind = definition.get("kind")
         android_cpp_guard =\
             definition.get("cpp_guard") in ["ANDROID", "MOZ_WIDGET_ANDROID"]
 
         if not android_cpp_guard and \
            hist_kind in ["flag", "count"] and \
            name not in whitelists["kind"]:
-            raise ParserError(('Unsupported kind "%s" for histogram "%s":\n'
-                               'New "%s" histograms are not supported on Desktop, you should'
-                               ' use scalars instead:\n'
-                               '%s\n'
-                               'Are you trying to add a histogram on Android?'
-                               ' Add "cpp_guard": "ANDROID" to your histogram definition.')
-                              % (hist_kind, name, hist_kind, SCALARS_DOC_URL))
+            ParserError(('Unsupported kind "%s" for histogram "%s":\n'
+                         'New "%s" histograms are not supported on Desktop, you should'
+                         ' use scalars instead:\n'
+                         '%s\n'
+                         'Are you trying to add a histogram on Android?'
+                         ' Add "cpp_guard": "ANDROID" to your histogram definition.')
+                        % (hist_kind, name, hist_kind, SCALARS_DOC_URL)).handle_now()
 
     # Check for the presence of fields that old histograms are whitelisted for.
     def check_whitelistable_fields(self, name, definition):
         # Use counters don't have any mechanism to add the fields checked here,
         # so skip the check for them.
         # We also don't need to run any of these checks on the server.
         if self._is_use_counter or not self._strict_type_checks:
             return
 
         # In the pipeline we don't have whitelists available.
         if whitelists is None:
             return
 
         for field in ['alert_emails', 'bug_numbers']:
             if field not in definition and name not in whitelists[field]:
-                raise ParserError('New histogram "%s" must have a "%s" field.' % (name, field))
+                ParserError('New histogram "%s" must have a "%s" field.' %
+                            (name, field)).handle_later()
             if field in definition and name in whitelists[field]:
                 msg = 'Histogram "%s" should be removed from the whitelist for "%s" in ' \
                       'histogram-whitelists.json.'
-                raise ParserError(msg % (name, field))
+                ParserError(msg % (name, field)).handle_later()
 
     def check_field_types(self, name, definition):
         # Define expected types for the histogram properties.
         type_checked_fields = {
             "n_buckets": int,
             "n_values": int,
             "low": int,
             "high": int,
@@ -475,47 +482,48 @@ associated with the histogram.  Returns 
             if t is basestring:
                 return "string"
             return t.__name__
 
         for key, key_type in type_checked_fields.iteritems():
             if key not in definition:
                 continue
             if not isinstance(definition[key], key_type):
-                raise ParserError('Value for key "{0}" in histogram "{1}" should be {2}.'
-                                  .format(key, name, nice_type_name(key_type)))
+                ParserError('Value for key "{0}" in histogram "{1}" should be {2}.'
+                            .format(key, name, nice_type_name(key_type))).handle_later()
 
         for key, key_type in type_checked_list_fields.iteritems():
             if key not in definition:
                 continue
             if not all(isinstance(x, key_type) for x in definition[key]):
-                raise ParserError('All values for list "{0}" in histogram "{1}" should be of type'
-                                  ' {2}.'.format(key, name, nice_type_name(key_type)))
+                ParserError('All values for list "{0}" in histogram "{1}" should be of type'
+                            ' {2}.'.format(key, name, nice_type_name(key_type))).handle_later()
 
     def check_keys(self, name, definition, allowed_keys):
         if not self._strict_type_checks:
             return
         for key in definition.iterkeys():
             if key not in allowed_keys:
-                raise ParserError('Key "%s" is not allowed for histogram "%s".' % (key, name))
+                ParserError('Key "%s" is not allowed for histogram "%s".' %
+                            (key, name)).handle_later()
 
     def set_bucket_parameters(self, low, high, n_buckets):
         self._low = low
         self._high = high
         self._n_buckets = n_buckets
         if whitelists is not None and self._n_buckets > 100 and type(self._n_buckets) is int:
             if self._name not in whitelists['n_buckets']:
-                raise ParserError(
+                ParserError(
                     'New histogram "%s" is not permitted to have more than 100 buckets.\n'
                     'Histograms with large numbers of buckets use disproportionately high'
                     ' amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)'
                     ' if you think an exception ought to be made:\n'
                     'https://wiki.mozilla.org/Modules/Toolkit#Telemetry'
                     % self._name
-                    )
+                    ).handle_later()
 
     @staticmethod
     def boolean_flag_bucket_parameters(definition):
         return (1, 2, 3)
 
     @staticmethod
     def linear_bucket_parameters(definition):
         return (definition.get('low', 1),
@@ -552,57 +560,58 @@ associated with the histogram.  Returns 
             'count': 'COUNT',
             'enumerated': 'LINEAR',
             'categorical': 'CATEGORICAL',
             'linear': 'LINEAR',
             'exponential': 'EXPONENTIAL',
         }
 
         if self._kind not in types:
-            raise ParserError('Unknown kind "%s" for histogram "%s".' % (self._kind, self._name))
+            ParserError('Unknown kind "%s" for histogram "%s".' %
+                        (self._kind, self._name)).handle_later()
 
         self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % types[self._kind]
 
     def set_dataset(self, definition):
         datasets = {
             'opt-in': 'DATASET_RELEASE_CHANNEL_OPTIN',
             'opt-out': 'DATASET_RELEASE_CHANNEL_OPTOUT'
         }
 
         value = definition.get('releaseChannelCollection', 'opt-in')
         if value not in datasets:
-            raise ParserError('Unknown value for releaseChannelCollection'
-                              ' policy for histogram "%s".' % self._name)
+            ParserError('Unknown value for releaseChannelCollection'
+                        ' policy for histogram "%s".' % self._name).handle_later()
 
         self._dataset = "nsITelemetry::" + datasets[value]
 
 
 # This hook function loads the histograms into an OrderedDict.
 # It will raise a ParserError if duplicate keys are found.
 def load_histograms_into_dict(ordered_pairs, strict_type_checks):
     d = collections.OrderedDict()
     for key, value in ordered_pairs:
         if strict_type_checks and key in d:
-            raise ParserError("Found duplicate key in Histograms file: %s" % key)
+            ParserError("Found duplicate key in Histograms file: %s" % key).handle_later()
         d[key] = value
     return d
 
 
 # We support generating histograms from multiple different input files, not
 # just Histograms.json.  For each file's basename, we have a specific
 # routine to parse that file, and return a dictionary mapping histogram
 # names to histogram parameters.
 def from_Histograms_json(filename, strict_type_checks):
     with open(filename, 'r') as f:
         try:
             def hook(ps):
                 return load_histograms_into_dict(ps, strict_type_checks)
             histograms = json.load(f, object_pairs_hook=hook)
         except ValueError, e:
-            raise ParserError("error parsing histograms in %s: %s" % (filename, e.message))
+            ParserError("error parsing histograms in %s: %s" % (filename, e.message)).handle_now()
     return histograms
 
 
 def from_UseCounters_conf(filename, strict_type_checks):
     return usecounters.generate_histograms(filename)
 
 
 def from_nsDeprecatedOperationList(filename, strict_type_checks):
@@ -657,38 +666,39 @@ the histograms defined in filenames.
         parser = FILENAME_PARSERS[os.path.basename(filename)]
         histograms = parser(filename, strict_type_checks)
 
         # OrderedDicts are important, because then the iteration order over
         # the parsed histograms is stable, which makes the insertion into
         # all_histograms stable, which makes ordering in generated files
         # stable, which makes builds more deterministic.
         if not isinstance(histograms, OrderedDict):
-            raise ParserError("Histogram parser did not provide an OrderedDict.")
+            ParserError("Histogram parser did not provide an OrderedDict.").handle_now()
 
         for (name, definition) in histograms.iteritems():
             if name in all_histograms:
-                raise ParserError('Duplicate histogram name "%s".' % name)
+                ParserError('Duplicate histogram name "%s".' % name).handle_later()
             all_histograms[name] = definition
 
     # We require that all USE_COUNTER2_* histograms be defined in a contiguous
     # block.
     use_counter_indices = filter(lambda x: x[1].startswith("USE_COUNTER2_"),
                                  enumerate(all_histograms.iterkeys()))
     if use_counter_indices:
         lower_bound = use_counter_indices[0][0]
         upper_bound = use_counter_indices[-1][0]
         n_counters = upper_bound - lower_bound + 1
         if n_counters != len(use_counter_indices):
-            raise ParserError("Use counter histograms must be defined in a contiguous block.")
+            ParserError("Use counter histograms must be defined in a contiguous block."
+                        ).handle_later()
 
     # Check that histograms that were removed from Histograms.json etc.
     # are also removed from the whitelists.
     if whitelists is not None:
         all_whitelist_entries = itertools.chain.from_iterable(whitelists.itervalues())
         orphaned = set(all_whitelist_entries) - set(all_histograms.keys())
         if len(orphaned) > 0:
             msg = 'The following entries are orphaned and should be removed from ' \
                   'histogram-whitelists.json:\n%s'
-            raise ParserError(msg % (', '.join(sorted(orphaned))))
+            ParserError(msg % (', '.join(sorted(orphaned)))).handle_later()
 
     for (name, definition) in all_histograms.iteritems():
         yield Histogram(name, definition, strict_type_checks=strict_type_checks)
--- a/toolkit/components/telemetry/shared_telemetry_utils.py
+++ b/toolkit/components/telemetry/shared_telemetry_utils.py
@@ -4,33 +4,58 @@
 
 # This file contains utility functions shared by the scalars and the histogram generation
 # scripts.
 
 from __future__ import print_function
 
 import re
 import yaml
+import sys
 
 # This is a list of flags that determine which process a measurement is allowed
 # to record from.
 KNOWN_PROCESS_FLAGS = {
     'all': 'All',
     'all_childs': 'AllChilds',
     'main': 'Main',
     'content': 'Content',
     'gpu': 'Gpu',
 }
 
 PROCESS_ENUM_PREFIX = "mozilla::Telemetry::Common::RecordedProcessType::"
 
 
-# This is thrown by the different probe parsers.
 class ParserError(Exception):
-    pass
+    """Thrown by different probe parsers. Errors are partitioned into
+    'immediately fatal' and 'eventually fatal' so that the parser can print
+    multiple error messages at a time. See bug 1401612 ."""
+
+    eventual_errors = []
+
+    def __init__(self, *args):
+        Exception.__init__(self, *args)
+
+    def handle_later(self):
+        ParserError.eventual_errors.append(self)
+
+    def handle_now(self):
+        ParserError.print_eventuals()
+        print(self.message, file=sys.stderr)
+        sys.exit(1)
+
+    @classmethod
+    def print_eventuals(cls):
+        while cls.eventual_errors:
+            print(cls.eventual_errors.pop(0).message, file=sys.stderr)
+
+    @classmethod
+    def exit_func(cls):
+        if cls.eventual_errors:
+            cls("Some errors occurred").handle_now()
 
 
 def is_valid_process_name(name):
     return (name in KNOWN_PROCESS_FLAGS)
 
 
 def process_name_to_enum(name):
     return PROCESS_ENUM_PREFIX + KNOWN_PROCESS_FLAGS.get(name)