Bug 1188888 - Part 3 - Refactor the histogram script type & presence checking. r=chutten
authorGeorg Fritzsche <georg.fritzsche@googlemail.com>
Wed, 20 Jul 2016 17:10:24 +0200
changeset 330930 45b83bf04a2617969b8b5664f7dc3f11e118815f
parent 330929 58344005efca56b77c38459955f4099ad9f68f22
child 330931 a662e130c9ded8a98cf4b28ed10acd4fd7d1d78d
push id9858
push userjlund@mozilla.com
push dateMon, 01 Aug 2016 14:37:10 +0000
treeherdermozilla-aurora@203106ef6cb6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerschutten
bugs1188888
milestone50.0a1
Bug 1188888 - Part 3 - Refactor the histogram script type & presence checking. r=chutten
toolkit/components/telemetry/histogram_tools.py
--- a/toolkit/components/telemetry/histogram_tools.py
+++ b/toolkit/components/telemetry/histogram_tools.py
@@ -211,80 +211,85 @@ associated with the histogram.  Returns 
         # We removed extended_statistics_ok on the client, but the server-side,
         # where _strict_type_checks==False, has to deal with historical data.
         if not self._strict_type_checks:
             table['exponential'].append('extended_statistics_ok')
 
         table_dispatch(definition['kind'], table,
                        lambda allowed_keys: Histogram.check_keys(name, definition, allowed_keys))
 
-        # Check for the alert_emails field. Use counters don't have any mechanism
-        # to add them, so skip the check for them.
-        if not self._is_use_counter:
-            if 'alert_emails' not in definition:
-                if whitelists is not None and name not in whitelists['alert_emails']:
-                    raise KeyError, 'New histogram "%s" must have an alert_emails field.' % name
-            elif not isinstance(definition['alert_emails'], list):
-                raise KeyError, 'alert_emails must be an array (in histogram "%s")' % name
+        self.check_name(name)
+        self.check_field_types(name, definition)
+        self.check_expiration(name, definition)
+        self.check_whitelistable_fields(name, definition)
 
-        Histogram.check_name(name)
-        self.check_field_types(name, definition)
-        Histogram.check_expiration(name, definition)
-        self.check_bug_numbers(name, definition)
-
-    @staticmethod
-    def check_name(name):
+    def check_name(self, name):
         if '#' in name:
             raise ValueError, '"#" not permitted for %s' % (name)
 
-    @staticmethod
-    def check_expiration(name, definition):
+    def check_expiration(self, name, definition):
         expiration = definition.get('expires_in_version')
 
         if not expiration:
             return
 
         if re.match(r'^[1-9][0-9]*$', expiration):
             expiration = expiration + ".0a1"
         elif re.match(r'^[1-9][0-9]*\.0$', expiration):
             expiration = expiration + "a1"
 
         definition['expires_in_version'] = expiration
 
+    # Check for the presence of fields that old histograms are whitelisted for.
+    def check_whitelistable_fields(self, name, definition):
+        # Use counters don't have any mechanism to add the fields checked here,
+        # so skip the check for them.
+        if self._is_use_counter:
+            return
+
+        # In the pipeline we don't have whitelists available.
+        if whitelists is None:
+            return
+
+        for field in ['alert_emails', 'bug_numbers']:
+            if field not in definition and name not in whitelists[field]:
+                raise KeyError, 'New histogram "%s" must have a %s field.' % (name, field)
+
     def check_bug_numbers(self, name, definition):
         # Use counters don't have any mechanism to add the bug numbers field.
         if self._is_use_counter:
             return
+
         bug_numbers = definition.get('bug_numbers')
         if not bug_numbers:
             if whitelists is None or name in whitelists['bug_numbers']:
                 return
             else:
                 raise KeyError, 'New histogram "%s" must have a bug_numbers field.' % name
 
-        if not isinstance(bug_numbers, list):
-            raise ValueError, 'bug_numbers field for "%s" should be an array' % (name)
-
-        if not all(type(num) is int for num in bug_numbers):
-            raise ValueError, 'bug_numbers array for "%s" should only contain integers' % (name)
-
     def check_field_types(self, name, definition):
         # Define expected types for the histogram properties.
         type_checked_fields = {
-                "n_buckets": int,
-                "n_values": int,
-                "low": int,
-                "high": int,
-                "keyed": bool,
-                "expires_in_version": basestring,
-                "kind": basestring,
-                "description": basestring,
-                "cpp_guard": basestring,
-                "releaseChannelCollection": basestring
-            }
+            "n_buckets": int,
+            "n_values": int,
+            "low": int,
+            "high": int,
+            "keyed": bool,
+            "expires_in_version": basestring,
+            "kind": basestring,
+            "description": basestring,
+            "cpp_guard": basestring,
+            "releaseChannelCollection": basestring,
+        }
+
+        # For list fields we check the items types.
+        type_checked_list_fields = {
+            "bug_numbers": int,
+            "alert_emails": basestring,
+        }
 
         # For the server-side, where _strict_type_checks==False, we want to
         # skip the stricter type checks for these fields for dealing with
         # historical data.
         coerce_fields = ["low", "high", "n_values", "n_buckets"]
         if not self._strict_type_checks:
             def try_to_coerce_to_number(v):
                 try:
@@ -292,26 +297,34 @@ associated with the histogram.  Returns 
                 except:
                     return v
             for key in [k for k in coerce_fields if k in definition]:
                 definition[key] = try_to_coerce_to_number(definition[key])
             # This handles old "keyed":"true" definitions (bug 1271986).
             if definition.get("keyed", None) == "true":
                 definition["keyed"] = True
 
+        def nice_type_name(t):
+            if t is basestring:
+                return "string"
+            return t.__name__
+
         for key, key_type in type_checked_fields.iteritems():
             if not key in definition:
                 continue
             if not isinstance(definition[key], key_type):
-                if key_type is basestring:
-                    type_name = "string"
-                else:
-                    type_name = key_type.__name__
                 raise ValueError, ('value for key "{0}" in Histogram "{1}" '
-                        'should be {2}').format(key, name, type_name)
+                        'should be {2}').format(key, name, nice_type_name(key_type))
+
+        for key, key_type in type_checked_list_fields.iteritems():
+            if not key in definition:
+                continue
+            if not all(isinstance(x, key_type) for x in definition[key]):
+                raise ValueError, ('all values for list "{0}" in Histogram "{1}" '
+                        'should be {2}').format(key, name, nice_type_name(key_type))
 
     @staticmethod
     def check_keys(name, definition, allowed_keys):
         for key in definition.iterkeys():
             if key not in allowed_keys:
                 raise KeyError, '%s not permitted for %s' % (key, name)
 
     def set_bucket_parameters(self, low, high, n_buckets):