Bug 748417 - double-check Python-generated range-information; r=taras
authorNathan Froyd <froydnj@mozilla.com>
Mon, 27 Aug 2012 16:47:32 -0400
changeset 105700 f8c53a2ecb2b490d5b9cbc64cc6989974032de8e
parent 105699 01b657335f1d9dd3ecd369a993f105e5540d7633
child 105701 e36689713d6c8246c1e948f1845df07e92702c05
push id55
push usershu@rfrn.org
push dateThu, 30 Aug 2012 01:33:09 +0000
reviewerstaras
bugs748417
milestone18.0a1
Bug 748417 - double-check Python-generated range-information; r=taras
toolkit/components/telemetry/Makefile.in
toolkit/components/telemetry/Telemetry.cpp
toolkit/components/telemetry/gen-histogram-data.py
toolkit/components/telemetry/histogram_tools.py
--- a/toolkit/components/telemetry/Makefile.in
+++ b/toolkit/components/telemetry/Makefile.in
@@ -60,16 +60,26 @@ endif
 include $(topsrcdir)/config/config.mk
 include $(topsrcdir)/config/rules.mk
 include $(topsrcdir)/ipc/chromium/chromium-config.mk
 
 histograms_file := $(srcdir)/Histograms.json
 histogram_enum_file := TelemetryHistogramEnums.h
 histogram_data_file := TelemetryHistogramData.inc
 
-$(histogram_enum_file): $(histograms_file) $(srcdir)/gen-histogram-enum.py
+enum_python_deps := \
+  $(srcdir)/gen-histogram-enum.py \
+  $(srcdir)/histogram_tools.py \
+  $(NULL)
+
+data_python_deps := \
+  $(srcdir)/gen-histogram-data.py \
+  $(srcdir)/histogram_tools.py \
+  $(NULL)
+
+$(histogram_enum_file): $(histograms_file) $(enum_python_deps)
 	$(PYTHON) $(srcdir)/gen-histogram-enum.py $< > $@
-$(histogram_data_file): $(histograms_file) $(srcdir)/gen-histogram-data.py
+$(histogram_data_file): $(histograms_file) $(data_python_deps)
 	$(PYTHON) $(srcdir)/gen-histogram-data.py $< > $@
 
 Telemetry.$(OBJ_SUFFIX): $(histogram_data_file)
 
 GARBAGE += $(histogram_data_file) $(histogram_enum_file)
--- a/toolkit/components/telemetry/Telemetry.cpp
+++ b/toolkit/components/telemetry/Telemetry.cpp
@@ -267,16 +267,30 @@ GetHistogramByEnumId(Telemetry::ID id, H
     return NS_OK;
   }
 
   const TelemetryHistogram &p = gHistograms[id];
   nsresult rv = HistogramGet(p.id, p.min, p.max, p.bucketCount, p.histogramType, &h);
   if (NS_FAILED(rv))
     return rv;
 
+#ifdef DEBUG
+  // Check that the C++ Histogram code computes the same ranges as the
+  // Python histogram code.
+  const struct bounds &b = gBucketLowerBoundIndex[id];
+  if (b.length != 0) {
+    MOZ_ASSERT(size_t(b.length) == h->bucket_count(),
+               "C++/Python bucket # mismatch");
+    for (int i = 0; i < b.length; ++i) {
+      MOZ_ASSERT(gBucketLowerBounds[b.offset + i] == h->ranges(i),
+                 "C++/Python bucket mismatch");
+    }
+  }
+#endif
+
   *ret = knownHistograms[id] = h;
   return NS_OK;
 }
 
 bool
 FillRanges(JSContext *cx, JSObject *array, Histogram *h)
 {
   for (size_t i = 0; i < h->bucket_count(); i++) {
--- a/toolkit/components/telemetry/gen-histogram-data.py
+++ b/toolkit/components/telemetry/gen-histogram-data.py
@@ -4,16 +4,17 @@
 
 # Write out histogram information for C++.  The histograms are defined
 # in a file provided as a command-line argument.
 
 from __future__ import with_statement
 
 import sys
 import histogram_tools
+import itertools
 
 banner = """/* This file is auto-generated, see gen-histogram-data.py.  */
 """
 
 # Write out the gHistograms array.
 
 def print_array_entry(histogram):
     cpp_guard = histogram.cpp_guard()
@@ -44,17 +45,17 @@ def static_asserts_for_boolean(histogram
     pass
 
 def static_asserts_for_flag(histogram):
     pass
 
 def static_asserts_for_enumerated(histogram):
     n_values = histogram.high()
     static_assert("%s > 2" % n_values,
-                  "Not enough values for %s" % histogram.name)
+                  "Not enough values for %s" % histogram.name())
 
 def shared_static_asserts(histogram):
     name = histogram.name()
     low = histogram.low()
     high = histogram.high()
     n_buckets = histogram.n_buckets()
     static_assert("%s < %s" % (low, high), "low >= high for %s" % name)
     static_assert("%s > 2" % n_buckets, "Not enough values for %s" % name)
@@ -79,18 +80,63 @@ def write_histogram_static_asserts(histo
         'linear' : static_asserts_for_linear,
         'exponential' : static_asserts_for_exponential,
         }
 
     for histogram in histograms:
         histogram_tools.table_dispatch(histogram.kind(), table,
                                        lambda f: f(histogram))
 
+def write_debug_histogram_ranges(histograms):
+    ranges_lengths = []
+
+    # Collect all the range information from individual histograms.
+    # Write that information out as well.
+    print "#ifdef DEBUG"
+    print "const int gBucketLowerBounds[] = {"
+    for histogram in histograms:
+        ranges = []
+        try:
+            ranges = histogram.ranges()
+        except histogram_tools.DefinitionException:
+            pass
+        ranges_lengths.append(len(ranges))
+        # Note that we do not test cpp_guard here.  We do this so we
+        # will have complete information about all the histograms in
+        # this array.  Just having information about the ranges of
+        # histograms is not platform-specific; if there are histograms
+        # that have platform-specific constants in their definitions,
+        # those histograms will fail in the .ranges() call above and
+        # we'll have a zero-length array to deal with here.
+        if len(ranges) > 0:
+            print ','.join(map(str, ranges)), ','
+        else:
+            print '/* Skipping %s */' % histogram.name()
+    print "};"
+
+    # Write the offsets into gBucketLowerBounds.
+    print "struct bounds { int offset; int length; };"
+    print "const struct bounds gBucketLowerBoundIndex[] = {"
+    offset = 0
+    for (histogram, range_length) in itertools.izip(histograms, ranges_lengths):
+        cpp_guard = histogram.cpp_guard()
+        # We do test cpp_guard here, so that histogram IDs are valid
+        # indexes into this array.
+        if cpp_guard:
+            print "#if defined(%s)" % cpp_guard
+        print "{ %d, %d }," % (offset, range_length)
+        if cpp_guard:
+            print "#endif"
+        offset += range_length
+    print "};"
+    print "#endif"
+
 def main(argv):
     filename = argv[0]
 
     histograms = list(histogram_tools.from_file(filename))
 
     print banner
     write_histogram_table(histograms)
     write_histogram_static_asserts(histograms)
+    write_debug_histogram_ranges(histograms)
 
 main(sys.argv[1:])
--- a/toolkit/components/telemetry/histogram_tools.py
+++ b/toolkit/components/telemetry/histogram_tools.py
@@ -1,23 +1,66 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import with_statement
 
+import math
 import simplejson as json
 
 def table_dispatch(kind, table, body):
     """Call body with table[kind] if it exists.  Raise an error otherwise."""
     if kind in table:
-        body(table[kind])
+        return body(table[kind])
     else:
         raise BaseException, "don't know how to handle a histogram of kind %s" % kind
 
+class DefinitionException(BaseException):
+    pass
+
+def check_numeric_limits(dmin, dmax, n_buckets):
+    if type(dmin) != int:
+        raise DefinitionException, "minimum is not a number"
+    if type(dmax) != int:
+        raise DefinitionException, "maximum is not a number"
+    if type(n_buckets) != int:
+        raise DefinitionException, "number of buckets is not a number"
+
+def linear_buckets(dmin, dmax, n_buckets):
+    check_numeric_limits(dmin, dmax, n_buckets)
+    if n_buckets == 2:
+        return [0, 1, 2]
+    ret_array = [0] * n_buckets
+    dmin = float(dmin)
+    dmax = float(dmax)
+    for i in range(1, n_buckets):
+        linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2)
+        ret_array[i] = int(linear_range + 0.5)
+    return ret_array
+
+def exponential_buckets(dmin, dmax, n_buckets):
+    check_numeric_limits(dmin, dmax, n_buckets)
+    log_max = math.log(dmax);
+    bucket_index = 2;
+    ret_array = [0] * n_buckets
+    current = dmin
+    ret_array[1] = current
+    for bucket_index in range(2, n_buckets):
+        log_current = math.log(current)
+        log_ratio = (log_max - log_current) / (n_buckets - bucket_index)
+        log_next = log_current + log_ratio
+        next_value = int(math.floor(math.exp(log_next) + 0.5))
+        if next_value > current:
+            current = next_value
+        else:
+            current = current + 1
+        ret_array[bucket_index] = current
+    return ret_array
+
 always_allowed_keys = ['kind', 'description', 'cpp_guard']
 
 class Histogram:
     """A class for representing a histogram definition."""
 
     def __init__(self, name, definition):
         """Initialize a histogram named name with the given definition.
 definition is a dict-like object that must contain at least the keys:
@@ -75,16 +118,26 @@ the histogram."""
         """Return the number of buckets in the histogram.  May be a string."""
         return self._n_buckets
 
     def cpp_guard(self):
         """Return the preprocessor symbol that should guard C/C++ definitions
 associated with the histogram.  Returns None if no guarding is necessary."""
         return self._cpp_guard
 
+    def ranges(self):
+        """Return an array of lower bounds for each bucket in the histogram."""
+        table = { 'boolean': linear_buckets,
+                  'flag': linear_buckets,
+                  'enumerated': linear_buckets,
+                  'linear': linear_buckets,
+                  'exponential': exponential_buckets }
+        return table_dispatch(self.kind(), table,
+                              lambda p: p(self.low(), self.high(), self.n_buckets()))
+
     def compute_bucket_parameters(self, definition):
         table = {
             'boolean': Histogram.boolean_flag_bucket_parameters,
             'flag': Histogram.boolean_flag_bucket_parameters,
             'enumerated': Histogram.enumerated_bucket_parameters,
             'linear': Histogram.linear_bucket_parameters,
             'exponential': Histogram.exponential_bucket_parameters
             }
@@ -107,17 +160,24 @@ associated with the histogram.  Returns 
 
     @staticmethod
     def check_keys(name, definition, allowed_keys):
         for key in definition.iterkeys():
             if key not in allowed_keys:
                 raise KeyError, '%s not permitted for %s' % (key, name)
 
     def set_bucket_parameters(self, low, high, n_buckets):
-        (self._low, self._high, self._n_buckets) = (low, high, n_buckets)
+        def try_to_coerce_to_number(v):
+            try:
+                return eval(v, {})
+            except:
+                return v
+        self._low = try_to_coerce_to_number(low)
+        self._high = try_to_coerce_to_number(high)
+        self._n_buckets = try_to_coerce_to_number(n_buckets)
 
     @staticmethod
     def boolean_flag_bucket_parameters(definition):
         return (0, 1, 2)
 
     @staticmethod
     def linear_bucket_parameters(definition):
         return (definition.get('low', 1),