Bug 972939 - Add documentation for mozbase structured logging. r=wlach
authorJames Graham <james@hoppipolla.co.uk>
Tue, 25 Feb 2014 17:32:07 +0000
changeset 170429 b9398216b902ad59054fe26c020e731fa123b527
parent 170428 c7d38d7390b1173acd1572720ffe4fc242cad53c
child 170430 052fd5d340735904fc2402c7de6fb5330b582b4a
push id40231
push userryanvm@gmail.com
push dateTue, 25 Feb 2014 18:57:27 +0000
treeherdermozilla-inbound@052fd5d34073 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerswlach
bugs972939
milestone30.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 972939 - Add documentation for mozbase structured logging. r=wlach
testing/mozbase/docs/loggingreporting.rst
testing/mozbase/docs/mozlog_structured.rst
testing/mozbase/mozlog/mozlog/structured/formatters/base.py
testing/mozbase/mozlog/mozlog/structured/formatters/machformatter.py
testing/mozbase/mozlog/mozlog/structured/formatters/unittest.py
testing/mozbase/mozlog/mozlog/structured/formatters/xunit.py
testing/mozbase/mozlog/mozlog/structured/handlers/__init__.py
testing/mozbase/mozlog/mozlog/structured/reader.py
testing/mozbase/mozlog/mozlog/structured/structuredlog.py
--- a/testing/mozbase/docs/loggingreporting.rst
+++ b/testing/mozbase/docs/loggingreporting.rst
@@ -4,9 +4,9 @@ Logging and reporting
 Ideally output between different types of testing system should be as
 uniform as possible, as well as making it easy to make things more or
 less verbose. We created some libraries to make doing this easy.
 
 .. toctree::
    :maxdepth: 2
 
    mozlog
-
+   mozlog_structured
new file mode 100644
--- /dev/null
+++ b/testing/mozbase/docs/mozlog_structured.rst
@@ -0,0 +1,270 @@
+:mod:`mozlog.structured` --- Structured logging for test output
+===============================================================
+
+``mozlog.structured`` is a library designed for logging the execution
+and results of test harnesses. The canonical output format is JSON,
+with one line of JSON per log entry. It is *not* based on the stdlib
+logging module, although it shares several concepts with this module.
+
+One notable difference between this module and the standard logging
+module is the way that loggers are created. The structured logging
+module does not require that loggers with a specific name are
+singleton objects accessed through a factory function. Instead the
+``StructuredLogger`` constructor may be used directly. However all
+loggers with the same name share the same internal state (the "Borg"
+pattern). In particular the list of handlers functions is the same for
+all loggers with the same name.
+
+Logging is threadsafe, with access to handlers protected by a
+``threading.Lock``. However it is `not` process-safe. This means that
+applications using multiple processes, e.g. via the
+``multiprocessing`` module, should arrange for all logging to happen in
+a single process.
+
+Data Format
+-----------
+
+Structured loggers produce messages in a simple format designed to be
+compatible with the JSON data model. Each message is a single object,
+with the type of message indicated by the ``action`` key. It is
+intended that the set of ``action`` values be closed; where there are
+use cases for additional values they should be integrated into this
+module rather than extended in an ad-hoc way. The set of keys present
+on on all messages is:
+
+``action``
+  The type of the message (string).
+
+``time``
+  The timestamp of the message in ms since the epoch (int).
+
+``thread``
+  The name of the thread emitting the message (string).
+
+``pid``
+  The pid of the process creating the message (int).
+
+``source``
+  Name of the logger creating the message (string).
+
+For each ``action`` there are is a further set of specific fields
+describing the details of the event that caused the message to be
+emitted:
+
+``suite_start``
+  Emitted when the testsuite starts running.
+
+  ``tests``
+    A list of test_ids (list).
+
+``suite_end``
+  Emitted when the testsuite is finished and no more results will be produced.
+
+``test_start``
+  Emitted when a test is being started.
+
+  ``test``
+    A unique id for the test (string or list of strings).
+
+``test_status``
+  Emitted for a test which has subtests to record the result of a
+  single subtest
+
+  ``test``
+    The same unique id for the test as in the ``test_start`` message.
+
+  ``subtest``
+    Name of the subtest (string).
+
+  ``status``
+    Result of the test (string enum; ``PASS``, ``FAIL``, ``TIMEOUT``,
+    ``NOTRUN``)
+
+  ``expected``
+    Expected result of the test. Omitted if the expected result is the
+    same as the actual result (string enum, same as ``status``).
+
+``test_end``
+  Emitted to give the result of a test with no subtests, or the status
+  of the overall file when there are subtests.
+
+  ``test``
+    The same unique id for the test as in the ``test_start`` message.
+
+  ``status``
+    Either result of the test (if there are no subtests) in which case
+    (string enum ``PASS``, ``FAIL``, ``TIMEOUT``, ``CRASH``,
+    ``ASSERT``, ``SKIP``) or the status of the overall file where
+    there are subtests (string enum ``OK``, ``ERROR``, ``TIMEOUT``,
+    ``CRASH``, ``ASSERT``, ``SKIP``).
+
+  ``expected``
+    The expected status, or emitted if the expected status matches the
+    actual status (string enum, same as ``status``).
+
+``process_output``
+  Output from a managed subprocess.
+
+  ``process``
+  pid of the subprocess.
+
+  ``command``
+  Command used to launch the subprocess.
+
+  ``data``
+  Data output by the subprocess.
+
+``log``
+  General human-readable logging message, used to debug the harnesses
+  themselves rather than to provide input to other tools.
+
+  ``level``
+    Level of the log message (string enum ``CRITICAL``, ``ERROR``,
+    ``WARNING``, ``INFO``, ``DEBUG``).
+
+  ``message``
+    Text of the log message.
+
+StructuredLogger Objects
+------------------------
+
+.. automodule:: mozlog.structured.structuredlog
+
+.. autoclass:: StructuredLogger
+   :members: add_handler, remove_handler, handlers, suite_start,
+             suite_end, test_start, test_status, test_end,
+             process_output, critical, error, warning, info, debug
+
+.. autoclass:: StructuredLogFileLike
+  :members:
+
+Handlers
+--------
+
+A handler is a callable that is called for each log message produced
+and is responsible for handling the processing of that
+message. The typical example of this is a ``StreamHandler`` which takes
+a log message, invokes a formatter which converts the log to a string,
+and writes it to a file.
+
+.. automodule:: mozlog.structured.handlers
+
+.. autoclass:: StreamHandler
+  :members:
+
+.. autoclass:: LogLevelFilter
+  :members:
+
+Formatters
+----------
+
+Formatters are callables that take a log message, and return either a
+string representation of that message, or ``None`` if that message
+should not appear in the output. This allows formatters to both
+exclude certain items and create internal buffers of the output so
+that, for example, a single string might be returned for a
+``test_end`` message indicating the overall result of the test,
+including data provided in the ``test_status`` messages.
+
+Formatter modules are written so that they can take raw input on stdin
+and write formatted output on stdout. This allows the formatters to be
+invoked as part of a command line for post-processing raw log files.
+
+.. automodule:: mozlog.structured.formatters.base
+
+.. autoclass:: BaseFormatter
+  :members:
+
+.. automodule:: mozlog.structured.formatters.unittest
+
+.. autoclass:: UnittestFormatter
+  :members:
+
+.. automodule:: mozlog.structured.formatters.xunit
+
+.. autoclass:: XUnitFormatter
+  :members:
+
+.. automodule:: mozlog.structured.formatters.html
+
+.. autoclass:: HTMLFormatter
+  :members:
+
+.. automodule:: mozlog.structured.formatters.machformatter
+
+.. autoclass:: MachFormatter
+  :members:
+
+.. autoclass:: MachTerminalFormatter
+  :members:
+
+Processing Log Files
+--------------------
+
+The ``mozlog.structured.reader`` module provides utilities for working
+with structured log files.
+
+.. automodule:: mozlog.structured.reader
+  :members:
+
+Integration with argparse
+-------------------------
+
+The `mozlog.structured.commandline` module provides integration with
+the `argparse` module to provide uniform logging-related command line
+arguments to programs using `mozlog.structured`. Each known formatter
+gets a command line argument of the form ``--log-{name}``, which takes
+the name of a file to log to with that format of `-` to indicate stdout.
+
+.. automodule:: mozlog.structured.commandline
+  :members:
+
+Examples
+--------
+
+Log to stdout::
+
+    from mozlog.structured import structuredlog
+    from mozlog.structured import handlers, formatters
+    logger = structuredlog.StructuredLogger("my-test-suite")
+    logger.add_handler(handlers.StreamHandler(sys.stdout,
+                                              formatters.JSONFormatter()))
+    logger.suite_start(["test-id-1"])
+    logger.test_start("test-id-1")
+    logger.info("This is a message with action='LOG' and level='INFO'")
+    logger.test_status("test-id-1", "subtest-1", "PASS")
+    logger.test_end("test-id-1", "OK")
+    logger.suite_end()
+
+
+Populate an ``argparse.ArgumentParser`` with logging options, and
+create a logger based on the value of those options, defaulting to
+JSON output on stdout if nothing else is supplied::
+
+   import argparse
+   from mozlog.structured import commandline
+
+   parser = argparse.ArgumentParser()
+   # Here one would populate the parser with other options
+   commandline.add_logging_group(parser)
+
+   args = parser.parse_args()
+   logger = commandline.setup_logging("testsuite-name", args, {"raw": sys.stdout})
+
+Count the number of tests that timed out in a testsuite::
+
+   from mozlog.structured import reader
+
+   class TestCounter(object):
+       def count(filename):
+           self.count = 0
+           with open(filename) as f:
+               reader.map_action(reader.read(f),
+                                 {"test_end":self.process_test_end})
+          return self.count
+
+       def process_test_end(self, data):
+           if data["status"] == "TIMEOUT":
+               self.count += 1
+
+   print TestCounter().count("my_test_run.log")
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/base.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/base.py
@@ -1,16 +1,31 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import json
 
 
 class BaseFormatter(object):
+    """Base class for implementing non-trivial formatters.
+
+    Subclasses are expected to provide a method for each action type they
+    wish to handle, each taking a single argument for the test data.
+    For example a trivial subclass that just produces the id of each test as
+    it starts might be::
+
+      class StartIdFormatter(BaseFormatter);
+          def test_start(data):
+              #For simplicity in the example pretend the id is always a string
+              return data["test"]
+    """
+    def __init__(self):
+        pass
+
     def __call__(self, data):
         if hasattr(self, data["action"]):
             handler = getattr(self, data["action"])
             return handler(data)
 
 
 def format_file(input_file, handler):
     while True:
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/machformatter.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/machformatter.py
@@ -88,23 +88,44 @@ class BaseMachFormatter(base.BaseFormatt
             self.last_time = entry_time
         else:
             t = entry_time - self.start_time
 
         return t
 
 
 class MachFormatter(BaseMachFormatter):
+    """Formatter designed for producing human-redable output
+    when tests are running. This is principally intended for integration with
+    the ``mach`` command dispatch framework.
+
+    :param start_time: time.time() at which the testrun started
+    :param write_interval: bool indicating whether to include the interval since the
+                           last message
+    :param write_times: bool indicating whether to include the time since the testrun
+                        started.
+    """
     def __call__(self, data):
         s = BaseMachFormatter.__call__(self, data)
         if s is not None:
             return "%s %s" % (format_seconds(self._time(data)), s)
 
 
 class MachTerminalFormatter(BaseMachFormatter):
+    """Formatter designed to produce coloured output in a terminal when tests are
+    running. This is principally intended for integration with
+    the ``mach`` command dispatch framework.
+
+    :param start_time: time.time() at which the testrun started
+    :param write_interval: bool indicating whether to include the interval since the
+                           last message
+    :param write_times: bool indicating whether to include the time since the testrun
+                        started.
+    :param terminal: Terminal object from mach.
+    """
     def __init__(self, start_time=None, write_interval=False, write_times=True,
                  terminal=None):
         self.terminal = terminal
         BaseMachFormatter.__init__(self,
                                    start_time=start_time,
                                    write_interval=write_interval,
                                    write_times=write_times)
 
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/unittest.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/unittest.py
@@ -5,16 +5,18 @@
 
 import sys
 
 import base
 from .. import handlers
 
 
 class UnittestFormatter(base.BaseFormatter):
+    """Formatter designed to produce output in a format like that used by
+    the ``unittest`` module in the standard library."""
     def __init__(self):
         self.fails = []
         self.errors = []
         self.tests_run = 0
         self.start_time = None
         self.end_time = None
 
     def suite_start(self, data):
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/xunit.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/xunit.py
@@ -11,19 +11,28 @@ def format_test_id(test_id):
         #Not sure how to deal with reftests yet
         raise NotImplementedError
 
     #Turn a path into something like a class heirachy
     return test_id.replace('.', '_').replace('/', ".")
 
 
 class XUnitFormatter(base.BaseFormatter):
-    """The data model here isn't a great match. This implementation creates
-    one <testcase> element for each subtest and one more, with no @name
-    for each test"""
+    """Formatter that produces XUnit-style XML output.
+
+    The tree is created in-memory so this formatter may be problematic
+    with very large log files.
+
+    Note that the data model isn't a perfect match. In
+    particular XUnit assumes that each test has a unittest-style
+    class name and function name, which isn't the case for us. The
+    implementation currently replaces path names with something that
+    looks like class names, but this doesn't work for test types that
+    actually produce class names, or for test types that have multiple
+    components in their test id (e.g. reftests)."""
 
     def __init__(self):
         self.tree = ElementTree.ElementTree()
         self.root = None
         self.suite_start_time = None
         self.test_start_time = None
 
         self.tests_run = 0
--- a/testing/mozbase/mozlog/mozlog/structured/handlers/__init__.py
+++ b/testing/mozbase/mozlog/mozlog/structured/handlers/__init__.py
@@ -16,35 +16,50 @@ class BaseHandler(object):
     def remove_filter(self, filter_func):
         self.filters.remove(filter_func)
 
     def filter(self, data):
         return all(item(data) for item in self.filters)
 
 
 class LogLevelFilter(object):
+    """Handler that filters out messages with action:log and a level
+    lower than some specified level.
+
+    :param inner: Handler to use for messages that pass this filter
+    :param level: Minimum log level to process
+    """
     def __init__(self, inner, level):
         self.inner = inner
         self.level = log_levels[level.upper()]
 
     def __call__(self, item):
         if (item["action"] != "log" or
             log_levels[item["level"]] <= self.level):
             return self.inner(item)
 
 
 class StreamHandler(BaseHandler):
+    """Handler for writing to a file-like object
+
+    :param stream: File-like object to write log messages to
+    :param formatter: formatter to convert messages to string format
+    """
+
     _lock = Lock()
 
     def __init__(self,  stream, formatter):
         assert stream is not None
         self.stream = stream
         BaseHandler.__init__(self, formatter)
 
     def __call__(self, data):
+        """Write a log message.
+
+        :param data: Structured log message dictionary."""
         formatted = self.formatter(data)
         if not formatted:
             return
         with self._lock:
             #XXX Should encoding be the formatter's responsibility?
             try:
                 self.stream.write(formatted.encode("utf8"))
             except:
--- a/testing/mozbase/mozlog/mozlog/structured/reader.py
+++ b/testing/mozbase/mozlog/mozlog/structured/reader.py
@@ -1,17 +1,19 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 import json
 
 def read(log_f, raise_on_error=False):
-    """Return a generator that will return the entries in a structured log file
+    """Return a generator that will return the entries in a structured log file.
+    Note that the caller must not close the file whilst the generator is still
+    in use.
 
-    :param log_f: file-like object containing the log enteries, one per line
+    :param log_f: file-like object containing the raw log entries, one per line
     :param raise_on_error: boolean indicating whether ValueError should be raised
                            for lines that cannot be decoded."""
     for line in log_f:
         try:
             yield json.loads(line)
         except ValueError:
             if raise_on_error:
                 raise
--- a/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
+++ b/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
@@ -57,37 +57,38 @@ Subfields for all messages:
 
 log_levels = dict((k.upper(), v) for v, k in
                   enumerate(["critical", "error", "warning", "info", "debug"]))
 
 
 class StructuredLogger(object):
     _lock = Lock()
     _handlers = defaultdict(list)
+    """Create a structured logger with the given name
+
+    :param name: The name of the logger.
+    """
 
     def __init__(self, name):
-        """
-        Create a structured logger with the given name
-
-        :param name: The name of the logger.
-        """
         self.name = name
 
     def add_handler(self, handler):
+        """Add a handler to the current logger"""
         self._handlers[self.name].append(handler)
 
     def remove_handler(self, handler):
+        """Remove a handler from the current logger"""
         for i, candidate_handler in enumerate(self._handlers[self.name][:]):
             if candidate_handler == handler:
                 del self._handlers[self.name][i]
                 break
 
     @property
     def handlers(self):
-        """Get a list of handlers that will be called when a
+        """A list of handlers that will be called when a
         message is logged from this logger"""
         return self._handlers[self.name]
 
     def _log_data(self, action, data=None):
         if data is None:
             data = {}
         with self._lock:
             log_data = self._make_log_data(action, data)
@@ -99,30 +100,28 @@ class StructuredLogger(object):
                     "time": int(time.time() * 1000),
                     "thread": current_thread().name,
                     "pid": current_process().pid,
                     "source": self.name}
         all_data.update(data)
         return all_data
 
     def suite_start(self, tests):
-        """
-        Log a suite_start message
+        """Log a suite_start message
 
         :param tests: List of test identifiers that will be run in the suite.
         """
         self._log_data("suite_start", {"tests": tests})
 
     def suite_end(self):
         """Log a suite_end message"""
         self._log_data("suite_end")
 
     def test_start(self, test):
-        """
-        "Log a test_start message
+        """Log a test_start message
 
         :param test: Identifier of the test that will run.
         """
         self._log_data("test_start", {"test": test})
 
     def test_status(self, test, subtest, status, expected="PASS", message=None):
         """
         Log a test_status message indicating a subtest result. Tests that
@@ -183,38 +182,43 @@ class StructuredLogger(object):
         """
         data = {"process": process, "data": data}
         if command is not None:
             data["command"] = command
         self._log_data("process_output", data)
 
 
 def _log_func(level_name):
-    def log(self, message, params=None):
-        if params is None:
-            params = {}
+    def log(self, message):
         data = {"level": level_name, "message": message}
-        data.update(params)
         self._log_data("log", data)
+    log.__doc__ = """Log a message with level %s
+
+:param message: The string message to log
+""" % level_name
+    log.__name__ = str(level_name).lower()
     return log
 
 
 # Create all the methods on StructuredLog for debug levels
 for level_name in log_levels:
     setattr(StructuredLogger, level_name.lower(), _log_func(level_name))
 
 
 class StructuredLogFileLike(object):
-    """
-    Wrapper for file like objects to redirect output to logger
-    instead.
+    """Wrapper for file-like objects to redirect writes to logger
+    instead. Each call to `write` becomes a single log entry of type `log`.
 
     When using this it is important that the callees i.e. the logging
     handlers do not themselves try to write to the wrapped file as this
     will cause infinite recursion.
+
+    :param logger: `StructuredLogger` to which to redirect the file write operations.
+    :param level: log level to use for each write.
+    :param prefix: String prefix to prepend to each log entry.
     """
     def __init__(self, logger, level="info", prefix=None):
         self.logger = logger
         self.log_func = getattr(self.logger, level)
         self.prefix = prefix
 
     def write(self, data):
         if data.endswith("\n"):