Bug 1042921 - Explicitly type-convert all input to mozlog.structured.;r=ahal
authorJames Graham <james@hoppipolla.co.uk>
Mon, 04 Aug 2014 10:29:58 +0100
changeset 219305 7e0f0db0bb2c515d5e5df13f002446a7d081f320
parent 219304 b1552199cc7cee534308d4c82823699be3e42104
child 219306 a788782fc3d7de8f91785ed5d22b74bc49df8b0c
push id3979
push userraliiev@mozilla.com
push dateMon, 13 Oct 2014 16:35:44 +0000
treeherdermozilla-beta@30f2cc610691 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersahal
bugs1042921
milestone34.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1042921 - Explicitly type-convert all input to mozlog.structured.;r=ahal
testing/mochitest/runtests.py
testing/mochitest/tests/SimpleTest/TestRunner.js
testing/mozbase/mozlog/mozlog/structured/handlers/__init__.py
testing/mozbase/mozlog/mozlog/structured/logtypes.py
testing/mozbase/mozlog/mozlog/structured/structuredlog.py
testing/mozbase/mozlog/tests/test_structured.py
--- a/testing/mochitest/runtests.py
+++ b/testing/mochitest/runtests.py
@@ -1945,17 +1945,17 @@ class Mochitest(MochitestUtilsMixin):
       elif "Failed:" in line:
         self.harness.countfail += val
       elif "Todo:" in line:
         self.harness.counttodo += val
       return message
 
     def fix_stack(self, message):
       if message['action'] == 'log' and self.stackFixerFunction:
-        message['message'] = self.stackFixerFunction(message['message'].encode('ascii', 'replace'))
+        message['message'] = self.stackFixerFunction(message['message'].encode('utf-8', 'replace'))
       return message
 
     def record_last_test(self, message):
       """record last test on harness"""
       if message['action'] == 'test_start':
         self.harness.lastTestSeen = message['test']
       elif message['action'] == 'log' and 'TEST-START' in message['message'] and '|' in message['message']:
         self.harness.lastTestSeen = message['message'].split("|")[1].strip()
--- a/testing/mochitest/tests/SimpleTest/TestRunner.js
+++ b/testing/mochitest/tests/SimpleTest/TestRunner.js
@@ -238,17 +238,17 @@ function StructuredLogger(name) {
         data = typeof data !== "undefined" ? data : null;
 
         if (data === null) {
             data = {};
         }
 
         var allData = {action: action,
                        time: new Date().getTime(),
-                       thread: null,
+                       thread: "",
                        pid: null,
                        source: this.name};
 
         for (var attrname in data) {
             allData[attrname] = data[attrname];
         }
 
         this._dumpMessage(allData);
--- a/testing/mozbase/mozlog/mozlog/structured/handlers/__init__.py
+++ b/testing/mozbase/mozlog/mozlog/structured/handlers/__init__.py
@@ -1,13 +1,14 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from threading import Lock
+import codecs
 
 from ..structuredlog import log_levels
 
 
 class BaseHandler(object):
     def __init__(self, formatter=str):
         self.formatter = formatter
         self.filters = []
@@ -45,25 +46,37 @@ class StreamHandler(BaseHandler):
     :param stream: File-like object to write log messages to
     :param formatter: formatter to convert messages to string format
     """
 
     _lock = Lock()
 
     def __init__(self,  stream, formatter):
         assert stream is not None
+
+        # This is a hack to deal with the case where we are passed a
+        # StreamWriter (e.g. by mach for stdout). A StreamWriter requires
+        # the code to handle unicode in exactly the opposite way compared
+        # to a normal stream i.e. you always have to pass in a Unicode
+        # object rather than a string object. Cope with that by extracting
+        # the underlying raw stream.
+        if isinstance(stream, codecs.StreamWriter):
+            stream = stream.stream
+
         self.stream = stream
         BaseHandler.__init__(self, formatter)
 
     def __call__(self, data):
         """Write a log message.
 
         :param data: Structured log message dictionary."""
         formatted = self.formatter(data)
         if not formatted:
             return
         with self._lock:
-            #XXX Should encoding be the formatter's responsibility?
-            try:
-                self.stream.write(formatted.encode("utf8", "replace"))
-            except:
-                raise
+            if isinstance(formatted, unicode):
+                self.stream.write(formatted.encode("utf-8", "replace"))
+            elif isinstance(formatted, str):
+                self.stream.write(formatted)
+            else:
+                assert False, "Got output from the formatter of an unexpected type"
+
             self.stream.flush()
new file mode 100644
--- /dev/null
+++ b/testing/mozbase/mozlog/mozlog/structured/logtypes.py
@@ -0,0 +1,157 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+convertor_registry = {}
+missing = object()
+no_default = object()
+
+class log_action(object):
+    def __init__(self, *args):
+        self.args = {}
+
+        self.args_no_default = []
+        self.args_with_default = []
+
+        # These are the required fields in a log message that usually aren't
+        # supplied by the caller, but can be in the case of log_raw
+        self.default_args = [
+            Unicode("action"),
+            Int("time"),
+            Unicode("thread"),
+            Int("pid", default=None),
+            Unicode("source"),
+            Unicode("component")]
+
+        for arg in args:
+            if arg.default is no_default:
+                self.args_no_default.append(arg.name)
+            else:
+                self.args_with_default.append(arg.name)
+
+            if arg.name in self.args:
+                raise ValueError("Repeated argument name %s" % arg.name)
+
+            self.args[arg.name] = arg
+
+        for extra in self.default_args:
+            self.args[extra.name] = extra
+
+
+    def __call__(self, f):
+        convertor_registry[f.__name__] = self
+        converter = self
+
+        def inner(self, *args, **kwargs):
+            data = converter.convert(*args, **kwargs)
+            return f(self, data)
+
+        return inner
+
+    def convert(self, *args, **kwargs):
+        data = {}
+        values = {}
+        values.update(kwargs)
+
+        j = -1
+        for i, arg in enumerate(args):
+            j += 1
+            # Skip over non-defaulted args supplied as keyword arguments
+            while self.args_no_default[j] in values:
+                j += 1
+                if j == len(self.args_no_default):
+                    raise TypeError("Too many arguments")
+            values[self.args_no_default[j]] = arg
+
+        for k in range(j+1, len(self.args_no_default)):
+            if self.args_no_default[k] not in values:
+                raise TypeError("Missing required argument %s\n%r\n%r\n%r" %
+                                (self.args_no_default[k], args, kwargs, values))
+
+        # Fill in missing arguments
+        for name in self.args_with_default:
+            if name not in values:
+                values[name] = self.args[name].default
+
+        for key, value in values.iteritems():
+            out_value = self.args[key](value)
+            if out_value is not missing:
+                data[key] = out_value
+
+        return data
+
+    def convert_known(self, **kwargs):
+        known_kwargs = {name: value for name, value in kwargs.iteritems()
+                        if name in self.args}
+        return self.convert(**known_kwargs)
+
+class DataType(object):
+    def __init__(self, name, default=no_default, optional=False):
+        self.name = name
+        self.default = default
+
+        if default is no_default and optional is not False:
+            raise ValueError("optional arguments require a default value")
+
+        self.optional = optional
+
+    def __call__(self, value):
+        if value == self.default:
+            if self.optional:
+                return missing
+            return self.default
+
+        try:
+            return self.convert(value)
+        except:
+            raise ValueError("Failed to convert value %s of type %s for field %s to type %s" %
+                             (value, type(value).__name__, self.name, self.__class__.__name__))
+
+class Unicode(DataType):
+    def convert(self, data):
+        if isinstance(data, unicode):
+            return data
+        if isinstance(data, str):
+            return data.decode("utf8", "replace")
+        return unicode(data)
+
+class TestId(DataType):
+    def convert(self, data):
+        if isinstance(data, unicode):
+            return data
+        elif isinstance(data, str):
+            return data.decode("utf-8", "replace")
+        elif isinstance(data, tuple):
+            # This is really a bit of a hack; should really split out convertors from the
+            # fields they operate on
+            func = Unicode(None).convert
+            return tuple(func(item) for item in data)
+        else:
+            raise ValueError
+
+class Status(DataType):
+    allowed = ["PASS", "FAIL", "OK", "ERROR", "TIMEOUT", "CRASH", "ASSERT", "SKIP"]
+    def convert(self, data):
+        value = data.upper()
+        if value not in self.allowed:
+            raise ValueError
+        return value
+
+class SubStatus(Status):
+    allowed = ["PASS", "FAIL", "ERROR", "TIMEOUT", "CRASH", "ASSERT"]
+
+class Dict(DataType):
+    def convert(self, data):
+        return dict(data)
+
+class List(DataType):
+    def __init__(self, name, item_type, default=no_default, optional=False):
+        DataType.__init__(self, name, default, optional)
+        self.item_type = item_type(None)
+
+    def convert(self, data):
+        return [self.item_type.convert(item) for item in data]
+
+class Int(DataType):
+    def convert(self, data):
+        return int(data)
--- a/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
+++ b/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
@@ -1,20 +1,21 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import unicode_literals
 
-from collections import defaultdict
 from multiprocessing import current_process
 from threading import current_thread, Lock
 import json
 import time
 
+from logtypes import Unicode, TestId, Status, SubStatus, Dict, List, log_action, convertor_registry
+
 """Structured Logging for recording test results.
 
 Allowed actions, and subfields:
   suite_start
       tests  - List of test names
 
   suite_end
 
@@ -124,20 +125,33 @@ class StructuredLogger(object):
                 break
 
     @property
     def handlers(self):
         """A list of handlers that will be called when a
         message is logged from this logger"""
         return self._state.handlers
 
-    def log_raw(self, data):
-        if "action" not in data:
+    def log_raw(self, raw_data):
+        if "action" not in raw_data:
             raise ValueError
-        data = self._make_log_data(data['action'], data)
+
+        action = raw_data["action"]
+        converted_data = convertor_registry[action].convert_known(**raw_data)
+        for k, v in raw_data.iteritems():
+            if k not in converted_data:
+                converted_data[k] = v
+
+        data = self._make_log_data(action, converted_data)
+
+        if action in ("test_status", "test_end"):
+            if (data["expected"] == data["status"] or
+                data["status"] == "SKIP"):
+                del data["expected"]
+
         self._handle_log(data)
 
     def _log_data(self, action, data=None):
         if data is None:
             data = {}
 
         log_data = self._make_log_data(action, data)
         self._handle_log(log_data)
@@ -153,149 +167,145 @@ class StructuredLogger(object):
                     "thread": current_thread().name,
                     "pid": current_process().pid,
                     "source": self.name}
         if self.component:
             all_data['component'] = self.component
         all_data.update(data)
         return all_data
 
-    def suite_start(self, tests, run_info=None):
+    @log_action(List("tests", Unicode),
+                Dict("run_info", default=None, optional=True))
+    def suite_start(self, data):
         """Log a suite_start message
 
         :param tests: List of test identifiers that will be run in the suite.
         """
-
-        data = {"tests": tests}
-        if run_info is not None:
-            data["run_info"] = run_info
-
         if self._state.suite_started:
             self.error("Got second suite_start message before suite_end. Logged with data %s" %
                        json.dumps(data))
             return
 
         self._state.suite_started = True
 
         self._log_data("suite_start", data)
 
-    def suite_end(self):
+    @log_action()
+    def suite_end(self, data):
         """Log a suite_end message"""
         if not self._state.suite_started:
             self.error("Got suite_end message before suite_start.")
             return
 
         self._state.suite_started = False
 
         self._log_data("suite_end")
 
-    def test_start(self, test):
+    @log_action(TestId("test"))
+    def test_start(self, data):
         """Log a test_start message
 
         :param test: Identifier of the test that will run.
         """
         if not self._state.suite_started:
-            self.error("Got test_start message before suite_start for test %s" % (test,))
+            self.error("Got test_start message before suite_start for test %s" %
+                       data["test"])
             return
-        if test in self._state.running_tests:
-            self.error("test_start for %s logged while in progress." % (test,))
+        if data["test"] in self._state.running_tests:
+            self.error("test_start for %s logged while in progress." %
+                       data["test"])
             return
-        self._state.running_tests.add(test)
-        self._log_data("test_start", {"test": test})
+        self._state.running_tests.add(data["test"])
+        self._log_data("test_start", data)
 
-    def test_status(self, test, subtest, status, expected="PASS", message=None,
-                    stack=None, extra=None):
+    @log_action(TestId("test"),
+                Unicode("subtest"),
+                SubStatus("status"),
+                SubStatus("expected", default="PASS"),
+                Unicode("message", default=None, optional=True),
+                Unicode("stack", default=None, optional=True),
+                Dict("extra", default=None, optional=True))
+    def test_status(self, data):
         """
         Log a test_status message indicating a subtest result. Tests that
         do not have subtests are not expected to produce test_status messages.
 
         :param test: Identifier of the test that produced the result.
         :param subtest: Name of the subtest.
         :param status: Status string indicating the subtest result
         :param expected: Status string indicating the expected subtest result.
         :param message: String containing a message associated with the result.
         :param stack: a stack trace encountered during test execution.
         :param extra: suite-specific data associated with the test result.
         """
-        if status.upper() not in ["PASS", "FAIL", "TIMEOUT", "NOTRUN", "ASSERT"]:
-            raise ValueError("Unrecognised status %s" % status)
-        data = {"test": test,
-                "subtest": subtest,
-                "status": status.upper()}
-        if message is not None:
-            data["message"] = unicode(message)
-        if expected != data["status"]:
-            data["expected"] = expected
-        if stack is not None:
-            data["stack"] = stack
-        if extra is not None:
-            data["extra"] = extra
 
-        if test not in self._state.running_tests:
+        if (data["expected"] == data["status"] or
+            data["status"] == "SKIP"):
+            del data["expected"]
+
+        if data["test"] not in self._state.running_tests:
             self.error("test_status for %s logged while not in progress. "
-                       "Logged with data: %s" % (test, json.dumps(data)))
+                       "Logged with data: %s" % (data["test"], json.dumps(data)))
             return
 
         self._log_data("test_status", data)
 
-    def test_end(self, test, status, expected="OK", message=None, stack=None,
-                 extra=None):
+    @log_action(TestId("test"),
+                Status("status"),
+                Status("expected", default="OK"),
+                Unicode("message", default=None, optional=True),
+                Unicode("stack", default=None, optional=True),
+                Dict("extra", default=None, optional=True))
+    def test_end(self, data):
         """
         Log a test_end message indicating that a test completed. For tests
         with subtests this indicates whether the overall test completed without
         errors. For tests without subtests this indicates the test result
         directly.
 
         :param test: Identifier of the test that produced the result.
         :param status: Status string indicating the test result
         :param expected: Status string indicating the expected test result.
         :param message: String containing a message associated with the result.
         :param stack: a stack trace encountered during test execution.
         :param extra: suite-specific data associated with the test result.
         """
-        if status.upper() not in ["PASS", "FAIL", "OK", "ERROR", "TIMEOUT",
-                                  "CRASH", "ASSERT", "SKIP"]:
-            raise ValueError("Unrecognised status %s" % status)
-        data = {"test": test,
-                "status": status.upper()}
-        if message is not None:
-            data["message"] = unicode(message)
-        if expected != data["status"] and status != "SKIP":
-            data["expected"] = expected
-        if stack is not None:
-            data["stack"] = stack
-        if extra is not None:
-            data["extra"] = extra
+
+        if (data["expected"] == data["status"] or
+             data["status"] == "SKIP"):
+            del data["expected"]
 
-        if test not in self._state.running_tests:
+        if data["test"] not in self._state.running_tests:
             self.error("test_end for %s logged while not in progress. "
-                       "Logged with data: %s" % (test, json.dumps(data)))
+                       "Logged with data: %s" % (data["test"], json.dumps(data)))
         else:
-            self._state.running_tests.remove(test)
+            self._state.running_tests.remove(data["test"])
             self._log_data("test_end", data)
 
-    def process_output(self, process, data, command=None):
+    @log_action(Unicode("process"),
+                Unicode("data"),
+                Unicode("command", default=None, optional=True))
+    def process_output(self, data):
         """Log output from a managed process.
 
         :param process: A unique identifier for the process producing the output
                         (typically the pid)
         :param data: The output to log
         :param command: A string representing the full command line used to start
                         the process.
         """
-        data = {"process": process, "data": data}
-        if command is not None:
-            data["command"] = command
         self._log_data("process_output", data)
 
 
 def _log_func(level_name):
-    def log(self, message):
-        data = {"level": level_name, "message": unicode(message)}
+    @log_action(Unicode("message"))
+    def log(self, data):
+        data["level"] = level_name
         self._log_data("log", data)
+
     log.__doc__ = """Log a message with level %s
 
 :param message: The string message to log
 """ % level_name
     log.__name__ = str(level_name).lower()
     return log
 
 
--- a/testing/mozbase/mozlog/tests/test_structured.py
+++ b/testing/mozbase/mozlog/tests/test_structured.py
@@ -1,21 +1,24 @@
+# -*- coding: utf-8 -*-
 import argparse
 import optparse
 import os
 import unittest
 import StringIO
 import json
+import mozfile
 
 from mozlog.structured import (
     commandline,
     reader,
     structuredlog,
     stdadapter,
     handlers,
+    formatters,
 )
 
 
 class TestHandler(object):
     def __init__(self):
         self.items = []
 
     def __call__(self, data):
@@ -235,17 +238,17 @@ class TestStructuredLog(BaseStructuredTe
         logger1.test_start("test")
         last_item = self.pop_last_item()
         self.assertEquals(last_item["action"], "log")
         self.assertEquals(last_item["level"], "ERROR")
 
     def test_process(self):
         self.logger.process_output(1234, "test output")
         self.assert_log_equals({"action": "process_output",
-                                "process": 1234,
+                                "process": "1234",
                                 "data": "test output"})
 
     def test_log(self):
         for level in ["critical", "error", "warning", "info", "debug"]:
             getattr(self.logger, level)("message")
             self.assert_log_equals({"action": "log",
                                     "level": level.upper(),
                                     "message": "message"})
@@ -316,16 +319,55 @@ class TestStructuredLog(BaseStructuredTe
                                 "message": "line 3"})
 
         file_like.write("line 4\r\n")
 
         self.assert_log_equals({"action": "log",
                                 "level": "INFO",
                                 "message": "line 4"})
 
+class TestTypeconversions(BaseStructuredTest):
+    def test_raw(self):
+        self.logger.log_raw({"action":"suite_start", "tests":[1], "time": "1234"})
+        self.assert_log_equals({"action": "suite_start",
+                                "tests":["1"],
+                                "time": 1234})
+        self.logger.suite_end()
+
+    def test_tuple(self):
+        self.logger.suite_start([])
+        self.logger.test_start(("\xf0\x90\x8d\x84\xf0\x90\x8c\xb4\xf0\x90\x8d\x83\xf0\x90\x8d\x84", 42, u"\u16a4"))
+        self.assert_log_equals({"action": "test_start",
+                                "test": (u'\U00010344\U00010334\U00010343\U00010344', u"42", u"\u16a4")})
+        self.logger.suite_end()
+
+    def test_non_string_messages(self):
+        self.logger.suite_start([])
+        self.logger.info(1)
+        self.assert_log_equals({"action": "log",
+                                "message": "1",
+                                "level": "INFO"})
+        self.logger.info([1, (2, '3'), "s", "s" + chr(255)])
+        self.assert_log_equals({"action": "log",
+                                "message": "[1, (2, '3'), 's', 's\\xff']",
+                                "level": "INFO"})
+        self.logger.suite_end()
+
+    def test_utf8str_write(self):
+        with mozfile.NamedTemporaryFile() as logfile:
+            _fmt = formatters.TbplFormatter()
+            _handler = handlers.StreamHandler(logfile, _fmt)
+            self.logger.add_handler(_handler)
+            self.logger.suite_start([])
+            self.logger.info("☺")
+            with open(logfile.name) as f:
+                data = f.readlines()
+                self.assertEquals(data[-1], "☺\n")
+            self.logger.suite_end()
+
 class TestCommandline(unittest.TestCase):
     def test_setup_logging(self):
         parser = argparse.ArgumentParser()
         commandline.add_logging_group(parser)
         args = parser.parse_args(["--log-raw=-"])
         logger = commandline.setup_logging("test", args, {})
         self.assertEqual(len(logger.handlers), 1)