Bug 1352355 - Convert mozleak to structured logging, , r=ahal,mccr8
authorJames Graham <james@hoppipolla.co.uk>
Tue, 04 Dec 2018 22:18:23 +0000
changeset 508567 5441e0814fb901fa2ebb2d694eacf3b98b7bd30f
parent 508566 e134df617e0c7b95649dfc315a5ce537e9ca3fe4
child 508568 80a92104544fbab8e6506c86e6b1230a1da01dd4
push id1905
push userffxbld-merge
push dateMon, 21 Jan 2019 12:33:13 +0000
treeherdermozilla-release@c2fca1944d8c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersahal, mccr8
bugs1352355
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1352355 - Convert mozleak to structured logging, , r=ahal,mccr8 Moves mozleak to use structured logging. The logger gets two new actions, mozleak_object to indicate the name of an object that leaked in a specific process and mozleak_total to indicate the total number of bytes leaked in a process. The output from the TBPL formatter is expected to remain near-identical to the previous output from the logger, so there shouldn't be any effect on the ability to fail jobs if there are leaks. Additional features required by web-platform-tests are also added here; the leak thresholds are passed to the logger for mozleak_total and a list of any objects allowed to leak are passed for mozleak_object, so that a log consumer may decide whether a leak is unexpected. In addition, the scope attribute is used to specify the set of tests (or other tasks) running at the time of the leak, which may be used to associate a leak with a specific set of files. MozReview-Commit-ID: 19FsMxVQExH Depends on D12408 Differential Revision: https://phabricator.services.mozilla.com/D12409
layout/tools/reftest/selftest/test_reftest_output.py
testing/mochitest/tests/python/test_basic_mochitest_plain.py
testing/mozbase/mozleak/mozleak/leaklog.py
testing/mozbase/mozlog/mozlog/formatters/machformatter.py
testing/mozbase/mozlog/mozlog/formatters/tbplformatter.py
testing/mozbase/mozlog/mozlog/logtypes.py
testing/mozbase/mozlog/mozlog/structuredlog.py
--- a/layout/tools/reftest/selftest/test_reftest_output.py
+++ b/layout/tools/reftest/selftest/test_reftest_output.py
@@ -130,19 +130,19 @@ def test_output_leak(monkeypatch, runtes
         return old_process_leak_log(os.path.join(here, 'files', 'leaks.log'), *args[1:], **kwargs)
 
     monkeypatch.setattr('mozleak.process_leak_log', process_leak_log)
 
     status, lines = runtests('reftest-pass.list')
     assert status == 0
 
     tbpl_status, log_level, summary = get_mozharness_status(lines, status)
-    assert tbpl_status == TBPL_FAILURE
-    assert log_level == ERROR
+    assert tbpl_status == TBPL_WARNING
+    assert log_level == WARNING
 
-    errors = filter_action('log', lines)
-    errors = [e for e in errors if e['level'] == 'ERROR']
-    assert len(errors) == 1
-    assert 'leakcheck' in errors[0]['message']
+    leaks = filter_action('mozleak_total', lines)
+    assert len(leaks) == 1
+    assert leaks[0]['process'] == "default"
+    assert leaks[0]['bytes'] == 19915
 
 
 if __name__ == '__main__':
     mozunit.main()
--- a/testing/mochitest/tests/python/test_basic_mochitest_plain.py
+++ b/testing/mochitest/tests/python/test_basic_mochitest_plain.py
@@ -112,19 +112,19 @@ def test_output_leak(monkeypatch, runtes
 
     monkeypatch.setattr('mozleak.process_leak_log', process_leak_log)
 
     status, lines = runtests('test_pass.html')
     # TODO: mochitest should return non-zero here
     assert status == 0
 
     tbpl_status, log_level, summary = get_mozharness_status(lines, status)
-    assert tbpl_status == TBPL_FAILURE
-    assert log_level == ERROR
+    assert tbpl_status == TBPL_WARNING
+    assert log_level == WARNING
 
-    errors = filter_action('log', lines)
-    errors = [e for e in errors if e['level'] == 'ERROR']
-    assert len(errors) == 1
-    assert 'leakcheck' in errors[0]['message']
+    leaks = filter_action('mozleak_total', lines)
+    assert len(leaks) == 1
+    assert leaks[0]['process'] == "default"
+    assert leaks[0]['bytes'] == 19915
 
 
 if __name__ == '__main__':
     mozunit.main()
--- a/testing/mozbase/mozleak/mozleak/leaklog.py
+++ b/testing/mozbase/mozleak/mozleak/leaklog.py
@@ -15,38 +15,44 @@ def _get_default_logger():
     if not log:
         import logging
         log = logging.getLogger(__name__)
     return log
 
 
 def process_single_leak_file(leakLogFileName, processType, leakThreshold,
                              ignoreMissingLeaks, log=None,
-                             stackFixer=None):
+                             stackFixer=None, scope=None, allowed=None):
     """Process a single leak log.
     """
 
     #     |              |Per-Inst  Leaked|     Total  Rem|
     #   0 |TOTAL         |      17     192| 419115886    2|
     # 833 |nsTimerImpl   |      60     120|     24726    2|
     # 930 |Foo<Bar, Bar> |      32       8|       100    1|
     lineRe = re.compile(r"^\s*\d+ \|"
                         r"(?P<name>[^|]+)\|"
                         r"\s*(?P<size>-?\d+)\s+(?P<bytesLeaked>-?\d+)\s*\|"
                         r"\s*-?\d+\s+(?P<numLeaked>-?\d+)")
     # The class name can contain spaces. We remove trailing whitespace later.
 
     log = log or _get_default_logger()
 
+    if allowed is None:
+        allowed = {}
+
     processString = "%s process:" % processType
     crashedOnPurpose = False
     totalBytesLeaked = None
     leakedObjectAnalysis = []
     leakedObjectNames = []
     recordLeakedObjects = False
+    log.info("leakcheck | Processing leak log file %s"
+             % leakLogFileName)
+
     with open(leakLogFileName, "r") as leaks:
         for line in leaks:
             if line.find("purposefully crash") > -1:
                 crashedOnPurpose = True
             matches = lineRe.match(line)
             if not matches:
                 # eg: the leak table header row
                 strippedLine = line.rstrip()
@@ -76,69 +82,47 @@ def process_single_leak_file(leakLogFile
                     totalBytesLeaked = bytesLeaked
                     # Throw out the information we had about the previous bloat
                     # view.
                     leakedObjectNames = []
                     leakedObjectAnalysis = []
                     recordLeakedObjects = True
                 else:
                     recordLeakedObjects = False
-            if size < 0 or bytesLeaked < 0 or numLeaked < 0:
+            if (size < 0 or bytesLeaked < 0 or numLeaked < 0) and leakThreshold >= 0:
                 log.error("TEST-UNEXPECTED-FAIL | leakcheck | %s negative leaks caught!"
                           % processString)
                 continue
             if name != "TOTAL" and numLeaked != 0 and recordLeakedObjects:
                 leakedObjectNames.append(name)
-                leakedObjectAnalysis.append("TEST-INFO | leakcheck | %s leaked %d %s"
-                                            % (processString, numLeaked, name))
-
-    log.info('\n'.join(leakedObjectAnalysis))
-
-    if totalBytesLeaked is None:
-        # We didn't see a line with name 'TOTAL'
-        if crashedOnPurpose:
-            log.info("TEST-INFO | leakcheck | %s deliberate crash and thus no leak log"
-                     % processString)
-        elif ignoreMissingLeaks:
-            log.info("TEST-INFO | leakcheck | %s ignoring missing output line for total leaks"
-                     % processString)
-        else:
-            log.error("TEST-UNEXPECTED-FAIL | leakcheck | %s missing output line for total leaks!"
-                      % processString)
-            log.info("TEST-INFO | leakcheck | missing output line from log file %s"
-                     % leakLogFileName)
-        return
+                leakedObjectAnalysis.append((numLeaked, name))
 
-    if totalBytesLeaked == 0:
-        log.info("TEST-PASS | leakcheck | %s no leaks detected!" %
-                 processString)
-        return
+    for numLeaked, name in leakedObjectAnalysis:
+        leak_allowed = False
+        if name in allowed:
+            limit = leak_allowed[name]
+            leak_allowed = limit is None or numLeaked <= limit
+        log.mozleak_object(processType,
+                           numLeaked,
+                           name,
+                           scope=scope,
+                           allowed=leak_allowed)
 
-    # Create a comma delimited string of the first N leaked objects found,
-    # to aid with bug summary matching in TBPL. Note: The order of the objects
-    # had no significance (they're sorted alphabetically).
-    maxSummaryObjects = 5
-    leakedObjectSummary = ', '.join(leakedObjectNames[:maxSummaryObjects])
-    if len(leakedObjectNames) > maxSummaryObjects:
-        leakedObjectSummary += ', ...'
-
-    message = "leakcheck | %s %d bytes leaked (%s)" % (
-            processString, totalBytesLeaked, leakedObjectSummary)
-
-    # totalBytesLeaked will include any expected leaks, so it can be off
-    # by a few thousand bytes.
-    if totalBytesLeaked > leakThreshold:
-        log.error("TEST-UNEXPECTED-FAIL | %s" % message)
-    else:
-        log.warning(message)
+    log.mozleak_total(processType,
+                      totalBytesLeaked,
+                      leakThreshold,
+                      leakedObjectNames,
+                      scope=scope,
+                      induced_crash=crashedOnPurpose,
+                      ignore_missing=ignoreMissingLeaks)
 
 
 def process_leak_log(leak_log_file, leak_thresholds=None,
                      ignore_missing_leaks=None, log=None,
-                     stack_fixer=None):
+                     stack_fixer=None, scope=None, allowed=None):
     """Process the leak log, including separate leak logs created
     by child processes.
 
     Use this function if you want an additional PASS/FAIL summary.
     It must be used with the |XPCOM_MEM_BLOAT_LOG| environment variable.
 
     The base of leak_log_file for a non-default process needs to end with
       _proctype_pid12345.log
@@ -146,31 +130,40 @@ def process_leak_log(leak_log_file, leak
     be the result of calling XRE_ChildProcessTypeToString(). 12345 is
     a series of digits that is the pid for the process. The .log is
     optional.
 
     All other file names are treated as being for default processes.
 
     leak_thresholds should be a dict mapping process types to leak thresholds,
     in bytes. If a process type is not present in the dict the threshold
-    will be 0.
+    will be 0. If the threshold is a negative number we additionally ignore
+    the case where there's negative leaks.
+
+    allowed - A dictionary mapping process types to dictionaries containing
+    the number of objects of that type which are allowed to leak.
+
+    scope - An identifier for the set of tests run during the browser session
+            (e.g. a directory name)
 
     ignore_missing_leaks should be a list of process types. If a process
     creates a leak log without a TOTAL, then we report an error if it isn't
     in the list ignore_missing_leaks.
     """
-
     log = log or _get_default_logger()
 
     leakLogFile = leak_log_file
     if not os.path.exists(leakLogFile):
         log.warning(
             "leakcheck | refcount logging is off, so leaks can't be detected!")
         return
 
+    log.info("leakcheck | Processing log file %s%s" %
+             (leakLogFile, (" for scope %s" % scope) if scope is not None else ""))
+
     leakThresholds = leak_thresholds or {}
     ignoreMissingLeaks = ignore_missing_leaks or []
 
     # This list is based on kGeckoProcessTypeString. ipdlunittest processes likely
     # are not going to produce leak logs we will ever see.
     knownProcessTypes = ["default", "plugin", "tab", "geckomediaplugin", "gpu", "rdd"]
 
     for processType in knownProcessTypes:
@@ -198,9 +191,10 @@ def process_leak_log(leak_log_file, leak
             else:
                 processType = "default"
             if processType not in knownProcessTypes:
                 log.error("TEST-UNEXPECTED-FAIL | leakcheck | "
                           "Leak log with unknown process type %s" % processType)
             leakThreshold = leakThresholds.get(processType, 0)
             process_single_leak_file(thisFile, processType, leakThreshold,
                                      processType in ignoreMissingLeaks,
-                                     log=log, stackFixer=stack_fixer)
+                                     log=log, stackFixer=stack_fixer,
+                                     scope=scope, allowed=allowed)
--- a/testing/mozbase/mozlog/mozlog/formatters/machformatter.py
+++ b/testing/mozbase/mozlog/mozlog/formatters/machformatter.py
@@ -250,16 +250,53 @@ class MachFormatter(base.BaseFormatter):
             prefix = self.term.yellow("WARNING")
         else:
             prefix = self.term.red("ERROR")
 
         return ("%s | LeakSanitizer | "
                 "SUMMARY: AddressSanitizer: %d byte(s) leaked in %d allocation(s)." %
                 (prefix, data["bytes"], data["allocations"]))
 
+    def mozleak_object(self, data):
+        data_log = data.copy()
+        data_log["level"] = "INFO"
+        data_log["message"] = ("leakcheck: %s leaked %d %s" %
+                               (data["process"], data["bytes"], data["name"]))
+        return self.log(data_log)
+
+    def mozleak_total(self, data):
+        if data["bytes"] is None:
+            # We didn't see a line with name 'TOTAL'
+            if data.get("induced_crashed", False):
+                data_log = data.copy()
+                data_log["level"] = "INFO"
+                data_log["message"] = ("leakcheck: %s deliberate crash and thus no leak log\n"
+                                       % data["process"])
+                return self.log(data_log)
+            if data.get("ignore_missing", False):
+                return ("%s ignoring missing output line for total leaks\n" %
+                        data["process"])
+
+            status = self.term.red("FAIL")
+            return ("%s leakcheck: "
+                    "%s missing output line for total leaks!\n" %
+                    (status, data["process"]))
+
+        if data["bytes"] == 0:
+            return ("%s leakcheck: %s no leaks detected!\n" %
+                    (self.term.green("PASS"), data["process"]))
+
+        message = "leakcheck: %s %d bytes leaked\n" % (data["process"], data["bytes"])
+
+        # data["bytes"] will include any expected leaks, so it can be off
+        # by a few thousand bytes.
+        failure = data["bytes"] > data["threshold"]
+        status = self.term.red("UNEXPECTED-FAIL") if failure else self.term.yellow("FAIL")
+        return "%s %s\n" % (status, message)
+
     def test_status(self, data):
         test = self._get_test_id(data)
         if test not in self.status_buffer:
             self.status_buffer[test] = {"count": 0, "unexpected": 0, "pass": 0}
         self.status_buffer[test]["count"] += 1
 
         if data["status"] == "PASS":
             self.status_buffer[test]["pass"] += 1
--- a/testing/mozbase/mozlog/mozlog/formatters/tbplformatter.py
+++ b/testing/mozbase/mozlog/mozlog/formatters/tbplformatter.py
@@ -292,16 +292,57 @@ class TbplFormatter(BaseFormatter):
         return "%s | LeakSanitizer | leak at %s\n%s" % (prefix, frame_list, suffix)
 
     def lsan_summary(self, data):
         level = "INFO" if data.get("allowed", False) else "ERROR"
         return ("%s | LeakSanitizer | "
                 "SUMMARY: AddressSanitizer: %d byte(s) leaked in %d allocation(s)." %
                 (level, data["bytes"], data["allocations"]))
 
+    def mozleak_object(self, data):
+        return ("TEST-INFO | leakcheck | %s leaked %d %s\n" %
+                (data["process"], data["bytes"], data["name"]))
+
+    def mozleak_total(self, data):
+        if data["bytes"] is None:
+            # We didn't see a line with name 'TOTAL'
+            if data.get("induced_crashed", False):
+                return ("TEST-INFO | leakcheck | %s deliberate crash and thus no leak log\n"
+                        % data["process"])
+            if data.get("ignore_missing", False):
+                return ("TEST-INFO | leakcheck | "
+                        "%s ignoring missing output line for total leaks\n" %
+                        data["process"])
+
+            return ("TEST-UNEXPECTED-FAIL | leakcheck | "
+                    "%s missing output line for total leaks!\n" %
+                    data["process"])
+
+        if data["bytes"] == 0:
+            return ("TEST-PASS | leakcheck | %s no leaks detected!\n" %
+                    data["process"])
+
+        # Create a comma delimited string of the first N leaked objects found,
+        # to aid with bug summary matching in TBPL. Note: The order of the objects
+        # had no significance (they're sorted alphabetically).
+        max_objects = 5
+        object_summary = ', '.join(data["objects"][:max_objects])
+        if len(data["objects"]) > max_objects:
+            object_summary += ', ...'
+
+        message = "leakcheck | %s %d bytes leaked (%s)\n" % (
+            data["process"], data["bytes"], object_summary)
+
+        # data["bytes"] will include any expected leaks, so it can be off
+        # by a few thousand bytes.
+        if data["bytes"] > data["threshold"]:
+            return "TEST-UNEXPECTED-FAIL | %s\n" % message
+        else:
+            return "WARNING | %s\n" % message
+
     def _format_suite_summary(self, suite, summary):
         counts = summary['counts']
         logs = summary['unexpected_logs']
 
         total = sum(self.summary.aggregate('count', counts).values())
         expected = sum(self.summary.aggregate('expected', counts).values())
         status_str = "{}/{}".format(expected, total)
         rv = ["{}: {}".format(suite, status_str)]
--- a/testing/mozbase/mozlog/mozlog/logtypes.py
+++ b/testing/mozbase/mozlog/mozlog/logtypes.py
@@ -260,8 +260,15 @@ class Tuple(ContainerType):
             return [superfmt(t) for t in item_type]
         return (superfmt(item_type),)
 
     def convert(self, data):
         if len(data) != len(self.item_type):
             raise ValueError("Expected %i items got %i" % (len(self.item_type), len(data)))
         return tuple(item_type.convert(value)
                      for item_type, value in zip(self.item_type, data))
+
+
+class Nullable(ContainerType):
+    def convert(self, data):
+        if data is None:
+            return data
+        return self.item_type.convert(data)
--- a/testing/mozbase/mozlog/mozlog/structuredlog.py
+++ b/testing/mozbase/mozlog/mozlog/structuredlog.py
@@ -7,17 +7,17 @@ from __future__ import absolute_import, 
 from multiprocessing import current_process
 from threading import current_thread, Lock
 import json
 import sys
 import time
 import traceback
 
 from .logtypes import (Unicode, TestId, TestList, Status, SubStatus, Dict, List, Int, Any, Tuple,
-                       Boolean)
+                       Boolean, Nullable)
 from .logtypes import log_action, convertor_registry
 import six
 
 """Structured Logging for recording test results.
 
 Allowed actions, and subfields:
   suite_start
       tests  - List of test names
@@ -61,16 +61,24 @@ Allowed actions, and subfields:
       allowed_match - A stack frame in the list that matched a rule meaning the
                       leak is expected
 
   lsan_summary
       bytes - Number of bytes leaked
       allocations - Number of allocations
       allowed - Boolean indicating whether all detected leaks matched allow rules
 
+  mozleak_object
+     process - Process that leaked
+     bytes - Number of bytes that leaked
+     name - Name of the object that leaked
+     scope - An identifier for the set of tests run during the browser session
+             (e.g. a directory name)
+     allowed - Boolean indicating whether the leak was permitted
+
   log
       level [CRITICAL | ERROR | WARNING |
              INFO | DEBUG] - level of the logging message
       message - Message to log
 
 Subfields for all messages:
       action - the action type of the current message
       time - the timestamp in ms since the epoch of the log message
@@ -484,16 +492,34 @@ class StructuredLogger(object):
         self._log_data("lsan_leak", data)
 
     @log_action(Int("bytes"),
                 Int("allocations"),
                 Boolean("allowed", optional=True, default=False))
     def lsan_summary(self, data):
         self._log_data("lsan_summary", data)
 
+    @log_action(Unicode("process"),
+                Int("bytes"),
+                Unicode("name"),
+                Unicode("scope", optional=True, default=None),
+                Boolean("allowed", optional=True, default=False))
+    def mozleak_object(self, data):
+        self._log_data("mozleak_object", data)
+
+    @log_action(Unicode("process"),
+                Nullable(Int, "bytes"),
+                Int("threshold"),
+                List(Unicode, "objects"),
+                Unicode("scope", optional=True, default=None),
+                Boolean("induced_crash", optional=True, default=False),
+                Boolean("ignore_missing", optional=True, default=False))
+    def mozleak_total(self, data):
+        self._log_data("mozleak_total", data)
+
     @log_action()
     def shutdown(self, data):
         """Shutdown the logger.
 
         This logs a 'shutdown' action after which any further attempts to use
         the logger will raise a :exc:`LoggerShutdownError`.
 
         This is also called implicitly from the destructor or