Bug 1023371 - Refactor mozcrash to seperate out the IO from the stack inspection, r=ted.mielczarek
authorJames Graham <james@hoppipolla.co.uk>
Fri, 15 Aug 2014 23:19:34 +0100
changeset 199882 64717933c7276fbd54d4c1273180cd3f1982103e
parent 199881 39c9d9bba87e29382bcd5f0f08447cad3116456d
child 199883 6bde3aa7e3171813bcd042c11af9d9d480bac348
push id47755
push userjames@hoppipolla.co.uk
push dateFri, 15 Aug 2014 22:20:28 +0000
treeherdermozilla-inbound@64717933c727 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersted.mielczarek
bugs1023371
milestone34.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1023371 - Refactor mozcrash to seperate out the IO from the stack inspection, r=ted.mielczarek This makes it compatible with structured logging.
testing/mozbase/mozcrash/mozcrash/mozcrash.py
testing/mozbase/mozlog/mozlog/structured/formatters/machformatter.py
testing/mozbase/mozlog/mozlog/structured/formatters/tbplformatter.py
testing/mozbase/mozlog/mozlog/structured/structuredlog.py
--- a/testing/mozbase/mozcrash/mozcrash/mozcrash.py
+++ b/testing/mozbase/mozcrash/mozcrash/mozcrash.py
@@ -9,22 +9,42 @@ import glob
 import os
 import re
 import shutil
 import subprocess
 import sys
 import tempfile
 import urllib2
 import zipfile
+from collections import namedtuple
 
 import mozfile
 import mozlog
+from mozlog.structured import structuredlog
 
 
-def check_for_crashes(dump_directory, symbols_path,
+StackInfo = namedtuple("StackInfo",
+                       ["minidump_path",
+                        "signature",
+                        "stackwalk_stdout",
+                        "stackwalk_stderr",
+                        "stackwalk_retcode",
+                        "stackwalk_errors",
+                        "extra"])
+
+
+def get_logger():
+    structured_logger = structuredlog.get_default_logger("mozcrash")
+    if structured_logger is None:
+        return mozlog.getLogger('mozcrash')
+    return structured_logger
+
+
+def check_for_crashes(dump_directory,
+                      symbols_path,
                       stackwalk_binary=None,
                       dump_save_path=None,
                       test_name=None,
                       quiet=False):
     """
     Print a stack trace for minidump files left behind by a crashing program.
 
     `dump_directory` will be searched for minidump files. Any minidump files found will
@@ -46,126 +66,230 @@ def check_for_crashes(dump_directory, sy
     If `test_name` is set it will be used as the test name in log output. If not set the
     filename of the calling function will be used.
 
     If `quiet` is set, no PROCESS-CRASH message will be printed to stdout if a
     crash is detected.
 
     Returns True if any minidumps were found, False otherwise.
     """
-    dumps = glob.glob(os.path.join(dump_directory, '*.dmp'))
-    if not dumps:
-        return False
-
-    if stackwalk_binary is None:
-        stackwalk_binary = os.environ.get('MINIDUMP_STACKWALK', None)
 
     # try to get the caller's filename if no test name is given
     if test_name is None:
         try:
             test_name = os.path.basename(sys._getframe(1).f_code.co_filename)
         except:
             test_name = "unknown"
 
-    try:
-        log = mozlog.getLogger('mozcrash')
-        remove_symbols = False
-        # If our symbols are at a remote URL, download them now
-        # We want to download URLs like http://... but not Windows paths like c:\...
-        if symbols_path and mozfile.is_url(symbols_path):
-            log.info("Downloading symbols from: %s", symbols_path)
-            remove_symbols = True
-            # Get the symbols and write them to a temporary zipfile
-            data = urllib2.urlopen(symbols_path)
-            symbols_file = tempfile.TemporaryFile()
-            symbols_file.write(data.read())
-            # extract symbols to a temporary directory (which we'll delete after
-            # processing all crashes)
-            symbols_path = tempfile.mkdtemp()
-            zfile = zipfile.ZipFile(symbols_file, 'r')
-            mozfile.extract_zip(zfile, symbols_path)
-            zfile.close()
+    crash_info = CrashInfo(dump_directory, symbols_path, dump_save_path=dump_save_path,
+                           stackwalk_binary=stackwalk_binary)
 
-        for d in dumps:
-            extra = os.path.splitext(d)[0] + '.extra'
+    if not crash_info.has_dumps:
+        return False
 
-            stackwalk_output = []
-            stackwalk_output.append("Crash dump filename: " + d)
-            top_frame = None
-            if symbols_path and stackwalk_binary and os.path.exists(stackwalk_binary):
-                # run minidump_stackwalk
-                p = subprocess.Popen([stackwalk_binary, d, symbols_path],
-                                     stdout=subprocess.PIPE,
-                                     stderr=subprocess.PIPE)
-                (out, err) = p.communicate()
-                if len(out) > 3:
-                    # minidump_stackwalk is chatty,
-                    # so ignore stderr when it succeeds.
-                    stackwalk_output.append(out)
-                    # The top frame of the crash is always the line after "Thread N (crashed)"
-                    # Examples:
-                    #  0  libc.so + 0xa888
-                    #  0  libnss3.so!nssCertificate_Destroy [certificate.c : 102 + 0x0]
-                    #  0  mozjs.dll!js::GlobalObject::getDebuggers() [GlobalObject.cpp:89df18f9b6da : 580 + 0x0]
-                    #  0  libxul.so!void js::gc::MarkInternal<JSObject>(JSTracer*, JSObject**) [Marking.cpp : 92 + 0x28]
-                    lines = out.splitlines()
-                    for i, line in enumerate(lines):
-                        if "(crashed)" in line:
-                            match = re.search(r"^ 0  (?:.*!)?(?:void )?([^\[]+)", lines[i+1])
-                            if match:
-                                top_frame = "@ %s" % match.group(1).strip()
-                            break
-                else:
-                    stackwalk_output.append("stderr from minidump_stackwalk:")
-                    stackwalk_output.append(err)
-                if p.returncode != 0:
-                    stackwalk_output.append("minidump_stackwalk exited with return code %d" % p.returncode)
-            else:
-                if not symbols_path:
-                    stackwalk_output.append("No symbols path given, can't process dump.")
-                if not stackwalk_binary:
-                    stackwalk_output.append("MINIDUMP_STACKWALK not set, can't process dump.")
-                elif stackwalk_binary and not os.path.exists(stackwalk_binary):
-                    stackwalk_output.append("MINIDUMP_STACKWALK binary not found: %s" % stackwalk_binary)
-            if not top_frame:
-                top_frame = "Unknown top frame"
-            if not quiet:
-                print "PROCESS-CRASH | %s | application crashed [%s]" % (test_name, top_frame)
-                print '\n'.join(stackwalk_output)
-            if dump_save_path is None:
-                dump_save_path = os.environ.get('MINIDUMP_SAVE_PATH', None)
-            if dump_save_path:
-                # This code did not previously create the directory,
-                # so there may be a file hanging out with its name.
-                if os.path.isfile(dump_save_path):
-                    os.unlink(dump_save_path)
-                if not os.path.isdir(dump_save_path):
-                    try:
-                        os.makedirs(dump_save_path)
-                    except OSError:
-                        pass
-
-                shutil.move(d, dump_save_path)
-                log.info("Saved minidump as %s",
-                         os.path.join(dump_save_path, os.path.basename(d)))
-
-                if os.path.isfile(extra):
-                    shutil.move(extra, dump_save_path)
-                    log.info("Saved app info as %s",
-                             os.path.join(dump_save_path, os.path.basename(extra)))
-            else:
-                mozfile.remove(d)
-                mozfile.remove(extra)
-    finally:
-        if remove_symbols:
-            mozfile.remove(symbols_path)
+    for info in crash_info:
+        if not quiet:
+            stackwalk_output = ["Crash dump filename: %s" % info.minidump_path]
+            if info.stackwalk_stderr:
+                stackwalk_output.append("stderr from minidump_stackwalk:")
+                stackwalk_output.append(info.stackwalk_stderr)
+            elif info.stackwalk_stdout is not None:
+                stackwalk_output.append(info.stackwalk_stdout)
+            if info.stackwalk_retcode is not None and info.stackwalk_retcode != 0:
+                stackwalk_output.append("minidump_stackwalk exited with return code %d" %
+                                        info.stackwalk_retcode)
+            signature = info.signature if info.signature else "unknown top frame"
+            print "PROCESS-CRASH | %s | application crashed [%s]" % (test_name,
+                                                                     signature)
+            print '\n'.join(stackwalk_output)
+            print '\n'.join(info.stackwalk_errors)
 
     return True
 
 
+def log_crashes(logger,
+                dump_directory,
+                symbols_path,
+                process=None,
+                test=None,
+                stackwalk_binary=None,
+                dump_save_path=None):
+    """Log crashes using a structured logger"""
+    for info in CrashInfo(dump_directory, symbols_path, dump_save_path=dump_save_path,
+                          stackwalk_binary=stackwalk_binary):
+        kwargs = info._asdict()
+        kwargs.pop("extra")
+        logger.crash(process=process, test=test, **kwargs)
+
+
+class CrashInfo(object):
+    """Get information about a crash based on dump files.
+
+    Typical usage is to iterate over the CrashInfo object. This returns StackInfo
+    objects, one for each crash dump file that is found in the dump_directory.
+
+    :param dump_directory: Path to search for minidump files
+    :param symbols_path: Path to a path to a directory containing symbols to use for
+                         dump processing. This can either be a path to a directory
+                         containing Breakpad-format symbols, or a URL to a zip file
+                         containing a set of symbols.
+    :param dump_save_path: Path to which to save the dump files. If this is None,
+                           the MINIDUMP_SAVE_PATH environment variable will be used.
+    :param stackwalk_binary: Path to the minidump_stackwalk binary. If this is None,
+                             the MINIDUMP_STACKWALK environment variable will be used
+                             as the path to the minidump binary."""
+
+    def __init__(self, dump_directory, symbols_path, dump_save_path=None,
+                 stackwalk_binary=None):
+        self.dump_directory = dump_directory
+        self.symbols_path = symbols_path
+        self.remove_symbols = False
+
+        if dump_save_path is None:
+            dump_save_path = os.environ.get('MINIDUMP_SAVE_PATH', None)
+        self.dump_save_path = dump_save_path
+
+        if stackwalk_binary is None:
+            stackwalk_binary = os.environ.get('MINIDUMP_STACKWALK', None)
+        self.stackwalk_binary = stackwalk_binary
+
+        self.logger = get_logger()
+        self._dump_files = None
+
+    def _get_symbols(self):
+        # This updates self.symbols_path so we only download once
+        if self.symbols_path and mozfile.is_url(self.symbols_path):
+            self.remove_symbols = True
+            self.logger.info("Downloading symbols from: %s" % self.symbols_path)
+            # Get the symbols and write them to a temporary zipfile
+            data = urllib2.urlopen(self.symbols_path)
+            with tempfile.TemporaryFile() as symbols_file:
+                symbols_file.write(data.read())
+                # extract symbols to a temporary directory (which we'll delete after
+                # processing all crashes)
+                self.symbols_path = tempfile.mkdtemp()
+                with zipfile.ZipFile(symbols_file, 'r') as zfile:
+                    mozfile.extract_zip(zfile, self.symbols_path)
+
+    @property
+    def dump_files(self):
+        """List of tuple (path_to_dump_file, path_to_extra_file) for each dump
+           file in self.dump_directory. The extra files may not exist."""
+        if self._dump_files is None:
+            self._dump_files = [(path, os.path.splitext(path)[0] + '.extra') for path in
+                                glob.glob(os.path.join(self.dump_directory, '*.dmp'))]
+        return self._dump_files
+
+    @property
+    def has_dumps(self):
+        """Boolean indicating whether any crash dump files were found in the
+        current directory"""
+        return len(self.dump_files) > 0
+
+    def __iter__(self):
+        for path, extra in self.dump_files:
+            rv = self._process_dump_file(path, extra)
+            yield rv
+
+        if self.remove_symbols:
+            mozfile.remove(self.symbols_path)
+
+    def _process_dump_file(self, path, extra):
+        """Process a single dump file using self.stackwalk_binary, and return a
+        tuple containing properties of the crash dump.
+
+        :param path: Path to the minidump file to analyse
+        :return: A StackInfo tuple with the fields::
+                   minidump_path: Path of the dump file
+                   signature: The top frame of the stack trace, or None if it
+                              could not be determined.
+                   stackwalk_stdout: String of stdout data from stackwalk
+                   stackwalk_stderr: String of stderr data from stackwalk or
+                                     None if it succeeded
+                   stackwalk_retcode: Return code from stackwalk
+                   stackwalk_errors: List of errors in human-readable form that prevented
+                                     stackwalk being launched.
+        """
+        self._get_symbols()
+
+        errors = []
+        signature = None
+        include_stderr = False
+        out = None
+        err = None
+        retcode = None
+        if (self.symbols_path and self.stackwalk_binary and
+            os.path.exists(self.stackwalk_binary)):
+            # run minidump_stackwalk
+            p = subprocess.Popen([self.stackwalk_binary, path, self.symbols_path],
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.PIPE)
+            (out, err) = p.communicate()
+            retcode = p.returncode
+            if len(out) > 3:
+                # minidump_stackwalk is chatty,
+                # so ignore stderr when it succeeds.
+                # The top frame of the crash is always the line after "Thread N (crashed)"
+                # Examples:
+                #  0  libc.so + 0xa888
+                #  0  libnss3.so!nssCertificate_Destroy [certificate.c : 102 + 0x0]
+                #  0  mozjs.dll!js::GlobalObject::getDebuggers() [GlobalObject.cpp:89df18f9b6da : 580 + 0x0]
+                #  0  libxul.so!void js::gc::MarkInternal<JSObject>(JSTracer*, JSObject**) [Marking.cpp : 92 + 0x28]
+                lines = out.splitlines()
+                for i, line in enumerate(lines):
+                    if "(crashed)" in line:
+                        match = re.search(r"^ 0  (?:.*!)?(?:void )?([^\[]+)", lines[i+1])
+                        if match:
+                            signature = "@ %s" % match.group(1).strip()
+                        break
+            else:
+                include_stderr = True
+        else:
+            if not self.symbols_path:
+                errors.append("No symbols path given, can't process dump.")
+            if not self.stackwalk_binary:
+                errors.append("MINIDUMP_STACKWALK not set, can't process dump.")
+            elif self.stackwalk_binary and not os.path.exists(self.stackwalk_binary):
+                errors.append("MINIDUMP_STACKWALK binary not found: %s" % self.stackwalk_binary)
+
+        if self.dump_save_path:
+            self._save_dump_file(path, extra)
+
+        if os.path.exists(path):
+            mozfile.remove(path)
+        if os.path.exists(extra):
+            mozfile.remove(extra)
+
+        return StackInfo(path,
+                         signature,
+                         out,
+                         err if include_stderr else None,
+                         retcode,
+                         errors,
+                         extra)
+
+    def _save_dump_file(self, path, extra):
+        if os.path.isfile(self.dump_save_path):
+            os.unlink(self.dump_save_path)
+        if not os.path.isdir(self.dump_save_path):
+            try:
+                os.makedirs(self.dump_save_path)
+            except OSError:
+                pass
+
+        shutil.move(path, self.dump_save_path)
+        self.logger.info("Saved minidump as %s" %
+                         os.path.join(self.dump_save_path, os.path.basename(path)))
+
+        if os.path.isfile(extra):
+            shutil.move(extra, self.dump_save_path)
+            self.logger.info("Saved app info as %s" %
+                             os.path.join(self.dump_save_path, os.path.basename(extra)))
+
+
 def check_for_java_exception(logcat, quiet=False):
     """
     Print a summary of a fatal Java exception, if present in the provided
     logcat output.
 
     Example:
     PROCESS-CRASH | java-exception | java.lang.NullPointerException at org.mozilla.gecko.GeckoApp$21.run(GeckoApp.java:1833)
 
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/machformatter.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/machformatter.py
@@ -71,16 +71,18 @@ class MachFormatter(base.BaseFormatter):
 
             if data["action"] == "test_end":
                 if "expected" not in data and not self.has_unexpected[test]:
                     color = self.terminal.green
                 else:
                     color = self.terminal.red
             elif data["action"] in ("suite_start", "suite_end", "test_start"):
                 color = self.terminal.yellow
+            elif data["action"] == "crash":
+                color = self.terminal.red
 
             if color is not None:
                 action = color(action)
 
         return "%s %s: %s %s\n" % (time, action, thread, s)
 
     def _get_test_id(self, data):
         test_id = data.get("test")
@@ -239,16 +241,51 @@ class MachFormatter(base.BaseFormatter):
         else:
             self.summary_values["expected"] += 1
 
     def process_output(self, data):
         return '"%s" (pid:%s command:%s)' % (data["data"],
                                              data["process"],
                                              data.get("command", ""))
 
+    def crash(self, data):
+        test = self._get_test_id(data)
+
+        if data.get("stackwalk_returncode", 0) != 0 and not data.get("stackwalk_stderr"):
+            success = True
+        else:
+            success = False
+
+        rv = ["pid:%s. Test:%s. Minidump anaylsed:%s. Signature:[%s]" %
+              (data.get("pid", None), test, success, data["signature"])]
+
+        if data.get("minidump_path"):
+            rv.append("Crash dump filename: %s" % data["minidump_path"])
+
+        if data.get("stackwalk_returncode", 0) != 0:
+            rv.append("minidump_stackwalk exited with return code %d" %
+                      data["stackwalk_returncode"])
+
+        if data.get("stackwalk_stderr"):
+            rv.append("stderr from minidump_stackwalk:")
+            rv.append(data["stackwalk_stderr"])
+        elif data.get("stackwalk_stdout"):
+            rv.append(data["stackwalk_stdout"])
+
+        if data.get("stackwalk_errors"):
+            rv.extend(data.get("stackwalk_errors"))
+
+        rv = "\n".join(rv)
+        if not rv[-1] == "\n":
+            rv += "\n"
+
+        return rv
+
+
+
     def log(self, data):
         level = data.get("level").upper()
 
         if self.terminal is not None:
             if level in ("CRITICAL", "ERROR"):
                 level = self.terminal.red(level)
             elif level == "WARNING":
                 level = self.terminal.yellow(level)
--- a/testing/mozbase/mozlog/mozlog/structured/formatters/tbplformatter.py
+++ b/testing/mozbase/mozlog/mozlog/structured/formatters/tbplformatter.py
@@ -25,16 +25,43 @@ class TbplFormatter(BaseFormatter):
         if "stack" in data:
             message += "\n%s" % data["stack"]
 
         return "%s\n" % message
 
     def process_output(self, data):
         return "PROCESS | %(process)s | %(data)s\n" % data
 
+    def crash(self, data):
+        id = self.id_str(data["test"]) if "test" in data else "pid: " % data["process"]
+
+        rv = ["PROCESS-CRASH | %s | application crashed [%s]" % (id,
+                                                                 data["signature"])]
+        if data.get("minidump_path"):
+            rv.append("Crash dump filename: %s" % data["minidump_path"])
+
+        if data.get("stackwalk_stderr"):
+            rv.append("stderr from minidump_stackwalk:")
+            rv.append(data["stackwalk_stderr"])
+        elif data.get("stackwalk_stdout"):
+            rv.append(data["stackwalk_stdout"])
+
+        if data.get("stackwalk_returncode", 0) != 0:
+            rv.append("minidump_stackwalk exited with return code %d" %
+                      data["stackwalk_returncode"])
+
+        if data.get("stackwalk_errors"):
+            rv.extend(data.get("stackwalk_errors"))
+
+        rv = "\n".join(rv)
+        if not rv[-1] == "\n":
+            rv += "\n"
+
+        return rv
+
     def suite_start(self, data):
         self.suite_start_time = data["time"]
         return "SUITE-START | Running %i tests\n" % len(data["tests"])
 
     def test_start(self, data):
         self.test_start_times[self.test_id(data["test"])] = data["time"]
 
         return "TEST-START | %s\n" % self.id_str(data["test"])
--- a/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
+++ b/testing/mozbase/mozlog/mozlog/structured/structuredlog.py
@@ -6,17 +6,18 @@ from __future__ import unicode_literals
 
 from multiprocessing import current_process
 from threading import current_thread, Lock
 import json
 import sys
 import time
 import traceback
 
-from logtypes import Unicode, TestId, Status, SubStatus, Dict, List, Any, log_action, convertor_registry
+from logtypes import Unicode, TestId, Status, SubStatus, Dict, List, Int, Any
+from logtypes import log_action, convertor_registry
 
 """Structured Logging for recording test results.
 
 Allowed actions, and subfields:
   suite_start
       tests  - List of test names
 
   suite_end
@@ -89,16 +90,17 @@ log_levels = dict((k.upper(), v) for v, 
 
 
 class LoggerState(object):
     def __init__(self):
         self.handlers = []
         self.running_tests = set()
         self.suite_started = False
 
+
 class StructuredLogger(object):
     _lock = Lock()
     _logger_states = {}
     """Create a structured logger with the given name
 
     :param name: The name of the logger.
     :param component: A subcomponent that the logger belongs to (typically a library name)
     """
@@ -292,16 +294,30 @@ class StructuredLogger(object):
         :param process: A unique identifier for the process producing the output
                         (typically the pid)
         :param data: The output to log
         :param command: A string representing the full command line used to start
                         the process.
         """
         self._log_data("process_output", data)
 
+    @log_action(Unicode("process", default=None),
+                Unicode("signature", default="[Unknown]"),
+                TestId("test", default=None, optional=True),
+                Unicode("minidump_path", default=None, optional=True),
+                Unicode("minidump_extra", default=None, optional=True),
+                Int("stackwalk_retcode", default=None, optional=True),
+                Unicode("stackwalk_stdout", default=None, optional=True),
+                Unicode("stackwalk_stderr", default=None, optional=True),
+                List("stackwalk_errors", Unicode, default=None))
+    def crash(self, data):
+        if data["stackwalk_errors"] is None:
+            data["stackwalk_errors"] = []
+
+        self._log_data("crash", data)
 
 def _log_func(level_name):
     @log_action(Unicode("message"),
                 Any("exc_info", default=False))
     def log(self, data):
         exc_info = data.pop("exc_info", None)
         if exc_info:
             if not isinstance(exc_info, tuple):