Bug 1415290 - Check return codes in mozcrash kill_pid. r=jmaher, a=test-only
authorGeoff Brown <gbrown@mozilla.com>
Mon, 13 Nov 2017 13:12:05 -0700
changeset 444771 1d4a6dc2023c7332de374aade71742917b40d726
parent 444770 c8042c122490e90495e8c37993406f0c63eb6735
child 444772 d7122c593efe4ed1dba8a184c2a4b45d3647a63d
push id1618
push userCallek@gmail.com
push dateThu, 11 Jan 2018 17:45:48 +0000
treeherdermozilla-release@882ca853e05a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjmaher, test-only
bugs1415290
milestone58.0
Bug 1415290 - Check return codes in mozcrash kill_pid. r=jmaher, a=test-only
testing/mochitest/runtests.py
testing/mozbase/mozcrash/mozcrash/mozcrash.py
--- a/testing/mochitest/runtests.py
+++ b/testing/mochitest/runtests.py
@@ -1976,21 +1976,25 @@ toolbar#nav-bar {
         killPid(processPID, self.log)
 
     def extract_child_pids(self, process_log, parent_pid=None):
         """Parses the given log file for the pids of any processes launched by
         the main process and returns them as a list.
         If parent_pid is provided, and psutil is available, returns children of
         parent_pid according to psutil.
         """
+        rv = []
         if parent_pid and HAVE_PSUTIL:
             self.log.info("Determining child pids from psutil")
-            return [p.pid for p in psutil.Process(parent_pid).children()]
-
-        rv = []
+            try:
+                rv = [p.pid for p in psutil.Process(parent_pid).children()]
+            except psutil.NoSuchProcess:
+                self.log.warning("Failed to lookup children of pid %d" % parent_pid)
+            return rv
+
         pid_re = re.compile(r'==> process \d+ launched child process (\d+)')
         with open(process_log) as fd:
             for line in fd:
                 self.log.info(line.rstrip())
                 m = pid_re.search(line)
                 if m:
                     rv.append(int(m.group(1)))
         return rv
@@ -2226,16 +2230,20 @@ toolbar#nav-bar {
                 marionette_exception = sys.exc_info()
 
             # wait until app is finished
             # XXX copy functionality from
             # https://github.com/mozilla/mozbase/blob/master/mozrunner/mozrunner/runner.py#L61
             # until bug 913970 is fixed regarding mozrunner `wait` not returning status
             # see https://bugzilla.mozilla.org/show_bug.cgi?id=913970
             status = proc.wait()
+            if status is None:
+                self.log.warning("runtests.py | Failed to get app exit code - running/crashed?")
+                # must report an integer to process_exit()
+                status = 0
             self.log.process_exit("Main app process", status)
             runner.process_handler = None
 
             # finalize output handler
             outputHandler.finish()
 
             # record post-test information
             if status:
@@ -2783,38 +2791,55 @@ toolbar#nav-bar {
         self.log.info(error_message)
         self.log.error("Force-terminating active process(es).")
 
         browser_pid = browser_pid or proc.pid
         child_pids = self.extract_child_pids(processLog, browser_pid)
         self.log.info('Found child pids: %s' % child_pids)
 
         if HAVE_PSUTIL:
-            child_procs = [psutil.Process(pid) for pid in child_pids]
+            try:
+                browser_proc = [psutil.Process(browser_pid)]
+            except:
+                self.log.info('Failed to get proc for pid %d' % browser_pid)
+                browser_proc = []
+            try:
+                child_procs = [psutil.Process(pid) for pid in child_pids]
+            except:
+                self.log.info('Failed to get child procs')
+                child_procs = []
             for pid in child_pids:
                 self.killAndGetStack(pid, utilityPath, debuggerInfo,
                                      dump_screen=not debuggerInfo)
             gone, alive = psutil.wait_procs(child_procs, timeout=30)
             for p in gone:
                 self.log.info('psutil found pid %s dead' % p.pid)
             for p in alive:
                 self.log.warning('failed to kill pid %d after 30s' %
                                  p.pid)
+            self.killAndGetStack(browser_pid, utilityPath, debuggerInfo,
+                                 dump_screen=not debuggerInfo)
+            gone, alive = psutil.wait_procs(browser_proc, timeout=30)
+            for p in gone:
+                self.log.info('psutil found pid %s dead' % p.pid)
+            for p in alive:
+                self.log.warning('failed to kill pid %d after 30s' %
+                                 p.pid)
         else:
             self.log.error("psutil not available! Will wait 30s before "
                            "attempting to kill parent process. This should "
                            "not occur in mozilla automation. See bug 1143547.")
             for pid in child_pids:
                 self.killAndGetStack(pid, utilityPath, debuggerInfo,
                                      dump_screen=not debuggerInfo)
             if child_pids:
                 time.sleep(30)
 
-        self.killAndGetStack(browser_pid, utilityPath, debuggerInfo,
-                             dump_screen=not debuggerInfo)
+            self.killAndGetStack(browser_pid, utilityPath, debuggerInfo,
+                                 dump_screen=not debuggerInfo)
 
     class OutputHandler(object):
 
         """line output handler for mozrunner"""
 
         def __init__(
                 self,
                 harness,
--- a/testing/mozbase/mozcrash/mozcrash/mozcrash.py
+++ b/testing/mozbase/mozcrash/mozcrash/mozcrash.py
@@ -468,20 +468,42 @@ if mozinfo.isWin:
 
     def kill_pid(pid):
         """
         Terminate a process with extreme prejudice.
 
         :param pid: PID of the process to terminate.
         """
         PROCESS_TERMINATE = 0x0001
+        WAIT_OBJECT_0 = 0x0
+        WAIT_FAILED = -1
+        logger = get_logger()
         handle = OpenProcess(PROCESS_TERMINATE, 0, pid)
         if handle:
-            kernel32.TerminateProcess(handle, 1)
+            if kernel32.TerminateProcess(handle, 1):
+                # TerminateProcess is async; wait up to 30 seconds for process to
+                # actually terminate, then give up so that clients are not kept
+                # waiting indefinitely for hung processes.
+                status = kernel32.WaitForSingleObject(handle, 30000)
+                if status == WAIT_FAILED:
+                    err = kernel32.GetLastError()
+                    logger.warning("kill_pid(): wait failed (%d) terminating pid %d: error %d" %
+                                   (status, pid, err))
+                elif status != WAIT_OBJECT_0:
+                    logger.warning("kill_pid(): wait failed (%d) terminating pid %d" %
+                                   (status, pid))
+            else:
+                err = kernel32.GetLastError()
+                logger.warning("kill_pid(): unable to terminate pid %d: %d" %
+                               (pid, err))
             CloseHandle(handle)
+        else:
+            err = kernel32.GetLastError()
+            logger.warning("kill_pid(): unable to get handle for pid %d: %d" %
+                           (pid, err))
 else:
     def kill_pid(pid):
         """
         Terminate a process with extreme prejudice.
 
         :param pid: PID of the process to terminate.
         """
         os.kill(pid, signal.SIGKILL)