Bug 1274584 - [mozprocess] Improve debugging messages on windows, r=jgriffin
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Mon, 30 May 2016 11:00:59 -0400
changeset 339213 fe37010e2b84f4374bed42220615f30807efb781
parent 339212 d9f96e5937d9f151fff347a60159082188d467b5
child 339214 fd75d39e98b629bfd05189d2c0b493cad17b3a2e
push id6249
push userjlund@mozilla.com
push dateMon, 01 Aug 2016 13:59:36 +0000
treeherdermozilla-beta@bad9d4f5bf7e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjgriffin
bugs1274584
milestone49.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1274584 - [mozprocess] Improve debugging messages on windows, r=jgriffin We can set MOZPROCESS_DEBUG to help debug windows process code. However on try it is unreadable as there are multiple things using mozprocess, and each process has multiple threads. It's impossible to tell which log message comes from where. This improves the debug logs a bit by always specifying the PID and thread name. There are a few other drive-by cleanups in this thread. The only one of note is removing a python 2.5 only code path. MozReview-Commit-ID: L3DQPW0Is5v
testing/mozbase/mozprocess/mozprocess/processhandler.py
--- a/testing/mozbase/mozprocess/mozprocess/processhandler.py
+++ b/testing/mozbase/mozprocess/mozprocess/processhandler.py
@@ -8,16 +8,17 @@ import os
 import signal
 import subprocess
 import sys
 import threading
 import time
 import traceback
 from Queue import Queue, Empty
 from datetime import datetime
+
 __all__ = ['ProcessHandlerMixin', 'ProcessHandler', 'LogOutput',
            'StoreOutput', 'StreamOutput']
 
 # Set the MOZPROCESS_DEBUG environment variable to 1 to see some debugging output
 MOZPROCESS_DEBUG = os.getenv("MOZPROCESS_DEBUG")
 
 # We dont use mozinfo because it is expensive to import, see bug 933558.
 isWin = os.name == "nt"
@@ -26,16 +27,17 @@ isPosix = os.name == "posix" # includes 
 if isWin:
     import ctypes, ctypes.wintypes, msvcrt
     from ctypes import sizeof, addressof, c_ulong, byref, WinError, c_longlong
     from . import winprocess
     from .qijo import JobObjectAssociateCompletionPortInformation,\
     JOBOBJECT_ASSOCIATE_COMPLETION_PORT, JobObjectExtendedLimitInformation,\
     JOBOBJECT_BASIC_LIMIT_INFORMATION, JOBOBJECT_EXTENDED_LIMIT_INFORMATION, IO_COUNTERS
 
+
 class ProcessHandlerMixin(object):
     """
     A class for launching and manipulating local processes.
 
     :param cmd: command to run. May be a string or a list. If specified as a list, the first element will be interpreted as the command, and all additional elements will be interpreted as arguments to that command.
     :param args: list of arguments to pass to the command (defaults to None). Must not be set when `cmd` is specified as a list.
     :param cwd: working directory for command (defaults to None).
     :param env: is the environment to use for the process (defaults to os.environ).
@@ -102,44 +104,51 @@ class ProcessHandlerMixin(object):
                                           stdin, stdout, stderr,
                                           preexec_fn, close_fds,
                                           shell, cwd, env,
                                           universal_newlines, startupinfo, creationflags)
             except OSError:
                 print >> sys.stderr, args
                 raise
 
+        def debug(self, msg):
+            if not MOZPROCESS_DEBUG:
+                return
+            thread = threading.current_thread().name
+            print("DBG::MOZPROC PID:{} ({}) | {}".format(self.pid, thread, msg))
+
         def __del__(self, _maxint=sys.maxint):
             if isWin:
                 handle = getattr(self, '_handle', None)
                 if handle:
                     if hasattr(self, '_internal_poll'):
                         self._internal_poll(_deadstate=_maxint)
                     else:
                         self.poll(_deadstate=sys.maxint)
                 if handle or self._job or self._io_port:
                     self._cleanup()
             else:
                 subprocess.Popen.__del__(self)
 
         def kill(self, sig=None):
             if isWin:
                 if not self._ignore_children and self._handle and self._job:
+                    self.debug("calling TerminateJobObject")
                     winprocess.TerminateJobObject(self._job, winprocess.ERROR_CONTROL_C_EXIT)
                     self.returncode = winprocess.GetExitCodeProcess(self._handle)
                 elif self._handle:
-                    err = None
+                    self.debug("calling TerminateProcess")
                     try:
                         winprocess.TerminateProcess(self._handle, winprocess.ERROR_CONTROL_C_EXIT)
                     except:
-                        err = "Could not terminate process"
-                    winprocess.GetExitCodeProcess(self._handle)
-                    self._cleanup()
-                    if err is not None:
-                        raise OSError(err)
+                        traceback.print_exc()
+                        raise OSError("Could not terminate process")
+                    finally:
+                        winprocess.GetExitCodeProcess(self._handle)
+                        self._cleanup()
             else:
                 def send_sig(sig):
                     if not self._ignore_children:
                         try:
                             os.killpg(self.pid, sig)
                         except BaseException as e:
                             if getattr(e, "errno", None) != 3:
                                 # Error 3 is "no such process", which is ok
@@ -359,18 +368,17 @@ falling back to not using job objects fo
                 except KeyboardInterrupt:
                     raise KeyboardInterrupt
 
             def _poll_iocompletion_port(self):
                 # Watch the IO Completion port for status
                 self._spawned_procs = {}
                 countdowntokill = 0
 
-                if MOZPROCESS_DEBUG:
-                    print "DBG::MOZPROC Self.pid value is: %s" % self.pid
+                self.debug("start polling IO completion port")
 
                 while True:
                     msgid = c_ulong(0)
                     compkey = c_ulong(0)
                     pid = c_ulong(0)
                     portstatus = winprocess.GetQueuedCompletionStatus(self._io_port,
                                                                       byref(msgid),
                                                                       byref(compkey),
@@ -385,16 +393,17 @@ falling back to not using job objects fo
                         # Windows sometimes takes a small nap between notifying the
                         # IO Completion port and actually killing the children, and we
                         # don't want to mistake that situation for the situation of an unexpected
                         # parent abort (which is what we're looking for here).
                         if diff.seconds > self.MAX_IOCOMPLETION_PORT_NOTIFICATION_DELAY:
                             print >> sys.stderr, "Parent process %s exited with children alive:" % self.pid
                             print >> sys.stderr, "PIDS: %s" %  ', '.join([str(i) for i in self._spawned_procs])
                             print >> sys.stderr, "Attempting to kill them..."
+
                             self.kill()
                             self._process_events.put({self.pid: 'FINISHED'})
 
                     if not portstatus:
                         # Check to see what happened
                         errcode = winprocess.GetLastError()
                         if errcode == winprocess.ERROR_ABANDONED_WAIT_0:
                             # Then something has killed the port, break the loop
@@ -405,112 +414,101 @@ falling back to not using job objects fo
                             # Timeouts are expected, just keep on polling
                             continue
                         else:
                             print >> sys.stderr, "Error Code %s trying to query IO Completion Port, exiting" % errcode
                             raise WinError(errcode)
                             break
 
                     if compkey.value == winprocess.COMPKEY_TERMINATE.value:
-                        if MOZPROCESS_DEBUG:
-                            print "DBG::MOZPROC compkeyterminate detected"
+                        self.debug("compkeyterminate detected")
                         # Then we're done
                         break
 
                     # Check the status of the IO Port and do things based on it
                     if compkey.value == winprocess.COMPKEY_JOBOBJECT.value:
                         if msgid.value == winprocess.JOB_OBJECT_MSG_ACTIVE_PROCESS_ZERO:
                             # No processes left, time to shut down
                             # Signal anyone waiting on us that it is safe to shut down
-                            if MOZPROCESS_DEBUG:
-                                print "DBG::MOZPROC job object msg active processes zero"
+                            self.debug("job object msg active processes zero")
                             self._process_events.put({self.pid: 'FINISHED'})
                             break
                         elif msgid.value == winprocess.JOB_OBJECT_MSG_NEW_PROCESS:
                             # New Process started
                             # Add the child proc to our list in case our parent flakes out on us
                             # without killing everything.
                             if pid.value != self.pid:
                                 self._spawned_procs[pid.value] = 1
-                                if MOZPROCESS_DEBUG:
-                                    print "DBG::MOZPROC new process detected with pid value: %s" % pid.value
+                                self.debug("new process detected with pid value: %s" % pid.value)
                         elif msgid.value == winprocess.JOB_OBJECT_MSG_EXIT_PROCESS:
-                            if MOZPROCESS_DEBUG:
-                                print "DBG::MOZPROC process id %s exited normally" % pid.value
+                            self.debug("process id %s exited normally" % pid.value)
                             # One process exited normally
                             if pid.value == self.pid and len(self._spawned_procs) > 0:
                                 # Parent process dying, start countdown timer
                                 countdowntokill = datetime.now()
                             elif pid.value in self._spawned_procs:
                                 # Child Process died remove from list
                                 del(self._spawned_procs[pid.value])
                         elif msgid.value == winprocess.JOB_OBJECT_MSG_ABNORMAL_EXIT_PROCESS:
                             # One process existed abnormally
-                            if MOZPROCESS_DEBUG:
-                                print "DBG::MOZPROC process id %s existed abnormally" % pid.value
+                            self.debug("process id %s exited abnormally" % pid.value)
                             if pid.value == self.pid and len(self._spawned_procs) > 0:
                                 # Parent process dying, start countdown timer
                                 countdowntokill = datetime.now()
                             elif pid.value in self._spawned_procs:
                                 # Child Process died remove from list
                                 del self._spawned_procs[pid.value]
                         else:
                             # We don't care about anything else
-                            if MOZPROCESS_DEBUG:
-                                print "DBG::MOZPROC We got a message %s" % msgid.value
+                            self.debug("We got a message %s" % msgid.value)
                             pass
 
             def _wait(self):
-
                 # First, check to see if the process is still running
                 if self._handle:
                     self.returncode = winprocess.GetExitCodeProcess(self._handle)
                 else:
                     # Dude, the process is like totally dead!
                     return self.returncode
 
-                # Python 2.5 uses isAlive versus is_alive use the proper one
                 threadalive = False
                 if hasattr(self, "_procmgrthread"):
-                    if hasattr(self._procmgrthread, 'is_alive'):
-                        threadalive = self._procmgrthread.is_alive()
-                    else:
-                        threadalive = self._procmgrthread.isAlive()
+                    threadalive = self._procmgrthread.is_alive()
                 if self._job and threadalive:
+                    self.debug("waiting with IO completion port")
                     # Then we are managing with IO Completion Ports
                     # wait on a signal so we know when we have seen the last
                     # process come through.
                     # We use queues to synchronize between the thread and this
                     # function because events just didn't have robust enough error
                     # handling on pre-2.7 versions
                     err = None
                     try:
                         # timeout is the max amount of time the procmgr thread will wait for
                         # child processes to shutdown before killing them with extreme prejudice.
                         item = self._process_events.get(timeout=self.MAX_IOCOMPLETION_PORT_NOTIFICATION_DELAY +
                                                                 self.MAX_PROCESS_KILL_DELAY)
                         if item[self.pid] == 'FINISHED':
+                            self.debug("received 'FINISHED' from _procmgrthread")
                             self._process_events.task_done()
                     except:
-                        err = "IO Completion Port failed to signal process shutdown"
-                    # Either way, let's try to get this code
-                    if self._handle:
-                        self.returncode = winprocess.GetExitCodeProcess(self._handle)
-                    self._cleanup()
-
-                    if err is not None:
-                        raise OSError(err)
-
+                        traceback.print_exc()
+                        raise OSError("IO Completion Port failed to signal process shutdown")
+                    finally:
+                        if self._handle:
+                            self.returncode = winprocess.GetExitCodeProcess(self._handle)
+                        self._cleanup()
 
                 else:
                     # Not managing with job objects, so all we can reasonably do
                     # is call waitforsingleobject and hope for the best
+                    self.debug("waiting without IO completion port")
 
-                    if MOZPROCESS_DEBUG and not self._ignore_children:
-                        print "DBG::MOZPROC NOT USING JOB OBJECTS!!!"
+                    if not self._ignore_children:
+                        self.debug("NOT USING JOB OBJECTS!!!")
                     # First, make sure we have not already ended
                     if self.returncode != winprocess.STILL_ACTIVE:
                         self._cleanup()
                         return self.returncode
 
                     rc = None
                     if self._handle:
                         rc = winprocess.WaitForSingleObject(self._handle, -1)