Bug 855681 - Try to get stacks from child processes that were alive after shutdown; r=ted
authorEd Morley <emorley@mozilla.com>
Wed, 03 Apr 2013 20:45:26 +0100
changeset 127544 1c3872e0e29223389389d970b7d7d0e7a0ffb484
parent 127543 17e5b3d4837c005cbf7a19daa78ef99bb5200dea
child 127545 2233d794801321819303798a8ca0c3f0e6102b63
push id24507
push userryanvm@gmail.com
push dateThu, 04 Apr 2013 02:14:09 +0000
treeherdermozilla-central@445d8eecdd80 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersted
bugs855681
milestone23.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 855681 - Try to get stacks from child processes that were alive after shutdown; r=ted
build/automation.py.in
--- a/build/automation.py.in
+++ b/build/automation.py.in
@@ -903,17 +903,17 @@ user_pref("camino.use_system_proxy_setti
       l = ctypes.c_long()
       done = time.time() + timeout
       while time.time() < done:
         if self.PeekNamedPipe(x, None, 0, None, ctypes.byref(l), None) == 0:
           err = self.GetLastError()
           if err == 38 or err == 109: # ERROR_HANDLE_EOF || ERROR_BROKEN_PIPE
             return ('', False)
           else:
-            log.error("readWithTimeout got error: %d", err)
+            self.log.error("readWithTimeout got error: %d", err)
         if l.value > 0:
           # we're assuming that the output is line-buffered,
           # which is not unreasonable
           return (f.readline(), False)
         time.sleep(0.01)
       return ('', True)
 
     def isPidAlive(self, pid):
@@ -927,17 +927,17 @@ user_pref("camino.use_system_proxy_setti
       ctypes.windll.kernel32.CloseHandle(pHandle)
       return pExitCode.value == STILL_ACTIVE
 
     def killPid(self, pid):
       PROCESS_TERMINATE = 0x0001
       pHandle = ctypes.windll.kernel32.OpenProcess(PROCESS_TERMINATE, 0, pid)
       if not pHandle:
         return
-      success = ctypes.windll.kernel32.TerminateProcess(pHandle, 1)
+      ctypes.windll.kernel32.TerminateProcess(pHandle, 1)
       ctypes.windll.kernel32.CloseHandle(pHandle)
 
   else:
 
     def readWithTimeout(self, f, timeout):
       """Try to read a line of output from the file object |f|. If no output
       is received within |timeout| seconds, return a blank line.
       Returns a tuple (line, did_timeout), where |did_timeout| is True
@@ -1009,37 +1009,37 @@ user_pref("camino.use_system_proxy_setti
           image = imgfile.read()
     except IOError, err:
         self.log.info("Failed to read image from %s", imgoutput)
 
     import base64
     encoded = base64.b64encode(image)
     self.log.info("SCREENSHOT: data:image/png;base64,%s", encoded)
 
-  def killAndGetStack(self, proc, utilityPath, debuggerInfo):
+  def killAndGetStack(self, processPID, utilityPath, debuggerInfo):
     """Kill the process, preferrably in a way that gets us a stack trace."""
     if not debuggerInfo:
       if self.haveDumpedScreen:
         self.log.info("Not taking screenshot here: see the one that was previously logged")
       else:
         self.dumpScreen(utilityPath)
 
     if self.CRASHREPORTER and not debuggerInfo:
       if self.UNIXISH:
         # ABRT will get picked up by Breakpad's signal handler
-        os.kill(proc.pid, signal.SIGABRT)
+        os.kill(processPID, signal.SIGABRT)
         return
       elif self.IS_WIN32:
         # We should have a "crashinject" program in our utility path
         crashinject = os.path.normpath(os.path.join(utilityPath, "crashinject.exe"))
-        if os.path.exists(crashinject) and subprocess.Popen([crashinject, str(proc.pid)]).wait() == 0:
+        if os.path.exists(crashinject) and subprocess.Popen([crashinject, str(processPID)]).wait() == 0:
           return
       #TODO: kill the process such that it triggers Breakpad on OS X (bug 525296)
     self.log.info("Can't trigger Breakpad, just killing process")
-    proc.kill()
+    self.killPid(processPID)
 
   def waitForFinish(self, proc, utilityPath, timeout, maxTime, startTime, debuggerInfo, symbolsPath):
     """ Look for timeout or crashes and return the status after the process terminates """
     stackFixerProcess = None
     stackFixerFunction = None
     didTimeout = False
     hitMaxTime = False
     if proc.stdout is None:
@@ -1081,20 +1081,20 @@ user_pref("camino.use_system_proxy_setti
           else:
             self.dumpScreen(utilityPath)
 
         (line, didTimeout) = self.readWithTimeout(logsource, timeout)
         if not hitMaxTime and maxTime and datetime.now() - startTime > timedelta(seconds = maxTime):
           # Kill the application, but continue reading from stack fixer so as not to deadlock on stackFixerProcess.wait().
           hitMaxTime = True
           self.log.info("TEST-UNEXPECTED-FAIL | %s | application ran for longer than allowed maximum time of %d seconds", self.lastTestSeen, int(maxTime))
-          self.killAndGetStack(proc, utilityPath, debuggerInfo)
+          self.killAndGetStack(proc.pid, utilityPath, debuggerInfo)
       if didTimeout:
         self.log.info("TEST-UNEXPECTED-FAIL | %s | application timed out after %d seconds with no output", self.lastTestSeen, int(timeout))
-        self.killAndGetStack(proc, utilityPath, debuggerInfo)
+        self.killAndGetStack(proc.pid, utilityPath, debuggerInfo)
 
     status = proc.wait()
     if status == 0:
       self.lastTestSeen = "Main app process exited normally"
     if status != 0 and not didTimeout and not hitMaxTime:
       self.log.info("TEST-UNEXPECTED-FAIL | %s | Exited with code %d during test run", self.lastTestSeen, status)
     if stackFixerProcess is not None:
       fixerStatus = stackFixerProcess.wait()
@@ -1130,17 +1130,17 @@ user_pref("camino.use_system_proxy_setti
     if testURL is not None:
       if self.IS_CAMINO:
         args.extend(("-url", testURL))
       else:
         args.append((testURL))
     args.extend(extraArgs)
     return cmd, args
 
-  def checkForZombies(self, processLog):
+  def checkForZombies(self, processLog, utilityPath, debuggerInfo):
     """ Look for hung processes """
     if not os.path.exists(processLog):
       self.log.info('Automation Error: PID log not found: %s', processLog)
       # Whilst no hung process was found, the run should still display as a failure
       return True
 
     foundZombie = False
     self.log.info('INFO | zombiecheck | Reading PID log: %s', processLog)
@@ -1154,17 +1154,17 @@ user_pref("camino.use_system_proxy_setti
         processList.append(int(m.group(1)))
     processLogFD.close()
 
     for processPID in processList:
       self.log.info("INFO | zombiecheck | Checking for orphan process with PID: %d", processPID)
       if self.isPidAlive(processPID):
         foundZombie = True
         self.log.info("TEST-UNEXPECTED-FAIL | zombiecheck | child process %d still alive after shutdown", processPID)
-        self.killPid(processPID)
+        self.killAndGetStack(processPID, utilityPath, debuggerInfo)
     return foundZombie
 
   def checkForCrashes(self, profileDir, symbolsPath):
     return mozcrash.check_for_crashes(os.path.join(profileDir, "minidumps"), symbolsPath, test_name=self.lastTestSeen)
 
   def runApp(self, testURL, env, app, profileDir, extraArgs,
              runSSLTunnel = False, utilityPath = None,
              xrePath = None, certPath = None,
@@ -1230,17 +1230,17 @@ user_pref("camino.use_system_proxy_setti
       # Allow callers to specify an onLaunch callback to be fired after the
       # app is launched.
       onLaunch()
 
     status = self.waitForFinish(proc, utilityPath, timeout, maxTime, startTime, debuggerInfo, symbolsPath)
     self.log.info("INFO | automation.py | Application ran for: %s", str(datetime.now() - startTime))
 
     # Do a final check for zombie child processes.
-    zombieProcesses = self.checkForZombies(processLog)
+    zombieProcesses = self.checkForZombies(processLog, utilityPath, debuggerInfo)
 
     crashed = self.checkForCrashes(profileDir, symbolsPath)
 
     if crashed or zombieProcesses:
       status = 1
 
     if os.path.exists(processLog):
       os.unlink(processLog)