bug 501034 - add hang detection + breakpad triggering to automation.py. r=Waldo
authorTed Mielczarek <ted.mielczarek@gmail.com>
Tue, 13 Oct 2009 16:56:24 -0400
changeset 34283 c641519baa90262ccde0dcc5b09b6535d81c524c
parent 34282 0c58b4c74e3f712a9368bea1723074ae7b640c25
child 34284 3b932018ce6a0ee85d7e6b7ff9db23b558469967
push idunknown
push userunknown
push dateunknown
reviewersWaldo
bugs501034
milestone1.9.3a1pre
bug 501034 - add hang detection + breakpad triggering to automation.py. r=Waldo
build/automation-build.mk
build/automation.py.in
build/pgo/profileserver.py.in
layout/tools/reftest/runreftest.py
testing/mochitest/runtests.py.in
--- a/build/automation-build.mk
+++ b/build/automation-build.mk
@@ -63,16 +63,22 @@ AUTOMATION_PPARGS += -DIS_TEST_BUILD=0
 endif
 
 ifeq ($(MOZ_DEBUG), 1)
 AUTOMATION_PPARGS += -DIS_DEBUG_BUILD=1
 else
 AUTOMATION_PPARGS += -DIS_DEBUG_BUILD=0
 endif
 
+ifdef MOZ_CRASHREPORTER
+AUTOMATION_PPARGS += -DCRASHREPORTER=1
+else
+AUTOMATION_PPARGS += -DCRASHREPORTER=0
+endif
+
 $(CURDIR)/automationutils.py: $(MOZILLA_DIR)/build/automationutils.py
 	$(INSTALL) $< .
 
 automation.py: $(MOZILLA_DIR)/build/automation.py.in $(MOZILLA_DIR)/build/automation-build.mk $(CURDIR)/automationutils.py
 	$(PYTHON) $(MOZILLA_DIR)/config/Preprocessor.py \
 	$(AUTOMATION_PPARGS) $(DEFINES) $(ACDEFINES) $< > $@
 
 GARBAGE += automation.py $(CURDIR)/automationutils.py
--- a/build/automation.py.in
+++ b/build/automation.py.in
@@ -38,16 +38,17 @@
 # ***** END LICENSE BLOCK *****
 
 import codecs
 from datetime import datetime
 import itertools
 import logging
 import os
 import re
+import select
 import shutil
 import signal
 import subprocess
 import sys
 import threading
 
 from automationutils import checkForCrashes
 
@@ -67,18 +68,22 @@ SCRIPT_DIR = os.path.abspath(os.path.rea
            "Process",
            "initializeProfile",
            "DIST_BIN",
            "DEFAULT_APP",
            "CERTS_SRC_DIR",
            "environment",
            "IS_TEST_BUILD",
            "IS_DEBUG_BUILD",
+           "DEFAULT_TIMEOUT",
           ]
 
+# timeout, in seconds
+DEFAULT_TIMEOUT = 60.0
+
 # These are generated in mozilla/build/Makefile.in
 #expand DIST_BIN = __XPC_BIN_PATH__
 #expand IS_WIN32 = len("__WIN32__") != 0
 #expand IS_MAC = __IS_MAC__ != 0
 #expand IS_LINUX = __IS_LINUX__ != 0
 #ifdef IS_CYGWIN
 #expand IS_CYGWIN = __IS_CYGWIN__ == 1
 #else
@@ -89,16 +94,17 @@ IS_CYGWIN = False
 #expand PERL = __PERL__
 
 UNIXISH = not IS_WIN32 and not IS_MAC
 
 #expand DEFAULT_APP = "./" + __BROWSER_PATH__
 #expand CERTS_SRC_DIR = __CERTS_SRC_DIR__
 #expand IS_TEST_BUILD = __IS_TEST_BUILD__
 #expand IS_DEBUG_BUILD = __IS_DEBUG_BUILD__
+#expand CRASHREPORTER = __CRASHREPORTER__ == 1
 
 ###########
 # LOGGING #
 ###########
 
 # We use the logging system here primarily because it'll handle multiple
 # threads, which is needed to process the output of the server and application
 # processes simultaneously.
@@ -429,25 +435,60 @@ def environment(env = None, xrePath = DI
     env["PATH"] = env["PATH"] + ";" + ldLibraryPath
 
   if crashreporter:
     env['MOZ_CRASHREPORTER_NO_REPORT'] = '1'
     env['MOZ_CRASHREPORTER'] = '1'
   env['GNOME_DISABLE_CRASH_DIALOG'] = "1"
   return env
 
+if IS_WIN32:
+  import ctypes, time, msvcrt
+  PeekNamedPipe = ctypes.windll.kernel32.PeekNamedPipe
+  GetLastError = ctypes.windll.kernel32.GetLastError
+  def readWithTimeout(f, timeout):
+    """Try to read a line of output from the file object |f|.
+    |f| must be a  pipe, like the |stdout| member of a subprocess.Popen
+    object created with stdout=PIPE. If no output
+    is received within |timeout| seconds, return a blank line.
+    Returns a tuple (line, did_timeout), where |did_timeout| is True
+    if the read timed out, and False otherwise."""
+    x = msvcrt.get_osfhandle(f.fileno())
+    l = ctypes.c_long()
+    buf = ctypes.create_string_buffer('', 1024)
+    done = time.time() + timeout
+    while time.time() < done:
+        if PeekNamedPipe(x, buf, len(buf), None, ctypes.byref(l), None) == 0:
+            err = GetLastError()
+            if err == 38 or err == 109: # ERROR_HANDLE_EOF || ERROR_BROKEN_PIPE
+                return ('', False)
+        if (l > 0 and '\n' in buf.value):
+            return (f.readline(), False)
+        time.sleep(0.1)
+    return ('', True)
+else:
+  def readWithTimeout(f, timeout):
+    """Try to read a line of output from the file object |f|. If no output
+    is received within |timeout| seconds, return a blank line.
+    Returns a tuple (line, did_timeout), where |did_timeout| is True
+    if the read timed out, and False otherwise."""
+    (r, w, e) = select.select([f], [], [], timeout)
+    if len(r) == 0:
+      return ('', True)
+    return (f.readline(), False)
 
 ###############
 # RUN THE APP #
 ###############
 
 def runApp(testURL, env, app, profileDir, extraArgs,
            runSSLTunnel = False, utilityPath = DIST_BIN,
            xrePath = DIST_BIN, certPath = CERTS_SRC_DIR,
-           debuggerInfo = None, symbolsPath = None):
+           debuggerInfo = None, symbolsPath = None,
+           timeout = DEFAULT_TIMEOUT):
   "Run the app, log the duration it took to execute, return the status code."
 
   # copy env so we don't munge the caller's environment
   env = dict(env);
   env["NO_EM_RESTART"] = "1"
 
   if IS_TEST_BUILD and runSSLTunnel:
     # create certificate database for the profile
@@ -501,41 +542,52 @@ def runApp(testURL, env, app, profileDir
   proc = Process([cmd] + args,
                  env = environment(env, xrePath = xrePath,
                                    crashreporter = not debuggerInfo),
                  stdout = outputPipe,
                  stderr = subprocess.STDOUT)
   log.info("INFO | automation.py | Application pid: %d", proc.pid)
 
   stackFixerProcess = None
+  didTimeout = False
   if outputPipe is None:
     log.info("TEST-INFO: Not logging stdout or stderr due to debugger connection")
   else:
     logsource = proc.stdout
     if IS_DEBUG_BUILD:
       stackFixerCommand = None
       if IS_MAC:
         stackFixerCommand = "fix-macosx-stack.pl"
       elif IS_LINUX:
         stackFixerCommand = "fix-linux-stack.pl"
       if stackFixerCommand is not None:
         stackFixerProcess = Process([PERL, os.path.join(utilityPath, stackFixerCommand)], stdin=logsource, stdout=subprocess.PIPE)
         logsource = stackFixerProcess.stdout
 
-    line = logsource.readline()
-    while line != "":
+    (line, didTimeout) = readWithTimeout(logsource, timeout)
+    while line != "" and not didTimeout:
       log.info(line.rstrip())
-      line = logsource.readline()
+      (line, didTimeout) = readWithTimeout(logsource, timeout)
+    if didTimeout:
+      log.info("TEST-UNEXPECTED-FAIL | automation.py | application timed out after %d seconds with no output", int(timeout))
+      if CRASHREPORTER:
+        if UNIXISH:
+          os.kill(proc.pid, signal.SIGSEGV)
+        else:
+          #TODO: kill the process such that it triggers Breakpad
+          proc.kill()
+      else:
+        proc.kill()
 
   status = proc.wait()
-  if status != 0:
+  if status != 0 and not didTimeout:
     log.info("TEST-UNEXPECTED-FAIL | automation.py | Exited with code %d during test run", status)
   if stackFixerProcess is not None:
     status = stackFixerProcess.wait()
-    if status != 0:
+    if status != 0 and not didTimeout:
       log.info("TEST-UNEXPECTED-FAIL | automation.py | Stack fixer process exited with code %d during test run", status)
   log.info("INFO | automation.py | Application ran for: %s", str(datetime.now() - startTime))
 
   if checkForCrashes(os.path.join(profileDir, "minidumps"), symbolsPath):
     status = -1
 
   if IS_TEST_BUILD and runSSLTunnel:
     ssltunnelProcess.kill()
--- a/build/pgo/profileserver.py.in
+++ b/build/pgo/profileserver.py.in
@@ -63,10 +63,13 @@ if __name__ == '__main__':
   t.start()
 
   automation.initializeProfile(PROFILE_DIRECTORY)
   browserEnv = automation.environment()
   browserEnv["XPCOM_DEBUG_BREAK"] = "warn"
 
   url = "http://localhost:%d/index.html" % PORT
   appPath = os.path.join(SCRIPT_DIR, automation.DEFAULT_APP)
-  status = automation.runApp(url, browserEnv, appPath, PROFILE_DIRECTORY, {})
+  status = automation.runApp(url, browserEnv, appPath, PROFILE_DIRECTORY, {},
+                             # the profiling HTML doesn't output anything,
+                             # so let's just ignore this for now.
+                             timeout = None)
   sys.exit(status)
--- a/layout/tools/reftest/runreftest.py
+++ b/layout/tools/reftest/runreftest.py
@@ -168,17 +168,20 @@ Are you executing $objdir/_tests/reftest
     # then again to actually run reftest
     automation.log.info("REFTEST INFO | runreftest.py | Running tests: start.\n")
     reftestlist = getFullPath(args[0])
     status = automation.runApp(None, browserEnv, options.app, profileDir,
                                ["-reftest", reftestlist],
                                utilityPath = options.utilityPath,
                                xrePath=options.xrePath,
                                debuggerInfo=debuggerInfo,
-                               symbolsPath=options.symbolsPath)
+                               symbolsPath=options.symbolsPath,
+                               # give the JS harness 30 seconds to deal
+                               # with its own timeouts
+                               timeout=options.timeout + 30.0)
     processLeakLog(leakLogFile, options.leakThreshold)
     automation.log.info("\nREFTEST INFO | runreftest.py | Running tests: end.")
   finally:
     if profileDir:
       shutil.rmtree(profileDir)
   sys.exit(status)
 
 def copyExtraFilesToProfile(options, profileDir):
--- a/testing/mochitest/runtests.py.in
+++ b/testing/mochitest/runtests.py.in
@@ -450,26 +450,31 @@ Are you executing $objdir/_tests/testing
   # We don't care to call |processLeakLog()| for this step.
   automation.log.info("\nINFO | runtests.py | Performing extension manager registration: end.")
 
   # Remove the leak detection file so it can't "leak" to the tests run.
   # The file is not there if leak logging was not enabled in the application build.
   if os.path.exists(LEAK_REPORT_FILE):
     os.remove(LEAK_REPORT_FILE)
 
-  # then again to actually run reftest
+  # then again to actually run mochitest
+  # don't use a timeout if we're not auto-running
+  #XXX: use the value from --timeout when bug 521130 lands, for now
+  # use the Mochitest JS harness's timeout + 30 seconds
+  timeout = options.autorun and 330.0 or None
   automation.log.info("INFO | runtests.py | Running tests: start.\n")
   status = automation.runApp(testURL, browserEnv, options.app,
                              PROFILE_DIRECTORY, options.browserArgs,
                              runSSLTunnel = True,
                              utilityPath = options.utilityPath,
                              xrePath = options.xrePath,
                              certPath=options.certPath,
                              debuggerInfo=debuggerInfo,
-                             symbolsPath=options.symbolsPath)
+                             symbolsPath=options.symbolsPath,
+                             timeout = timeout)
 
   # Server's no longer needed, and perhaps more importantly, anything it might
   # spew to console shouldn't disrupt the leak information table we print next.
   server.stop()
 
   processLeakLog(LEAK_REPORT_FILE, options.leakThreshold)
   automation.log.info("\nINFO | runtests.py | Running tests: end.")