Bug 1194076 - replace ffprocess.py code with psutil API calls. r=jmaher
authorJulien Pagès <j.parkouss@gmail.com>
Thu, 03 Sep 2015 09:51:44 +0200
changeset 1008 3fa2c76a3fb7
parent 1007 cbea3dea9d0c
child 1009 3b94adbd66f1
push id697
push userj.parkouss@gmail.com
push dateThu, 03 Sep 2015 10:57:20 +0000
reviewersjmaher
bugs1194076
Bug 1194076 - replace ffprocess.py code with psutil API calls. r=jmaher
talos/ffprocess.py
talos/talos_process.py
talos/ttest.py
deleted file mode 100755
--- a/talos/ffprocess.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-"""Firefox process management for Talos"""
-
-import os
-import signal
-import time
-import logging
-import mozinfo
-from utils import TalosError
-
-
-def is_running(pid):
-    """returns if a pid is running"""
-    if mozinfo.isWin:
-        from ctypes import sizeof, windll, addressof
-        from ctypes.wintypes import DWORD
-
-        BIG_ARRAY = DWORD * 4096
-        processes = BIG_ARRAY()
-        needed = DWORD()
-
-        pids = []
-        result = windll.psapi.EnumProcesses(processes,
-                                            sizeof(processes),
-                                            addressof(needed))
-        if result:
-            num_results = needed.value / sizeof(DWORD)
-            for i in range(num_results):
-                pids.append(int(processes[i]))
-    else:
-        from mozprocess import pid as mozpid
-        pids = [int(i['PID']) for i in mozpid.ps()]
-
-    return bool([i for i in pids if pid == i])
-
-
-def running_processes(pids):
-    """filter pids and return only the running ones"""
-    return [pid for pid in pids if is_running(pid)]
-
-
-if mozinfo.os == 'win':
-    def terminate_process(pid, timeout):
-        from mozprocess import wpk
-        wpk.kill_pid(pid)
-else:
-    if mozinfo.os == 'linux':
-        DEFAULT_SIGNALS = ('SIGABRT', 'SIGTERM', 'SIGKILL')
-    else:
-        DEFAULT_SIGNALS = ('SIGTERM', 'SIGKILL')
-
-    def terminate_process(pid, timeout):
-        ret = ''
-        try:
-            for sig in DEFAULT_SIGNALS:
-                if is_running(pid):
-                    os.kill(pid, getattr(signal, sig))
-                    time.sleep(timeout)
-                    ret = 'killed with %s' % sig
-        except OSError, (errno, strerror):
-            print 'WARNING: failed os.kill: %s : %s' % (errno, strerror)
-        return ret
-
-
-def terminate_processes(pids, timeout):
-    results = []
-    for pid in pids[:]:
-        ret = terminate_process(pid, timeout)
-        if ret:
-            results.append("(%s): %s" % (pid, ret))
-        else:
-            # Remove PIDs which are already terminated
-            # TODO: we will never be here on windows!
-            pids.remove(pid)
-    return ",".join(results)
-
-
-def cleanup_processes(pids, timeout):
-    # kill any remaining browser processes
-    # returns string of which process_names were terminated and with
-    # what signal
-
-    logging.debug("Terminating: %s", ", ".join(str(pid) for pid in pids))
-    terminate_result = terminate_processes(pids, timeout)
-    # check if anything is left behind
-    if running_processes(pids):
-        # this is for windows machines.  when attempting to send kill
-        # messages to win processes the OS
-        # always gives the process a chance to close cleanly before
-        # terminating it, this takes longer
-        # and we need to give it a little extra time to complete
-        time.sleep(timeout)
-        process_pids = running_processes(pids)
-        if process_pids:
-            raise TalosError(
-                "failed to cleanup process with PID: %s" % process_pids)
-
-    return terminate_result
--- a/talos/talos_process.py
+++ b/talos/talos_process.py
@@ -1,20 +1,45 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import time
 import logging
+import psutil
 from mozprocess import ProcessHandler
 from threading import Event
 
 from utils import TalosError
 
 
+class ProcessContext(object):
+    """
+    Store useful results of the browser execution.
+    """
+    def __init__(self):
+        self.output = None
+        self.process = None
+
+    @property
+    def pid(self):
+        return self.process and self.process.pid
+
+    def kill_process(self):
+        if self.process and self.process.is_running():
+            logging.debug("Terminating %s", self.process)
+            self.process.terminate()
+            try:
+                self.process.wait(3)
+            except psutil.TimeoutExpired:
+                self.process.kill()
+                # will raise TimeoutExpired if unable to kill
+                self.process.wait(3)
+
+
 class Reader(object):
     def __init__(self, event):
         self.output = []
         self.got_end_timestamp = False
         self.event = event
 
     def __call__(self, line):
         if line.find('__endTimestamp') != -1:
@@ -29,37 +54,42 @@ class Reader(object):
 
 def run_browser(command, timeout=None, on_started=None, **kwargs):
     """
     Run the browser using the given `command`.
 
     After the browser prints __endTimestamp, we give it 5
     seconds to quit and kill it if it's still alive at that point.
 
+    Note that this method ensure that the process is killed at
+    the end. If this is not possible, an exception will be raised.
+
     :param command: the commad (as a string list) to run the browser
     :param timeout: if specified, timeout to wait for the browser before
                     we raise a :class:`TalosError`
     :param on_started: a callback that can be used to do things just after
                        the browser has been started
     :param kwargs: additional keyword arguments for the :class:`ProcessHandler`
                    instance
 
-    Returns a tuple (pid, output) where output is a list of strings.
+    Returns a ProcessContext instance, with available output and pid used.
     """
+    context = ProcessContext()
     first_time = int(time.time()) * 1000
     wait_for_quit_timeout = 5
     event = Event()
     reader = Reader(event)
 
     kwargs['storeOutput'] = False
     kwargs['processOutputLine'] = reader
     kwargs['onFinish'] = event.set
     proc = ProcessHandler(command, **kwargs)
     proc.run()
     try:
+        context.process = psutil.Process(proc.pid)
         if on_started:
             on_started()
         # wait until we saw __endTimestamp in the proc output,
         # or the browser just terminated - or we have a timeout
         if not event.wait(timeout):
             raise TalosError("timeout")
         if reader.got_end_timestamp:
             for i in range(1, wait_for_quit_timeout):
@@ -67,21 +97,21 @@ def run_browser(command, timeout=None, o
                     break
             if proc.poll() is None:
                 logging.info(
                     "Browser shutdown timed out after {0} seconds, terminating"
                     " process.".format(wait_for_quit_timeout)
                 )
     finally:
         # this also handle KeyboardInterrupt
-        if proc.poll() is None:
-            proc.kill()
-            proc.wait(timeout=1)  # wait a bit for complete termination
+        # ensure early the process is really terminated
+        context.kill_process()
 
     reader.output.append(
         "__startBeforeLaunchTimestamp%d__endBeforeLaunchTimestamp"
         % first_time)
     reader.output.append(
         "__startAfterTerminationTimestamp%d__endAfterTerminationTimestamp"
         % (int(time.time()) * 1000))
 
     logging.info("Browser exited with error code: {0}".format(proc.returncode))
-    return proc.pid, reader.output
+    context.output = reader.output
+    return context
--- a/talos/ttest.py
+++ b/talos/ttest.py
@@ -11,85 +11,57 @@
      - collects info on any counters while test runs
      - waits for a 'dump' from the browser
 """
 
 import os
 import sys
 import platform
 import results
-import traceback
 import subprocess
 import utils
 import mozcrash
 import talosconfig
 import shutil
 import mozfile
 import logging
 
 from talos.utils import TalosError, TalosCrash, TalosRegression
-from talos import ffprocess
 from talos.talos_process import run_browser
 from talos.ffsetup import FFSetup
 from talos.cmanager import CounterManagement
 
 
 class TTest(object):
 
-    _pids = []
     if platform.system() == "Linux":
         platform_type = 'linux_'
     elif platform.system() in ("Windows", "Microsoft"):
         if '5.1' in platform.version():  # winxp
             platform_type = 'win_'
         elif '6.1' in platform.version():  # w7
             platform_type = 'w7_'
         elif '6.2' in platform.version():  # w8
             platform_type = 'w8_'
         else:
             raise TalosError('unsupported windows version')
     elif platform.system() == "Darwin":
         platform_type = 'mac_'
 
-    def cleanupAndCheckForCrashes(self, browser_config, profile_dir,
-                                  test_name):
-        """cleanup browser processes and process crashes if found"""
-
-        # cleanup processes
-        ffprocess.cleanup_processes(self._pids,
-                                    browser_config['browser_wait'])
-
+    def check_for_crashes(self, browser_config, profile_dir, test_name):
         # check for minidumps
         minidumpdir = os.path.join(profile_dir, 'minidumps')
         found = mozcrash.check_for_crashes(minidumpdir,
                                            browser_config['symbols_path'],
                                            test_name=test_name)
         mozfile.remove(minidumpdir)
 
         if found:
             raise TalosCrash("Found crashes after test run, terminating test")
 
-    def testCleanup(self, browser_config, profile_dir, test_config):
-        try:
-            if profile_dir:
-                try:
-                    self.cleanupAndCheckForCrashes(browser_config,
-                                                   profile_dir,
-                                                   test_config['name'])
-                except TalosError:
-                    # ignore this error since we have already checked for
-                    # crashes earlier
-                    pass
-
-        except TalosError, te:
-            logging.debug("cleanup error: %s", te)
-        except Exception:
-            logging.debug("unknown error during cleanup: %s"
-                          % (traceback.format_exc(),))
-
     def runTest(self, browser_config, test_config):
         """
             Runs an url based test on the browser as specified in the
             browser_config dictionary
 
         Args:
             browser_config:  Dictionary of configuration options for the
                              browser (paths, prefs, etc)
@@ -100,210 +72,200 @@ class TTest(object):
 
         logging.debug("operating with platform_type : %s", self.platform_type)
         with FFSetup(browser_config, test_config) as setup:
             return self._runTest(browser_config, test_config, setup)
 
     def _runTest(self, browser_config, test_config, setup):
         counters = test_config.get(self.platform_type + 'counters', [])
         resolution = test_config['resolution']
-        try:
+
+        # add the mainthread_io to the environment variable, as defined
+        # in test.py configs
+        here = os.path.dirname(os.path.realpath(__file__))
+        if test_config['mainthread']:
+            mainthread_io = os.path.join(here, "mainthread_io.log")
+            setup.env['MOZ_MAIN_THREAD_IO_LOG'] = mainthread_io
+
+        test_config['url'] = utils.interpolate(
+            test_config['url'],
+            profile=setup.profile_dir,
+            firefox=browser_config['browser_path']
+        )
+
+        # setup global (cross-cycle) counters:
+        # shutdown, responsiveness
+        global_counters = {}
+        if browser_config.get('xperf_path'):
+            for c in test_config.get('xperf_counters', []):
+                global_counters[c] = []
+
+        if test_config['shutdown']:
+            global_counters['shutdown'] = []
+        if test_config.get('responsiveness') and \
+                platform.system() != "Linux":
+            # ignore responsiveness tests on linux until we fix
+            # Bug 710296
+            setup.env['MOZ_INSTRUMENT_EVENT_LOOP'] = '1'
+            setup.env['MOZ_INSTRUMENT_EVENT_LOOP_THRESHOLD'] = '20'
+            setup.env['MOZ_INSTRUMENT_EVENT_LOOP_INTERVAL'] = '10'
+            global_counters['responsiveness'] = []
 
-            # add the mainthread_io to the environment variable, as defined
-            # in test.py configs
-            here = os.path.dirname(os.path.realpath(__file__))
-            if test_config['mainthread']:
-                mainthread_io = os.path.join(here, "mainthread_io.log")
-                setup.env['MOZ_MAIN_THREAD_IO_LOG'] = mainthread_io
+        # instantiate an object to hold test results
+        test_results = results.TestResults(
+            test_config,
+            global_counters
+        )
+
+        for i in range(test_config['cycles']):
+            logging.info("Running cycle %d/%d for %s test...",
+                         i+1, test_config['cycles'], test_config['name'])
+
+            # remove the browser  error file
+            mozfile.remove(browser_config['error_filename'])
 
-            test_config['url'] = utils.interpolate(
+            # reinstall any file whose stability we need to ensure across
+            # the cycles
+            if test_config.get('reinstall', ''):
+                for keep in test_config['reinstall']:
+                    origin = os.path.join(test_config['profile_path'],
+                                          keep)
+                    dest = os.path.join(setup.profile_dir, keep)
+                    logging.debug("Reinstalling %s on top of %s", origin,
+                                  dest)
+                    shutil.copy(origin, dest)
+
+            # Run the test
+            timeout = test_config.get('timeout', 7200)  # 2 hours default
+            if setup.sps_profile:
+                # When profiling, give the browser some extra time
+                # to dump the profile.
+                timeout += 5 * 60
+
+            command_args = utils.GenerateBrowserCommandLine(
+                browser_config["browser_path"],
+                browser_config["extra_args"],
+                setup.profile_dir,
                 test_config['url'],
-                profile=setup.profile_dir,
-                firefox=browser_config['browser_path']
+                profiling_info=(setup.sps_profile.profiling_info
+                                if setup.sps_profile else None)
             )
 
-            # setup global (cross-cycle) counters:
-            # shutdown, responsiveness
-            global_counters = {}
-            if browser_config.get('xperf_path'):
-                for c in test_config.get('xperf_counters', []):
-                    global_counters[c] = []
-
-            if test_config['shutdown']:
-                global_counters['shutdown'] = []
-            if test_config.get('responsiveness') and \
-                    platform.system() != "Linux":
-                # ignore responsiveness tests on linux until we fix
-                # Bug 710296
-                setup.env['MOZ_INSTRUMENT_EVENT_LOOP'] = '1'
-                setup.env['MOZ_INSTRUMENT_EVENT_LOOP_THRESHOLD'] = '20'
-                setup.env['MOZ_INSTRUMENT_EVENT_LOOP_INTERVAL'] = '10'
-                global_counters['responsiveness'] = []
-
-            # instantiate an object to hold test results
-            test_results = results.TestResults(
-                test_config,
-                global_counters
-            )
-
-            for i in range(test_config['cycles']):
-                logging.info("Running cycle %d/%d for %s test...",
-                             i+1, test_config['cycles'], test_config['name'])
+            mainthread_error_count = 0
+            if test_config['setup']:
+                # Generate bcontroller.json for xperf
+                talosconfig.generateTalosConfig(command_args,
+                                                browser_config,
+                                                test_config)
+                subprocess.call(
+                    ['python'] + test_config['setup'].split(),
+                )
 
-                # remove the browser  error file
-                mozfile.remove(browser_config['error_filename'])
-
-                # reinstall any file whose stability we need to ensure across
-                # the cycles
-                if test_config.get('reinstall', ''):
-                    for keep in test_config['reinstall']:
-                        origin = os.path.join(test_config['profile_path'],
-                                              keep)
-                        dest = os.path.join(setup.profile_dir, keep)
-                        logging.debug("Reinstalling %s on top of %s", origin,
-                                      dest)
-                        shutil.copy(origin, dest)
+            mm_httpd = None
 
-                # check to see if the previous cycle is still hanging around
-                if (i > 0) and ffprocess.running_processes(self._pids):
-                    raise TalosError("previous cycle still running")
-
-                # Run the test
-                timeout = test_config.get('timeout', 7200)  # 2 hours default
-                if setup.sps_profile:
-                    # When profiling, give the browser some extra time
-                    # to dump the profile.
-                    timeout += 5 * 60
-
-                command_args = utils.GenerateBrowserCommandLine(
-                    browser_config["browser_path"],
-                    browser_config["extra_args"],
-                    setup.profile_dir,
-                    test_config['url'],
-                    profiling_info=(setup.sps_profile.profiling_info
-                                    if setup.sps_profile else None)
+            if test_config['name'] == 'media_tests':
+                from startup_test.media import media_manager
+                mm_httpd = media_manager.run_server(
+                    os.path.dirname(os.path.realpath(__file__))
                 )
 
-                mainthread_error_count = 0
-                if test_config['setup']:
-                    # Generate bcontroller.json for xperf
-                    talosconfig.generateTalosConfig(command_args,
-                                                    browser_config,
-                                                    test_config)
-                    subprocess.call(
-                        ['python'] + test_config['setup'].split(),
-                    )
-
-                mm_httpd = None
-
-                if test_config['name'] == 'media_tests':
-                    from startup_test.media import media_manager
-                    mm_httpd = media_manager.run_server(
-                        os.path.dirname(os.path.realpath(__file__))
-                    )
-
-                counter_management = None
-                if counters:
-                    counter_management = CounterManagement(
-                        browser_config['process'],
-                        counters,
-                        resolution
-                    )
+            counter_management = None
+            if counters:
+                counter_management = CounterManagement(
+                    browser_config['process'],
+                    counters,
+                    resolution
+                )
 
-                try:
-                    pid, browser_output = run_browser(
-                        command_args,
-                        timeout=timeout,
-                        env=setup.env,
-                        # start collecting counters as soon as possible
-                        on_started=(counter_management.start
-                                    if counter_management else None),
-                    )
-                finally:
-                    if counter_management:
-                        counter_management.stop()
-                    if mm_httpd:
-                        mm_httpd.stop()
-
-                self._pids.append(pid)
+            try:
+                pcontext = run_browser(
+                    command_args,
+                    timeout=timeout,
+                    env=setup.env,
+                    # start collecting counters as soon as possible
+                    on_started=(counter_management.start
+                                if counter_management else None),
+                )
+            finally:
+                if counter_management:
+                    counter_management.stop()
+                if mm_httpd:
+                    mm_httpd.stop()
 
-                if test_config['mainthread']:
-                    rawlog = os.path.join(here, "mainthread_io.log")
-                    if os.path.exists(rawlog):
-                        processedlog = \
-                            os.path.join(here, 'mainthread_io.json')
-                        xre_path = \
-                            os.path.dirname(browser_config['browser_path'])
-                        mtio_py = os.path.join(here, 'mainthreadio.py')
-                        command = ['python', mtio_py, rawlog,
-                                   processedlog, xre_path]
-                        mtio = subprocess.Popen(command,
-                                                env=os.environ.copy(),
-                                                stdout=subprocess.PIPE)
-                        output, stderr = mtio.communicate()
-                        for line in output.split('\n'):
-                            if line.strip() == "":
-                                continue
+            if test_config['mainthread']:
+                rawlog = os.path.join(here, "mainthread_io.log")
+                if os.path.exists(rawlog):
+                    processedlog = \
+                        os.path.join(here, 'mainthread_io.json')
+                    xre_path = \
+                        os.path.dirname(browser_config['browser_path'])
+                    mtio_py = os.path.join(here, 'mainthreadio.py')
+                    command = ['python', mtio_py, rawlog,
+                               processedlog, xre_path]
+                    mtio = subprocess.Popen(command,
+                                            env=os.environ.copy(),
+                                            stdout=subprocess.PIPE)
+                    output, stderr = mtio.communicate()
+                    for line in output.split('\n'):
+                        if line.strip() == "":
+                            continue
+
+                        print line
+                        mainthread_error_count += 1
+                    mozfile.remove(rawlog)
 
-                            print line
-                            mainthread_error_count += 1
-                        mozfile.remove(rawlog)
+            if test_config['cleanup']:
+                # HACK: add the pid to support xperf where we require
+                # the pid in post processing
+                talosconfig.generateTalosConfig(command_args,
+                                                browser_config,
+                                                test_config,
+                                                pid=pcontext.pid)
+                subprocess.call(
+                    [sys.executable] + test_config['cleanup'].split()
+                )
 
-                if test_config['cleanup']:
-                    # HACK: add the pid to support xperf where we require
-                    # the pid in post processing
-                    talosconfig.generateTalosConfig(command_args,
-                                                    browser_config,
-                                                    test_config,
-                                                    pid=pid)
-                    subprocess.call(
-                        [sys.executable] + test_config['cleanup'].split()
-                    )
+            # For startup tests, we launch the browser multiple times
+            # with the same profile
+            for fname in ('sessionstore.js', '.parentlock',
+                          'sessionstore.bak'):
+                mozfile.remove(os.path.join(setup.profile_dir, fname))
 
-                # For startup tests, we launch the browser multiple times
-                # with the same profile
-                for fname in ('sessionstore.js', '.parentlock',
-                              'sessionstore.bak'):
-                    mozfile.remove(os.path.join(setup.profile_dir, fname))
-
-                # check for xperf errors
-                if os.path.exists(browser_config['error_filename']) or \
-                   mainthread_error_count > 0:
-                    raise TalosRegression(
-                        "Talos has found a regression, if you have questions"
-                        " ask for help in irc on #perf"
-                    )
+            # check for xperf errors
+            if os.path.exists(browser_config['error_filename']) or \
+               mainthread_error_count > 0:
+                raise TalosRegression(
+                    "Talos has found a regression, if you have questions"
+                    " ask for help in irc on #perf"
+                )
 
-                # add the results from the browser output
-                try:
-                    test_results.add(
-                        '\n'.join(browser_output),
-                        counter_results=(counter_management.results()
-                                         if counter_management
-                                         else None))
-                except Exception:
-                    # Log the exception, but continue. One way to get here
-                    # is if the browser hangs, and we'd still like to get
-                    # symbolicated profiles in that case.
-                    logging.exception("Unable to add results for cycle %d" % i)
-
-                if setup.sps_profile:
-                    setup.sps_profile.symbolicate(i)
+            # add the results from the browser output
+            try:
+                test_results.add(
+                    '\n'.join(pcontext.output),
+                    counter_results=(counter_management.results()
+                                     if counter_management
+                                     else None))
+            except Exception:
+                # Log the exception, but continue. One way to get here
+                # is if the browser hangs, and we'd still like to get
+                # symbolicated profiles in that case.
+                # TODO: the browser can't be hanging here anymore
+                # check if we can remove this, or if we should adjust the
+                # above comment.
+                logging.exception("Unable to add results for cycle %d" % i)
 
-                # clean up any stray browser processes
-                self.cleanupAndCheckForCrashes(browser_config,
-                                               setup.profile_dir,
-                                               test_config['name'])
+            if setup.sps_profile:
+                setup.sps_profile.symbolicate(i)
+
+            self.check_for_crashes(browser_config, setup.profile_dir,
+                                   test_config['name'])
 
-            # include global (cross-cycle) counters
-            test_results.all_counter_results.extend(
-                [{key: value} for key, value in global_counters.items()]
-            )
-            for c in test_results.all_counter_results:
-                for key, value in c.items():
-                    logging.debug("COUNTER %r: %s", key, value)
+        # include global (cross-cycle) counters
+        test_results.all_counter_results.extend(
+            [{key: value} for key, value in global_counters.items()]
+        )
+        for c in test_results.all_counter_results:
+            for key, value in c.items():
+                logging.debug("COUNTER %r: %s", key, value)
 
-            # return results
-            return test_results
-
-        except Exception:
-            self.testCleanup(browser_config, setup.profile_dir, test_config)
-            raise
+        # return results
+        return test_results