Bug 1272176 - Emit Perfherder data for system resource utilization; r?wlach draft
authorGregory Szorc <gps@mozilla.com>
Thu, 12 May 2016 13:55:35 -0700
changeset 366628 9f7fbd04d95690afcb6180f9ff4de9125351ce09
parent 366627 d82860087d47d10791757d56e289076800318ae1
child 520812 250e53e17dc37be11f46f2fb7559c2b4312c4e4b
push id18026
push userbmo:gps@mozilla.com
push dateFri, 13 May 2016 00:07:46 +0000
reviewerswlach
bugs1272176
milestone49.0a1
Bug 1272176 - Emit Perfherder data for system resource utilization; r?wlach THIS PATCH IS NOT COMPLETE AND IS NOT READY TO LAND. Submitting to wlach for early feedback so I know if I'm on the right track. This commit teaches the resource monitor in mozharness to emit Perfherder data for system metrics and step times. This will allow us to see when the timing or resource characteristics of jobs in automation changes. The wonkiest part of this patch is likely the mechanism to define the Perfherder "test" names. We don't appear to have an identifier in mozharness suitable for distinguishing between job types. e.g. the "desktop_unittest.py" script is responsible for running a few dozen jobs. So we invent code for creating an identifier from the script config options. I /think/ Treeherder will automatically assign the project/branch, platform, and build type, which is why these aren't included in the identifier. MozReview-Commit-ID: HjhtXfxOvzJ
testing/mozharness/mozharness/base/python.py
testing/mozharness/scripts/desktop_unittest.py
--- a/testing/mozharness/mozharness/base/python.py
+++ b/testing/mozharness/mozharness/base/python.py
@@ -455,16 +455,21 @@ class ResourceMonitoringMixin(object):
         super(ResourceMonitoringMixin, self).__init__(*args, **kwargs)
 
         self.register_virtualenv_module('psutil>=3.1.1', method='pip',
                                         optional=True)
         self.register_virtualenv_module('mozsystemmonitor==0.1',
                                         method='pip', optional=True)
         self._resource_monitor = None
 
+        # Name to assign Perfherder resource monitor metrics to. This needs
+        # to be assigned by a script in order for Perfherder metrics to be
+        # reported.
+        self.resource_monitor_perfherder_name = None
+
     @PostScriptAction('create-virtualenv')
     def _start_resource_monitoring(self, action, success=None):
         self.activate_virtualenv()
 
         # Resource Monitor requires Python 2.7, however it's currently optional.
         # Remove when all machines have had their Python version updated (bug 711299).
         if sys.version_info[:2] < (2, 7):
             self.warning('Resource monitoring will not be enabled! Python 2.7+ required.')
@@ -560,16 +565,48 @@ class ResourceMonitoringMixin(object):
 
             except ValueError:
                 self.warning("Exception when formatting: %s" %
                              traceback.format_exc())
 
         cpu_percent, cpu_times, io, (swap_in, swap_out) = resources(None)
         duration = rm.end_time - rm.start_time
 
+        # Write out Perfherder data if configured.
+        if self.resource_monitor_perfherder_name:
+            pn = self.resource_monitor_perfherder_name
+            subtests = []
+
+            if cpu_percent:
+                subtests.append({
+                    'name': '%s.cpu_percent' % pn,
+                    'value': cpu_percent,
+                })
+
+            subtests.extend([
+                {'name': '%s.io_write_bytes' % pn, 'value': io.write_bytes},
+                {'name': '%s.io.read_bytes' % pn, 'value': io.read_bytes},
+                {'name': '%s.io_write_time' % pn, 'value': io.write_time},
+                {'name': '%s.io_read_time' % pn, 'value': io.read_time},
+            ])
+
+            for phase in rm.phases.keys():
+                phase_duration = rm.phases[phase][1] - rm.phases[phase][0]
+                subtests.append({
+                    'name': '%s.step.%s.time' % (pn, phase),
+                    'value': phase_duration,
+                })
+
+            d = {
+                'framework': {'name': 'job_resource_usage'},
+                'suites': [{'subtests': subtests}],
+            }
+
+            self.info('PERFHERDER_DATA: %s' % json.dumps(d))
+
         log_usage('Total resource usage', duration, cpu_percent, cpu_times, io)
 
         # Print special messages so usage shows up in Treeherder.
         if cpu_percent:
             self._tinderbox_print('CPU usage<br/>{:,.1f}%'.format(
                                   cpu_percent))
 
         self._tinderbox_print('I/O read bytes / time<br/>{:,} / {:,}'.format(
--- a/testing/mozharness/scripts/desktop_unittest.py
+++ b/testing/mozharness/scripts/desktop_unittest.py
@@ -166,16 +166,41 @@ class DesktopUnittest(TestingMixin, Merc
         self.symbols_url = c.get('symbols_url')
         # this is so mozinstall in install() doesn't bug out if we don't run
         # the download_and_extract action
         self.installer_path = c.get('installer_path')
         self.binary_path = c.get('binary_path')
         self.abs_app_dir = None
         self.abs_res_dir = None
 
+        # Construct an identifier to be used to identify Perfherder data
+        # for resource monitoring recording. This attempts to uniquely
+        # identify this test invocation configuration.
+        perfherder_parts = []
+        suites = (
+            'specified_mochitest_suites',
+            'specified_reftest_suites',
+            'specified_xpcshell_suites',
+            'specified_cppunittest_suites',
+            'specified_gtest_suites',
+            'specified_jittest_suites',
+            'specified_mozbase_suites',
+            'specified_mozmill_suites',
+        )
+        for s in suites:
+            if s in c:
+                perfherder_parts.extend(c[s])
+
+        if 'this_chunk' in c:
+            perfherder_parts.append(c['this_chunk'])
+        if c['e10s']:
+            perfherder_parts.append('e10s')
+
+        self.resource_monitor_perfherder_name = '.'.join(perfherder_parts)
+
     # helper methods {{{2
     def _pre_config_lock(self, rw_config):
         super(DesktopUnittest, self)._pre_config_lock(rw_config)
         c = self.config
         if not c.get('run_all_suites'):
             return  # configs are valid
         for category in SUITE_CATEGORIES:
             specific_suites = c.get('specified_%s_suites' % (category))