Bug 1478057 - Report the mean of all measurements per pageload, not separate values per measurement type; r=igoldan draft
authorRob Wood <rwood@mozilla.com>
Fri, 27 Jul 2018 11:10:30 -0400
changeset 823540 bcb5b801eeec0a585f49f9e4e638beecdd1ee7e2
parent 823539 de5e21062a40a8f22284f2bdac9aaa8d1cf28844
push id117712
push userrwood@mozilla.com
push dateFri, 27 Jul 2018 15:10:54 +0000
reviewersigoldan
bugs1478057
milestone63.0a1
Bug 1478057 - Report the mean of all measurements per pageload, not separate values per measurement type; r=igoldan MozReview-Commit-ID: H0lYbZPFzMY
testing/raptor/raptor/output.py
testing/raptor/raptor/tests/raptor-gdocs.ini
testing/raptor/raptor/tests/raptor-tp6.ini
--- a/testing/raptor/raptor/output.py
+++ b/testing/raptor/raptor/output.py
@@ -25,30 +25,30 @@ class Output(object):
         """
         - results : list of RaptorTestResult instances
         """
         self.results = results
         self.summarized_results = {}
 
     def summarize(self):
         suites = []
-        vals = []
         test_results = {
             'framework': {
                 'name': 'raptor',
             },
             'suites': suites,
         }
 
         # check if we actually have any results
         if len(self.results) == 0:
             LOG.error("error: no raptor test results found!")
             return
 
         for test in self.results:
+            vals = []
             subtests = []
             suite = {
                 'name': test.name,
                 'type': test.type,
                 'extraOptions': test.extra_options,
                 'subtests': subtests,
                 'lowerIsBetter': test.lower_is_better,
                 'alertThreshold': float(test.alert_threshold)
@@ -66,50 +66,58 @@ class Output(object):
 
                 # {u'name': u'raptor-firefox-tp6-amazon', u'type': u'pageload', u'measurements':
                 # {u'fnbpaint': [788, 315, 334, 286, 318, 276, 296, 296, 292, 285, 268, 277, 274,
                 # 328, 295, 290, 286, 270, 279, 280, 346, 303, 308, 398, 281]}, u'browser':
                 # u'Firefox 62.0a1 20180528123052', u'lower_is_better': True, u'page':
                 # u'https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop',
                 # u'unit': u'ms', u'alert_threshold': 2}
 
-                for key, values in test.measurements.iteritems():
+                for measurement_name, replicates in test.measurements.iteritems():
                     new_subtest = {}
-                    new_subtest['name'] = test.name + "-" + key
-                    new_subtest['replicates'] = values
+                    new_subtest['name'] = test.name + "-" + measurement_name
+                    new_subtest['replicates'] = replicates
                     new_subtest['lowerIsBetter'] = test.lower_is_better
                     new_subtest['alertThreshold'] = float(test.alert_threshold)
                     new_subtest['value'] = 0
                     new_subtest['unit'] = test.unit
 
                     filtered_values = filter.ignore_first(new_subtest['replicates'], 1)
                     new_subtest['value'] = filter.median(filtered_values)
-                    vals.append(new_subtest['value'])
 
+                    vals.append([new_subtest['value'], new_subtest['name']])
                     subtests.append(new_subtest)
 
             elif test.type == "benchmark":
                 if 'speedometer' in test.measurements:
                     subtests, vals = self.parseSpeedometerOutput(test)
                 elif 'motionmark' in test.measurements:
                     subtests, vals = self.parseMotionmarkOutput(test)
                 elif 'sunspider' in test.measurements:
                     subtests, vals = self.parseSunspiderOutput(test)
                 elif 'webaudio' in test.measurements:
                     subtests, vals = self.parseWebaudioOutput(test)
                 suite['subtests'] = subtests
 
-                # if there is more than one subtest, calculate a summary result
-                if len(subtests) > 1:
-                    suite['value'] = self.construct_summary(vals, testname=test.name)
-
             else:
                 LOG.error("output.summarize received unsupported test results type")
                 return
 
+            # for pageload tests, if there are > 1 subtests here, that means there
+            # were multiple measurements captured in each single pageload; we want
+            # to get the mean of those values and report 1 overall 'suite' value
+            # for the page; so that each test page/URL only has 1 line output
+            # on treeherder/perfherder (all replicates available in the JSON)
+
+            # for benchmarks there is generally  more than one subtest in each cycle
+            # and a benchmark-specific formula is needed to calculate the final score
+
+            if len(subtests) > 1:
+                suite['value'] = self.construct_summary(vals, testname=test.name)
+
         self.summarized_results = test_results
 
     def parseSpeedometerOutput(self, test):
         # each benchmark 'index' becomes a subtest; each pagecycle / iteration
         # of the test has multiple values per index/subtest
 
         # this is the format we receive the results in from the benchmark
         # i.e. this is ONE pagecycle of speedometer:
@@ -396,11 +404,11 @@ class Output(object):
             return self.speedometer_score(vals)
         elif testname.startswith('raptor-stylebench'):
             return self.stylebench_score(vals)
         elif testname.startswith('raptor-sunspider'):
             return self.sunspider_score(vals)
         elif testname.startswith('raptor-webaudio'):
             return self.webaudio_score(vals)
         elif len(vals) > 1:
-            return filter.geometric_mean([i for i, j in vals])
+            return round(filter.geometric_mean([i for i, j in vals]), 2)
         else:
-            return filter.mean([i for i, j in vals])
+            return round(filter.mean([i for i, j in vals]), 2)
--- a/testing/raptor/raptor/tests/raptor-gdocs.ini
+++ b/testing/raptor/raptor/tests/raptor-gdocs.ini
@@ -15,39 +15,45 @@ unit = ms
 lower_is_better = true
 alert_threshold = 2.0
 page_timeout = 30000
 
 [raptor-google-docs-firefox]
 apps = firefox
 test_url = https://docs.google.com/document/d/1US-07msg12slQtI_xchzYxcKlTs6Fp7WqIc6W5GK5M8/edit?usp=sharing
 playback_recordings = google-docs.mp
-measure = fnbpaint
+measure = fnbpaint, hero
+hero = hero1
 
 [raptor-google-sheets-firefox]
 apps = firefox
 test_url = https://docs.google.com/spreadsheets/d/1jT9qfZFAeqNoOK97gruc34Zb7y_Q-O_drZ8kSXT-4D4/edit?usp=sharing
 playback_recordings = google-sheets.mp
-measure = fnbpaint
+measure = fnbpaint, hero
+hero = hero1
 
 [raptor-google-slides-firefox]
 apps = firefox
 test_url = https://docs.google.com/presentation/d/1Ici0ceWwpFvmIb3EmKeWSq_vAQdmmdFcWqaiLqUkJng/edit?usp=sharing
 playback_recordings = google-slides.mp
-measure = fnbpaint
+measure = fnbpaint, hero
+hero = hero1
 
 [raptor-google-docs-chrome]
 apps = chrome
 test_url = https://docs.google.com/document/d/1US-07msg12slQtI_xchzYxcKlTs6Fp7WqIc6W5GK5M8/edit?usp=sharing
 playback_recordings = google-docs.mp
-measure = fcp
+measure = fcp, hero
+hero = hero1
 
 [raptor-google-sheets-chrome]
 apps = chrome
 test_url = https://docs.google.com/spreadsheets/d/1jT9qfZFAeqNoOK97gruc34Zb7y_Q-O_drZ8kSXT-4D4/edit?usp=sharing
 playback_recordings = google-sheets.mp
-measure = fcp
+measure = fcp, hero
+hero = hero1
 
 [raptor-google-slides-chrome]
 apps = chrome
 test_url = https://docs.google.com/presentation/d/1Ici0ceWwpFvmIb3EmKeWSq_vAQdmmdFcWqaiLqUkJng/edit?usp=sharing
 playback_recordings = google-slides.mp
-measure = fcp
+measure = fcp, hero
+hero = hero1
--- a/testing/raptor/raptor/tests/raptor-tp6.ini
+++ b/testing/raptor/raptor/tests/raptor-tp6.ini
@@ -14,54 +14,62 @@ page_cycles = 25
 unit = ms
 lower_is_better = true
 alert_threshold = 2.0
 
 [raptor-tp6-amazon-firefox]
 apps = firefox
 test_url = https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop
 playback_recordings = amazon.mp
-measure = fnbpaint
+measure = fnbpaint, hero
+hero = hero1
 
 [raptor-tp6-facebook-firefox]
 apps = firefox
 test_url = https://www.facebook.com
 playback_recordings = facebook.mp
-measure = fnbpaint
+measure = fnbpaint, hero
+hero = hero1
 
 [raptor-tp6-google-firefox]
 apps = firefox
 # note: use the full url as the first part (without '&cad=h') redirects
 # to the url + '&cad=h'; that redirection causes measure.js content
 # to be loaded into that page also; resulting in 2 fnbpaint values etc.
 test_url = https://www.google.com/search?hl=en&q=barack+obama&cad=h
 playback_recordings = google-search.mp
-measure = fnbpaint
+measure = fnbpaint, hero
+hero = hero1
 
 [raptor-tp6-youtube-firefox]
 apps = firefox
 test_url = https://www.youtube.com
 playback_recordings = youtube.mp
-measure = fnbpaint
+measure = fnbpaint, hero
+hero = hero1
 
 [raptor-tp6-amazon-chrome]
 apps = chrome
 test_url = https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop
 playback_recordings = amazon.mp
-measure = fcp
+measure = fcp, hero
+hero = hero1
 
 [raptor-tp6-facebook-chrome]
 apps = chrome
 test_url = https://www.facebook.com
 playback_recordings = facebook.mp
-measure = fcp
+measure = fcp, hero
+hero = hero1
 
 [raptor-tp6-google-chrome]
 apps = chrome
 test_url = https://www.google.com/#hl=en&q=barack+obama
 playback_recordings = google-search.mp
-measure = fcp
+measure = fcp, hero
+hero = hero1
 
 [raptor-tp6-youtube-chrome]
 apps = chrome
 test_url = https://www.youtube.com
 playback_recordings = youtube.mp
-measure = fcp
+measure = fcp, hero
+hero = hero1