Backed out 5 changesets (bug 1548845) for failing new youtube playback raptor tests. CLOSED TREE
authorMihai Alexandru Michis <malexandru@mozilla.com>
Fri, 17 May 2019 16:17:06 +0300
changeset 474323 f72947acdfcd662c26a8e84efac58e703b2ce2ec
parent 474322 2d1a7a5be46038b3b130e18f409eec5b7e6bce66
child 474324 3866561a7bae97a6c9ec3c18174747ec36bba090
push id36027
push usershindli@mozilla.com
push dateFri, 17 May 2019 16:24:38 +0000
treeherdermozilla-central@c94c54aff466 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
bugs1548845
milestone68.0a1
backs out934d2f88195de26cc114451e6511613d27f997aa
609f489bdc8c0370d97b00dcc496454ee5d0296c
a2544ca8c593cd50ac0753fdc2f799f39c0b057e
152615db9db653c273b5e90487d198ae2fd788a5
6b3a8394727fbde512c4ee39a4d8d21a17f10581
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Backed out 5 changesets (bug 1548845) for failing new youtube playback raptor tests. CLOSED TREE Backed out changeset 934d2f88195d (bug 1548845) Backed out changeset 609f489bdc8c (bug 1548845) Backed out changeset a2544ca8c593 (bug 1548845) Backed out changeset 152615db9db6 (bug 1548845) Backed out changeset 6b3a8394727f (bug 1548845)
taskcluster/ci/test/raptor.yml
taskcluster/ci/test/test-sets.yml
testing/raptor/raptor/filter.py
testing/raptor/raptor/filters.py
testing/raptor/raptor/manifest.py
testing/raptor/raptor/output.py
testing/raptor/raptor/raptor.ini
testing/raptor/raptor/tests/raptor-youtube-playback.ini
testing/raptor/test/test_manifest.py
testing/raptor/webext/raptor/manifest.json
testing/raptor/webext/raptor/runner.js
--- a/taskcluster/ci/test/raptor.yml
+++ b/taskcluster/ci/test/raptor.yml
@@ -1574,29 +1574,16 @@ raptor-wasm-godot-ion-firefox-profiling:
     run-on-projects: ['mozilla-central', 'try']
     max-run-time: 900
     tier: 2
     mozharness:
         extra-options:
             - --test=raptor-wasm-godot-ion
             - --gecko-profile
 
-raptor-youtube-playback-firefox:
-    description: "Raptor YouTube Playback on Firefox"
-    try-name: raptor-youtube-playback-firefox
-    treeherder-symbol: Rap(ytp)
-    max-run-time:
-        by-test-platform:
-            windows10-aarch64/opt: 3600
-            default: 2700
-    tier: 2
-    mozharness:
-        extra-options:
-            - --test=raptor-youtube-playback
-
 raptor-tp6-1-firefox-cold:
     description: "Raptor tp6-1 cold page-load on Firefox"
     try-name: raptor-tp6-1-firefox-cold
     treeherder-symbol: Rap(tp6-c-1)
     tier: 2
     mozharness:
         extra-options:
             - --test=raptor-tp6-cold-1
--- a/taskcluster/ci/test/test-sets.yml
+++ b/taskcluster/ci/test/test-sets.yml
@@ -95,17 +95,16 @@ raptor-firefox:
     - raptor-tp6-binast-1-firefox
     - raptor-speedometer-firefox
     - raptor-stylebench-firefox
     - raptor-motionmark-htmlsuite-firefox
     - raptor-motionmark-animometer-firefox
     - raptor-webaudio-firefox
     - raptor-sunspider-firefox
     - raptor-wasm-godot-firefox
-    - raptor-youtube-playback-firefox
     - raptor-tp6-1-firefox-cold
     - raptor-tp6-2-firefox-cold
     - raptor-tp6-3-firefox-cold
     - raptor-tp6-4-firefox-cold
 
 raptor-profiling:
     - raptor-tp6-1-firefox-profiling
     - raptor-tp6-2-firefox-profiling
rename from testing/raptor/raptor/filters.py
rename to testing/raptor/raptor/filter.py
--- a/testing/raptor/raptor/filters.py
+++ b/testing/raptor/raptor/filter.py
@@ -11,20 +11,20 @@ import math
 """
 data filters:
 takes a series of run data and applies statistical transforms to it
 
 Each filter is a simple function, but it also have attached a special
 `prepare` method that create a tuple with one instance of a
 :class:`Filter`; this allow to write stuff like::
 
-  from raptor import filters
-  filter_list = filters.ignore_first.prepare(1) + filters.median.prepare()
+  from raptor import filter
+  filters = filter.ignore_first.prepare(1) + filter.median.prepare()
 
-  for filter in filter_list:
+  for filter in filters:
       data = filter(data)
   # data is filtered
 """
 
 _FILTERS = {}
 
 
 class Filter(object):
--- a/testing/raptor/raptor/manifest.py
+++ b/testing/raptor/raptor/manifest.py
@@ -10,18 +10,16 @@ from manifestparser import TestManifest
 from mozlog import get_proxy_logger
 from utils import transform_platform
 
 here = os.path.abspath(os.path.dirname(__file__))
 raptor_ini = os.path.join(here, 'raptor.ini')
 tests_dir = os.path.join(here, 'tests')
 LOG = get_proxy_logger(component="raptor-manifest")
 
-LIVE_SITE_TIMEOUT_MULTIPLIER = 1.2
-
 required_settings = [
     'alert_threshold',
     'apps',
     'lower_is_better',
     'measure',
     'page_cycles',
     'test_url',
     'scenario_time',
@@ -161,19 +159,16 @@ def write_test_settings_json(args, test_
     if subtest_lower_is_better is None:
         # default to main test values if not set
         test_settings['raptor-options']['subtest_lower_is_better'] = (
             test_settings['raptor-options']['lower_is_better'])
     else:
         test_settings['raptor-options']['subtest_lower_is_better'] = bool_from_str(
             subtest_lower_is_better)
 
-    if test_details.get("alert_change_type", None) is not None:
-        test_settings['raptor-options']['alert_change_type'] = test_details['alert_change_type']
-
     if test_details.get("alert_threshold", None) is not None:
         test_settings['raptor-options']['alert_threshold'] = float(test_details['alert_threshold'])
 
     if test_details.get("screen_capture", None) is not None:
         test_settings['raptor-options']['screen_capture'] = test_details.get("screen_capture")
 
     # if Gecko profiling is enabled, write profiling settings for webext
     if test_details.get("gecko_profile", False):
@@ -327,20 +322,18 @@ def get_raptor_test_list(args, oskey):
         next_test['browser_cycle'] = 1
 
         if next_test.get('use_live_sites', "false") == "true":
             # when using live sites we want to turn off playback
             LOG.info("using live sites so turning playback off!")
             next_test['playback'] = None
             LOG.info("using live sites so appending '-live' to the test name")
             next_test['name'] = next_test['name'] + "-live"
-            # allow a slightly higher page timeout due to remote page loads
-            next_test['page_timeout'] = int(
-                next_test['page_timeout']) * LIVE_SITE_TIMEOUT_MULTIPLIER
-            LOG.info("using live sites so using page timeout of %dms" % next_test['page_timeout'])
+            # we also want to increase the page timeout since may be longer live
+            next_test['page_timeout'] = 180000
 
         # convert 'measure =' test INI line to list
         if next_test.get('measure') is not None:
             _measures = []
             for m in [m.strip() for m in next_test['measure'].split(',')]:
                 # build the 'measures =' list
                 _measures.append(m)
             next_test['measure'] = _measures
--- a/testing/raptor/raptor/output.py
+++ b/testing/raptor/raptor/output.py
@@ -3,17 +3,17 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # some parts of this originally taken from /testing/talos/talos/output.py
 
 """output raptor test results"""
 from __future__ import absolute_import
 
-import filters
+import filter
 
 import json
 import os
 
 from mozlog import get_proxy_logger
 
 LOG = get_proxy_logger(component="raptor-output")
 
@@ -55,20 +55,16 @@ class Output(object):
                 'type': test.type,
                 'extraOptions': test.extra_options,
                 'subtests': subtests,
                 'lowerIsBetter': test.lower_is_better,
                 'unit': test.unit,
                 'alertThreshold': float(test.alert_threshold)
             }
 
-            # Check if optional properties have been set by the test
-            if hasattr(test, "alert_change_type"):
-                suite['alertChangeType'] = test.alert_change_type
-
             # if cold load add that info to the suite result dict; this will be used later
             # when combining the results from multiple browser cycles into one overall result
             if test.cold is True:
                 suite['cold'] = True
                 suite['browser_cycle'] = int(test.browser_cycle)
                 suite['expected_browser_cycles'] = int(test.expected_browser_cycles)
 
             suites.append(suite)
@@ -96,64 +92,62 @@ class Output(object):
                     new_subtest['alertThreshold'] = float(test.alert_threshold)
                     new_subtest['value'] = 0
                     new_subtest['unit'] = test.subtest_unit
 
                     if test.cold is False:
                         # for warm page-load, ignore first value due to 1st pageload noise
                         LOG.info("ignoring the first %s value due to initial pageload noise"
                                  % measurement_name)
-                        filtered_values = filters.ignore_first(new_subtest['replicates'], 1)
+                        filtered_values = filter.ignore_first(new_subtest['replicates'], 1)
                     else:
                         # for cold-load we want all the values
                         filtered_values = new_subtest['replicates']
 
                     # for pageload tests that measure TTFI: TTFI is not guaranteed to be available
                     # everytime; the raptor measure.js webext will substitute a '-1' value in the
                     # cases where TTFI is not available, which is acceptable; however we don't want
                     # to include those '-1' TTFI values in our final results calculations
                     if measurement_name == "ttfi":
-                        filtered_values = filters.ignore_negative(filtered_values)
+                        filtered_values = filter.ignore_negative(filtered_values)
                         # we've already removed the first pageload value; if there aren't any more
                         # valid TTFI values available for this pageload just remove it from results
                         if len(filtered_values) < 1:
                             continue
 
                     # if 'alert_on' is set for this particular measurement, then we want to set the
                     # flag in the perfherder output to turn on alerting for this subtest
                     if self.subtest_alert_on is not None:
                         if measurement_name in self.subtest_alert_on:
                             LOG.info("turning on subtest alerting for measurement type: %s"
                                      % measurement_name)
                             new_subtest['shouldAlert'] = True
 
-                    new_subtest['value'] = filters.median(filtered_values)
+                    new_subtest['value'] = filter.median(filtered_values)
 
                     vals.append([new_subtest['value'], new_subtest['name']])
                     subtests.append(new_subtest)
 
             elif test.type == "benchmark":
-                if 'assorted-dom' in test.measurements:
-                    subtests, vals = self.parseAssortedDomOutput(test)
+                if 'speedometer' in test.measurements:
+                    subtests, vals = self.parseSpeedometerOutput(test)
                 elif 'motionmark' in test.measurements:
                     subtests, vals = self.parseMotionmarkOutput(test)
-                elif 'speedometer' in test.measurements:
-                    subtests, vals = self.parseSpeedometerOutput(test)
                 elif 'sunspider' in test.measurements:
                     subtests, vals = self.parseSunspiderOutput(test)
+                elif 'webaudio' in test.measurements:
+                    subtests, vals = self.parseWebaudioOutput(test)
                 elif 'unity-webgl' in test.measurements:
                     subtests, vals = self.parseUnityWebGLOutput(test)
+                elif 'assorted-dom' in test.measurements:
+                    subtests, vals = self.parseAssortedDomOutput(test)
+                elif 'wasm-misc' in test.measurements:
+                    subtests, vals = self.parseWASMMiscOutput(test)
                 elif 'wasm-godot' in test.measurements:
                     subtests, vals = self.parseWASMGodotOutput(test)
-                elif 'wasm-misc' in test.measurements:
-                    subtests, vals = self.parseWASMMiscOutput(test)
-                elif 'webaudio' in test.measurements:
-                    subtests, vals = self.parseWebaudioOutput(test)
-                elif 'youtube-playbackperf-test' in test.measurements:
-                    subtests, vals = self.parseYoutubePlaybackPerformanceOutput(test)
                 suite['subtests'] = subtests
 
             else:
                 LOG.error("output.summarize received unsupported test results type for %s" %
                           test.name)
                 return
 
             # for benchmarks there is generally  more than one subtest in each cycle
@@ -269,17 +263,17 @@ class Output(object):
                         combined_suites[next_suite['details']['name']]['subtests'] \
                             .append(next_subtest)
 
         # now we have a single entry for each test; with all replicates from all browser cycles
         for i, name in enumerate(combined_suites):
             vals = []
             for next_sub in combined_suites[name]['subtests']:
                 # calculate sub-test results (i.e. each measurement type)
-                next_sub['value'] = filters.median(next_sub['replicates'])
+                next_sub['value'] = filter.median(next_sub['replicates'])
                 # add to vals; vals is used to calculate overall suite result i.e. the
                 # geomean of all of the subtests / measurement types
                 vals.append([next_sub['value'], next_sub['name']])
 
             # calculate overall suite result ('value') which is geomean of all measures
             if len(combined_suites[name]['subtests']) > 1:
                 combined_suites[name]['value'] = self.construct_summary(vals, testname=name)
 
@@ -401,17 +395,17 @@ class Output(object):
                                       'replicates': []}
                 _subtests[sub]['replicates'].extend([round(x, 3) for x in replicates])
 
         vals = []
         subtests = []
         names = _subtests.keys()
         names.sort(reverse=True)
         for name in names:
-            _subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
+            _subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
             subtests.append(_subtests[name])
             vals.append([_subtests[name]['value'], name])
 
         return subtests, vals
 
     def parseWASMMiscOutput(self, test):
         '''
           {u'wasm-misc': [
@@ -438,17 +432,17 @@ class Output(object):
                                       'replicates': []}
                 _subtests[sub]['replicates'].append(item['time'])
 
         vals = []
         subtests = []
         names = _subtests.keys()
         names.sort(reverse=True)
         for name in names:
-            _subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
+            _subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
             subtests.append(_subtests[name])
             vals.append([_subtests[name]['value'], name])
 
         return subtests, vals
 
     def parseWASMGodotOutput(self, test):
         '''
             {u'wasm-godot': [
@@ -477,17 +471,17 @@ class Output(object):
                                       'replicates': []}
                 _subtests[sub]['replicates'].append(item['time'])
 
         vals = []
         subtests = []
         names = _subtests.keys()
         names.sort(reverse=True)
         for name in names:
-            _subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
+            _subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
             subtests.append(_subtests[name])
             vals.append([_subtests[name]['value'], name])
 
         return subtests, vals
 
     def parseWebaudioOutput(self, test):
         # each benchmark 'index' becomes a subtest; each pagecycle / iteration
         # of the test has multiple values per index/subtest
@@ -524,17 +518,17 @@ class Output(object):
                                       'replicates': []}
                 _subtests[sub]['replicates'].extend([round(x, 3) for x in replicates])
 
         vals = []
         subtests = []
         names = _subtests.keys()
         names.sort(reverse=True)
         for name in names:
-            _subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
+            _subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
             subtests.append(_subtests[name])
             vals.append([_subtests[name]['value'], name])
 
         print subtests
         return subtests, vals
 
     def parseMotionmarkOutput(self, test):
         # for motionmark we want the frameLength:average value for each test
@@ -579,17 +573,17 @@ class Output(object):
                                       'replicates': []}
                 _subtests[sub]['replicates'].extend([replicate])
 
         vals = []
         subtests = []
         names = _subtests.keys()
         names.sort(reverse=True)
         for name in names:
-            _subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
+            _subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
             subtests.append(_subtests[name])
             vals.append([_subtests[name]['value'], name])
 
         return subtests, vals
 
     def parseSunspiderOutput(self, test):
         _subtests = {}
         data = test.measurements['sunspider']
@@ -606,17 +600,17 @@ class Output(object):
                 _subtests[sub]['replicates'].extend([round(x, 3) for x in replicates])
 
         subtests = []
         vals = []
 
         names = _subtests.keys()
         names.sort(reverse=True)
         for name in names:
-            _subtests[name]['value'] = filters.mean(_subtests[name]['replicates'])
+            _subtests[name]['value'] = filter.mean(_subtests[name]['replicates'])
             subtests.append(_subtests[name])
 
             vals.append([_subtests[name]['value'], name])
 
         return subtests, vals
 
     def parseUnityWebGLOutput(self, test):
         """
@@ -651,17 +645,17 @@ class Output(object):
                                       'replicates': []}
                 _subtests[sub]['replicates'].append(item['result'])
 
         vals = []
         subtests = []
         names = _subtests.keys()
         names.sort(reverse=True)
         for name in names:
-            _subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
+            _subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
             subtests.append(_subtests[name])
             vals.append([_subtests[name]['value'], name])
 
         return subtests, vals
 
     def parseAssortedDomOutput(self, test):
         # each benchmark 'index' becomes a subtest; each pagecycle / iteration
         # of the test has multiple values
@@ -690,90 +684,24 @@ class Output(object):
                                        'replicates': []}
                 _subtests[_sub]['replicates'].extend([_value])
 
         vals = []
         subtests = []
         names = _subtests.keys()
         names.sort(reverse=True)
         for name in names:
-            _subtests[name]['value'] = round(filters.median(_subtests[name]['replicates']), 2)
+            _subtests[name]['value'] = round(filter.median(_subtests[name]['replicates']), 2)
             subtests.append(_subtests[name])
             # only use the 'total's to compute the overall result
             if name == 'total':
                 vals.append([_subtests[name]['value'], name])
 
         return subtests, vals
 
-    def parseYoutubePlaybackPerformanceOutput(self, test):
-        """Parse the metrics for the Youtube playback performance test.
-
-        For each video measured values for dropped and decoded frames will be
-        available from the benchmark site.
-
-        {u'PlaybackPerf.VP9.2160p60@2X': {u'droppedFrames': 1, u'decodedFrames': 796}
-
-        With each page cycle / iteration of the test multiple values can be present.
-
-        Raptor will calculate the percentage of dropped frames to decoded frames.
-        All those three values will then be emitted as separate sub tests.
-        """
-        _subtests = {}
-        data = test.measurements['youtube-playbackperf-test']
-
-        def create_subtest_entry(name, value,
-                                 unit=test.subtest_unit,
-                                 lower_is_better=test.subtest_lower_is_better):
-            # build a list of subtests and append all related replicates
-            if name not in _subtests.keys():
-                # subtest not added yet, first pagecycle, so add new one
-                _subtests[name] = {
-                    'name': name,
-                    'unit': unit,
-                    'lowerIsBetter': lower_is_better,
-                    'replicates': [],
-                }
-
-            _subtests[name]['replicates'].append(value)
-
-        for pagecycle in data:
-            for _sub, _value in pagecycle[0].iteritems():
-                try:
-                    percent_dropped = float(_value['droppedFrames']) / _value['decodedFrames']
-                except ZeroDivisionError:
-                    # if no frames have been decoded the playback failed completely
-                    percent_dropped = 1
-
-                # Remove the not needed "PlaybackPerf." prefix from each test
-                _sub = _sub.split('PlaybackPerf.', 1)[-1]
-
-                # build a list of subtests and append all related replicates
-                create_subtest_entry("{}_decoded_frames".format(_sub),
-                                     _value['decodedFrames'],
-                                     lower_is_better=False,
-                                     )
-                create_subtest_entry("{}_dropped_frames".format(_sub),
-                                     _value['droppedFrames'],
-                                     )
-                create_subtest_entry("{}_%_dropped_frames".format(_sub),
-                                     percent_dropped,
-                                     )
-
-        vals = []
-        subtests = []
-        names = _subtests.keys()
-        names.sort(reverse=True)
-        for name in names:
-            _subtests[name]['value'] = round(filters.median(_subtests[name]['replicates']), 2)
-            subtests.append(_subtests[name])
-            if name.endswith("dropped_frames"):
-                vals.append([_subtests[name]['value'], name])
-
-        return subtests, vals
-
     def summarize_screenshots(self, screenshots):
         if len(screenshots) == 0:
             return
 
         self.summarized_screenshots.append("""<!DOCTYPE html>
         <head>
         <style>
             table, th, td {
@@ -893,17 +821,17 @@ class Output(object):
             LOG.info("PERFHERDER_DATA: %s" % json.dumps(next_data_set))
             LOG.info("%s results can also be found locally at: %s" % (data_type, results_path))
 
         return True
 
     @classmethod
     def v8_Metric(cls, val_list):
         results = [i for i, j in val_list]
-        score = 100 * filters.geometric_mean(results)
+        score = 100 * filter.geometric_mean(results)
         return score
 
     @classmethod
     def JS_Metric(cls, val_list):
         """v8 benchmark score"""
         results = [i for i, j in val_list]
         return sum(results)
 
@@ -916,58 +844,58 @@ class Output(object):
         results = [i for i, j in val_list]
         # speedometer has 16 tests, each of these are made of up 9 subtests
         # and a sum of the 9 values.  We receive 160 values, and want to use
         # the 16 test values, not the sub test values.
         if len(results) != 160:
             raise Exception("Speedometer has 160 subtests, found: %s instead" % len(results))
 
         results = results[9::10]
-        score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
+        score = 60 * 1000 / filter.geometric_mean(results) / correctionFactor
         return score
 
     @classmethod
     def benchmark_score(cls, val_list):
         """
         benchmark_score: ares6/jetstream self reported as 'geomean'
         """
         results = [i for i, j in val_list if j == 'geomean']
-        return filters.mean(results)
+        return filter.mean(results)
 
     @classmethod
     def webaudio_score(cls, val_list):
         """
         webaudio_score: self reported as 'Geometric Mean'
         """
         results = [i for i, j in val_list if j == 'Geometric Mean']
-        return filters.mean(results)
+        return filter.mean(results)
 
     @classmethod
     def unity_webgl_score(cls, val_list):
         """
         unity_webgl_score: self reported as 'Geometric Mean'
         """
         results = [i for i, j in val_list if j == 'Geometric Mean']
-        return filters.mean(results)
+        return filter.mean(results)
 
     @classmethod
     def wasm_misc_score(cls, val_list):
         """
         wasm_misc_score: self reported as '__total__'
         """
         results = [i for i, j in val_list if j == '__total__']
-        return filters.mean(results)
+        return filter.mean(results)
 
     @classmethod
     def wasm_godot_score(cls, val_list):
         """
         wasm_godot_score: first-interactive mean
         """
         results = [i for i, j in val_list if j == 'first-interactive']
-        return filters.mean(results)
+        return filter.mean(results)
 
     @classmethod
     def stylebench_score(cls, val_list):
         """
         stylebench_score: https://bug-172968-attachments.webkit.org/attachment.cgi?id=319888
         """
         correctionFactor = 3
         results = [i for i, j in val_list]
@@ -1003,34 +931,28 @@ class Output(object):
         #     75 entries for test before the sum.
         #
         # We receive 76 entries per test, which ads up to 380. We want to use
         # the 5 test entries, not the rest.
         if len(results) != 380:
             raise Exception("StyleBench has 380 entries, found: %s instead" % len(results))
 
         results = results[75::76]
-        score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
+        score = 60 * 1000 / filter.geometric_mean(results) / correctionFactor
         return score
 
     @classmethod
     def sunspider_score(cls, val_list):
         results = [i for i, j in val_list]
         return sum(results)
 
     @classmethod
     def assorted_dom_score(cls, val_list):
         results = [i for i, j in val_list]
-        return round(filters.geometric_mean(results), 2)
-
-    @classmethod
-    def youtube_playback_performance_score(cls, val_list):
-        """Calculate percentage of failed tests."""
-        results = [i for i, j in val_list]
-        return round(filters.mean(results), 2)
+        return round(filter.geometric_mean(results), 2)
 
     @classmethod
     def supporting_data_total(cls, val_list):
         results = [i for i, j in val_list]
         return sum(results)
 
     def construct_summary(self, vals, testname):
         if testname.startswith('raptor-v8_7'):
@@ -1050,16 +972,14 @@ class Output(object):
         elif testname.startswith('raptor-webaudio'):
             return self.webaudio_score(vals)
         elif testname.startswith('raptor-assorted-dom'):
             return self.assorted_dom_score(vals)
         elif testname.startswith('raptor-wasm-misc'):
             return self.wasm_misc_score(vals)
         elif testname.startswith('raptor-wasm-godot'):
             return self.wasm_godot_score(vals)
-        elif testname.startswith('raptor-youtube-playback'):
-            return self.youtube_playback_performance_score(vals)
         elif testname.startswith('supporting_data'):
             return self.supporting_data_total(vals)
         elif len(vals) > 1:
-            return round(filters.geometric_mean([i for i, j in vals]), 2)
+            return round(filter.geometric_mean([i for i, j in vals]), 2)
         else:
-            return round(filters.mean([i for i, j in vals]), 2)
+            return round(filter.mean([i for i, j in vals]), 2)
--- a/testing/raptor/raptor/raptor.ini
+++ b/testing/raptor/raptor/raptor.ini
@@ -50,17 +50,16 @@
 # raptor benchmark tests
 [include:tests/raptor-assorted-dom.ini]
 [include:tests/raptor-motionmark-animometer.ini]
 [include:tests/raptor-motionmark-htmlsuite.ini]
 [include:tests/raptor-speedometer.ini]
 [include:tests/raptor-stylebench.ini]
 [include:tests/raptor-sunspider.ini]
 [include:tests/raptor-unity-webgl.ini]
-[include:tests/raptor-youtube-playback.ini]
 [include:tests/raptor-wasm-godot.ini]
 [include:tests/raptor-wasm-godot-baseline.ini]
 [include:tests/raptor-wasm-godot-ion.ini]
 [include:tests/raptor-wasm-godot-cranelift.ini]
 [include:tests/raptor-wasm-misc.ini]
 [include:tests/raptor-wasm-misc-baseline.ini]
 [include:tests/raptor-wasm-misc-ion.ini]
 [include:tests/raptor-wasm-misc-cranelift.ini]
deleted file mode 100644
--- a/testing/raptor/raptor/tests/raptor-youtube-playback.ini
+++ /dev/null
@@ -1,32 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-# Youtube playback performance benchmark
-#
-# Original location of source and media files:
-#   https://ytlr-cert.appspot.com/2019/main.html?test_type=playbackperf-test
-
-[DEFAULT]
-type =  benchmark
-use_live_sites = true
-gecko_profile_interval = 1
-gecko_profile_entries = 14000000
-gecko_profile_threads = MediaPlayback
-test_url = http://yttest.dev.mozaws.net/2019/main.html?test_type=playbackperf-test&raptor=true&command=run&exclude=1,2
-page_cycles = 1
-# account for a page cycle duration of at maximum 45 minutes
-page_timeout = 2700000
-alert_threshold = 2.0
-lower_is_better = true
-unit = score
-subtest_lower_is_better = true
-subtest_unit = score
-# TODO: Allow the host / port option in the manifest (Bug 1547932)
-preferences = {"network.proxy.type": 0}
-
-[raptor-youtube-playback-firefox]
-apps = firefox
-
-[raptor-youtube-playback-geckoview]
-apps = geckoview
--- a/testing/raptor/test/test_manifest.py
+++ b/testing/raptor/test/test_manifest.py
@@ -46,17 +46,16 @@ VALID_MANIFESTS = [{
     'lower_is_better': True,
     'manifest': 'valid_details_1',
     'measure': 'fnbpaint, fcb',
     'page_cycles': 25,
     'test_url': 'http://www.test-url/goes/here',
     'type': 'pageload',
     'unit': 'ms',
 
-    'alert_change_type': None,
     'alert_on': None,
     'playback': None,
 }, {
     # page load test for geckoview
     'alert_threshold': 2.0,
     'apps': 'geckoview',
     'browser_cycles': 10,
     'cold': True,
--- a/testing/raptor/webext/raptor/manifest.json
+++ b/testing/raptor/webext/raptor/manifest.json
@@ -47,28 +47,25 @@
                   "*://*.yahoo.com/*",
                   "*://*.youtube.com/*",
                   "*://*.yandex.ru/*"
                   ],
       "js": ["pageload.js"],
       "run_at": "document_end"
     },
     {
-      "matches": [
-        "*://*/Speedometer/index.html*",
-        "*://*/StyleBench/*",
-        "*://*/MotionMark/*",
-        "*://*/SunSpider/*",
-        "*://*/webaudio/*",
-        "*://*/unity-webgl/index.html*",
-        "*://*/wasm-misc/index.html*",
-        "*://*/wasm-godot/index.html*",
-        "*://*/assorted-dom/assorted/results.html*",
-        "*://*.mozaws.net/*"
-      ],
+      "matches": ["*://*/Speedometer/index.html*",
+                  "*://*/StyleBench/*",
+                  "*://*/MotionMark/*",
+                  "*://*/SunSpider/*",
+                  "*://*/webaudio/*",
+                  "*://*/unity-webgl/index.html*",
+                  "*://*/wasm-misc/index.html*",
+                  "*://*/wasm-godot/index.html*",
+                  "*://*/assorted-dom/assorted/results.html*"],
       "js": ["benchmark.js"],
       "run_at": "document_end"
     }
   ],
   "browser_action": {
     "browser_style": true,
     "default_icon": "icon.png",
     "default_title": "Raptor LOADED"
--- a/testing/raptor/webext/raptor/runner.js
+++ b/testing/raptor/webext/raptor/runner.js
@@ -63,28 +63,25 @@ var isBenchmarkPending = false;
 var pageTimeout = 10000; // default pageload timeout
 var geckoProfiling = false;
 var geckoInterval = 1;
 var geckoEntries = 1000000;
 var geckoThreads = [];
 var debugMode = 0;
 var screenCapture = false;
 
-var results = {
-  "name": "",
-  "page": "",
-  "type": "",
-  "browser_cycle": 0,
-  "expected_browser_cycles": 0,
-  "cold": false,
-  "lower_is_better": true,
-  "alert_change_type": "relative",
-  "alert_threshold": 2.0,
-  "measurements": {},
-};
+var results = {"name": "",
+               "page": "",
+               "type": "",
+               "browser_cycle": 0,
+               "expected_browser_cycles": 0,
+               "cold": false,
+               "lower_is_better": true,
+               "alert_threshold": 2.0,
+               "measurements": {}};
 
 function getTestSettings() {
   console.log("getting test settings from control server");
   return new Promise(resolve => {
     fetch(settingsURL).then(function(response) {
       response.text().then(function(text) {
         console.log(text);
         settings = JSON.parse(text)["raptor-options"];
@@ -106,28 +103,27 @@ function getTestSettings() {
 
         if (host) {
           // just replace the '<host>' keyword in the URL with actual host
           testURL = testURL.replace("<host>", host);
         }
 
         console.log(`testURL: ${testURL}`);
 
-        results.alert_change_type = settings.alert_change_type;
-        results.alert_threshold = settings.alert_threshold;
-        results.browser_cycle = browserCycle;
-        results.cold = settings.cold;
-        results.expected_browser_cycles = settings.expected_browser_cycles;
-        results.lower_is_better = settings.lower_is_better === true;
-        results.name = testName;
         results.page = testURL;
         results.type = testType;
+        results.name = testName;
+        results.browser_cycle = browserCycle;
+        results.expected_browser_cycles = settings.expected_browser_cycles;
+        results.cold = settings.cold;
         results.unit = settings.unit;
         results.subtest_unit = settings.subtest_unit;
+        results.lower_is_better = settings.lower_is_better === true;
         results.subtest_lower_is_better = settings.subtest_lower_is_better === true;
+        results.alert_threshold = settings.alert_threshold;
 
         if (settings.gecko_profile === true) {
           results.extra_options = ["gecko_profile"];
 
           geckoProfiling = true;
           geckoEntries = settings.gecko_profile_entries;
           geckoInterval = settings.gecko_profile_interval;
           geckoThreads = settings.gecko_profile_threads;