Bug 1460741 - Add 'speedometer' benchmark to raptor for firefox; r=jmaher
authorRob Wood <rwood@mozilla.com>
Tue, 15 May 2018 14:50:48 -0400
changeset 421297 225b70969911f6a5b45bf3282c1de6860cbd43b3
parent 421296 574f89a0933433d4913f966552ed9fa9de28b017
child 421298 39a77ae8f358155181cd0e4d660b62b93150a133
push id34091
push userbtara@mozilla.com
push dateTue, 05 Jun 2018 13:52:34 +0000
treeherdermozilla-central@752465b44c79 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjmaher
bugs1460741
milestone62.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1460741 - Add 'speedometer' benchmark to raptor for firefox; r=jmaher MozReview-Commit-ID: 6eTJhUJv3y9
testing/mozharness/mozharness/mozilla/testing/raptor.py
testing/raptor/raptor/benchmark.py
testing/raptor/raptor/cmdline.py
testing/raptor/raptor/control_server.py
testing/raptor/raptor/gen_test_config.py
testing/raptor/raptor/manifest.py
testing/raptor/raptor/output.py
testing/raptor/raptor/outputhandler.py
testing/raptor/raptor/playback/mitmproxy.py
testing/raptor/raptor/raptor.ini
testing/raptor/raptor/raptor.py
testing/raptor/raptor/results.py
testing/raptor/raptor/tests/raptor-speedometer.ini
testing/raptor/requirements.txt
testing/raptor/webext/raptor/benchmark-relay.js
testing/raptor/webext/raptor/manifest.json
testing/raptor/webext/raptor/measure.js
testing/raptor/webext/raptor/runner.js
third_party/webkit/PerformanceTests/Speedometer/resources/benchmark-report.js
--- a/testing/mozharness/mozharness/mozilla/testing/raptor.py
+++ b/testing/mozharness/mozharness/mozilla/testing/raptor.py
@@ -136,22 +136,26 @@ class Raptor(TestingMixin, MercurialScri
         kw_options = {'binary': binary_path}
         # options overwritten from **kw
         if 'test' in self.config:
             kw_options['test'] = self.config['test']
         if self.config.get('branch'):
             kw_options['branchName'] = self.config['branch']
         if self.symbols_path:
             kw_options['symbolsPath'] = self.symbols_path
+        if self.config.get('obj_path', None) is not None:
+            kw_options['obj-path'] = self.config['obj_path']
         kw_options.update(kw)
         # configure profiling options
         options.extend(self.query_gecko_profile_options())
         # extra arguments
         if args is not None:
             options += args
+        if self.config.get('run_local', False):
+            options.extend(['--run-local'])
         if 'raptor_extra_options' in self.config:
             options += self.config['raptor_extra_options']
         if self.config.get('code_coverage', False):
             options.extend(['--code-coverage'])
         for key, value in kw_options.items():
             options.extend(['--%s' % key, value])
         return options
 
new file mode 100644
--- /dev/null
+++ b/testing/raptor/raptor/benchmark.py
@@ -0,0 +1,114 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+
+import os
+import shutil
+import socket
+
+from mozlog import get_proxy_logger
+
+from wptserve import server, handlers
+
+LOG = get_proxy_logger(component="raptor-benchmark")
+here = os.path.abspath(os.path.dirname(__file__))
+
+
+class Benchmark(object):
+    """utility class for running benchmarks in raptor"""
+
+    def __init__(self, config, test):
+        self.config = config
+        self.test = test
+
+        # bench_dir is where we will download all mitmproxy required files
+        # when running locally it comes from obj_path via mozharness/mach
+        if self.config.get("obj_path", None) is not None:
+            self.bench_dir = self.config.get("obj_path")
+        else:
+            # in production it is ../tasks/task_N/build/tests/raptor/raptor/...
+            # 'here' is that path, we can start with that
+            self.bench_dir = here
+
+        # now add path for benchmark source; locally we put it in a raptor benchmarks
+        # folder; in production the files are automatically copied to a different dir
+        if self.config.get('run_local', False):
+            self.bench_dir = os.path.join(self.bench_dir, 'testing', 'raptor', 'benchmarks')
+        else:
+            self.bench_dir = os.path.join(self.bench_dir, 'tests', 'webkit', 'PerformanceTests')
+
+        LOG.info("bench_dir to be used for benchmark source: %s" % self.bench_dir)
+        if not os.path.exists(self.bench_dir):
+            os.makedirs(self.bench_dir)
+
+        # when running locally we need to get the benchmark source
+        if self.config.get('run_local', False):
+            self.get_webkit_source()
+
+        LOG.info("bench_dir contains:")
+        LOG.info(os.listdir(self.bench_dir))
+
+        # now have the benchmark source ready, go ahead and serve it up!
+        self.start_http_server()
+
+    def get_webkit_source(self):
+        # in production the build system auto copies webkit source into place;
+        # but when run locally we need to do this manually, so that raptor can find it
+        if 'speedometer' in self.test['name']:
+            # we only want to copy over the source for the benchmark that is about to run
+            dest = os.path.join(self.bench_dir, 'Speedometer')
+            src = os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'], 'third_party',
+                               'webkit', 'PerformanceTests', 'Speedometer')
+        else:
+            # otherwise copy all, but be sure to add each benchmark above instead
+            dest = self.bench_dir
+            # source for all benchmarks is repo/third_party...
+            src = os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'], 'third_party',
+                               'webkit', 'PerformanceTests')
+
+        if os.path.exists(dest):
+            LOG.info("benchmark source already exists at: %s" % dest)
+            return
+
+        LOG.info("copying webkit benchmarks from %s to %s" % (src, dest))
+        try:
+            shutil.copytree(src, dest)
+        except Exception:
+            LOG.critical("error copying webkit benchmarks from %s to %s" % (src, dest))
+
+    def start_http_server(self):
+        self.write_server_headers()
+
+        # pick a free port
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.bind(('', 0))
+        self.port = sock.getsockname()[1]
+        sock.close()
+        _webserver = '127.0.0.1:%d' % self.port
+
+        self.httpd = self.setup_webserver(_webserver)
+        self.httpd.start()
+
+    def write_server_headers(self):
+        # to add specific headers for serving files via wptserve, write out a headers dir file
+        # see http://wptserve.readthedocs.io/en/latest/handlers.html#file-handlers
+        LOG.info("writing wptserve headers file")
+        headers_file = os.path.join(self.bench_dir, '__dir__.headers')
+        file = open(headers_file, 'w')
+        file.write("Access-Control-Allow-Origin: *")
+        file.close()
+        LOG.info("wrote wpt headers file: %s" % headers_file)
+
+    def setup_webserver(self, webserver):
+        LOG.info("starting webserver on %r" % webserver)
+        LOG.info("serving benchmarks from here: %s" % self.bench_dir)
+        self.host, self.port = webserver.split(':')
+
+        return server.WebTestHttpd(port=int(self.port), doc_root=self.bench_dir,
+                                   routes=[("GET", "*", handlers.file_handler)])
+
+    def stop_serve(self):
+        LOG.info("TODO: stop serving benchmark source")
+        pass
--- a/testing/raptor/raptor/cmdline.py
+++ b/testing/raptor/raptor/cmdline.py
@@ -16,20 +16,24 @@ def create_parser(mach_interface=False):
     add_arg('-t', '--test', required=True, dest='test',
             help="name of raptor test to run")
     if not mach_interface:
         add_arg('--app', default='firefox', dest='app',
                 help="name of the application we are testing (default: firefox)",
                 choices=['firefox', 'chrome'])
         add_arg('-b', '--binary', required=True, dest='binary',
                 help="path to the browser executable that we are testing")
-        add_arg('--branchName', dest="branch_name", default='',
+        add_arg('--branchName', dest="branch_name", default=None,
                 help="Name of the branch we are testing on")
         add_arg('--symbolsPath', dest='symbols_path',
                 help="Path to the symbols for the build we are testing")
+        add_arg('--run-local', dest="run_local", default=False, action="store_true",
+                help="Flag that indicates if raptor is running locally or in production")
+        add_arg('--obj-path', dest="obj_path", default=None,
+                help="Browser build obj_path (received when running in production)")
 
     add_logging_group(parser)
     return parser
 
 
 def verify_options(parser, args):
     ctx = vars(args)
 
--- a/testing/raptor/raptor/control_server.py
+++ b/testing/raptor/raptor/control_server.py
@@ -9,17 +9,17 @@ from __future__ import absolute_import
 import BaseHTTPServer
 import json
 import os
 import socket
 import threading
 
 from mozlog import get_proxy_logger
 
-LOG = get_proxy_logger(component='control_server')
+LOG = get_proxy_logger(component='raptor-control-server')
 
 here = os.path.abspath(os.path.dirname(__file__))
 
 
 def MakeCustomHandlerClass(results_handler, shutdown_browser):
 
     class MyHandler(BaseHTTPServer.BaseHTTPRequestHandler, object):
 
--- a/testing/raptor/raptor/gen_test_config.py
+++ b/testing/raptor/raptor/gen_test_config.py
@@ -5,31 +5,32 @@ from __future__ import absolute_import
 
 import os
 
 from mozlog import get_proxy_logger
 
 
 here = os.path.abspath(os.path.dirname(__file__))
 webext_dir = os.path.join(os.path.dirname(here), 'webext', 'raptor')
-LOG = get_proxy_logger(component="gen_test_url")
+LOG = get_proxy_logger(component="raptor-gen-test-config")
 
 
-def gen_test_config(browser, test, cs_port):
-    LOG.info("writing test settings url background js, so webext can get it")
+def gen_test_config(browser, test, cs_port, b_port=0):
+    LOG.info("writing test settings into background js, so webext can get it")
 
     data = """// this file is auto-generated by raptor, do not edit directly
 function getTestConfig() {
     return {"browser": "%s",
             "cs_port": "%d",
             "test_name": "%s",
-            "test_settings_url": "http://localhost:%d/%s.json"};
+            "test_settings_url": "http://localhost:%d/%s.json",
+            "benchmark_port": "%d"};
 }
 
-""" % (browser, cs_port, test, cs_port, test)
+""" % (browser, cs_port, test, cs_port, test, b_port)
 
     webext_background_script = (os.path.join(webext_dir, "auto_gen_test_config.js"))
 
     file = open(webext_background_script, "w")
     file.write(data)
     file.close()
 
-    LOG.info("finished writing test config into webext")
+    LOG.info("finished writing test config to %s" % webext_background_script)
--- a/testing/raptor/raptor/manifest.py
+++ b/testing/raptor/raptor/manifest.py
@@ -7,17 +7,17 @@ import json
 import os
 
 from manifestparser import TestManifest
 from mozlog import get_proxy_logger
 
 here = os.path.abspath(os.path.dirname(__file__))
 raptor_ini = os.path.join(here, 'raptor.ini')
 tests_dir = os.path.join(here, 'tests')
-LOG = get_proxy_logger(component="manifest")
+LOG = get_proxy_logger(component="raptor-manifest")
 
 required_settings = ['apps', 'type', 'page_cycles', 'test_url', 'measure',
                      'unit', 'lower_is_better', 'alert_threshold']
 
 playback_settings = ['playback_binary_manifest', 'playback_binary_zip_mac',
                      'playback_pageset_manifest', 'playback_pageset_zip_mac',
                      'playback_recordings']
 
@@ -38,16 +38,19 @@ def get_browser_test_list(browser_app):
                                       **info)
 
 
 def validate_test_ini(test_details):
     # validate all required test details were found in the test INI
     valid_settings = True
 
     for setting in required_settings:
+        # measure setting not required for benchmark type tests
+        if setting == 'measure' and test_details['type'] == 'benchmark':
+            continue
         if setting not in test_details:
             valid_settings = False
             LOG.info("setting '%s' is required but not found in %s"
                      % (setting, test_details['manifest']))
 
     # if playback is specified, we need more playback settings
     if 'playback' in test_details:
         for setting in playback_settings:
@@ -76,18 +79,20 @@ def write_test_settings_json(test_detail
             test_settings['raptor-options']['measure']['fnbpaint'] = True
         if "fcp" in test_details['measure']:
             test_settings['raptor-options']['measure']['fcp'] = True
         if "hero" in test_details['measure']:
             test_settings['raptor-options']['measure']['hero'] = test_details['hero'].split()
     if test_details.get("page_timeout", None) is not None:
         test_settings['raptor-options']['page_timeout'] = int(test_details['page_timeout'])
     test_settings['raptor-options']['unit'] = test_details.get("unit", "ms")
-    test_settings['raptor-options']['lower_is_better'] = \
-        bool(test_details.get("lower_is_better", True))
+    if test_details.get("lower_is_better", "true") == "false":
+        test_settings['raptor-options']['lower_is_better'] = False
+    else:
+        test_settings['raptor-options']['lower_is_better'] = True
     if test_details.get("alert_threshold", None) is not None:
         test_settings['raptor-options']['alert_threshold'] = float(test_details['alert_threshold'])
 
     settings_file = os.path.join(tests_dir, test_details['name'] + '.json')
     try:
         with open(settings_file, 'w') as out_file:
             json.dump(test_settings, out_file, indent=4, ensure_ascii=False)
             out_file.close()
--- a/testing/raptor/raptor/output.py
+++ b/testing/raptor/raptor/output.py
@@ -42,46 +42,113 @@ class Output(object):
         if len(self.results) == 0:
             LOG.error("error: no raptor test results found!")
             return
 
         for test in self.results:
             subtests = []
             suite = {
                 'name': test.name,
+                'type': test.type,
                 'extraOptions': test.extra_options,
-                'subtests': subtests
+                'subtests': subtests,
+                'lowerIsBetter': test.lower_is_better,
+                'alertThreshold': float(test.alert_threshold)
             }
 
             suites.append(suite)
 
-            # each test can report multiple measurements per pageload
-            # each measurement becomes a subtest inside the 'suite'
-            for key, values in test.measurements.iteritems():
-                new_subtest = {}
-                new_subtest['name'] = test.name + "-" + key
-                new_subtest['replicates'] = values
-                new_subtest['lower_is_better'] = test.lower_is_better
-                new_subtest['alert_threshold'] = float(test.alert_threshold)
-                new_subtest['value'] = 0
-                new_subtest['unit'] = test.unit
+            # process results for pageloader type of tests
+            if test.type == "pageload":
+                # each test can report multiple measurements per pageload
+                # each measurement becomes a subtest inside the 'suite'
+
+                # this is the format we receive the results in from the pageload test
+                # i.e. one test (subtest) in raptor-firefox-tp6:
+
+                # {u'name': u'raptor-firefox-tp6-amazon', u'type': u'pageload', u'measurements':
+                # {u'fnbpaint': [788, 315, 334, 286, 318, 276, 296, 296, 292, 285, 268, 277, 274,
+                # 328, 295, 290, 286, 270, 279, 280, 346, 303, 308, 398, 281]}, u'browser':
+                # u'Firefox 62.0a1 20180528123052', u'lower_is_better': True, u'page':
+                # u'https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop',
+                # u'unit': u'ms', u'alert_threshold': 2}
+
+                for key, values in test.measurements.iteritems():
+                    new_subtest = {}
+                    new_subtest['name'] = test.name + "-" + key
+                    new_subtest['replicates'] = values
+                    new_subtest['lowerIsBetter'] = test.lower_is_better
+                    new_subtest['alertThreshold'] = float(test.alert_threshold)
+                    new_subtest['value'] = 0
+                    new_subtest['unit'] = test.unit
+
+                    filtered_values = filter.ignore_first(new_subtest['replicates'], 1)
+                    new_subtest['value'] = filter.median(filtered_values)
+                    vals.append(new_subtest['value'])
+
+                    subtests.append(new_subtest)
+
+            elif test.type == "benchmark":
+                # each benchmark 'index' becomes a subtest; each pagecycle / iteration
+                # of the test has multiple values per index/subtest
+
+                # this is the format we receive the results in from the benchmark
+                # i.e. this is ONE pagecycle of speedometer:
 
-                filtered_values = filter.ignore_first(new_subtest['replicates'], 1)
-                new_subtest['value'] = filter.median(filtered_values)
-                vals.append(new_subtest['value'])
+                # {u'name': u'raptor-speedometer', u'type': u'benchmark', u'measurements':
+                # {u'speedometer': [[{u'AngularJS-TodoMVC/DeletingAllItems': [147.3000000000011,
+                # 149.95999999999913, 143.29999999999927, 150.34000000000378, 257.6999999999971],
+                # u'Inferno-TodoMVC/CompletingAllItems/Sync': [88.03999999999996,#
+                # 85.60000000000036, 94.18000000000029, 95.19999999999709, 86.47999999999593],
+                # u'AngularJS-TodoMVC': [518.2400000000016, 525.8199999999997, 610.5199999999968,
+                # 532.8200000000215, 640.1800000000003], ...(repeated for each index/subtest)}]]},
+                # u'browser': u'Firefox 62.0a1 20180528123052', u'lower_is_better': False, u'page':
+                # u'http://localhost:55019/Speedometer/index.html?raptor', u'unit': u'score',
+                # u'alert_threshold': 2}
+
+                for page_cycle in test.measurements['speedometer']:
+                    page_cycle_results = page_cycle[0]
 
-                subtests.append(new_subtest)
+                    for sub, replicates in page_cycle_results.iteritems():
+                        # for each pagecycle, replicates are appended to each subtest
+                        # so if it doesn't exist the first time create the subtest entry
+                        existing = False
+                        for existing_sub in subtests:
+                            if existing_sub['name'] == sub:
+                                # pagecycle, subtest already there, so append the replicates
+                                existing_sub['replicates'].extend(replicates)
+                                # update the value now that we have more replicates
+                                existing_sub['value'] = filter.median(existing_sub['replicates'])
+                                # now need to update our vals list too since have new subtest value
+                                for existing_val in vals:
+                                    if existing_val[1] == sub:
+                                        existing_val[0] = existing_sub['value']
+                                        break
+                                existing = True
+                                break
+
+                        if not existing:
+                            # subtest not added yet, first pagecycle, so add new one
+                            new_subtest = {}
+                            new_subtest['name'] = sub
+                            new_subtest['replicates'] = replicates
+                            new_subtest['lowerIsBetter'] = test.lower_is_better
+                            new_subtest['alertThreshold'] = float(test.alert_threshold)
+                            new_subtest['value'] = filter.median(replicates)
+                            new_subtest['unit'] = test.unit
+                            subtests.append(new_subtest)
+                            vals.append([new_subtest['value'], sub])
+            else:
+                LOG.error("output.summarize received unsupported test results type")
+                return
 
         # if there is more than one subtest, calculate a summary result
         if len(subtests) > 1:
             suite['value'] = self.construct_results(vals, testname=test.name)
 
-        LOG.info("returning summarized test results:")
-        LOG.info(test_results)
-
         self.summarized_results = test_results
 
     def output(self):
         """output to file and perfherder data json """
         if self.summarized_results == {}:
             LOG.error("error: no summarized raptor results found!")
             return False
 
@@ -159,24 +226,22 @@ class Output(object):
         if len(results) != 52:
             raise Exception("StyleBench has 52 subtests, found: %s instead" % len(results))
 
         results = results[12::13]
         score = 60 * 1000 / filter.geometric_mean(results) / correctionFactor
         return score
 
     def construct_results(self, vals, testname):
-        if testname.startswith('v8_7'):
+        if testname.startswith('raptor-v8_7'):
             return self.v8_Metric(vals)
-        elif testname.startswith('kraken'):
+        elif testname.startswith('raptor-kraken'):
             return self.JS_Metric(vals)
-        elif testname.startswith('ares6'):
+        elif testname.startswith('raptor-jetstream'):
             return self.benchmark_score(vals)
-        elif testname.startswith('jetstream'):
-            return self.benchmark_score(vals)
-        elif testname.startswith('speedometer'):
+        elif testname.startswith('raptor-speedometer'):
             return self.speedometer_score(vals)
-        elif testname.startswith('stylebench'):
+        elif testname.startswith('raptor-stylebench'):
             return self.stylebench_score(vals)
         elif len(vals) > 1:
             return filter.geometric_mean([i for i, j in vals])
         else:
             return filter.mean([i for i, j in vals])
--- a/testing/raptor/raptor/outputhandler.py
+++ b/testing/raptor/raptor/outputhandler.py
@@ -5,17 +5,17 @@
 # originally from talos_process.py
 from __future__ import absolute_import
 
 import json
 
 from mozlog import get_proxy_logger
 
 
-LOG = get_proxy_logger(component='raptor_process')
+LOG = get_proxy_logger(component='raptor-output-handler')
 
 
 class OutputHandler(object):
     def __init__(self):
         self.proc = None
 
     def __call__(self, line):
         if not line.strip():
--- a/testing/raptor/raptor/playback/mitmproxy.py
+++ b/testing/raptor/raptor/playback/mitmproxy.py
@@ -14,17 +14,17 @@ import time
 import mozinfo
 
 from mozlog import get_proxy_logger
 from mozprocess import ProcessHandler
 
 from .base import Playback
 
 here = os.path.dirname(os.path.realpath(__file__))
-LOG = get_proxy_logger(component='mitmproxy')
+LOG = get_proxy_logger(component='raptor-mitmproxy')
 
 mozharness_dir = os.path.join(here, '../../../mozharness')
 sys.path.insert(0, mozharness_dir)
 
 external_tools_path = os.environ.get('EXTERNALTOOLSPATH', None)
 
 if external_tools_path is not None:
     # running in production via mozharness
@@ -67,63 +67,59 @@ pref("network.proxy.ssl_port", 8080);
 class Mitmproxy(Playback):
 
     def __init__(self, config):
         self.config = config
         self.mitmproxy_proc = None
         self.recordings = config.get('playback_recordings', None)
         self.browser_path = config.get('binary', None)
 
-        # bindir is where we will download all mitmproxy required files
-        # if invoved via mach we will have received this in config; otherwise
-        # not running via mach (invoved direcdtly in testing/raptor) so figure it out
+        # raptor_dir is where we will download all mitmproxy required files
+        # when running locally it comes from obj_path via mozharness/mach
         if self.config.get("obj_path", None) is not None:
-            self.bindir = self.config.get("obj_path")
+            self.raptor_dir = self.config.get("obj_path")
         else:
-            # bit of a pain to get object dir when not running via mach - need to go from
-            # the binary folder i.e.
-            # /mozilla-unified/obj-x86_64-apple-darwin17.4.0/dist/Nightly.app/Contents/MacOS/
-            # back to:
-            # mozilla-unified/obj-x86_64-apple-darwin17.4.0/
-            # note, this may need to be updated per platform
-            self.bindir = os.path.normpath(os.path.join(self.config['binary'],
-                                                        '..', '..', '..', '..',
-                                                        '..', 'testing', 'raptor'))
+            # in production it is ../tasks/task_N/build/, in production that dir
+            # is not available as an envvar, however MOZ_UPLOAD_DIR is set as
+            # ../tasks/task_N/build/blobber_upload_dir so take that and go up 1 level
+            self.raptor_dir = os.path.dirname(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']))
 
-        self.recordings_path = self.bindir
-        LOG.info("bindir to be used for mitmproxy downloads and exe files: %s" % self.bindir)
+        # add raptor to raptor_dir
+        self.raptor_dir = os.path.join(self.raptor_dir, "testing", "raptor")
+        self.recordings_path = self.raptor_dir
+        LOG.info("raptor_dir used for mitmproxy downloads and exe files: %s" % self.raptor_dir)
 
         # go ahead and download and setup mitmproxy
         self.download()
         # mitmproxy must be started before setup, so that the CA cert is available
         self.start()
         self.setup()
 
     def _tooltool_fetch(self, manifest):
         def outputHandler(line):
             LOG.info(line)
         command = [sys.executable, TOOLTOOL_PATH, 'fetch', '-o', '-m', manifest]
 
         proc = ProcessHandler(
             command, processOutputLine=outputHandler, storeOutput=False,
-            cwd=self.bindir)
+            cwd=self.raptor_dir)
 
         proc.run()
 
         try:
             proc.wait()
         except Exception:
             if proc.poll() is None:
                 proc.kill(signal.SIGTERM)
 
     def download(self):
         # download mitmproxy binary and pageset using tooltool
         # note: tooltool automatically unpacks the files as well
-        if not os.path.exists(self.bindir):
-            os.makedirs(self.bindir)
+        if not os.path.exists(self.raptor_dir):
+            os.makedirs(self.raptor_dir)
         LOG.info("downloading mitmproxy binary")
         _manifest = os.path.join(here, self.config['playback_binary_manifest'])
         self._tooltool_fetch(_manifest)
         LOG.info("downloading mitmproxy pageset")
         _manifest = os.path.join(here, self.config['playback_pageset_manifest'])
         self._tooltool_fetch(_manifest)
         return
 
@@ -134,17 +130,17 @@ class Mitmproxy(Playback):
         scripts_path = os.environ.get('SCRIPTSPATH')
         LOG.info('scripts_path: %s' % str(scripts_path))
         self.install_mitmproxy_cert(self.mitmproxy_proc,
                                     self.browser_path,
                                     str(scripts_path))
         return
 
     def start(self):
-        mitmdump_path = os.path.join(self.bindir, 'mitmdump')
+        mitmdump_path = os.path.join(self.raptor_dir, 'mitmdump')
         recordings_list = self.recordings.split()
         self.mitmproxy_proc = self.start_mitmproxy_playback(mitmdump_path,
                                                             self.recordings_path,
                                                             recordings_list,
                                                             self.browser_path)
         return
 
     def stop(self):
--- a/testing/raptor/raptor/raptor.ini
+++ b/testing/raptor/raptor/raptor.ini
@@ -1,2 +1,3 @@
 # raptor tests
 [include:tests/raptor-firefox-tp6.ini]
+[include:tests/raptor-speedometer.ini]
--- a/testing/raptor/raptor/raptor.py
+++ b/testing/raptor/raptor/raptor.py
@@ -21,39 +21,41 @@ webext_dir = os.path.join(os.path.dirnam
 sys.path.insert(0, here)
 
 try:
     from mozbuild.base import MozbuildObject
     build = MozbuildObject.from_environment(cwd=here)
 except ImportError:
     build = None
 
+from benchmark import Benchmark
 from cmdline import parse_args
 from control_server import RaptorControlServer
 from gen_test_config import gen_test_config
 from outputhandler import OutputHandler
 from manifest import get_raptor_test_list
 from playback import get_playback
 from results import RaptorResultsHandler
 
 
 class Raptor(object):
     """Container class for Raptor"""
 
-    def __init__(self, app, binary):
+    def __init__(self, app, binary, run_local=False, obj_path=None):
         self.config = {}
         self.config['app'] = app
         self.config['binary'] = binary
         self.config['platform'] = mozinfo.os
-
+        self.config['run_local'] = run_local
+        self.config['obj_path'] = obj_path
         self.raptor_venv = os.path.join(os.getcwd(), 'raptor-venv')
-        self.log = get_default_logger(component='raptor')
-        self.addons_installed = False
+        self.log = get_default_logger(component='raptor-main')
         self.control_server = None
         self.playback = None
+        self.benchmark = None
 
         # Create the profile
         self.profile = create_profile(self.config['app'])
 
         # Merge in base profiles
         with open(os.path.join(self.profile_data_dir, 'profiles.json'), 'r') as fh:
             base_profiles = json.load(fh)['raptor']
 
@@ -94,19 +96,30 @@ class Raptor(object):
         self.config['playback_binary_zip'] = test.get(_key, None)
         self.config['playback_pageset_manifest'] = test.get('playback_pageset_manifest', None)
         _key = 'playback_pageset_zip_%s' % self.config['platform']
         self.config['playback_pageset_zip'] = test.get(_key, None)
         self.config['playback_recordings'] = test.get('playback_recordings', None)
 
     def run_test(self, test, timeout=None):
         self.log.info("starting raptor test: %s" % test['name'])
+        self.log.info("test settings: %s" % str(test))
+        self.log.info("raptor config: %s" % str(self.config))
+
+        # benchmark-type tests require the benchmark test to be served out
+        if test.get('type') == "benchmark":
+            self.benchmark = Benchmark(self.config, test)
+            benchmark_port = int(self.benchmark.port)
+        else:
+            benchmark_port = 0
+
         gen_test_config(self.config['app'],
                         test['name'],
-                        self.control_server.port)
+                        self.control_server.port,
+                        benchmark_port)
 
         # must intall raptor addon each time because we dynamically update some content
         raptor_webext = os.path.join(webext_dir, 'raptor')
         self.log.info("installing webext %s" % raptor_webext)
         self.profile.addons.install(raptor_webext)
         webext_id = self.profile.addons.addon_details(raptor_webext)['id']
 
         # some tests require tools to playback the test pages
@@ -136,16 +149,28 @@ class Raptor(object):
         self.log.info("removing webext %s" % raptor_webext)
         self.profile.addons.remove_addon(webext_id)
 
         if self.runner.is_running():
             self.log("Application timed out after {} seconds".format(timeout))
             self.runner.stop()
 
     def process_results(self):
+        # when running locally output results in build/raptor.json; when running
+        # in production output to a local.json to be turned into tc job artifact
+        if self.config.get('run_local', False):
+            if 'MOZ_DEVELOPER_REPO_DIR' in os.environ:
+                raptor_json_path = os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'],
+                                                'testing', 'mozharness', 'build', 'raptor.json')
+            else:
+                raptor_json_path = os.path.join(here, 'raptor.json')
+        else:
+            raptor_json_path = os.path.join(os.getcwd(), 'local.json')
+
+        self.config['raptor_json_path'] = raptor_json_path
         return self.results_handler.summarize_and_output(self.config)
 
     def clean_up(self):
         self.control_server.stop()
         self.runner.stop()
         self.log.info("finished")
 
 
@@ -162,17 +187,17 @@ def main(args=sys.argv[1:]):
     if len(raptor_test_list) == 0:
         LOG.critical("abort: no tests found")
         sys.exit(1)
 
     LOG.info("raptor tests scheduled to run:")
     for next_test in raptor_test_list:
         LOG.info(next_test['name'])
 
-    raptor = Raptor(args.app, args.binary)
+    raptor = Raptor(args.app, args.binary, args.run_local, args.obj_path)
 
     raptor.start_control_server()
 
     for next_test in raptor_test_list:
         raptor.run_test(next_test)
 
     success = raptor.process_results()
     raptor.clean_up()
--- a/testing/raptor/raptor/results.py
+++ b/testing/raptor/raptor/results.py
@@ -17,17 +17,16 @@ class RaptorResultsHandler():
     """Handle Raptor test results"""
 
     def __init__(self):
         self.results = []
 
     def add(self, new_result_json):
         # add to results
         LOG.info("received results in RaptorResultsHandler.add")
-        LOG.info(new_result_json)
         new_result = RaptorTestResult(new_result_json)
         self.results.append(new_result)
 
     def summarize_and_output(self, test_config):
         # summarize the result data, write to file and output PERFHERDER_DATA
         LOG.info("summarizing raptor test results")
         output = Output(self.results)
         output.summarize()
new file mode 100644
--- /dev/null
+++ b/testing/raptor/raptor/tests/raptor-speedometer.ini
@@ -0,0 +1,15 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# speedometer benchmark for firefox and chrome
+
+[raptor-speedometer]
+apps = firefox
+type =  benchmark
+test_url = http://localhost:<port>/Speedometer/index.html?raptor
+page_cycles = 5
+page_timeout = 120000
+unit = score
+lower_is_better = false
+alert_threshold = 2.0
--- a/testing/raptor/requirements.txt
+++ b/testing/raptor/requirements.txt
@@ -1,3 +1,4 @@
 mozrunner ~= 7.0
 mozprofile ~= 1.1
 manifestparser >= 1.1
+wptserve ~= 1.4.0
--- a/testing/raptor/webext/raptor/benchmark-relay.js
+++ b/testing/raptor/webext/raptor/benchmark-relay.js
@@ -1,19 +1,21 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // receives result from benchmark and relays onto our background runner
 
 function receiveMessage(event) {
-  console.log("received message!");
-  console.log(event.origin);
-  if (event.origin == "http://localhost:8081") {
-    sendResult("speedometer", event.data);
+  console.log("raptor benchmark-relay received message");
+  console.log(event.data);
+  // raptor benchmark message data [0] is raptor tag, [1] is benchmark
+  // name, and the rest is actual benchmark results that we want to fw
+  if (event.data[0] == "raptor-benchmark") {
+    sendResult(event.data[1], event.data.slice(2));
   }
 }
 
 function sendResult(_type, _value) {
   // send result back to background runner script
   console.log("sending result back to runner: " + _type + " " + _value);
   chrome.runtime.sendMessage({"type": _type, "value": _value}, function(response) {
     console.log(response.text);
--- a/testing/raptor/webext/raptor/manifest.json
+++ b/testing/raptor/webext/raptor/manifest.json
@@ -8,23 +8,26 @@
   "name": "Raptor",
   "version": "0.1",
   "description": "Performance measurement framework prototype",
   "background": {
     "scripts": ["auto_gen_test_config.js", "runner.js"]
   },
   "content_scripts": [
     {
-      "matches": ["<all_urls>"],
+      "matches": ["*://*.amazon.com/*",
+                  "*://*.facebook.com/*",
+                  "*://*.google.com/*",
+                  "*://*.youtube.com/*"],
       "js": ["measure.js"]
     },
     {
-      "matches": ["http://*/Speedometer/index.html*"],
+      "matches": ["*://*/Speedometer/index.html*"],
       "js": ["benchmark-relay.js"]
     }
   ],
   "permissions": [
-    "http://127.0.0.1:8000/",
+    "<all_urls>",
     "tabs",
     "storage",
     "alarms"
   ]
 }
--- a/testing/raptor/webext/raptor/measure.js
+++ b/testing/raptor/webext/raptor/measure.js
@@ -35,41 +35,48 @@ function contentHandler() {
     // chrome, no promise so use callback
     chrome.storage.local.get("settings", function(item) {
       setup(item.settings);
     });
   }
 }
 
 function setup(settings) {
-  if (settings.measure !== undefined) {
-    if (settings.measure.fnbpaint !== undefined) {
-      getFNBPaint = settings.measure.fnbpaint;
-      if (getFNBPaint) {
-        console.log("will be measuring fnbpaint");
-        measureFNBPaint();
-      }
+  if (settings.type != "pageload") {
+    return;
+  }
+
+  if (settings.measure == undefined) {
+    console.log("abort: 'measure' key not found in test settings");
+    return;
+  }
+
+  if (settings.measure.fnbpaint !== undefined) {
+    getFNBPaint = settings.measure.fnbpaint;
+    if (getFNBPaint) {
+      console.log("will be measuring fnbpaint");
+      measureFNBPaint();
     }
-    if (settings.measure.fcp !== undefined) {
-      getFCP = settings.measure.fcp;
-      if (getFCP) {
-        console.log("will be measuring first-contentful-paint");
-        measureFirstContentfulPaint();
-      }
+  }
+
+  if (settings.measure.fcp !== undefined) {
+    getFCP = settings.measure.fcp;
+    if (getFCP) {
+      console.log("will be measuring first-contentful-paint");
+      measureFirstContentfulPaint();
     }
-    if (settings.measure.hero !== undefined) {
-      if (settings.measure.hero.length !== 0) {
-        getHero = true;
-        heroesToCapture = settings.measure.hero;
-        console.log("hero elements to measure: " + heroesToCapture);
-        measureHero();
-      }
+  }
+
+  if (settings.measure.hero !== undefined) {
+    if (settings.measure.hero.length !== 0) {
+      getHero = true;
+      heroesToCapture = settings.measure.hero;
+      console.log("hero elements to measure: " + heroesToCapture);
+      measureHero();
     }
-  } else {
-    console.log("abort: 'measure' key not found in test settings");
   }
 }
 
 function measureHero() {
   var obs = null;
 
   var heroElementsFound = window.document.querySelectorAll("[elementtiming]");
   console.log("found " + heroElementsFound.length + " hero elements in the page");
--- a/testing/raptor/webext/raptor/runner.js
+++ b/testing/raptor/webext/raptor/runner.js
@@ -13,28 +13,28 @@
 // 'python -m SimpleHTTPServer 8081'
 // to serve out the pages that we want to prototype with. Also
 // update the manifest content 'matches' accordingly
 
 // when the browser starts this webext runner will start automatically; we
 // want to give the browser some time (ms) to settle before starting tests
 var postStartupDelay = 30000;
 
-// have an optional delay (ms) between pageload cycles
-var pageloadDelay = 1000;
+// delay (ms) between pageload cycles
+var pageCycleDelay = 1000;
 
 var browserName;
 var ext;
 var testName = null;
 var settingsURL = null;
 var csPort = null;
+var benchmarkPort = null;
 var testType;
 var pageCycles = 0;
 var pageCycle = 0;
-var pageCycleDelay = 1000;
 var testURL;
 var testTabID = 0;
 var getHero = false;
 var getFNBPaint = false;
 var getFCP = false;
 var isHeroPending = false;
 var pendingHeroes = [];
 var settings = {};
@@ -58,16 +58,26 @@ function getTestSettings() {
       response.text().then(function(text) {
         console.log(text);
         settings = JSON.parse(text)["raptor-options"];
 
         // parse the test settings
         testType = settings.type;
         pageCycles = settings.page_cycles;
         testURL = settings.test_url;
+
+        // for pageload type tests, the testURL is fine as is - we don't have
+        // to add a port as it's accessed via proxy and the playback tool
+        // however for benchmark tests, their source is served out on a local
+        // webserver, so we need to swap in the webserver port into the testURL
+        if (testType == "benchmark") {
+          // just replace the '<port>' keyword in the URL with actual benchmarkPort
+          testURL = testURL.replace("<port>", benchmarkPort);
+        }
+
         results.page = testURL;
         results.type = testType;
         results.name = testName;
         results.unit = settings.unit;
         results.lower_is_better = settings.lower_is_better;
         results.alert_threshold = settings.alert_threshold;
 
         if (settings.page_timeout !== undefined) {
@@ -138,25 +148,25 @@ function getBrowserInfo() {
       resolve();
     }
   });
 }
 
 function testTabCreated(tab) {
   testTabID = tab.id;
   console.log("opened new empty tab " + testTabID);
-  setTimeout(nextCycle, pageloadDelay);
+  nextCycle();
 }
 
 async function testTabUpdated(tab) {
   console.log("tab " + tab.id + " reloaded");
   // wait for pageload test result from content
   await waitForResult();
   // move on to next cycle (or test complete)
-  setTimeout(nextCycle, pageloadDelay);
+  nextCycle();
 }
 
 function waitForResult() {
   console.log("awaiting results...");
   return new Promise(resolve => {
     function checkForResult() {
       if (testType == "pageload") {
         if (!isHeroPending && !isFNBPaintPending && !isFCPPending) {
@@ -200,17 +210,17 @@ function nextCycle() {
         }
         if (getFNBPaint)
           isFNBPaintPending = true;
         if (getFCP)
           isFCPPending = true;
       } else if (testType == "benchmark") {
         isBenchmarkPending = true;
       }
-      // reload the test page
+      // (re)load the test page
       ext.tabs.update(testTabID, {url: testURL}, testTabUpdated);
     }, pageCycleDelay);
   } else {
     verifyResults();
   }
 }
 
 function timeoutAlarmListener(alarm) {
@@ -351,16 +361,17 @@ function cleanUp() {
 function runner() {
   let config = getTestConfig();
   console.log("test name is: " + config.test_name);
   console.log("test settings url is: " + config.test_settings_url);
   testName = config.test_name;
   settingsURL = config.test_settings_url;
   csPort = config.cs_port;
   browserName = config.browser;
+  benchmarkPort = config.benchmark_port;
 
   getBrowserInfo().then(function() {
     getTestSettings().then(function() {
       if (testType == "benchmark") {
         // webkit benchmark type of test
         console.log("benchmark test start");
       } else if (testType == "pageload") {
         // standard pageload test
--- a/third_party/webkit/PerformanceTests/Speedometer/resources/benchmark-report.js
+++ b/third_party/webkit/PerformanceTests/Speedometer/resources/benchmark-report.js
@@ -1,12 +1,13 @@
 // This file can be customized to report results as needed.
 
 (function () {
-    if ((!window.testRunner && location.search != '?webkit' && location.hash != '#webkit') && location.search != '?gecko')
+    if ((!window.testRunner && location.search != '?webkit' && location.hash != '#webkit')
+         && location.search != '?gecko' && location.search != '?raptor')
         return;
 
     if (window.testRunner)
         testRunner.waitUntilDone();
 
     var scriptElement = document.createElement('script');
     scriptElement.src = '../resources/runner.js';
     document.head.appendChild(scriptElement);
@@ -68,28 +69,33 @@
                     addToMeasuredValue(suite.total, suiteName, 'Total');
                 }
             });
 
             var fullNames = new Array;
             for (var fullName in measuredValuesByFullName)
                 fullNames.push(fullName);
 
-            if (typeof tpRecordTime !== "undefined") {
+            if (typeof tpRecordTime !== "undefined" || location.search == '?raptor') {
                 var values = new Array;
                 for (var i = 0; i < fullNames.length; i++) {
                     values.push(measuredValuesByFullName[fullNames[i]]);
                 }
                 fullNames = new Array;
                 for (var fullName in measuredValuesByFullName) {
                     for (var count=0; count < this.iterationCount; count++) {
                         fullNames.push(fullName);
                     }
                 }
-                tpRecordTime(values.join(','), 0, fullNames.join(','));
+                if (location.search == '?raptor') {
+                    _data = ['raptor-benchmark', 'speedometer', measuredValuesByFullName];
+                    window.postMessage(_data, '*');
+                } else {
+                    tpRecordTime(values.join(','), 0, fullNames.join(','));
+                }
             } else {
                 for (var i = 0; i < fullNames.length; i++) {
                     var values = measuredValuesByFullName[fullNames[i]];
                     PerfTestRunner.reportValues(createTest(fullNames[i], values.aggregator, i + 1 == fullNames.length), values);
                 }
             }
         }
     };