Bug 1601387 - Avoid some ActiveData over-flows in 'mach test-info report'; r=jmaher
authorGeoff Brown <gbrown@mozilla.com>
Tue, 10 Dec 2019 21:41:34 +0000
changeset 506311 e9c6d571d65110ecc667039780300eb5d82a6188
parent 506310 ca57bc90a3f8580d533b2f9ee1a99e965cd5fbd4
child 506312 85c0c370ad6e9d61d2ce11415ee833d554ec039f
push id36902
push useraciure@mozilla.com
push dateWed, 11 Dec 2019 03:34:51 +0000
treeherdermozilla-central@7635669b8d72 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjmaher
bugs1601387, 10000, 1596567
milestone73.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1601387 - Avoid some ActiveData over-flows in 'mach test-info report'; r=jmaher Adjust ActiveData queries so that no query matches more than the ActiveData maximum of 10000 records (crashtest excepted, until bug 1596567 is fixed). This provides a dramatic improvement in the quality of run counts and run times reported for wpt and reftests. Also adjusts some of the update mechanisms to handle duplicate paths better. Also adds more (verbose) logging of match counts, report run times, etc in anticipation of further refinements. Differential Revision: https://phabricator.services.mozilla.com/D56616
taskcluster/ci/source-test/file-metadata.yml
testing/testinfo.py
--- a/taskcluster/ci/source-test/file-metadata.yml
+++ b/taskcluster/ci/source-test/file-metadata.yml
@@ -108,10 +108,10 @@ test-info-all:
             - linux64-cbindgen
             - linux64-nasm
             - linux64-node
     run:
         using: run-task
         cwd: '{checkout}'
         command: >-
             source taskcluster/scripts/misc/source-test-common.sh &&
-            ./mach test-info report --show-tests --show-summary --show-activedata --output-file /builds/worker/artifacts/test-info-all-tests.json &&
+            ./mach test-info report --show-tests --show-summary --show-activedata --verbose --output-file /builds/worker/artifacts/test-info-all-tests.json &&
             ./mach test-info report --show-annotations --output-file /builds/worker/artifacts/test-info-manifest-conditions.json
--- a/testing/testinfo.py
+++ b/testing/testinfo.py
@@ -25,28 +25,32 @@ ACTIVEDATA_RECORD_LIMIT = 10000
 class TestInfo(object):
     """
     Support 'mach test-info'.
     """
     def __init__(self, verbose):
         self.verbose = verbose
         here = os.path.abspath(os.path.dirname(__file__))
         self.build_obj = MozbuildObject.from_environment(cwd=here)
+        self.total_activedata_seconds = 0
 
     def log_verbose(self, what):
         if self.verbose:
             print(what)
 
     def activedata_query(self, query):
-        self.log_verbose(datetime.datetime.now())
+        start_time = datetime.datetime.now()
+        self.log_verbose(start_time)
         self.log_verbose(json.dumps(query))
         response = requests.post("http://activedata.allizom.org/query",
                                  data=json.dumps(query),
                                  stream=True)
-        self.log_verbose(datetime.datetime.now())
+        end_time = datetime.datetime.now()
+        self.total_activedata_seconds += (end_time - start_time).total_seconds()
+        self.log_verbose(end_time)
         self.log_verbose(response)
         response.raise_for_status()
         data = response.json()["data"]
         self.log_verbose("response length: %d" % len(data))
         return data
 
 
 class TestInfoTests(TestInfo):
@@ -502,20 +506,28 @@ class TestInfoLongRunningTasks(TestInfo)
 
 class TestInfoReport(TestInfo):
     """
     Support 'mach test-info report': Report of test runs summarized by
     manifest and component.
     """
     def __init__(self, verbose):
         TestInfo.__init__(self, verbose)
+        self.total_activedata_matches = 0
 
     def add_activedata_for_suite(self, branches, days, by_component,
-                                 suite_clause, path_mod):
+                                 suite_clause, tests_clause, path_mod):
         dates_clause = {"date": "today-%dday" % days}
+        where_conditions = [
+            suite_clause,
+            {"in": {"repo.branch.name": branches.split(',')}},
+            {"gt": {"run.timestamp": dates_clause}},
+        ]
+        if tests_clause:
+            where_conditions.append(tests_clause)
         ad_query = {
             "from": "unittest",
             "limit": ACTIVEDATA_RECORD_LIMIT,
             "format": "list",
             "groupby": ["result.test"],
             "select": [
                 {
                     "name": "result.count",
@@ -538,45 +550,53 @@ class TestInfoReport(TestInfo):
                     "name": "result.skips",
                     "value": {"case": [
                         {"when": {"eq": {"result.status": "SKIP"}}, "then": 1}
                     ]},
                     "aggregate": "sum",
                     "default": 0
                 }
             ],
-            "where": {"and": [
-                suite_clause,
-                {"in": {"repo.branch.name": branches.split(',')}},
-                {"gt": {"run.timestamp": dates_clause}}
-            ]}
+            "where": {"and": where_conditions}
         }
         ad_response = self.activedata_query(ad_query)
-        if len(ad_response) == ACTIVEDATA_RECORD_LIMIT:
+        if len(ad_response) >= ACTIVEDATA_RECORD_LIMIT:
             print("warning: limit reached; some data may be missing")
+        matches = 0
         for record in ad_response:
             if 'result' in record:
                 result = record['result']
-                self.update_report(by_component, result, path_mod)
+                if self.update_report(by_component, result, path_mod):
+                    matches += 1
+        self.log_verbose("  %d results; %d matches" % (len(ad_response), matches))
+        self.total_activedata_matches += matches
 
     def update_report(self, by_component, result, path_mod):
+        def update_item(item, label, value):
+            # It is important to include any existing item value in case ActiveData
+            # returns multiple records for the same test; that can happen if the report
+            # sometimes maps more than one ActiveData record to the same path.
+            new_value = item.get(label, 0) + value
+            if type(new_value) == int:
+                item[label] = new_value
+            else:
+                item[label] = round(new_value, 2)
+
         if 'test' in result and 'tests' in by_component:
             test = result['test']
             if path_mod:
                 test = path_mod(test)
             for bc in by_component['tests']:
                 for item in by_component['tests'][bc]:
                     if test == item['test']:
-                        seconds = result.get('duration', 'unknown')
-                        if seconds != 'unknown':
-                            seconds = round(seconds, 2)
-                        item['total run time, seconds'] = seconds
-                        item['total runs'] = result.get('count', 'unknown')
-                        item['skipped runs'] = result.get('skips', 'unknown')
-                        item['failed runs'] = result.get('failures', 'unknown')
+                        seconds = round(result.get('duration', 0), 2)
+                        update_item(item, 'total run time, seconds', seconds)
+                        update_item(item, 'total runs', result.get('count', 0))
+                        update_item(item, 'skipped runs', result.get('skips', 0))
+                        update_item(item, 'failed runs', result.get('failures', 0))
                         return True
         return False
 
     def path_mod_reftest(self, path):
         # "<path1> == <path2>" -> "<path1>"
         path = path.split(' ')[0]
         # "<path>?<params>" -> "<path>"
         path = path.split('?')[0]
@@ -625,42 +645,64 @@ class TestInfoReport(TestInfo):
     def path_mod_jittest(self, path):
         # "part1\part2" -> "part1/part2"
         path = path.replace('\\', os.path.sep)
         # "<path>" -> "js/src/jit-test/tests/<path>"
         return os.path.join('js', 'src', 'jit-test', 'tests', path)
 
     def add_activedata(self, branches, days, by_component):
         suites = {
-            "reftest": self.path_mod_reftest,
-            "web-platform-tests": self.path_mod_wpt,
-            "web-platform-tests-reftests": self.path_mod_wpt,
-            "jsreftest": self.path_mod_jsreftest,
-            "crashtest": self.path_mod_crashtest,
-            "xpcshell": None,
-            "mochitest-plain": None,
-            "jittest": self.path_mod_jittest,
-            "mochitest-browser-chrome": None,
-            "mochitest-media": None,
-            "mochitest-devtools-chrome": None,
-            "marionette": self.path_mod_marionette,
-            "mochitest-chrome": None,
-            "web-platform-tests-wdspec": self.path_mod_wpt,
-            "geckoview-junit": None,
-            "cppunittest": None,
+            # List of known suites requiring special path handling and/or
+            # suites typically containing thousands of test paths.
+            # regexes have been selected by trial and error to partition data
+            # into queries returning less than ACTIVEDATA_RECORD_LIMIT records.
+            "reftest": (self.path_mod_reftest,
+                        [{"regex": {"result.test": "layout/reftests/[a-k].*"}},
+                         {"regex": {"result.test": "layout/reftests/[^a-k].*"}},
+                         {"not": {"regex": {"result.test": "layout/reftests/.*"}}}]),
+            "web-platform-tests": (self.path_mod_wpt,
+                                   [{"regex": {"result.test": "/[a-g].*"}},
+                                    {"regex": {"result.test": "/[h-p].*"}},
+                                    {"not": {"regex": {"result.test": "/[a-p].*"}}}]),
+            "web-platform-tests-reftests": (self.path_mod_wpt,
+                                            [{"regex": {"result.test": "/css/css-.*"}},
+                                             {"not": {"regex": {"result.test": "/css/css-.*"}}}]),
+            "crashtest": (self.path_mod_crashtest,
+                          [{"regex": {"result.test": ".*/tests/[a-m].*"}},
+                           {"not": {"regex": {"result.test": ".*/tests/[a-m].*"}}}]),
+            "web-platform-tests-wdspec": (self.path_mod_wpt, [None]),
+            "xpcshell": (None, [None]),
+            "mochitest-plain": (None, [None]),
+            "mochitest-browser-chrome": (None, [None]),
+            "mochitest-media": (None, [None]),
+            "mochitest-devtools-chrome": (None, [None]),
+            "marionette": (self.path_mod_marionette, [None]),
+            "mochitest-chrome": (None, [None]),
         }
+        unsupported_suites = [
+            # Usually these suites are excluded because currently the test resolver
+            # does not provide test paths for them.
+            "jsreftest",
+            "jittest",
+            "geckoview-junit",
+            "cppunittest",
+        ]
         for suite in suites:
-            print("Adding ActiveData data for %s..." % suite)
             suite_clause = {"eq": {"run.suite.name": suite}}
-            self.add_activedata_for_suite(branches, days, by_component,
-                                          suite_clause, suites[suite])
-        # remainder
-        suite_clause = {"not": {"in": {"run.suite.name": list(suites)}}}
+            path_mod = suites[suite][0]
+            test_clauses = suites[suite][1]
+            for test_clause in test_clauses:
+                self.log_verbose("Adding ActiveData data for %s / %s..." %
+                                 (suite, str(test_clause)))
+                self.add_activedata_for_suite(branches, days, by_component,
+                                              suite_clause, test_clause, path_mod)
+        # Remainder: All supported suites not handled above.
+        suite_clause = {"not": {"in": {"run.suite.name": unsupported_suites + list(suites)}}}
         self.add_activedata_for_suite(branches, days, by_component,
-                                      suite_clause, None)
+                                      suite_clause, None, None)
 
     def description(self, components, flavor, subsuite, paths,
                     show_manifests, show_tests, show_summary, show_annotations,
                     show_activedata,
                     filter_values, filter_keys,
                     branches, days):
         # provide a natural language description of the report options
         what = []
@@ -715,16 +757,18 @@ class TestInfoReport(TestInfo):
                     if not filter_keys or key in filter_keys:
                         if re.search(value, test[key]):
                             value_found = True
                             break
                 if not value_found:
                     return False
             return True
 
+        start_time = datetime.datetime.now()
+
         # Ensure useful report by default
         if not show_manifests and not show_tests and not show_summary and not show_annotations:
             show_manifests = True
             show_summary = True
 
         by_component = {}
         if components:
             components = components.split(',')
@@ -857,29 +901,41 @@ class TestInfoReport(TestInfo):
                             failed_count += 1
                         if t.get('fails-if'):
                             failed_count += 1
                         if t.get('skip-if'):
                             skipped_count += 1
                         if show_tests:
                             rkey = key if show_components else 'all'
                             if rkey in by_component['tests']:
-                                by_component['tests'][rkey].append(test_info)
+                                # Avoid duplicates: Some test paths have multiple TestResolver
+                                # entries, as when a test is included by multiple manifests.
+                                found = False
+                                for ctest in by_component['tests'][rkey]:
+                                    if ctest['test'] == test_info['test']:
+                                        found = True
+                                        break
+                                if not found:
+                                    by_component['tests'][rkey].append(test_info)
                             else:
                                 by_component['tests'][rkey] = [test_info]
             if show_tests:
                 for key in by_component['tests']:
                     by_component['tests'][key].sort(key=lambda k: k['test'])
 
         if show_activedata:
             try:
                 self.add_activedata(branches, days, by_component)
             except Exception:
                 print("Failed to retrieve some ActiveData data.")
                 traceback.print_exc()
+            self.log_verbose("%d tests updated with matching ActiveData data" %
+                             self.total_activedata_matches)
+            self.log_verbose("%d seconds waiting for ActiveData" %
+                             self.total_activedata_seconds)
 
         by_component['description'] = self.description(
             components, flavor, subsuite, paths,
             show_manifests, show_tests, show_summary, show_annotations,
             show_activedata,
             filter_values, filter_keys,
             branches, days)
 
@@ -904,8 +960,12 @@ class TestInfoReport(TestInfo):
             output_dir = os.path.dirname(output_file)
             if not os.path.isdir(output_dir):
                 os.makedirs(output_dir)
 
             with open(output_file, 'w') as f:
                 f.write(json_report)
         else:
             print(json_report)
+
+        end_time = datetime.datetime.now()
+        self.log_verbose("%d seconds total to generate report" %
+                         (end_time - start_time).total_seconds())