Bug 1359965 - Support and generate tar.gz WPT archive; r=glandium
authorGregory Szorc <gps@mozilla.com>
Mon, 08 May 2017 17:19:05 -0700
changeset 360045 a0e257e346ccf3c1db332ec5903241f4eeb9a7ee
parent 360044 ae8bce278626bc84914063f93292ac5e825eec36
child 360046 f6bd9857f97dc3162b86e01600d755e62f4b63ab
push id43186
push usergszorc@mozilla.com
push dateTue, 23 May 2017 00:22:52 +0000
treeherderautoland@a0e257e346cc [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1359965 - Support and generate tar.gz WPT archive; r=glandium Several years ago there was a single zip file for all test files. Clients would only extract the files they needed. Thus, zip was a reasonable archive format because it allowed direct access to members without having to decompress the entirety of the stream. We have since split up that monolithic archive into separate, domain-specific archives. e.g. 1 archive for mochitests and one for xpcshell tests. This drastically cut down on network I/O required on testers because they only fetched archives/data that was relevant. It also enabled parallel generation of test archives, we shaved dozens of seconds off builds due to compression being a long pole. Despite the architectural changes to test archive management, we still used zip files. This is not ideal because we no longer access specific files in test archives and thus don't care about single/partial member access performance. This commit implements support for generating tar.gz test archives. And it switches the web-platform archive to a tar.gz file. The performance implications for archive generation are significant: before: 48,321,250 bytes; 6.05s after: 31,844,267 bytes; 4.57s The size is reduced because we have a single compression context so data from 1 file can benefit compression in a subsequent file. CPU usage is reduced because the compressor has to work less with 1 context than it does with N. While I didn't measure it, decompression performance should also be improved for the same reasons. And of course network I/O will be reduced. mozharness consumers use a generic method for handling unarchiving. This method automagically handles multiple file extensions. So as long as downstream consumers aren't hard coding ".zip" this change should "just work." MozReview-Commit-ID: LQa5MIHLsms
--- a/python/mozbuild/mozbuild/action/test_archive.py
+++ b/python/mozbuild/mozbuild/action/test_archive.py
@@ -15,16 +15,17 @@ import itertools
 import os
 import sys
 import time
 from manifestparser import TestManifest
 from reftest import ReftestManifest
 from mozbuild.util import ensureParentDir
+from mozpack.archive import create_tar_gz_from_files
 from mozpack.copier import FileRegistry
 from mozpack.files import ExistingFile, FileFinder
 from mozpack.manifests import InstallManifest
 from mozpack.mozjar import JarWriter
 import mozpack.path as mozpath
 import buildconfig
@@ -609,32 +610,41 @@ def find_manifest_dirs(topsrcdir, manife
 def main(argv):
     parser = argparse.ArgumentParser(
         description='Produce test archives')
     parser.add_argument('archive', help='Which archive to generate')
     parser.add_argument('outputfile', help='File to write output to')
     args = parser.parse_args(argv)
-    if not args.outputfile.endswith('.zip'):
-        raise Exception('expected zip output file')
+    out_file = args.outputfile
+    if not out_file.endswith(('.tar.gz', '.zip')):
+        raise Exception('expected tar.gz or zip output file')
     file_count = 0
     t_start = time.time()
-    ensureParentDir(args.outputfile)
-    with open(args.outputfile, 'wb') as fh:
+    ensureParentDir(out_file)
+    res = find_files(args.archive)
+    with open(out_file, 'wb') as fh:
         # Experimentation revealed that level 5 is significantly faster and has
         # marginally larger sizes than higher values and is the sweet spot
         # for optimal compression. Read the detailed commit message that
         # introduced this for raw numbers.
-        with JarWriter(fileobj=fh, optimize=False, compress_level=5) as writer:
-            res = find_files(args.archive)
-            for p, f in res:
-                writer.add(p.encode('utf-8'), f.read(), mode=f.mode, skip_duplicates=True)
-                file_count += 1
+        if out_file.endswith('.tar.gz'):
+            files = dict(res)
+            create_tar_gz_from_files(fh, files, compresslevel=5)
+            file_count = len(files)
+        elif out_file.endswith('.zip'):
+            with JarWriter(fileobj=fh, optimize=False, compress_level=5) as writer:
+                for p, f in res:
+                    writer.add(p.encode('utf-8'), f.read(), mode=f.mode,
+                               skip_duplicates=True)
+                    file_count += 1
+        else:
+            raise Exception('unhandled file extension: %s' % out_file)
     duration = time.time() - t_start
     zip_size = os.path.getsize(args.outputfile)
     basename = os.path.basename(args.outputfile)
     print('Wrote %d files in %d bytes to %s in %.2fs' % (
           file_count, zip_size, basename, duration))
--- a/taskcluster/taskgraph/transforms/beetmover.py
+++ b/taskcluster/taskgraph/transforms/beetmover.py
@@ -28,17 +28,17 @@ from voluptuous import Any, Required, Op
-    "target.web-platform.tests.zip",
+    "target.web-platform.tests.tar.zip",
@@ -59,17 +59,17 @@ from voluptuous import Any, Required, Op
-    "en-US/target.web-platform.tests.zip",
+    "en-US/target.web-platform.tests.tar.gz",
@@ -79,17 +79,17 @@ from voluptuous import Any, Required, Op
-    "target.web-platform.tests.zip",
+    "target.web-platform.tests.tar.gz",
--- a/taskcluster/taskgraph/transforms/beetmover_repackage.py
+++ b/taskcluster/taskgraph/transforms/beetmover_repackage.py
@@ -30,17 +30,17 @@ logger = logging.getLogger(__name__)
-    "target.web-platform.tests.zip",
+    "target.web-platform.tests.tar.gz",
--- a/testing/testsuite-targets.mk
+++ b/testing/testsuite-targets.mk
@@ -114,56 +114,61 @@ stage-all: \
 stage-all: stage-steeplechase
 stage-all: stage-cppunittests
   common \
   cppunittest \
   mochitest \
   reftest \
   talos \
   awsy \
+  xpcshell \
+  $(NULL)
   web-platform \
-  xpcshell \
 stage-all: stage-gtest
-TEST_PKGS += gtest
+TEST_PKGS_ZIP += gtest
-PKG_ARG = --$(1) '$(PKG_BASENAME).$(1).tests.zip'
+PKG_ARG = --$(1) '$(PKG_BASENAME).$(1).tests.$(2)'
 	$(PYTHON) $(topsrcdir)/build/gen_test_packages_manifest.py \
       --jsshell $(JSSHELL_NAME) \
       --dest-file '$(MOZ_TEST_PACKAGES_FILE)' \
-      $(call PKG_ARG,common) \
-      $(foreach pkg,$(TEST_PKGS),$(call PKG_ARG,$(pkg)))
+      $(call PKG_ARG,common,zip) \
+      $(foreach pkg,$(TEST_PKGS_ZIP),$(call PKG_ARG,$(pkg),zip)) \
+      $(foreach pkg,$(TEST_PKGS_TARGZ),$(call PKG_ARG,$(pkg),tar.gz))
 	@rm -f '$(DIST)/$(PKG_PATH)$(TEST_PACKAGE)'
 define package_archive
 package-tests-$(1): stage-all package-tests-prepare-dest
 	$$(call py_action,test_archive, \
 		$(1) \
-		'$$(abspath $$(DIST))/$$(PKG_PATH)/$$(PKG_BASENAME).$(1).tests.zip')
+		'$$(abspath $$(DIST))/$$(PKG_PATH)/$$(PKG_BASENAME).$(1).tests.$(2)')
 package-tests: package-tests-$(1)
-$(foreach name,$(TEST_PKGS),$(eval $(call package_archive,$(name))))
+$(foreach name,$(TEST_PKGS_ZIP),$(eval $(call package_archive,$(name),zip)))
+$(foreach name,$(TEST_PKGS_TARGZ),$(eval $(call package_archive,$(name),tar.gz)))
 ifeq ($(MOZ_BUILD_APP),mobile/android)
 stage-all: stage-android
 stage-all: stage-instrumentation-tests
 # Prepare _tests before any of the other staging/packaging steps.
 # make-stage-dir is a prerequisite to all the stage-* targets in testsuite-targets.mk.
--- a/toolkit/mozapps/installer/package-name.mk
+++ b/toolkit/mozapps/installer/package-name.mk
 MOZHARNESS_PACKAGE = mozharness.zip
 # Test package naming
 TEST_PACKAGE = $(PKG_BASENAME).common.tests.zip
 CPP_TEST_PACKAGE = $(PKG_BASENAME).cppunittest.tests.zip
 XPC_TEST_PACKAGE = $(PKG_BASENAME).xpcshell.tests.zip
 MOCHITEST_PACKAGE = $(PKG_BASENAME).mochitest.tests.zip
 REFTEST_PACKAGE = $(PKG_BASENAME).reftest.tests.zip
-WP_TEST_PACKAGE = $(PKG_BASENAME).web-platform.tests.zip
+WP_TEST_PACKAGE = $(PKG_BASENAME).web-platform.tests.tar.gz
 TALOS_PACKAGE = $(PKG_BASENAME).talos.tests.zip
 AWSY_PACKAGE = $(PKG_BASENAME).awsy.tests.zip
 GTEST_PACKAGE = $(PKG_BASENAME).gtest.tests.zip
 ifneq (,$(wildcard $(DIST)/bin/application.ini))
 BUILDID = $(shell $(PYTHON) $(MOZILLA_DIR)/config/printconfigsetting.py $(DIST)/bin/application.ini App BuildID)
 BUILDID = $(shell $(PYTHON) $(MOZILLA_DIR)/config/printconfigsetting.py $(DIST)/bin/platform.ini Build BuildID)