Bug 1592696 - avoid re-fetching profiles when possible r=rwood
authorTarek Ziadé <tarek@mozilla.com>
Tue, 12 Nov 2019 19:47:40 +0000
changeset 501630 8b06c36f7b6c765d4696a608ea2ad7a9c7c3c75b
parent 501629 cb7761bc98592a908b6232ecd08736004ff375b7
child 501631 de7a1a1b75f0262d707f0a6045110fc71236d9f3
push id36797
push useropoprus@mozilla.com
push dateWed, 13 Nov 2019 09:55:25 +0000
treeherdermozilla-central@2f19e7b646e0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersrwood
bugs1592696
milestone72.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1592696 - avoid re-fetching profiles when possible r=rwood This patch uses a cache dir at ~/.condprof-cache so we avoid re-downloading the same file several times. Differential Revision: https://phabricator.services.mozilla.com/D52180
testing/condprofile/condprof/client.py
testing/condprofile/condprof/metadata.py
testing/condprofile/condprof/tests/test_client.py
testing/condprofile/condprof/tests/test_runner.py
testing/condprofile/setup.py
testing/condprofile/tox.ini
--- a/testing/condprofile/condprof/client.py
+++ b/testing/condprofile/condprof/client.py
@@ -8,36 +8,44 @@ from __future__ import absolute_import
 import os
 import tarfile
 import functools
 import tempfile
 import shutil
 
 from condprof import check_install  # NOQA
 from condprof import progress
-from condprof.util import check_exists, download_file, TASK_CLUSTER, get_logger
+from condprof.util import download_file, TASK_CLUSTER, get_logger, ArchiveNotFound
 from condprof.changelog import Changelog
 
 
 ROOT_URL = "https://index.taskcluster.net"
 INDEX_PATH = "gecko.v2.try.latest.firefox.condprof-%(platform)s"
 PUBLIC_DIR = "artifacts/public/condprof"
 TC_LINK = ROOT_URL + "/v1/task/" + INDEX_PATH + "/" + PUBLIC_DIR + "/"
 ARTIFACT_NAME = "profile-%(platform)s-%(scenario)s-%(customization)s.tgz"
 CHANGELOG_LINK = (
     ROOT_URL + "/v1/task/" + INDEX_PATH + "/" + PUBLIC_DIR + "/changelog.json"
 )
 DIRECT_LINK = "https://taskcluster-artifacts.net/%(task_id)s/0/public/condprof/"
+CONDPROF_CACHE = "~/.condprof-cache"
 
 
 class ProfileNotFoundError(Exception):
     pass
 
 
-def get_profile(target_dir, platform, scenario, customization="default", task_id=None):
+def get_profile(
+    target_dir,
+    platform,
+    scenario,
+    customization="default",
+    task_id=None,
+    download_cache=True,
+):
     """Extract a conditioned profile in the target directory.
 
     If task_id is provided, will grab the profile from that task. when not
     provided (default) will grab the latest profile.
     """
     # XXX assert values
     params = {
         "platform": platform,
@@ -46,24 +54,31 @@ def get_profile(target_dir, platform, sc
         "task_id": task_id,
     }
     filename = ARTIFACT_NAME % params
     if task_id is None:
         url = TC_LINK % params + filename
     else:
         url = DIRECT_LINK % params + filename
 
-    download_dir = tempfile.mkdtemp()
+    if not download_cache:
+        download_dir = tempfile.mkdtemp()
+    else:
+        # using a cache dir in the user home dir
+        download_dir = os.path.expanduser(CONDPROF_CACHE)
+        if not os.path.exists(download_dir):
+            os.makedirs(download_dir)
+
     downloaded_archive = os.path.join(download_dir, filename)
     get_logger().msg("Getting %s" % url)
-    exists, __ = check_exists(url)
-    if exists != 200:
-        raise ProfileNotFoundError(exists)
+    try:
+        archive = download_file(url, target=downloaded_archive)
+    except ArchiveNotFound:
+        raise ProfileNotFoundError(url)
 
-    archive = download_file(url, target=downloaded_archive)
     try:
         with tarfile.open(archive, "r:gz") as tar:
             get_logger().msg("Extracting the tarball content in %s" % target_dir)
             size = len(list(tar))
             with progress.Bar(expected_size=size) as bar:
 
                 def _extract(self, *args, **kw):
                     if not TASK_CLUSTER:
@@ -71,31 +86,33 @@ def get_profile(target_dir, platform, sc
                     return self.old(*args, **kw)
 
                 tar.old = tar.extract
                 tar.extract = functools.partial(_extract, tar)
                 tar.extractall(target_dir)
     except (OSError, tarfile.ReadError) as e:
         raise ProfileNotFoundError(str(e))
     finally:
-        shutil.rmtree(download_dir)
+        if not download_cache:
+            shutil.rmtree(download_dir)
     get_logger().msg("Success, we have a profile to work with")
     return target_dir
 
 
 def read_changelog(platform):
     params = {"platform": platform}
     changelog_url = CHANGELOG_LINK % params
     get_logger().msg("Getting %s" % changelog_url)
-    exists, __ = check_exists(changelog_url)
-    if exists != 200:
-        raise ProfileNotFoundError(exists)
     download_dir = tempfile.mkdtemp()
     downloaded_changelog = os.path.join(download_dir, "changelog.json")
-    download_file(changelog_url, target=downloaded_changelog)
+    try:
+        download_file(changelog_url, target=downloaded_changelog)
+    except ArchiveNotFound:
+        shutil.rmtree(download_dir)
+        raise ProfileNotFoundError(changelog_url)
     return Changelog(download_dir)
 
 
 def main():
     # XXX demo. download an older version of a profile, given a task id
     # plat = get_current_platform()
     older_change = read_changelog("win64").history()[0]
     task_id = older_change["TASK_ID"]
--- a/testing/condprofile/condprof/metadata.py
+++ b/testing/condprofile/condprof/metadata.py
@@ -1,25 +1,25 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """ Manages a metadata file.
 """
 import os
 import datetime
-import collections
+from collections.abc import MutableMapping
 import json
 
 from condprof.util import LOG
 
 
 METADATA_NAME = "condprofile.json"
 
 
-class Metadata(collections.MutableMapping):
+class Metadata(MutableMapping):
     """ dict-like class that holds metadata for a profile.
     """
 
     def __init__(self, profile_dir):
         self.metadata_file = os.path.join(profile_dir, METADATA_NAME)
         LOG("Reading existing metadata at %s" % self.metadata_file)
         if not os.path.exists(self.metadata_file):
             LOG("Could not find the metadata file in that profile")
new file mode 100644
--- /dev/null
+++ b/testing/condprofile/condprof/tests/test_client.py
@@ -0,0 +1,66 @@
+import unittest
+import os
+import tempfile
+import shutil
+import responses
+import re
+
+from condprof.client import get_profile
+
+PROFILE = re.compile("https://index.taskcluster.net/.*/.*tgz")
+with open(os.path.join(os.path.dirname(__file__), "profile.tgz"), "rb") as f:
+    PROFILE_DATA = f.read()
+
+
+class TestClient(unittest.TestCase):
+    def setUp(self):
+        self.target = tempfile.mkdtemp()
+        self.download_dir = os.path.expanduser("~/.condprof-cache")
+        if os.path.exists(self.download_dir):
+            shutil.rmtree(self.download_dir)
+
+        responses.add(
+            responses.GET,
+            PROFILE,
+            body=PROFILE_DATA,
+            headers={"content-length": str(len(PROFILE_DATA)), "ETag": "'12345'"},
+            status=200,
+        )
+
+        responses.add(
+            responses.HEAD,
+            PROFILE,
+            body="",
+            headers={"content-length": str(len(PROFILE_DATA)), "ETag": "'12345'"},
+            status=200,
+        )
+
+    def tearDown(self):
+        shutil.rmtree(self.target)
+        shutil.rmtree(self.download_dir)
+
+    @responses.activate
+    def test_cache(self):
+        download_dir = os.path.expanduser("~/.condprof-cache")
+        if os.path.exists(download_dir):
+            num_elmts = len(os.listdir(download_dir))
+        else:
+            num_elmts = 0
+
+        get_profile(self.target, "win64", "cold", "default")
+
+        # grabbing a profile should generate two files
+        self.assertEqual(len(os.listdir(download_dir)), num_elmts + 2)
+
+        # we do two network calls when getting a file, a HEAD and a GET
+        response_calls = len(responses.calls)
+        self.assertEqual(response_calls, 2)
+
+        # and we should reuse them without downloading the file again
+        get_profile(self.target, "win64", "cold", "default")
+
+        # grabbing a profile should not download new stuff
+        self.assertEqual(len(os.listdir(download_dir)), num_elmts + 2)
+
+        # and do a single extra HEAD call
+        self.assertEqual(len(responses.calls), response_calls + 1)
--- a/testing/condprofile/condprof/tests/test_runner.py
+++ b/testing/condprofile/condprof/tests/test_runner.py
@@ -26,21 +26,17 @@ FTP_ARCHIVE = re.compile(
 ADDON = re.compile("https://addons.mozilla.org/.*/.*xpi")
 
 
 class TestRunner(unittest.TestCase):
     def setUp(self):
         self.archive_dir = tempfile.mkdtemp()
         responses.add(responses.GET, CHANGELOG, json={"error": "not found"}, status=404)
         responses.add(
-            responses.GET,
-            FTP,
-            content_type="application/text/html",
-            body=FTP_PAGE,
-            status=200,
+            responses.GET, FTP, content_type="text/html", body=FTP_PAGE, status=200
         )
 
         responses.add(
             responses.GET,
             FTP_ARCHIVE,
             body="1",
             headers={"content-length": "1"},
             status=200,
--- a/testing/condprofile/setup.py
+++ b/testing/condprofile/setup.py
@@ -13,16 +13,16 @@ if PY3:
 else:
     entry_points = """
       [console_scripts]
       cp-client = condprof.client:main
       """
 
 setup(
     name="conditioned-profile",
-    version="0.1",
+    version="0.2",
     packages=find_packages(),
     description="Firefox Heavy Profile creator",
     include_package_data=True,
     zip_safe=False,
     install_requires=[],  # use requirements files
     entry_points=entry_points,
 )
--- a/testing/condprofile/tox.ini
+++ b/testing/condprofile/tox.ini
@@ -1,16 +1,16 @@
 [tox]
 downloadcache = {toxworkdir}/cache/
 envlist = py36,flake8
 
 [testenv]
 passenv = TRAVIS TRAVIS_JOB_ID TRAVIS_BRANCH
 deps = -rtox-requirements.txt
-       -rrequirements.txt
+       -rlocal-requirements.txt
 commands =
        pytest --random-order-bucket=global -sv --cov-report= --cov-config .coveragerc --cov condprof condprof/tests
        - coverage report -m
        - coveralls
 
 [testenv:flake8]
 commands = flake8 condprof
 deps =