Bug 1276813 - Retry mozharness failed downloads more persistently; r=armenzg
authorGeoff Brown <gbrown@mozilla.com>
Tue, 18 Apr 2017 08:30:21 -0600
changeset 353634 5317782b698439e78388ae72b0420f3ab852f81a
parent 353633 7b3035c59fe900e45c74d203a6be22861c27409f
child 353635 f4c35ddb1d07eb5c8691ab06a4e8d18b16ecf211
push id31674
push userkwierso@gmail.com
push dateTue, 18 Apr 2017 21:35:32 +0000
treeherdermozilla-central@3f9f6d6086b2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarmenzg
bugs1276813
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1276813 - Retry mozharness failed downloads more persistently; r=armenzg The primary change here is to increase the number of times a failed download is retried, when downloading test_packages.json and test zip files, in hopes of recovering from more temporary service interruptions.
testing/mozharness/mozharness/base/script.py
testing/mozharness/mozharness/mozilla/proxxy.py
--- a/testing/mozharness/mozharness/base/script.py
+++ b/testing/mozharness/mozharness/base/script.py
@@ -663,16 +663,18 @@ class ScriptMixin(PlatformMixin):
             retry_exceptions=(
                 urllib2.HTTPError,
                 urllib2.URLError,
                 httplib.BadStatusLine,
                 socket.timeout,
                 socket.error,
                 FetchedIncorrectFilesize,
             ),
+            sleeptime=30,
+            attempts=5,
             error_message="Can't download from {}".format(url),
             error_level=FATAL,
         )
         compressed_file = self.retry(
             self.fetch_url_into_memory,
             kwargs={'url': url},
             **retry_args
         )
@@ -1103,17 +1105,18 @@ class ScriptMixin(PlatformMixin):
                 self.log("retry: Calling %s with args: %s, kwargs: %s, attempt #%d" %
                          (action.__name__, str(args), str(kwargs), n), level=log_level)
                 status = action(*args, **kwargs)
                 if good_statuses and status not in good_statuses:
                     retry = True
             except retry_exceptions, e:
                 retry = True
                 error_message = "%s\nCaught exception: %s" % (error_message, str(e))
-                self.log('retry: attempt #%d caught exception: %s' % (n, str(e)), level=INFO)
+                self.log('retry: attempt #%d caught %s exception: %s' %
+                         (n, type(e).__name__, str(e)), level=INFO)
 
             if not retry:
                 return status
             else:
                 if cleanup:
                     cleanup()
                 if n == attempts:
                     self.log(error_message % {'action': action, 'attempts': n}, level=error_level)
--- a/testing/mozharness/mozharness/mozilla/proxxy.py
+++ b/testing/mozharness/mozharness/mozilla/proxxy.py
@@ -122,17 +122,17 @@ class Proxxy(ScriptMixin, LogMixin):
 
         return any(r in fqdn and r in url for r in regions)
 
     def download_proxied_file(self, url, file_name, parent_dir=None,
                               create_parent_dir=True, error_level=ERROR,
                               exit_code=3):
         """
         Wrapper around BaseScript.download_file that understands proxies
-        retry dict is set to 3 attempts, sleeping time 30 seconds.
+        retry dict is set to 5 attempts, initial sleeping time 30 seconds.
 
             Args:
                 url (string): url to fetch
                 file_name (string, optional): output filename, defaults to None
                     if file_name is not defined, the output name is taken from
                     the url.
                 parent_dir (string, optional): name of the parent directory
                 create_parent_dir (bool, optional): if True, creates the parent
@@ -151,18 +151,18 @@ class Proxxy(ScriptMixin, LogMixin):
 
         for url in urls:
             self.info("trying %s" % url)
             retval = self.download_file(
                 url, file_name=file_name, parent_dir=parent_dir,
                 create_parent_dir=create_parent_dir, error_level=ERROR,
                 exit_code=exit_code,
                 retry_config=dict(
-                    attempts=3,
                     sleeptime=30,
+                    attempts=5,
                     error_level=INFO,
                 ))
             if retval:
                 return retval
 
         self.log("Failed to download from all available URLs, aborting",
                  level=error_level, exit_code=exit_code)
         return retval