Bug 1371489 - Reuse HTTP connections after HTTP errors. r?dustin draft
authorMike Hommey <mh+mozilla@glandium.org>
Fri, 09 Jun 2017 09:47:32 +0900
changeset 594559 5d2b75bad0f91542f3971435b7777df4bb89fc47
parent 594168 a1d75baa49d3ec21256550985b79e41b68abe514
child 594560 ca5dcd39196646c7bb8e898b81ed78ea2b39b51e
push id64063
push userbmo:mh+mozilla@glandium.org
push dateThu, 15 Jun 2017 05:41:54 +0000
reviewersdustin
bugs1371489
milestone56.0a1
Bug 1371489 - Reuse HTTP connections after HTTP errors. r?dustin When a response has unconsumed content, requests closes the connection instead of releasing it to the connection pool, such that a new connection has to be created when another request to the same host is done within the same Session. When calling response.raise_for_status, the response content is left unconsumed, meaning that without manually consuming the content first, every time raise_for_status raises, a connection is closed. While here, we move the raise_for_status to the _do_request function, so that there's only one place where this is dealt with.
taskcluster/taskgraph/util/taskcluster.py
--- a/taskcluster/taskgraph/util/taskcluster.py
+++ b/taskcluster/taskgraph/util/taskcluster.py
@@ -21,17 +21,23 @@ def get_session():
                   status_forcelist=[500, 502, 503, 504])
     session.mount('http://', HTTPAdapter(max_retries=retry))
     session.mount('https://', HTTPAdapter(max_retries=retry))
     return session
 
 
 def _do_request(url):
     session = get_session()
-    return session.get(url, stream=True)
+    response = session.get(url, stream=True)
+    if response.status_code >= 400:
+        # Consume content before raise_for_status, so that the connection can be
+        # reused.
+        response.content
+    response.raise_for_status()
+    return response
 
 
 def get_artifact_url(task_id, path, use_proxy=False):
     if use_proxy:
         ARTIFACT_URL = 'http://taskcluster/queue/v1/task/{}/artifacts/{}'
     else:
         ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
     return ARTIFACT_URL.format(task_id, path)
@@ -42,36 +48,33 @@ def get_artifact(task_id, path, use_prox
     Returns the artifact with the given path for the given task id.
 
     If the path ends with ".json" or ".yml", the content is deserialized as,
     respectively, json or yaml, and the corresponding python data (usually
     dict) is returned.
     For other types of content, a file-like object is returned.
     """
     response = _do_request(get_artifact_url(task_id, path, use_proxy))
-    response.raise_for_status()
     if path.endswith('.json'):
         return response.json()
     if path.endswith('.yml'):
         return yaml.load(response.text)
     response.raw.read = functools.partial(response.raw.read,
                                           decode_content=True)
     return response.raw
 
 
 def list_artifacts(task_id, use_proxy=False):
     response = _do_request(get_artifact_url(task_id, '', use_proxy).rstrip('/'))
-    response.raise_for_status()
     return response.json()['artifacts']
 
 
 def get_index_url(index_path, use_proxy=False):
     if use_proxy:
         INDEX_URL = 'http://taskcluster/index/v1/task/{}'
     else:
         INDEX_URL = 'https://index.taskcluster.net/v1/task/{}'
     return INDEX_URL.format(index_path)
 
 
 def find_task_id(index_path, use_proxy=False):
     response = _do_request(get_index_url(index_path, use_proxy))
-    response.raise_for_status()
     return response.json()['taskId']