Bug 1533330 - Update tooltool.py with optional support of taskcluster auth tokens r=rail
authorRok Garbas <rok@garbas.si>
Wed, 24 Apr 2019 13:41:46 +0000
changeset 530691 626ad97710e34569af2ab88d9acb9e06833852e7
parent 530690 496213852ce6d42886a992f729fb125be3fc98f2
child 530692 aa674b410265213ab67b295f854ef353613a83e3
push id11265
push userffxbld-merge
push dateMon, 13 May 2019 10:53:39 +0000
treeherdermozilla-beta@77e0fe8dbdd3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersrail
bugs1533330
milestone68.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1533330 - Update tooltool.py with optional support of taskcluster auth tokens r=rail With this change tooltool.py also supports taskcluster credentials to be passed (in json format) to --authentication-file option. RelengAPI tokens are still working with this patch, just additional authentication is added. Differential Revision: https://phabricator.services.mozilla.com/D27881
python/mozbuild/mozbuild/action/tooltool.py
testing/mozharness/external_tools/tooltool.py
--- a/python/mozbuild/mozbuild/action/tooltool.py
+++ b/python/mozbuild/mozbuild/action/tooltool.py
@@ -17,39 +17,59 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 # 02110-1301, USA.
 
 # A manifest file specifies files in that directory that are stored
 # elsewhere. This file should only list files in the same directory
 # in which the manifest file resides and it should be called
 # 'manifest.tt'
 
+from __future__ import print_function
+
+import base64
+import calendar
 import hashlib
+import hmac
 import httplib
 import json
 import logging
+import math
 import optparse
 import os
+import pprint
+import re
 import shutil
 import sys
 import tarfile
 import tempfile
 import threading
 import time
 import urllib2
 import urlparse
 import zipfile
 
 from subprocess import PIPE
 from subprocess import Popen
 
 __version__ = '1'
 
+# Allowed request header characters:
+# !#$%&'()*+,-./:;<=>?@[]^_`{|}~ and space, a-z, A-Z, 0-9, \, "
+REQUEST_HEADER_ATTRIBUTE_CHARS = re.compile(
+    r"^[ a-zA-Z0-9_\!#\$%&'\(\)\*\+,\-\./\:;<\=>\?@\[\]\^`\{\|\}~]*$")
 DEFAULT_MANIFEST_NAME = 'manifest.tt'
 TOOLTOOL_PACKAGE_SUFFIX = '.TOOLTOOL-PACKAGE'
+HAWK_VER = 1
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+    # TODO: py3 coverage
+    six_binary_type = bytes  # pragma: no cover
+else:
+    six_binary_type = str
 
 
 log = logging.getLogger(__name__)
 
 
 class FileRecordJSONEncoderException(Exception):
     pass
 
@@ -72,32 +92,231 @@ class BadFilenameException(ExceptionWith
 class DigestMismatchException(ExceptionWithFilename):
     pass
 
 
 class MissingFileException(ExceptionWithFilename):
     pass
 
 
+class InvalidCredentials(Exception):
+    pass
+
+
+class BadHeaderValue(Exception):
+    pass
+
+
+def parse_url(url):
+    url_parts = urlparse.urlparse(url)
+    url_dict = {
+        'scheme': url_parts.scheme,
+        'hostname': url_parts.hostname,
+        'port': url_parts.port,
+        'path': url_parts.path,
+        'resource': url_parts.path,
+        'query': url_parts.query,
+    }
+    if len(url_dict['query']) > 0:
+        url_dict['resource'] = '%s?%s' % (url_dict['resource'],  # pragma: no cover
+                                          url_dict['query'])
+
+    if url_parts.port is None:
+        if url_parts.scheme == 'http':
+            url_dict['port'] = 80
+        elif url_parts.scheme == 'https':  # pragma: no cover
+            url_dict['port'] = 443
+    return url_dict
+
+
+def utc_now(offset_in_seconds=0.0):
+    return int(math.floor(calendar.timegm(time.gmtime()) + float(offset_in_seconds)))
+
+
+def random_string(length):
+    return base64.urlsafe_b64encode(os.urandom(length))[:length]
+
+
+def prepare_header_val(val):
+    if isinstance(val, six_binary_type):
+        val = val.decode('utf-8')
+
+    if not REQUEST_HEADER_ATTRIBUTE_CHARS.match(val):
+        raise BadHeaderValue(  # pragma: no cover
+            'header value value={val} contained an illegal character'.format(val=repr(val)))
+
+    return val
+
+
+def parse_content_type(content_type):  # pragma: no cover
+    if content_type:
+        return content_type.split(';')[0].strip().lower()
+    else:
+        return ''
+
+
+def calculate_payload_hash(algorithm, payload, content_type):  # pragma: no cover
+    parts = [
+        part if isinstance(part, six_binary_type) else part.encode('utf8')
+        for part in ['hawk.' + str(HAWK_VER) + '.payload\n',
+                     parse_content_type(content_type) + '\n',
+                     payload or '',
+                     '\n',
+                     ]
+    ]
+
+    p_hash = hashlib.new(algorithm)
+    p_hash.update(''.join(parts))
+
+    log.debug('calculating payload hash from:\n{parts}'.format(parts=pprint.pformat(parts)))
+
+    return base64.b64encode(p_hash.digest())
+
+
+def validate_taskcluster_credentials(credentials):
+    if not hasattr(credentials, '__getitem__'):
+        raise InvalidCredentials('credentials must be a dict-like object')  # pragma: no cover
+    try:
+        credentials['clientId']
+        credentials['accessToken']
+    except KeyError:  # pragma: no cover
+        etype, val, tb = sys.exc_info()
+        raise InvalidCredentials('{etype}: {val}'.format(etype=etype, val=val))
+
+
+def normalize_header_attr(val):
+    if isinstance(val, six_binary_type):
+        return val.decode('utf-8')
+    return val  # pragma: no cover
+
+
+def normalize_string(mac_type,
+                     timestamp,
+                     nonce,
+                     method,
+                     name,
+                     host,
+                     port,
+                     content_hash,
+                     ):
+    return '\n'.join([
+        normalize_header_attr(header)
+        # The blank lines are important. They follow what the Node Hawk lib does.
+        for header in ['hawk.' + str(HAWK_VER) + '.' + mac_type,
+                       timestamp,
+                       nonce,
+                       method or '',
+                       name or '',
+                       host,
+                       port,
+                       content_hash or ''
+                       '',  # for ext which is empty in this case
+                       '',  # Add trailing new line.
+                       ]
+    ])
+
+
+def calculate_mac(mac_type,
+                  access_token,
+                  algorithm,
+                  timestamp,
+                  nonce,
+                  method,
+                  name,
+                  host,
+                  port,
+                  content_hash,
+                  ):
+    normalized = normalize_string(mac_type,
+                                  timestamp,
+                                  nonce,
+                                  method,
+                                  name,
+                                  host,
+                                  port,
+                                  content_hash)
+    log.debug(u'normalized resource for mac calc: {norm}'.format(norm=normalized))
+    digestmod = getattr(hashlib, algorithm)
+
+    # Make sure we are about to hash binary strings.
+
+    if not isinstance(normalized, six_binary_type):
+        normalized = normalized.encode('utf8')
+
+    if not isinstance(access_token, six_binary_type):
+        access_token = access_token.encode('ascii')
+
+    result = hmac.new(access_token, normalized, digestmod)
+    return base64.b64encode(result.digest())
+
+
+def make_taskcluster_header(credentials, req):
+    validate_taskcluster_credentials(credentials)
+
+    url = req.get_full_url()
+    method = req.get_method()
+    algorithm = 'sha256'
+    timestamp = str(utc_now())
+    nonce = random_string(6)
+    url_parts = parse_url(url)
+
+    content_hash = None
+    if req.has_data():
+        content_hash = calculate_payload_hash(  # pragma: no cover
+            algorithm,
+            req.get_data(),
+            req.get_method(),
+        )
+
+    mac = calculate_mac('header',
+                        credentials['accessToken'],
+                        algorithm,
+                        timestamp,
+                        nonce,
+                        method,
+                        url_parts['resource'],
+                        url_parts['hostname'],
+                        str(url_parts['port']),
+                        content_hash,
+                        )
+
+    header = u'Hawk mac="{}"'.format(prepare_header_val(mac))
+
+    if content_hash:  # pragma: no cover
+        header = u'{}, hash="{}"'.format(header, prepare_header_val(content_hash))
+
+    header = u'{header}, id="{id}", ts="{ts}", nonce="{nonce}"'.format(
+        header=header,
+        id=prepare_header_val(credentials['clientId']),
+        ts=prepare_header_val(timestamp),
+        nonce=prepare_header_val(nonce),
+    )
+
+    log.debug('Hawk header for URL={} method={}: {}'.format(url, method, header))
+
+    return header
+
+
 class FileRecord(object):
 
     def __init__(self, filename, size, digest, algorithm, unpack=False,
-                 version=None, visibility=None):
+                 version=None, visibility=None, setup=None):
         object.__init__(self)
         if '/' in filename or '\\' in filename:
             log.error(
                 "The filename provided contains path information and is, therefore, invalid.")
             raise BadFilenameException(filename=filename)
         self.filename = filename
         self.size = size
         self.digest = digest
         self.algorithm = algorithm
         self.unpack = unpack
         self.version = version
         self.visibility = visibility
+        self.setup = setup
 
     def __eq__(self, other):
         if self is other:
             return True
         if self.filename == other.filename and \
            self.size == other.size and \
            self.digest == other.digest and \
            self.algorithm == other.algorithm and \
@@ -179,16 +398,18 @@ class FileRecordJSONEncoder(json.JSONEnc
                 'digest': obj.digest,
             }
             if obj.unpack:
                 rv['unpack'] = True
             if obj.version:
                 rv['version'] = obj.version
             if obj.visibility is not None:
                 rv['visibility'] = obj.visibility
+            if obj.setup:
+                rv['setup'] = obj.setup
             return rv
 
     def default(self, f):
         if issubclass(type(f), list):
             record_list = []
             for i in f:
                 record_list.append(self.encode_file_record(i))
             return record_list
@@ -224,19 +445,20 @@ class FileRecordJSONDecoder(json.JSONDec
                 if req not in obj:
                     missing = True
                     break
 
             if not missing:
                 unpack = obj.get('unpack', False)
                 version = obj.get('version', None)
                 visibility = obj.get('visibility', None)
+                setup = obj.get('setup')
                 rv = FileRecord(
                     obj['filename'], obj['size'], obj['digest'], obj['algorithm'],
-                    unpack, version, visibility)
+                    unpack, version, visibility, setup)
                 log.debug("materialized %s" % rv)
                 return rv
         return obj
 
     def decode(self, s):
         decoded = json.JSONDecoder.decode(self, s)
         rv = self.process_file_records(decoded)
         return rv
@@ -329,17 +551,17 @@ def digest_file(f, a):
         data = f.read(chunk_size)
     name = repr(f.name) if hasattr(f, 'name') else 'a file'
     log.debug('hashed %s with %s to be %s', name, a, h.hexdigest())
     return h.hexdigest()
 
 
 def execute(cmd):
     """Execute CMD, logging its stdout at the info level"""
-    process = Popen(cmd, shell=True, stdout=PIPE)
+    process = Popen(cmd, shell=True, stdout=PIPE, bufsize=0)
     while True:
         line = process.stdout.readline()
         if not line:
             break
         log.info(line.replace('\n', ' '))
     return process.wait() == 0
 
 
@@ -530,17 +752,17 @@ def _cache_checksum_matches(base_file, c
         return False
 
 
 def _compute_cache_checksum(filename):
     with open(filename, "rb") as f:
         return digest_file(f, "sha256")
 
 
-def unpack_file(filename):
+def unpack_file(filename, setup=None):
     """Untar `filename`, assuming it is uncompressed or compressed with bzip2,
     xz, gzip, or unzip a zip file. The file is assumed to contain a single
     directory with a name matching the base of the given filename.
     Xz support is handled by shelling out to 'tar'."""
 
     checksum = _compute_cache_checksum(filename)
 
     if tarfile.is_tarfile(filename):
@@ -574,16 +796,19 @@ def unpack_file(filename):
         log.info('unzipping "%s"' % filename)
         z = zipfile.ZipFile(filename)
         z.extractall()
         z.close()
     else:
         log.error("Unknown archive extension for filename '%s'" % filename)
         return False
 
+    if setup and not execute(os.path.join(base_file, setup)):
+        return False
+
     with open(base_file + CHECKSUM_SUFFIX, "wb") as f:
         f.write(checksum)
 
     return True
 
 
 def fetch_files(manifest_file, base_urls, filenames=[], cache_folder=None,
                 auth_file=None, region=None):
@@ -604,16 +829,19 @@ def fetch_files(manifest_file, base_urls
     # We want to track files that fail to be fetched as well as
     # files that are fetched
     failed_files = []
     fetched_files = []
 
     # Files that we want to unpack.
     unpack_files = []
 
+    # Setup for unpacked files.
+    setup_files = {}
+
     # Lets go through the manifest and fetch the files that we want
     for f in manifest.file_records:
         # case 1: files are already present
         if f.present():
             if f.validate():
                 present_files.append(f.filename)
                 if f.unpack:
                     unpack_files.append(f.filename)
@@ -663,16 +891,23 @@ def fetch_files(manifest_file, base_urls
             temp_file_name = fetch_file(base_urls, f, auth_file=auth_file, region=region)
             if temp_file_name:
                 fetched_files.append((f, temp_file_name))
             else:
                 failed_files.append(f.filename)
         else:
             log.debug("skipping %s" % f.filename)
 
+        if f.setup:
+            if f.unpack:
+                setup_files[f.filename] = f.setup
+            else:
+                log.error("'setup' requires 'unpack' being set for %s" % f.filename)
+                failed_files.append(f.filename)
+
     # lets ensure that fetched files match what the manifest specified
     for localfile, temp_file_name in fetched_files:
         # since I downloaded to a temp file, I need to perform all validations on the temp file
         # this is why filerecord_for_validation is created
 
         filerecord_for_validation = FileRecord(
             temp_file_name, localfile.size, localfile.digest, localfile.algorithm)
 
@@ -705,17 +940,17 @@ def fetch_files(manifest_file, base_urls
                                 (localfile.filename, cache_folder), exc_info=True)
         else:
             failed_files.append(localfile.filename)
             log.error("'%s'" % filerecord_for_validation.describe())
             os.remove(temp_file_name)
 
     # Unpack files that need to be unpacked.
     for filename in unpack_files:
-        if not unpack_file(filename):
+        if not unpack_file(filename, setup_files.get(filename)):
             failed_files.append(filename)
 
     # If we failed to fetch or validate a file, we need to fail
     if len(failed_files) > 0:
         log.error("The following files failed: '%s'" %
                   "', ".join(failed_files))
         return False
     return True
@@ -773,20 +1008,35 @@ def _log_api_error(e):
         json_resp = json.load(e.fp)
         log.error("%s: %s" % (json_resp['error']['name'],
                               json_resp['error']['description']))
     else:
         log.exception("Error making RelengAPI request:")
 
 
 def _authorize(req, auth_file):
-    if auth_file:
-        log.debug("using bearer token in %s" % auth_file)
-        req.add_unredirected_header('Authorization',
-                                    'Bearer %s' % (open(auth_file, "rb").read().strip()))
+    if not auth_file:
+        return
+
+    is_taskcluster_auth = False
+    with open(auth_file) as f:
+        auth_file_content = f.read().strip()
+        try:
+            auth_file_content = json.loads(auth_file_content)
+            is_taskcluster_auth = True
+        except:
+            pass
+
+    if is_taskcluster_auth:
+        taskcluster_header = make_taskcluster_header(auth_file_content, req)
+        log.debug("Using taskcluster credentials in %s" % auth_file)
+        req.add_unredirected_header('Authorization', taskcluster_header)
+    else:
+        log.debug("Using Bearer token in %s" % auth_file)
+        req.add_unredirected_header('Authorization', 'Bearer %s' % auth_file_content)
 
 
 def _send_batch(base_url, auth_file, batch, region):
     url = urlparse.urljoin(base_url, 'upload')
     if region is not None:
         url += "?region=" + region
     req = urllib2.Request(url, json.dumps(batch), {'Content-Type': 'application/json'})
     _authorize(req, auth_file)
@@ -803,17 +1053,17 @@ def _s3_upload(filename, file):
     url = urlparse.urlparse(file['put_url'])
     cls = httplib.HTTPSConnection if url.scheme == 'https' else httplib.HTTPConnection
     host, port = url.netloc.split(':') if ':' in url.netloc else (url.netloc, 443)
     port = int(port)
     conn = cls(host, port)
     try:
         req_path = "%s?%s" % (url.path, url.query) if url.query else url.path
         conn.request('PUT', req_path, open(filename, "rb"),
-                     {'Content-type': 'application/octet-stream'})
+                     {'Content-Type': 'application/octet-stream'})
         resp = conn.getresponse()
         resp_body = resp.read()
         conn.close()
         if resp.status != 200:
             raise RuntimeError("Non-200 return from AWS: %s %s\n%s" %
                                (resp.status, resp.reason, resp_body))
     except Exception:
         file['upload_exception'] = sys.exc_info()
--- a/testing/mozharness/external_tools/tooltool.py
+++ b/testing/mozharness/external_tools/tooltool.py
@@ -17,39 +17,59 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 # 02110-1301, USA.
 
 # A manifest file specifies files in that directory that are stored
 # elsewhere. This file should only list files in the same directory
 # in which the manifest file resides and it should be called
 # 'manifest.tt'
 
+from __future__ import print_function
+
+import base64
+import calendar
 import hashlib
+import hmac
 import httplib
 import json
 import logging
+import math
 import optparse
 import os
+import pprint
+import re
 import shutil
 import sys
 import tarfile
 import tempfile
 import threading
 import time
 import urllib2
 import urlparse
 import zipfile
 
 from subprocess import PIPE
 from subprocess import Popen
 
 __version__ = '1'
 
+# Allowed request header characters:
+# !#$%&'()*+,-./:;<=>?@[]^_`{|}~ and space, a-z, A-Z, 0-9, \, "
+REQUEST_HEADER_ATTRIBUTE_CHARS = re.compile(
+    r"^[ a-zA-Z0-9_\!#\$%&'\(\)\*\+,\-\./\:;<\=>\?@\[\]\^`\{\|\}~]*$")
 DEFAULT_MANIFEST_NAME = 'manifest.tt'
 TOOLTOOL_PACKAGE_SUFFIX = '.TOOLTOOL-PACKAGE'
+HAWK_VER = 1
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+    # TODO: py3 coverage
+    six_binary_type = bytes  # pragma: no cover
+else:
+    six_binary_type = str
 
 
 log = logging.getLogger(__name__)
 
 
 class FileRecordJSONEncoderException(Exception):
     pass
 
@@ -72,16 +92,214 @@ class BadFilenameException(ExceptionWith
 class DigestMismatchException(ExceptionWithFilename):
     pass
 
 
 class MissingFileException(ExceptionWithFilename):
     pass
 
 
+class InvalidCredentials(Exception):
+    pass
+
+
+class BadHeaderValue(Exception):
+    pass
+
+
+def parse_url(url):
+    url_parts = urlparse.urlparse(url)
+    url_dict = {
+        'scheme': url_parts.scheme,
+        'hostname': url_parts.hostname,
+        'port': url_parts.port,
+        'path': url_parts.path,
+        'resource': url_parts.path,
+        'query': url_parts.query,
+    }
+    if len(url_dict['query']) > 0:
+        url_dict['resource'] = '%s?%s' % (url_dict['resource'],  # pragma: no cover
+                                          url_dict['query'])
+
+    if url_parts.port is None:
+        if url_parts.scheme == 'http':
+            url_dict['port'] = 80
+        elif url_parts.scheme == 'https':  # pragma: no cover
+            url_dict['port'] = 443
+    return url_dict
+
+
+def utc_now(offset_in_seconds=0.0):
+    return int(math.floor(calendar.timegm(time.gmtime()) + float(offset_in_seconds)))
+
+
+def random_string(length):
+    return base64.urlsafe_b64encode(os.urandom(length))[:length]
+
+
+def prepare_header_val(val):
+    if isinstance(val, six_binary_type):
+        val = val.decode('utf-8')
+
+    if not REQUEST_HEADER_ATTRIBUTE_CHARS.match(val):
+        raise BadHeaderValue(  # pragma: no cover
+            'header value value={val} contained an illegal character'.format(val=repr(val)))
+
+    return val
+
+
+def parse_content_type(content_type):  # pragma: no cover
+    if content_type:
+        return content_type.split(';')[0].strip().lower()
+    else:
+        return ''
+
+
+def calculate_payload_hash(algorithm, payload, content_type):  # pragma: no cover
+    parts = [
+        part if isinstance(part, six_binary_type) else part.encode('utf8')
+        for part in ['hawk.' + str(HAWK_VER) + '.payload\n',
+                     parse_content_type(content_type) + '\n',
+                     payload or '',
+                     '\n',
+                     ]
+    ]
+
+    p_hash = hashlib.new(algorithm)
+    p_hash.update(''.join(parts))
+
+    log.debug('calculating payload hash from:\n{parts}'.format(parts=pprint.pformat(parts)))
+
+    return base64.b64encode(p_hash.digest())
+
+
+def validate_taskcluster_credentials(credentials):
+    if not hasattr(credentials, '__getitem__'):
+        raise InvalidCredentials('credentials must be a dict-like object')  # pragma: no cover
+    try:
+        credentials['clientId']
+        credentials['accessToken']
+    except KeyError:  # pragma: no cover
+        etype, val, tb = sys.exc_info()
+        raise InvalidCredentials('{etype}: {val}'.format(etype=etype, val=val))
+
+
+def normalize_header_attr(val):
+    if isinstance(val, six_binary_type):
+        return val.decode('utf-8')
+    return val  # pragma: no cover
+
+
+def normalize_string(mac_type,
+                     timestamp,
+                     nonce,
+                     method,
+                     name,
+                     host,
+                     port,
+                     content_hash,
+                     ):
+    return '\n'.join([
+        normalize_header_attr(header)
+        # The blank lines are important. They follow what the Node Hawk lib does.
+        for header in ['hawk.' + str(HAWK_VER) + '.' + mac_type,
+                       timestamp,
+                       nonce,
+                       method or '',
+                       name or '',
+                       host,
+                       port,
+                       content_hash or ''
+                       '',  # for ext which is empty in this case
+                       '',  # Add trailing new line.
+                       ]
+    ])
+
+
+def calculate_mac(mac_type,
+                  access_token,
+                  algorithm,
+                  timestamp,
+                  nonce,
+                  method,
+                  name,
+                  host,
+                  port,
+                  content_hash,
+                  ):
+    normalized = normalize_string(mac_type,
+                                  timestamp,
+                                  nonce,
+                                  method,
+                                  name,
+                                  host,
+                                  port,
+                                  content_hash)
+    log.debug(u'normalized resource for mac calc: {norm}'.format(norm=normalized))
+    digestmod = getattr(hashlib, algorithm)
+
+    # Make sure we are about to hash binary strings.
+
+    if not isinstance(normalized, six_binary_type):
+        normalized = normalized.encode('utf8')
+
+    if not isinstance(access_token, six_binary_type):
+        access_token = access_token.encode('ascii')
+
+    result = hmac.new(access_token, normalized, digestmod)
+    return base64.b64encode(result.digest())
+
+
+def make_taskcluster_header(credentials, req):
+    validate_taskcluster_credentials(credentials)
+
+    url = req.get_full_url()
+    method = req.get_method()
+    algorithm = 'sha256'
+    timestamp = str(utc_now())
+    nonce = random_string(6)
+    url_parts = parse_url(url)
+
+    content_hash = None
+    if req.has_data():
+        content_hash = calculate_payload_hash(  # pragma: no cover
+            algorithm,
+            req.get_data(),
+            req.get_method(),
+        )
+
+    mac = calculate_mac('header',
+                        credentials['accessToken'],
+                        algorithm,
+                        timestamp,
+                        nonce,
+                        method,
+                        url_parts['resource'],
+                        url_parts['hostname'],
+                        str(url_parts['port']),
+                        content_hash,
+                        )
+
+    header = u'Hawk mac="{}"'.format(prepare_header_val(mac))
+
+    if content_hash:  # pragma: no cover
+        header = u'{}, hash="{}"'.format(header, prepare_header_val(content_hash))
+
+    header = u'{header}, id="{id}", ts="{ts}", nonce="{nonce}"'.format(
+        header=header,
+        id=prepare_header_val(credentials['clientId']),
+        ts=prepare_header_val(timestamp),
+        nonce=prepare_header_val(nonce),
+    )
+
+    log.debug('Hawk header for URL={} method={}: {}'.format(url, method, header))
+
+    return header
+
+
 class FileRecord(object):
 
     def __init__(self, filename, size, digest, algorithm, unpack=False,
                  version=None, visibility=None, setup=None):
         object.__init__(self)
         if '/' in filename or '\\' in filename:
             log.error(
                 "The filename provided contains path information and is, therefore, invalid.")
@@ -478,16 +696,18 @@ def fetch_file(base_urls, file_record, g
             req = urllib2.Request(url)
             _authorize(req, auth_file)
             f = urllib2.urlopen(req)
             log.debug("opened %s for reading" % url)
             with open(temp_path, 'wb') as out:
                 k = True
                 size = 0
                 while k:
+                    # TODO: print statistics as file transfers happen both for info and to stop
+                    # buildbot timeouts
                     indata = f.read(grabchunk)
                     out.write(indata)
                     size += len(indata)
                     if indata == '':
                         k = False
                 log.info("File %s fetched from %s as %s" %
                          (file_record.filename, base_url, temp_path))
                 fetched_path = temp_path
@@ -512,48 +732,81 @@ def fetch_file(base_urls, file_record, g
 
 def clean_path(dirname):
     """Remove a subtree if is exists. Helper for unpack_file()."""
     if os.path.exists(dirname):
         log.info('rm tree: %s' % dirname)
         shutil.rmtree(dirname)
 
 
+CHECKSUM_SUFFIX = ".checksum"
+
+
+def _cache_checksum_matches(base_file, checksum):
+    try:
+        with open(base_file + CHECKSUM_SUFFIX, "rb") as f:
+            prev_checksum = f.read().strip()
+            if prev_checksum == checksum:
+                log.info("Cache matches, avoiding extracting in '%s'" % base_file)
+                return True
+            return False
+    except IOError as e:
+        return False
+
+
+def _compute_cache_checksum(filename):
+    with open(filename, "rb") as f:
+        return digest_file(f, "sha256")
+
+
 def unpack_file(filename, setup=None):
     """Untar `filename`, assuming it is uncompressed or compressed with bzip2,
     xz, gzip, or unzip a zip file. The file is assumed to contain a single
     directory with a name matching the base of the given filename.
     Xz support is handled by shelling out to 'tar'."""
+
+    checksum = _compute_cache_checksum(filename)
+
     if tarfile.is_tarfile(filename):
         tar_file, zip_ext = os.path.splitext(filename)
         base_file, tar_ext = os.path.splitext(tar_file)
+        if _cache_checksum_matches(base_file, checksum):
+            return True
         clean_path(base_file)
         log.info('untarring "%s"' % filename)
         tar = tarfile.open(filename)
         tar.extractall()
         tar.close()
     elif filename.endswith('.tar.xz'):
         base_file = filename.replace('.tar.xz', '')
+        if _cache_checksum_matches(base_file, checksum):
+            return True
         clean_path(base_file)
         log.info('untarring "%s"' % filename)
         if not execute('tar -Jxf %s 2>&1' % filename):
             return False
     elif zipfile.is_zipfile(filename):
         base_file = filename.replace('.zip', '')
+        if _cache_checksum_matches(base_file, checksum):
+            return True
         clean_path(base_file)
         log.info('unzipping "%s"' % filename)
         z = zipfile.ZipFile(filename)
         z.extractall()
         z.close()
     else:
         log.error("Unknown archive extension for filename '%s'" % filename)
         return False
 
     if setup and not execute(os.path.join(base_file, setup)):
         return False
+
+    with open(base_file + CHECKSUM_SUFFIX, "wb") as f:
+        f.write(checksum)
+
     return True
 
 
 def fetch_files(manifest_file, base_urls, filenames=[], cache_folder=None,
                 auth_file=None, region=None):
     # Lets load the manifest file
     try:
         manifest = open_manifest(manifest_file)
@@ -750,20 +1003,35 @@ def _log_api_error(e):
         json_resp = json.load(e.fp)
         log.error("%s: %s" % (json_resp['error']['name'],
                               json_resp['error']['description']))
     else:
         log.exception("Error making RelengAPI request:")
 
 
 def _authorize(req, auth_file):
-    if auth_file:
-        log.debug("using bearer token in %s" % auth_file)
-        req.add_unredirected_header('Authorization',
-                                    'Bearer %s' % (open(auth_file, "rb").read().strip()))
+    if not auth_file:
+        return
+
+    is_taskcluster_auth = False
+    with open(auth_file) as f:
+        auth_file_content = f.read().strip()
+        try:
+            auth_file_content = json.loads(auth_file_content)
+            is_taskcluster_auth = True
+        except:
+            pass
+
+    if is_taskcluster_auth:
+        taskcluster_header = make_taskcluster_header(auth_file_content, req)
+        log.debug("Using taskcluster credentials in %s" % auth_file)
+        req.add_unredirected_header('Authorization', taskcluster_header)
+    else:
+        log.debug("Using Bearer token in %s" % auth_file)
+        req.add_unredirected_header('Authorization', 'Bearer %s' % auth_file_content)
 
 
 def _send_batch(base_url, auth_file, batch, region):
     url = urlparse.urljoin(base_url, 'upload')
     if region is not None:
         url += "?region=" + region
     req = urllib2.Request(url, json.dumps(batch), {'Content-Type': 'application/json'})
     _authorize(req, auth_file)
@@ -780,17 +1048,17 @@ def _s3_upload(filename, file):
     url = urlparse.urlparse(file['put_url'])
     cls = httplib.HTTPSConnection if url.scheme == 'https' else httplib.HTTPConnection
     host, port = url.netloc.split(':') if ':' in url.netloc else (url.netloc, 443)
     port = int(port)
     conn = cls(host, port)
     try:
         req_path = "%s?%s" % (url.path, url.query) if url.query else url.path
         conn.request('PUT', req_path, open(filename, "rb"),
-                     {'Content-type': 'application/octet-stream'})
+                     {'Content-Type': 'application/octet-stream'})
         resp = conn.getresponse()
         resp_body = resp.read()
         conn.close()
         if resp.status != 200:
             raise RuntimeError("Non-200 return from AWS: %s %s\n%s" %
                                (resp.status, resp.reason, resp_body))
     except Exception:
         file['upload_exception'] = sys.exc_info()