Bug 1296354 - Properly handle etags generated by multipart s3 uploads r=nthomas a=release DONTBUILD
authorRail Aliiev <rail@mozilla.com>
Wed, 24 Aug 2016 22:06:30 -0400
changeset 349922 1d42160deab394f3c81d35f2897c6d660b2d3d41
parent 349921 30b1e0880c0f629c19f19b83576e6f5bd4f72a15
child 349923 0801ef134cad49e7da4011ff04ba9f7c0738bf1a
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnthomas, release
bugs1296354
milestone50.0a2
Bug 1296354 - Properly handle etags generated by multipart s3 uploads r=nthomas a=release DONTBUILD MozReview-Commit-ID: 5gmXDKZwlSC
testing/mozharness/scripts/release/push-candidate-to-releases.py
--- a/testing/mozharness/scripts/release/push-candidate-to-releases.py
+++ b/testing/mozharness/scripts/release/push-candidate-to-releases.py
@@ -144,29 +144,40 @@ class ReleasePusher(BaseScript, Virtuale
         keys = [k for k in bucket.list(prefix=self._get_releases_prefix())]
         if keys:
             self.warning("Destination already exists with %s keys" % len(keys))
 
         def worker(item):
             source, destination = item
 
             def copy_key():
+                source_key = bucket.get_key(source)
                 dest_key = bucket.get_key(destination)
-                source_key = bucket.get_key(source)
+                # According to http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html
+                # S3 key MD5 is represented as ETag, except when objects are
+                # uploaded using multipart method. In this case objects's ETag
+                # is constructed using its MD5, minus symbol, and number of
+                # part. See http://stackoverflow.com/questions/12186993/what-is-the-algorithm-to-compute-the-amazon-s3-etag-for-a-file-larger-than-5gb#answer-19896823
+                source_md5 = source_key.etag.split("-")[0]
+                if dest_key:
+                    dest_md5 = dest_key.etag.split("-")[0]
+                else:
+                    dest_md5 = None
+
                 if not dest_key:
                     self.info("Copying {} to {}".format(source, destination))
                     bucket.copy_key(destination, self.config["bucket_name"],
                                     source)
-                elif source_key.etag == dest_key.etag:
+                elif source_md5 == dest_md5:
                     self.warning(
                         "{} already exists with the same content ({}), skipping copy".format(
-                            destination, dest_key.etag))
+                            destination, dest_md5))
                 else:
                     self.fatal(
-                        "{} already exists with the different content (src: {}, dest: {}), aborting".format(
+                        "{} already exists with the different content (src ETag: {}, dest ETag: {}), aborting".format(
                             destination, source_key.etag, dest_key.etag))
 
             return retry(copy_key, sleeptime=5, max_sleeptime=60,
                          retry_exceptions=(S3CopyError, S3ResponseError))
 
         def find_release_files():
             candidates_prefix = self._get_candidates_prefix()
             release_prefix = self._get_releases_prefix()