Bug 1296354 - Properly handle etags generated by multipart s3 uploads r=nthomas a=release DONTBUILD
authorRail Aliiev <rail@mozilla.com>
Wed, 24 Aug 2016 22:06:30 -0400
changeset 340457 a37eba7b5746020ab905019788fe46c7c8548e23
parent 340456 e441403ef6a8ac09f9316184c9e86e76a4ff8a06
child 340458 7d88d38c38d14727a6c8779aa3d0ebe20f88ec66
push id6340
push userraliiev@mozilla.com
push dateThu, 25 Aug 2016 13:43:48 +0000
treeherdermozilla-beta@a37eba7b5746 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnthomas, release
bugs1296354
milestone49.0
Bug 1296354 - Properly handle etags generated by multipart s3 uploads r=nthomas a=release DONTBUILD MozReview-Commit-ID: 5gmXDKZwlSC
testing/mozharness/scripts/release/push-candidate-to-releases.py
--- a/testing/mozharness/scripts/release/push-candidate-to-releases.py
+++ b/testing/mozharness/scripts/release/push-candidate-to-releases.py
@@ -144,29 +144,40 @@ class ReleasePusher(BaseScript, Virtuale
         keys = [k for k in bucket.list(prefix=self._get_releases_prefix())]
         if keys:
             self.warning("Destination already exists with %s keys" % len(keys))
 
         def worker(item):
             source, destination = item
 
             def copy_key():
+                source_key = bucket.get_key(source)
                 dest_key = bucket.get_key(destination)
-                source_key = bucket.get_key(source)
+                # According to http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html
+                # S3 key MD5 is represented as ETag, except when objects are
+                # uploaded using multipart method. In this case objects's ETag
+                # is constructed using its MD5, minus symbol, and number of
+                # part. See http://stackoverflow.com/questions/12186993/what-is-the-algorithm-to-compute-the-amazon-s3-etag-for-a-file-larger-than-5gb#answer-19896823
+                source_md5 = source_key.etag.split("-")[0]
+                if dest_key:
+                    dest_md5 = dest_key.etag.split("-")[0]
+                else:
+                    dest_md5 = None
+
                 if not dest_key:
                     self.info("Copying {} to {}".format(source, destination))
                     bucket.copy_key(destination, self.config["bucket_name"],
                                     source)
-                elif source_key.etag == dest_key.etag:
+                elif source_md5 == dest_md5:
                     self.warning(
                         "{} already exists with the same content ({}), skipping copy".format(
-                            destination, dest_key.etag))
+                            destination, dest_md5))
                 else:
                     self.fatal(
-                        "{} already exists with the different content (src: {}, dest: {}), aborting".format(
+                        "{} already exists with the different content (src ETag: {}, dest ETag: {}), aborting".format(
                             destination, source_key.etag, dest_key.etag))
 
             return retry(copy_key, sleeptime=5, max_sleeptime=60,
                          retry_exceptions=(S3CopyError, S3ResponseError))
 
         def find_release_files():
             candidates_prefix = self._get_candidates_prefix()
             release_prefix = self._get_releases_prefix()