Bug 1419478 Enable S3 caching for binary diff patch files in partial update tasks r=rail
authorSimon Fraser <sfraser@mozilla.com>
Fri, 15 Dec 2017 14:07:31 -0600
changeset 448590 7eecc63dcdecd85d0babd6f9da951eca0378d632
parent 448589 13faabcf8e965cca857e3cc8fdbea289a09c7ea0
child 448591 5bc55868583b2a7615800f713753eb00becc9fae
push id8527
push userCallek@gmail.com
push dateThu, 11 Jan 2018 21:05:50 +0000
treeherdermozilla-beta@95342d212a7a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersrail
bugs1419478
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1419478 Enable S3 caching for binary diff patch files in partial update tasks r=rail MozReview-Commit-ID: 23vwFcBkqKd
taskcluster/docker/funsize-update-generator/Dockerfile
taskcluster/docker/funsize-update-generator/requirements.txt
taskcluster/docker/funsize-update-generator/runme.sh
taskcluster/docker/funsize-update-generator/scripts/mbsdiff_hook.sh
taskcluster/taskgraph/transforms/partials.py
--- a/taskcluster/docker/funsize-update-generator/Dockerfile
+++ b/taskcluster/docker/funsize-update-generator/Dockerfile
@@ -4,17 +4,17 @@ MAINTAINER Simon Fraser <sfraser@mozilla
 # Required software
 ENV DEBIAN_FRONTEND noninteractive
 # Chain apt-get commands with apt-get clean in a single docker RUN
 # to make sure that files are removed within a single docker layer
 RUN apt-get update -q && \
     apt-get install -yyq --no-install-recommends \
     python3.5 python3-setuptools python3-cryptography libgetopt-simple-perl \
     bzip2 clamav clamav-freshclam python3-requests python3-sh curl \
-    python3-dev gcc liblzma-dev xz-utils && \
+    python3-dev gcc liblzma-dev xz-utils jq && \
     apt-get clean
 RUN useradd -d /home/worker -s /bin/bash -m worker
 COPY requirements.txt /tmp/
 
 # Freshclam may be flaky, retry if it fails
 RUN for i in 1 2 3 4 5; do freshclam --verbose && break || sleep 15; done
 
 # python-pip installs a lot of dependencies increasing the size of an image
--- a/taskcluster/docker/funsize-update-generator/requirements.txt
+++ b/taskcluster/docker/funsize-update-generator/requirements.txt
@@ -1,4 +1,5 @@
 mar==2.1.2
 backports.lzma==0.0.8
 datadog==0.17.0
-redo
+redo==1.6
+awscli==1.14.10
--- a/taskcluster/docker/funsize-update-generator/runme.sh
+++ b/taskcluster/docker/funsize-update-generator/runme.sh
@@ -7,18 +7,49 @@ test "$SHA1_SIGNING_CERT"
 test "$SHA384_SIGNING_CERT"
 
 ARTIFACTS_DIR="/home/worker/artifacts"
 mkdir -p "$ARTIFACTS_DIR"
 
 curl --location --retry 10 --retry-delay 10 -o /home/worker/task.json \
     "https://queue.taskcluster.net/v1/task/$TASK_ID"
 
-# enable locale cache
-export MBSDIFF_HOOK="/home/worker/bin/mbsdiff_hook.sh -c /tmp/fs-cache"
+# auth:aws-s3:read-write:tc-gp-private-1d-us-east-1/releng/mbsdiff-cache/
+# -> bucket of tc-gp-private-1d-us-east-1, path of releng/mbsdiff-cache/
+# Trailing slash is important, due to prefix permissions in S3.
+S3_BUCKET_AND_PATH=$(jq -r '.scopes[] | select(contains ("auth:aws-s3"))' /home/worker/task.json | awk -F: '{print $4}')
+
+# Will be empty if there's no scope for AWS S3.
+if [ -n "${S3_BUCKET_AND_PATH}" ]; then
+  # Does this parse as we expect?
+  S3_PATH=${S3_BUCKET_AND_PATH#*/}
+  AWS_BUCKET_NAME=${S3_BUCKET_AND_PATH%/${S3_PATH}*}
+  test "${S3_PATH}"
+  test "${AWS_BUCKET_NAME}"
+
+  set +x  # Don't echo these.
+  secret_url="taskcluster/auth/v1/aws/s3/read-write/${AWS_BUCKET_NAME}/${S3_PATH}"
+  AUTH=$(curl "${secret_url}")
+  AWS_ACCESS_KEY_ID=$(echo "${AUTH}" | jq -r '.credentials.accessKeyId')
+  AWS_SECRET_ACCESS_KEY=$(echo "${AUTH}" | jq -r '.credentials.secretAccessKey')
+  AWS_SESSION_TOKEN=$(echo "${AUTH}" | jq -r '.credentials.sessionToken')
+  export AWS_ACCESS_KEY_ID
+  export AWS_SECRET_ACCESS_KEY
+  export AWS_SESSION_TOKEN
+  AUTH=
+
+  if [ -n "$AWS_ACCESS_KEY_ID" ] && [ -n "$AWS_SECRET_ACCESS_KEY" ]; then
+    # Pass the full bucket/path prefix, as the script just appends local files.
+    export MBSDIFF_HOOK="/home/worker/bin/mbsdiff_hook.sh -S ${S3_BUCKET_AND_PATH}"
+  fi
+  set -x
+else
+  # enable locale cache
+  export MBSDIFF_HOOK="/home/worker/bin/mbsdiff_hook.sh -c /tmp/fs-cache"
+fi
 
 if [ ! -z "$FILENAME_TEMPLATE" ]; then
     EXTRA_PARAMS="--filename-template $FILENAME_TEMPLATE $EXTRA_PARAMS"
 fi
 
 /home/worker/bin/funsize.py \
     --artifacts-dir "$ARTIFACTS_DIR" \
     --task-definition /home/worker/task.json \
--- a/taskcluster/docker/funsize-update-generator/scripts/mbsdiff_hook.sh
+++ b/taskcluster/docker/funsize-update-generator/scripts/mbsdiff_hook.sh
@@ -4,106 +4,108 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #
 # This tool contains functions that are to be used to handle/enable funsize
 # Author: Mihai Tabara
 #
 
 HOOK=
-SERVER_URL=
+AWS_BUCKET_NAME=
 LOCAL_CACHE_DIR=
 
 getsha512(){
-    echo "$(openssl sha512 "${1}" | awk '{print $2}')"
+    openssl sha512 "${1}" | awk '{print $2}'
 }
 
 print_usage(){
-    echo "$(basename $0) -A SERVER-URL [-c LOCAL-CACHE-DIR-PATH] [-g] [-u] PATH-FROM-URL PATH-TO-URL PATH-PATCH"
+    echo "$(basename "$0") [-S S3-BUCKET-NAME] [-c LOCAL-CACHE-DIR-PATH] [-g] [-u] PATH-FROM-URL PATH-TO-URL PATH-PATCH"
     echo "Script that saves/retrieves from cache presumptive patches as args"
     echo ""
     echo "-A SERVER-URL - host where to send the files"
     echo "-c LOCAL-CACHE-DIR-PATH local path to which patches are cached"
     echo "-g pre hook - tests whether patch already in cache"
     echo "-u post hook - upload patch to cache for future use"
     echo ""
     echo "PATH-FROM-URL     : path on disk for source file"
     echo "PATH-TO-URL       : path on disk for destination file"
     echo "PATH-PATCH        : path on disk for patch between source and destination"
 }
 
 upload_patch(){
-    sha_from=`getsha512 "$1"`
-    sha_to=`getsha512 "$2"`
+    sha_from=$(getsha512 "$1")
+    sha_to=$(getsha512 "$2")
     patch_path="$3"
+    patch_filename="$(basename "$3")"
 
     # save to local cache first
     if [ -n "$LOCAL_CACHE_DIR" ]; then
         local_cmd="mkdir -p "$LOCAL_CACHE_DIR/$sha_from""
-        if `$local_cmd` >&2; then
-            cp -avf "$patch_path" "$LOCAL_CACHE_DIR/$sha_from/$sha_to"
-            echo "$patch_path saved on local cache!"
+        if $local_cmd >&2; then
+            cp -avf "${patch_path}" "$LOCAL_CACHE_DIR/$sha_from/$sha_to"
+            echo "${patch_path} saved on local cache."
         fi
     fi
-    # The remote cache implementation is not used. The code is for usage
-    # reference only.
-     return 0
 
-    # send it over to funsize
-    cmd="curl -sSw %{http_code} -o /dev/null -X POST $SERVER_URL -F sha_from="$sha_from" -F sha_to="$sha_to" -F patch_file="@$patch_path""
-    ret_code=`$cmd`
-
-    if [ $ret_code -eq 200 ]; then
-        echo "$patch_path Successful uploaded to funsize!"
-        return 0
+    if [ -n "${AWS_BUCKET_NAME}" ]; then
+        BUCKET_PATH="s3://${AWS_BUCKET_NAME}${sha_from}/${sha_to}/${patch_filename}"
+        if aws s3 cp "${patch_path}" "${BUCKET_PATH}"; then
+            echo "${patch_path} saved on s://${AWS_BUCKET_NAME}"
+            return 0
+        fi
+        echo "${patch_path} failed to be uploaded to s3://${AWS_BUCKET_NAME}"
+        return 1
     fi
-
-    echo "$patch_path Failed to be uploaded to funsize!"
-    return 1
+    return 0
 }
 
 get_patch(){
-    sha_from=`getsha512 "$1"`
-    sha_to=`getsha512 "$2"`
+    # $1 and $2 are the /path/to/filename
+    sha_from=$(getsha512 "$1")
+    sha_to=$(getsha512 "$2")
     destination_file="$3"
-    tmp_file="$destination_file.tmp"
+    s3_filename="$(basename "$3")"
 
-    # try to retrieve from local cache first
-    if [ -r "$LOCAL_CACHE_DIR/$sha_from/$sha_to" ]; then
-        cp -avf "$LOCAL_CACHE_DIR/$sha_from/$sha_to" "$destination_file"
-        echo "Successful retrieved $destination_file from local cache!"
-        return 0
-    else
-        echo "File is not in the locale cache"
-        return 1
+    # Try to retrieve from local cache first.
+    if [ -n "$LOCAL_CACHE_DIR" ]; then
+        if [ -r "$LOCAL_CACHE_DIR/$sha_from/$sha_to" ]; then
+            cp -avf "$LOCAL_CACHE_DIR/$sha_from/$sha_to" "$destination_file"
+            echo "Successful retrieved ${destination_file} from local cache."
+            return 0
+        fi
     fi
-    # The remote cache implementation is not used. The code is for usage
-    # reference only.
-
-    # if unsuccessful, try to retrieve from funsize
-    cmd="curl -LsSGw %{http_code} $SERVER_URL/$sha_from/$sha_to -o $tmp_file"
-    ret_code=`$cmd`
+    # If not in the local cache, we might find it remotely.
 
-    if [ $ret_code -eq 200 ]; then
-        mv "$tmp_file" "$destination_file"
-        echo "Successful retrieved $destination_file from funsize!"
-        return 0
+    if [ -n "${AWS_BUCKET_NAME}" ]; then
+        BUCKET_PATH="s3://${AWS_BUCKET_NAME}${sha_from}/${sha_to}/${s3_filename}"
+        if aws s3 ls "${BUCKET_PATH}"; then
+            if aws s3 cp "${BUCKET_PATH}" "${destination_file}"; then
+                echo "Successful retrieved ${destination_file} from s3://${AWS_BUCKET_NAME}"
+                return 0
+            else
+                echo "Failed to retrieve ${destination_file} from s3://${AWS_BUCKET_NAME}"
+                return 1
+            fi
+        # Not found, fall through to default error
+        fi
     fi
-
-    rm  -f "$tmp_file"
-    echo "Failed to retrieve $destination_file from funsize!"
     return 1
 }
 
 OPTIND=1
 
-while getopts ":A:c:gu" option; do
+while getopts ":S:c:gu" option; do
     case $option in
-        A)
-            SERVER_URL="$OPTARG"
+        S)
+            # This will probably be bucketname/path/prefix but we can use it either way
+            AWS_BUCKET_NAME="$OPTARG"
+            # Ensure trailing slash is there.
+            if [[ ! $AWS_BUCKET_NAME =~ .*/$ ]]; then
+              AWS_BUCKET_NAME="${AWS_BUCKET_NAME}/"
+            fi
             ;;
         c)
             LOCAL_CACHE_DIR="$OPTARG"
             ;;
         g)
             HOOK="PRE"
             ;;
         u)
--- a/taskcluster/taskgraph/transforms/partials.py
+++ b/taskcluster/taskgraph/transforms/partials.py
@@ -144,17 +144,20 @@ def make_task_description(config, jobs):
         level = config.params['level']
 
         task = {
             'label': label,
             'description': "{} Partials".format(
                 dep_job.task["metadata"]["description"]),
             'worker-type': 'aws-provisioner-v1/gecko-%s-b-linux' % level,
             'dependencies': dependencies,
-            'scopes': ['secrets:get:project/releng/gecko/build/level-%s/datadog-api-key' % level],
+            'scopes': [
+                'secrets:get:project/releng/gecko/build/level-%s/datadog-api-key' % level,
+                'auth:aws-s3:read-write:tc-gp-private-1d-us-east-1/releng/mbsdiff-cache/'
+            ],
             'attributes': attributes,
             'run-on-projects': dep_job.attributes.get('run_on_projects'),
             'treeherder': treeherder,
             'extra': extra,
             'worker': worker,
         }
 
         yield task