Bug 1263815 - retry xvfb startup. r=dustin
☠☠ backed out by db37e1f887d1 ☠ ☠
authorGregory Arndt <garndt@mozilla.com>
Wed, 06 Jul 2016 09:06:58 -0500
changeset 309933 e6e5569551be46e7200a5b33afdd22e48474ee34
parent 309932 0552273a68a16a14ff4f7c0b6400f6f11fd8cf6e
child 309934 5722cb5f4d22a9e64d318f7bd2e28fc99dee2140
push id31492
push usergarndt@mozilla.com
push dateThu, 18 Aug 2016 14:04:34 +0000
treeherderautoland@e6e5569551be [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdustin
bugs1263815
milestone51.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1263815 - retry xvfb startup. r=dustin Sometimes xvfb will not start up with the current retry/delay settings. This will attempt to retry more and delay for longer to ensure xvfb has started up. Common pieces of this have been factored out into a recipe that all docker images can schare that need this functionality. MozReview-Commit-ID: 2ww0eT3cIt6
taskcluster/scripts/builder/build-l10n.sh
taskcluster/scripts/builder/build-linux.sh
taskcluster/scripts/tester/test-ubuntu1204.sh
taskcluster/scripts/tester/test-ubuntu1604.sh
testing/docker/centos6-build-upd/Dockerfile
testing/docker/centos6-build-upd/VERSION
testing/docker/centos6-build/VERSION
testing/docker/centos6-build/system-setup.sh
testing/docker/desktop-build/Dockerfile
testing/docker/desktop-test/Dockerfile
testing/docker/desktop1604-test/Dockerfile
testing/docker/recipes/xvfb.sh
--- a/taskcluster/scripts/builder/build-l10n.sh
+++ b/taskcluster/scripts/builder/build-l10n.sh
@@ -1,14 +1,16 @@
 #! /bin/bash -vex
 
 set -x -e
 
 echo "running as" $(id)
 
+. /home/worker/scripts/xvfb.sh
+
 ####
 # Taskcluster friendly wrapper for performing fx desktop l10n repacks via mozharness.
 # Based on ./build-linux.sh
 ####
 
 # Inputs, with defaults
 
 : MOZHARNESS_SCRIPT             ${MOZHARNESS_SCRIPT}
@@ -38,47 +40,24 @@ export TINDERBOX_OUTPUT=1
 export LIBRARY_PATH=$LIBRARY_PATH:$WORKSPACE/src/obj-firefox:$WORKSPACE/src/gcc/lib64
 
 # test required parameters are supplied
 if [[ -z ${MOZHARNESS_SCRIPT} ]]; then fail "MOZHARNESS_SCRIPT is not set"; fi
 if [[ -z ${MOZHARNESS_CONFIG} ]]; then fail "MOZHARNESS_CONFIG is not set"; fi
 
 cleanup() {
     local rv=$?
-    if [ -n "$xvfb_pid" ]; then
-        kill $xvfb_pid || true
-    fi
+    cleanup_xvfb
     exit $rv
 }
 trap cleanup EXIT INT
 
-# run mozharness in XVfb, if necessary; this is an array to maintain the quoting in the -s argument
+# run XVfb in the background, if necessary
 if $NEED_XVFB; then
-    # Some mozharness scripts set DISPLAY=:2
-    Xvfb :2 -screen 0 1024x768x24 &
-    export DISPLAY=:2
-    xvfb_pid=$!
-    # Only error code 255 matters, because it signifies that no
-    # display could be opened. As long as we can open the display
-    # tests should work. We'll retry a few times with a sleep before
-    # failing.
-    retry_count=0
-    max_retries=2
-    xvfb_test=0
-    until [ $retry_count -gt $max_retries ]; do
-        xvinfo || xvfb_test=$?
-        if [ $xvfb_test != 255 ]; then
-            retry_count=$(($max_retries + 1))
-        else
-            retry_count=$(($retry_count + 1))
-            echo "Failed to start Xvfb, retry: $retry_count"
-            sleep 2
-        fi
-    done
-    if [ $xvfb_test == 255 ]; then fail "xvfb did not start properly"; fi
+    start_xvfb '1024x768x24' 2
 fi
 
 # set up mozharness configuration, via command line, env, etc.
 
 # $TOOLTOOL_CACHE bypasses mozharness completely and is read by tooltool_wrapper.sh to set the
 # cache.  However, only some mozharness scripts use tooltool_wrapper.sh, so this may not be
 # entirely effective.
 export TOOLTOOL_CACHE
--- a/taskcluster/scripts/builder/build-linux.sh
+++ b/taskcluster/scripts/builder/build-linux.sh
@@ -1,14 +1,16 @@
 #! /bin/bash -vex
 
 set -x -e
 
 echo "running as" $(id)
 
+. /home/worker/scripts/xvfb.sh
+
 ####
 # Taskcluster friendly wrapper for performing fx desktop builds via mozharness.
 ####
 
 # Inputs, with defaults
 
 : MOZHARNESS_SCRIPT             ${MOZHARNESS_SCRIPT}
 : MOZHARNESS_CONFIG             ${MOZHARNESS_CONFIG}
@@ -49,47 +51,24 @@ export MOZ_AUTOMATION_UPLOAD_SYMBOLS=0
 export LIBRARY_PATH=$LIBRARY_PATH:$WORKSPACE/src/obj-firefox:$WORKSPACE/src/gcc/lib64
 
 # test required parameters are supplied
 if [[ -z ${MOZHARNESS_SCRIPT} ]]; then fail "MOZHARNESS_SCRIPT is not set"; fi
 if [[ -z ${MOZHARNESS_CONFIG} ]]; then fail "MOZHARNESS_CONFIG is not set"; fi
 
 cleanup() {
     local rv=$?
-    if [ -n "$xvfb_pid" ]; then
-        kill $xvfb_pid || true
-    fi
+    cleanup_xvfb
     exit $rv
 }
 trap cleanup EXIT INT
 
-# run mozharness in XVfb, if necessary; this is an array to maintain the quoting in the -s argument
+# run XVfb in the background, if necessary
 if $NEED_XVFB; then
-    # Some mozharness scripts set DISPLAY=:2
-    Xvfb :2 -screen 0 1024x768x24 &
-    export DISPLAY=:2
-    xvfb_pid=$!
-    # Only error code 255 matters, because it signifies that no
-    # display could be opened. As long as we can open the display
-    # tests should work. We'll retry a few times with a sleep before
-    # failing.
-    retry_count=0
-    max_retries=2
-    xvfb_test=0
-    until [ $retry_count -gt $max_retries ]; do
-        xvinfo || xvfb_test=$?
-        if [ $xvfb_test != 255 ]; then
-            retry_count=$(($max_retries + 1))
-        else
-            retry_count=$(($retry_count + 1))
-            echo "Failed to start Xvfb, retry: $retry_count"
-            sleep 2
-        fi
-    done
-    if [ $xvfb_test == 255 ]; then fail "xvfb did not start properly"; fi
+    start_xvfb '1024x768x24' 2
 fi
 
 # set up mozharness configuration, via command line, env, etc.
 
 debug_flag=""
 if [ 0$DEBUG -ne 0 ]; then
   debug_flag='--debug'
 fi
--- a/taskcluster/scripts/tester/test-ubuntu1204.sh
+++ b/taskcluster/scripts/tester/test-ubuntu1204.sh
@@ -1,14 +1,16 @@
 #! /bin/bash -xe
 
 set -x -e
 
 echo "running as" $(id)
 
+. /home/worker/scripts/xvfb.sh
+
 ####
 # Taskcluster friendly wrapper for performing fx desktop tests via mozharness.
 ####
 
 # Inputs, with defaults
 
 : MOZHARNESS_URL                ${MOZHARNESS_URL}
 : MOZHARNESS_SCRIPT             ${MOZHARNESS_SCRIPT}
@@ -38,23 +40,17 @@ if [[ -z ${MOZHARNESS_CONFIG} ]]; then f
 mkdir -p ~/artifacts/public
 
 cleanup() {
     local rv=$?
     if [[ -s /home/worker/.xsession-errors ]]; then
       # To share X issues
       cp /home/worker/.xsession-errors ~/artifacts/public/xsession-errors.log
     fi
-    # When you call this script with START_VNC or TASKCLUSTER_INTERACTIVE
-    # we make sure we do not kill xvfb so you do not lose your connection
-    xvfb_pid=`pidof Xvfb`
-    if [ -n "$xvfb_pid" ] && [ $START_VNC == false ] && [ $TASKCLUSTER_INTERACTIVE == false ] ; then
-        kill $xvfb_pid || true
-        screen -XS xvfb quit || true
-    fi
+    cleanup_xvfb
     exit $rv
 }
 trap cleanup EXIT INT
 
 # Unzip the mozharness ZIP file created by the build task
 if ! curl --fail -o mozharness.zip --retry 10 -L $MOZHARNESS_URL; then
     fail "failed to download mozharness zip"
 fi
@@ -68,39 +64,19 @@ fi
 
 # start up the pulseaudio daemon.  Note that it's important this occur
 # before the Xvfb startup for ubuntu 12.04, not for 16.04
 if $NEED_PULSEAUDIO; then
     pulseaudio --fail --daemonize --start
     pactl load-module module-null-sink
 fi
 
-# run Xvfb in the background, if necessary
+# run XVfb in the background, if necessary
 if $NEED_XVFB; then
-    screen -dmS xvfb Xvfb :0 -nolisten tcp -screen 0 1600x1200x24 \
-       > ~/artifacts/public/xvfb.log 2>&1
-    export DISPLAY=:0
-    # Only error code 255 matters, because it signifies that no
-    # display could be opened. As long as we can open the display
-    # tests should work. We'll retry a few times with a sleep before
-    # failing.
-    retry_count=0
-    max_retries=2
-    xvfb_test=0
-    until [ $retry_count -gt $max_retries ]; do
-        xvinfo || xvfb_test=$?
-        if [ $xvfb_test != 255 ]; then
-            retry_count=$(($max_retries + 1))
-        else
-            retry_count=$(($retry_count + 1))
-            echo "Failed to start Xvfb, retry: $retry_count"
-            sleep 2
-        fi
-    done
-    if [ $xvfb_test == 255 ]; then fail "xvfb did not start properly"; fi
+    start_xvfb '1600x1200x24' 0
 fi
 
 if $START_VNC; then
     x11vnc > ~/artifacts/public/x11vnc.log 2>&1 &
 fi
 
 if $NEED_WINDOW_MANAGER; then
     # This is read by xsession to select the window manager
--- a/taskcluster/scripts/tester/test-ubuntu1604.sh
+++ b/taskcluster/scripts/tester/test-ubuntu1604.sh
@@ -1,14 +1,16 @@
 #! /bin/bash -xe
 
 set -x -e
 
 echo "running as" $(id)
 
+. /home/worker/scripts/xvfb.sh
+
 ####
 # Taskcluster friendly wrapper for performing fx desktop tests via mozharness.
 ####
 
 # Inputs, with defaults
 
 : MOZHARNESS_URL                ${MOZHARNESS_URL}
 : MOZHARNESS_SCRIPT             ${MOZHARNESS_SCRIPT}
@@ -38,63 +40,36 @@ if [[ -z ${MOZHARNESS_CONFIG} ]]; then f
 mkdir -p ~/artifacts/public
 
 cleanup() {
     local rv=$?
     if [[ -s /home/worker/.xsession-errors ]]; then
       # To share X issues
       cp /home/worker/.xsession-errors ~/artifacts/public/xsession-errors.log
     fi
-    # When you call this script with START_VNC or TASKCLUSTER_INTERACTIVE
-    # we make sure we do not kill xvfb so you do not lose your connection
-    xvfb_pid=`pidof Xvfb`
-    if [ -n "$xvfb_pid" ] && [ $START_VNC == false ] && [ $TASKCLUSTER_INTERACTIVE == false ] ; then
-        kill $xvfb_pid || true
-        screen -XS xvfb quit || true
-    fi
+    cleanup_xvfb
     exit $rv
 }
 trap cleanup EXIT INT
 
 # Unzip the mozharness ZIP file created by the build task
 if ! curl --fail -o mozharness.zip --retry 10 -L $MOZHARNESS_URL; then
     fail "failed to download mozharness zip"
 fi
 rm -rf mozharness
 unzip -q mozharness.zip
 rm mozharness.zip
 
 if ! [ -d mozharness ]; then
     fail "mozharness zip did not contain mozharness/"
 fi
 
-# run Xvfb in the background, if necessary
+# run XVfb in the background, if necessary
 if $NEED_XVFB; then
-    screen -dmS xvfb Xvfb :0 -nolisten tcp -screen 0 1600x1200x24 \
-       > ~/artifacts/public/xvfb.log 2>&1
-    export DISPLAY=:0
-
-    # Only error code 255 matters, because it signifies that no
-    # display could be opened. As long as we can open the display
-    # tests should work. We'll retry a few times with a sleep before
-    # failing.
-    retry_count=0
-    max_retries=2
-    xvfb_test=0
-    until [ $retry_count -gt $max_retries ]; do
-        xvinfo || xvfb_test=$?
-        if [ $xvfb_test != 255 ]; then
-            retry_count=$(($max_retries + 1))
-        else
-            retry_count=$(($retry_count + 1))
-            echo "Failed to start Xvfb, retry: $retry_count"
-            sleep 2
-        fi
-    done
-    if [ $xvfb_test == 255 ]; then fail "xvfb did not start properly"; fi
+    start_xvfb '1600x1200x24' 0
 fi
 
 if $START_VNC; then
     x11vnc > ~/artifacts/public/x11vnc.log 2>&1 &
 fi
 
 if $NEED_WINDOW_MANAGER; then
     # This is read by xsession to select the window manager
--- a/testing/docker/centos6-build-upd/Dockerfile
+++ b/testing/docker/centos6-build-upd/Dockerfile
@@ -1,9 +1,9 @@
-FROM          taskcluster/centos6-build:0.1.6
+FROM          taskcluster/centos6-build:0.1.8
 MAINTAINER    Dustin J. Mitchell <dustin@mozilla.com>
 
 ### update to latest from upstream repositories
 # if this becomes a long list of packages, consider bumping the
 # centos6-build version
 RUN yum update -y
 
 # Set a default command useful for debugging
--- a/testing/docker/centos6-build-upd/VERSION
+++ b/testing/docker/centos6-build-upd/VERSION
@@ -1,1 +1,1 @@
-0.1.6.20160329195300
+0.1.8.20160812141800
--- a/testing/docker/centos6-build/VERSION
+++ b/testing/docker/centos6-build/VERSION
@@ -1,1 +1,1 @@
-0.1.6
+0.1.8
--- a/testing/docker/centos6-build/system-setup.sh
+++ b/testing/docker/centos6-build/system-setup.sh
@@ -256,16 +256,17 @@ install gettext-devel
 # build utilities
 install ccache
 
 # a basic node environment so that we can run TaskCluster tools
 install nodejs
 install npm
 
 # enough X to run `make check` and do a PGO build
+install screen
 install Xvfb
 install xvinfo
 
 # required for building OS X tools
 install patch
 install libuuid-devel
 install openssl-static
 install cmake
--- a/testing/docker/desktop-build/Dockerfile
+++ b/testing/docker/desktop-build/Dockerfile
@@ -1,21 +1,25 @@
 # TODO remove VOLUME below when the base image is updated next.
-FROM          taskcluster/centos6-build-upd:0.1.6.20160329195300
+FROM          taskcluster/centos6-build-upd:0.1.8.20160812141800
 MAINTAINER    Dustin J. Mitchell <dustin@mozilla.com>
 
 # TODO remove when base image is updated
 VOLUME /home/worker/workspace
 VOLUME /home/worker/tooltool-cache
 
 # Add build scripts; these are the entry points from the taskcluster worker, and
 # operate on environment variables
 ADD             bin /home/worker/bin
 RUN             chmod +x /home/worker/bin/*
 
+# Add wrapper scripts for xvfb allowing tasks to easily retry starting up xvfb
+# %include testing/docker/recipes/xvfb.sh
+ADD topsrcdir/testing/docker/recipes/xvfb.sh /home/worker/scripts/xvfb.sh
+
 # Add configuration
 COPY            dot-config                    /home/worker/.config
 
 # Generate machine uuid file
 RUN dbus-uuidgen --ensure=/var/lib/dbus/machine-id
 
 # Stubbed out credentials; mozharness looks for this file an issues a WARNING
 # if it's not found, which causes the build to fail.  Note that this needs to
--- a/testing/docker/desktop-test/Dockerfile
+++ b/testing/docker/desktop-test/Dockerfile
@@ -5,16 +5,20 @@ RUN useradd -d /home/worker -s /bin/bash
 WORKDIR /home/worker
 
 # %include testing/docker/recipes/tooltool.py
 ADD topsrcdir/testing/docker/recipes/tooltool.py /setup/tooltool.py
 
 # %include testing/docker/recipes/install-mercurial.sh
 ADD topsrcdir/testing/docker/recipes/install-mercurial.sh /tmp/install-mercurial.sh
 
+# Add wrapper scripts for xvfb allowing tasks to easily retry starting up xvfb
+# %include testing/docker/recipes/xvfb.sh
+ADD topsrcdir/testing/docker/recipes/xvfb.sh /home/worker/scripts/xvfb.sh
+
 # %include testing/docker/recipes/ubuntu1204-test-system-setup.sh
 ADD topsrcdir/testing/docker/recipes/ubuntu1204-test-system-setup.sh /setup/system-setup.sh
 RUN bash /setup/system-setup.sh
 
 # %include testing/docker/recipes/run-task
 ADD topsrcdir/testing/docker/recipes/run-task /home/worker/bin/run-task
 
 # %include taskcluster/scripts/tester/test-ubuntu1204.sh
--- a/testing/docker/desktop1604-test/Dockerfile
+++ b/testing/docker/desktop1604-test/Dockerfile
@@ -9,16 +9,20 @@ ADD topsrcdir/testing/docker/recipes/too
 
 # %include testing/docker/recipes/install-mercurial.sh
 ADD topsrcdir/testing/docker/recipes/install-mercurial.sh /setup/install-mercurial.sh
 
 # %include testing/docker/recipes/ubuntu1604-test-system-setup.sh
 ADD topsrcdir/testing/docker/recipes/ubuntu1604-test-system-setup.sh /setup/system-setup.sh
 RUN           bash /setup/system-setup.sh
 
+# Add wrapper scripts for xvfb allowing tasks to easily retry starting up xvfb
+# %include testing/docker/recipes/xvfb.sh
+ADD topsrcdir/testing/docker/recipes/xvfb.sh /home/worker/scripts/xvfb.sh
+
 # %include testing/docker/recipes/run-task
 ADD topsrcdir/testing/docker/recipes/run-task /home/worker/bin/run-task
 
 # %include taskcluster/scripts/tester/test-ubuntu1604.sh
 ADD topsrcdir/taskcluster/scripts/tester/test-ubuntu1604.sh /home/worker/bin/test-linux.sh
 
 # This will create a host mounted filesystem when the cache is stripped
 # on Try. This cancels out some of the performance losses of aufs. See
new file mode 100644
--- /dev/null
+++ b/testing/docker/recipes/xvfb.sh
@@ -0,0 +1,71 @@
+#! /bin/bash -x
+
+set -x
+
+fail() {
+    echo # make sure error message is on a new line
+    echo "[xvfb.sh:error]" "${@}"
+    exit 1
+}
+
+cleanup_xvfb() {
+    # When you call this script with START_VNC or TASKCLUSTER_INTERACTIVE
+    # we make sure we do not kill xvfb so you do not lose your connection
+    local xvfb_pid=`pidof Xvfb`
+    if [ -n "$xvfb_pid" ] && [ $START_VNC == false ] && [ $TASKCLUSTER_INTERACTIVE == false ] ; then
+        kill $xvfb_pid || true
+        screen -XS xvfb quit || true
+    fi
+}
+
+# Attempt to start xvfb in a screen session with the given resolution and display
+# number.  Up to 5 attempts will be made to start xvfb with a short delay
+# between retries
+try_xvfb() {
+    screen -dmS xvfb Xvfb :$2 -nolisten tcp -screen 0 $1 \
+       > ~/artifacts/xvfb/xvfb.log 2>&1
+    export DISPLAY=:$2
+
+    # Only error code 255 matters, because it signifies that no
+    # display could be opened. As long as we can open the display
+    # tests should work. We'll retry a few times with a sleep before
+    # failing.
+    local retry_count=0
+    local max_retries=5
+    xvfb_test=0
+    until [ $retry_count -gt $max_retries ]; do
+        xvinfo || xvfb_test=$?
+        if [ $xvfb_test != 255 ]; then
+            retry_count=$(($max_retries + 1))
+        else
+            retry_count=$(($retry_count + 1))
+            echo "Failed to start Xvfb, retry: $retry_count"
+            sleep 2
+        fi
+    done
+    if [ $xvfb_test == 255 ]; then
+        return 1
+    else
+        return 0
+    fi
+}
+
+start_xvfb() {
+    mkdir -p ~/artifacts/xvfb
+    local retry_count=0
+    local max_retries=2
+    local success=1
+    until [ $retry_count -gt $max_retries ]; do
+        try_xvfb $1 $2
+        success=$?
+        if [ $success -eq 0 ]; then
+            retry_count=$(($max_retries + 1))
+        else
+            retry_count=$(($retry_count + 1))
+            sleep 10
+        fi
+    done
+    if [ $success -eq 1 ]; then
+        fail "Could not start xvfb after ${xvfb_max_retries} attempts"
+    fi
+}