autoland: improve service restarting during deploys (bug 1452867) r=smacleod
authorbyron jones <glob@mozilla.com>
Tue, 10 Apr 2018 15:33:34 +0800
changeset 5849 cf2860a4de9d
parent 5848 8318cec2b37a
child 5850 559f3ab51a46
push id2711
push userbjones@mozilla.com
push date2018-04-16 05:39 +0000
treeherderversion-control-tools@cf2860a4de9d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmacleod
bugs1452867
autoland: improve service restarting during deploys (bug 1452867) r=smacleod Ensure httpd and the main autoland service is always restarted during deploys, and ensure the main service completes a transplant before responding to INT/TERM signals.
ansible/roles/autoland/handlers/main.yml
ansible/roles/autoland/tasks/main.yml
autoland/autoland/autoland.py
--- a/ansible/roles/autoland/handlers/main.yml
+++ b/ansible/roles/autoland/handlers/main.yml
@@ -1,11 +1,3 @@
 ---
 - name: reload systemd
   command: systemctl daemon-reload
-
-- name: restart autoland
-  service: name=autoland
-           state=restarted
-
-- name: restart apache
-  service: name=httpd
-           state=restarted
--- a/ansible/roles/autoland/tasks/main.yml
+++ b/ansible/roles/autoland/tasks/main.yml
@@ -43,18 +43,16 @@
         mode=0644
 
 - name: install config.json
   template: src=config.json.j2
             dest=/home/autoland/version-control-tools/autoland/autoland/config.json
             owner=autoland
             group=autoland
             mode=0600
-  notify:
-    - restart autoland
 
 - name: link config.json
   file: src=/home/autoland/version-control-tools/autoland/autoland/config.json
         dest=/home/autoland/config.json
         owner=autoland
         group=autoland
         state=link
 
@@ -77,32 +75,28 @@
   copy: dest=/etc/systemd/system/autoland.service
         src=../files/autoland.service
         mode=0664
   notify:
     - reload systemd
 
 - name: enable autoland service
   service: name=autoland
-           state=started
+           state=restarted
            enabled=yes
 
 - name: setup main conf file
   copy: src=httpd.conf
         dest=/etc/httpd/conf/httpd.conf
-  notify:
-    - restart apache
 
 - name: remove conf.d
   file: path=/etc/httpd/conf.d/
         state=absent
 
 - name: install autoland site
   copy: src=../files/autoland_httpd.conf
         dest=/etc/httpd/conf/autoland.conf
-  notify:
-    - restart apache
 
-- name: start apache
+- name: start/reload apache
   service: name=httpd
-           state=started
+           state=reloaded
            enabled=yes
 
--- a/autoland/autoland/autoland.py
+++ b/autoland/autoland/autoland.py
@@ -4,16 +4,17 @@ import base64
 import config
 import datetime
 import json
 import lando
 import logging
 import mozreview
 import os
 import psycopg2
+import signal
 import sys
 import time
 import traceback
 import urlparse
 
 sys.path.insert(0, os.path.normpath(os.path.join(os.path.normpath(
                 os.path.abspath(os.path.dirname(__file__))), '..',
                                                              '..',
@@ -367,38 +368,48 @@ def main():
                         level=logging.DEBUG)
     stdout_handler = logging.StreamHandler(sys.stdout)
     logger.addHandler(stdout_handler)
 
     # boto's debug logging is rather verbose.
     logging.getLogger('botocore').setLevel(logging.INFO)
 
     logger.info('starting autoland')
+    dbconn = get_dbconn(args.dsn)
 
-    dbconn = get_dbconn(args.dsn)
+    # Set up signal handling to ensure we aren't cancelled mid-transplant.
+    global running
+    running = True
+
+    def handle_term(signal, frame):
+        logger.info('stopping autoland')
+        global running
+        running = False
+
+    signal.signal(signal.SIGTERM, handle_term)
+    signal.signal(signal.SIGINT, handle_term)
+
     last_error_msg = None
     next_mozreview_update = datetime.datetime.now()
-    while True:
+    while running:
         try:
             handle_pending_transplants(dbconn)
 
             # TODO: In normal configuration, all updates will be posted to the
             # same MozReview instance, so we don't bother tracking failure to
             # post for individual urls. In the future, we might need to
             # support this.
             if datetime.datetime.now() > next_mozreview_update:
                 ok = handle_pending_mozreview_updates(dbconn)
                 if ok:
                     next_mozreview_update += datetime.timedelta(seconds=1)
                 else:
                     next_mozreview_update += MOZREVIEW_RETRY_DELAY
 
             time.sleep(0.1)
-        except KeyboardInterrupt:
-            break
         except psycopg2.InterfaceError:
             dbconn = get_dbconn(args.dsn)
         except:
             # If things go really badly, we might see the same exception
             # thousands of times in a row. There's not really any point in
             # logging it more than once.
             error_msg = traceback.format_exc()
             if error_msg != last_error_msg: