ansible: Add a systemd unit to scrape AWS IP spaces (Bug 1435014) r=gps
authorConnor Sheehan <sheehan@mozilla.com>
Tue, 06 Feb 2018 17:51:22 -0500
changeset 5802 fe83b0ac9e5b
parent 5801 0d32297095d0
child 5803 f48f74151f57
push id2673
push usergszorc@mozilla.com
push date2018-02-21 00:59 +0000
treeherderversion-control-tools@fe83b0ac9e5b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgps
bugs1435014
ansible: Add a systemd unit to scrape AWS IP spaces (Bug 1435014) r=gps Adds a systemd service unit and associated timer unit for scheduling and executing AWS IP scraping daily at ~1 PM EST. Provides mail-unit-output service as a Wants and Before dependency, to facilitate mailing unit output after the file has been changed. Moves the location of aws-ip-ranges from /etc/mercurial to /var/hg to align with the Mozilla IP address document location, and to transfer ownership of the file from the root user to the hg user. Moves the in-repo aws-ip-ranges document to the docker-hg-web role files directory, and copies this file to the docker container as part of testing. MozReview-Commit-ID: AqU6wWBEWs5
ansible/files/aws-ip-ranges.json
ansible/roles/docker-hg-web/files/aws-ip-ranges.json
ansible/roles/docker-hg-web/tasks/main.yml
ansible/roles/hg-web/files/aws-ip-scraper.service
ansible/roles/hg-web/files/aws-ip-scraper.timer
ansible/roles/hg-web/files/hgrc
ansible/roles/hg-web/tasks/main.yml
scripts/scrape-manifest-ip-ranges.py
rename from ansible/files/aws-ip-ranges.json
rename to ansible/roles/docker-hg-web/files/aws-ip-ranges.json
--- a/ansible/roles/docker-hg-web/tasks/main.yml
+++ b/ansible/roles/docker-hg-web/tasks/main.yml
@@ -11,18 +11,21 @@
 
 - name: install Docker support files
   copy: src={{ item }} dest=/{{ item }} owner=root group=root mode=0755
   with_items:
     - entrypoint.py
     - entrypoint-solo
     - set-mirror-key.py
 
-- name: Install Mozilla IP Address Test Document
-  copy: src=moz-ip-ranges.txt dest=/var/hg/moz-ip-ranges.txt owner=root group=root mode=0644
+- name: Install IP address documents for prioritizing streamed clone bundles
+  copy: src={{ item }} dest=/var/hg/{{ item }} owner=hg group=hg mode=0644
+  with_items:
+    - moz-ip-ranges.txt
+    - aws-ip-ranges.json
 
 # We have an additional virtualenv in Docker to run moz.build info
 # directly from hgweb without the secure execution environment we
 # run in production. We do this because we can't execute the secure
 # wrapper inside Docker because of limited privileges of Docker
 # containers. This is unfortunate. But it's what we have to do for
 # now.
 
new file mode 100644
--- /dev/null
+++ b/ansible/roles/hg-web/files/aws-ip-scraper.service
@@ -0,0 +1,13 @@
+[Unit]
+Description=Scrape AWS IP address ranges
+Before=mail-unit-output@%n.service
+Wants=mail-unit-output@%n.service
+After=network.target
+OnFailure=mail-unit-output@%n.service
+
+[Service]
+ExecStart=/var/hg/venv_tools_py3/bin/python -u /var/hg/version-control-tools/scripts/scrape-manifest-ip-ranges.py aws
+Type=oneshot
+User=hg
+Group=hg
+Nice=10
new file mode 100644
--- /dev/null
+++ b/ansible/roles/hg-web/files/aws-ip-scraper.timer
@@ -0,0 +1,10 @@
+[Unit]
+Description=Schedules periodic scraping for AWS IP addresses
+After=network.target remote-fs.target nss-lookup.target
+
+[Timer]
+# Once a day, midday
+OnCalendar=*-*-* 18:00:00
+
+[Install]
+WantedBy=multi-user.target
--- a/ansible/roles/hg-web/files/hgrc
+++ b/ansible/roles/hg-web/files/hgrc
@@ -70,14 +70,14 @@ httppostargs = true
 
 [obshacks]
 # Enable the user that runs hgweb and performs replication to exchange
 # obsolescence markers, even if not enabled for regular users.
 obsolescenceexchangeusers = hg
 
 [hgmo]
 mozbuildinfowrapper = /usr/bin/sudo /usr/local/bin/mozbuild-eval %repo%
-awsippath = /etc/mercurial/aws-ip-ranges.json
+awsippath = /var/hg/aws-ip-ranges.json
 mozippath = /var/hg/moz-ip-ranges.txt
 pullclonebundlesmanifest = true
 
 [mozilla]
 firefoxreleasesdb = /var/hg/fxreleases/db.db
--- a/ansible/roles/hg-web/tasks/main.yml
+++ b/ansible/roles/hg-web/tasks/main.yml
@@ -237,23 +237,16 @@
 
 - name: write bloxtool config file
   template: src=bloxtool.ini.j2
             dest=/etc/mercurial/bloxtool.ini
             mode=0400
             owner=hg
             group=hg
 
-# Needed to sniff clients as coming from AWS.
-- name: install AWS IP ranges file
-  copy: src=files/aws-ip-ranges.json
-        dest=/etc/mercurial/aws-ip-ranges.json
-        mode=0644
-        owner=root
-        group=root
 
 # We control httpd.conf for features like server-status
 - name: install httpd.conf file
   template: src=httpd.conf.j2 dest=/etc/httpd/conf/httpd.conf
 
 - name: directory for httpd logs
   file: path=/var/log/httpd/hg.mozilla.org
         state=directory
@@ -518,16 +511,41 @@
 
 - name: Mozilla IP scraper service enabled (server only)
   service: name=mozilla-ip-scraper.timer
            enabled=yes
            state=started
   when: vct_dir.stat.exists == False
 
 
+- name: systemd service files for AWS IP scraper
+  copy: src={{ item }}
+        dest=/etc/systemd/system/{{ item }}
+        owner=root
+        group=root
+        mode=0644
+  with_items:
+    - aws-ip-scraper.service
+    - aws-ip-scraper.timer
+  notify: systemd daemon reload
+  when: vct_dir.stat.exists == False
+
+- name: AWS IP scraper service enabled (server only)
+  service: name=aws-ip-scraper.timer
+           enabled=yes
+           state=started
+  when: vct_dir.stat.exists == False
+
+- name: Run AWS IP scrape to get latest (server only)
+  service: name=aws-ip-scraper.service
+           enabled=yes
+           state=started
+  when: vct_dir.stat.exists == False
+
+
 - name: copy vcsreplicator config
   template: src=vcsreplicator.ini.j2
             dest=/etc/mercurial/vcsreplicator.ini
             owner=root
             group=root
             mode=0644
 
 - name: directory for vcsreplicator logs
--- a/scripts/scrape-manifest-ip-ranges.py
+++ b/scripts/scrape-manifest-ip-ranges.py
@@ -99,17 +99,17 @@ def get_mozilla_office_ips():
 
     except VoluptuousInvalid as vi:
         sys.exit('The JSON data from bloxtool does not match the required schema.')
 
 
 def get_aws_ips():
     try:
         # Grab the new data from Amazon
-        amazon_ip_ranges_file = Path('/etc/mercurial/aws-ip-ranges.json')
+        amazon_ip_ranges_file = Path('/var/hg/aws-ip-ranges.json')
         ip_ranges_response = requests.get('https://ip-ranges.amazonaws.com/ip-ranges.json')
 
         # Ensure 200 OK response code
         if ip_ranges_response.status_code != 200:
             sys.exit('HTTP response from Amazon was not 200 OK')
 
         # Sanity check: ensure the file is an appropriate size
         if len(ip_ranges_response.content) < 88000: