woo_commenter.py
author Geoff Brown <gbrown@mozilla.com>
Mon, 04 Jun 2018 07:47:03 -0600 (2018-06-04)
changeset 353 4dc7cd6e0bf2f999fff0e30f1a2bb9e397e641f7
parent 352 e6b2ecd3bff55feb0a54cea21df53b5be4ebef1b
permissions -rwxr-xr-x
Bug 1445301 - Update locations file for sclements; r=me
#!/usr/bin/env python

# This Source Code is subject to the terms of the Mozilla Public License
# version 2.0 (the "License"). You can obtain a copy of the License at
# http://mozilla.org/MPL/2.0/.

import ConfigParser
import datetime
import json
import os
import re
import sys
import time
from operator import itemgetter
from optparse import OptionParser

import requests
import tempita
from requests.exceptions import ConnectionError, HTTPError, Timeout

from woo_client import TopBugs

CONF_FILE = 'woo_cron.conf'
TEMPLATE_FILE = os.path.join('templates', 'bug_comment.template')

# The minimum number of failure classifications a bug must receive
# (in the specified time window) for a bug comment to be posted.
DAILY_THRESHOLD = 15
WEEKLY_THRESHOLD = 1
# Include rank for top 50 bugs
RANK_THRESHOLD = 50
# Include call-to-action message for bugs with more than 30 failures/week
# and a more urgent message if more than 75 failures/week
PRIORITY1_THRESHOLD = 75
PRIORITY2_THRESHOLD = 30
# Recommend disabling when more than 150 failures tracked over 21 days
DISABLE_THRESHOLD = 150
DISABLE_DAYS = 21
# Change [stockwell needswork] to [stockwell unknown] when failure rate
# drops below 20 failures/week
UNKNOWN_THRESHOLD = 20

WHITEBOARD_DISABLE_RECOMMENDED = "[stockwell disable-recommended]"
WHITEBOARD_NEEDSWORK_OWNER = "[stockwell needswork:owner]"
WHITEBOARD_UNKNOWN = "[stockwell unknown]"

BZ_API_URL = 'https://bugzilla.mozilla.org/rest/bug/%s'
TRIAGE_PARAMS = {'include_fields': 'product,component,priority,whiteboard,keywords'}
BZ_API_NEEDSWORK_URL = 'https://bugzilla.mozilla.org/rest/bug?status_whiteboard_type=substring&query_format=advanced&status_whiteboard=[stockwell needswork'
NEEDSWORK_PARAMS = {'include_fields': 'id,whiteboard'}


def calculate_date_strings(weekly_mode):
    """Returns a tuple of start and end date strings in YYYY-MM-DD format."""
    yesterday = datetime.date.today() - datetime.timedelta(days=1)
    end_date = yesterday.isoformat()
    # The start/end dates are inclusive.
    if weekly_mode:
        start_date = (yesterday - datetime.timedelta(days=6)).isoformat()
    else:
        # Daily mode.
        start_date = end_date
    return start_date, end_date


def calculate_skip_date_strings():
    """Returns a tuple of start and end date strings in YYYY-MM-DD format,
       for the skip threshold check."""
    yesterday = datetime.date.today() - datetime.timedelta(days=1)
    end_date = yesterday.isoformat()
    start_date = (yesterday - datetime.timedelta(days=DISABLE_DAYS)).isoformat()
    return start_date, end_date


def dict_to_sorted_list(d):
    """Convert a dict into a list of tuples, in descending order of value (then key)."""
    return sorted(d.iteritems(), key=itemgetter(1, 0), reverse=True)


def submit_bug_change(bmo_session, bug_id, params):
    """Submits a comment to a Bugzilla bug but fails gracefully in the case of errors,
       to avoid breaking the whole batch if only one or two requests fail to succeed.
       This is particularly important given bug numbers might have been typoed, or be
       for non-public bugs, on which this script will not be able to leave comments."""
    try:
        r = bmo_session.put(BZ_API_URL % bug_id, json=params, timeout=30)
        r.raise_for_status()
    except (ConnectionError, Timeout) as e:
        print "%s: %s" % (e.__class__.__name__, str(e))
    except HTTPError:
        print "HTTPError %s: %s" % (r.status_code, r.text)


def get_triage_info_for_bug(bmo_session, bug_id):
    info = None
    try:
        r = bmo_session.get(BZ_API_URL % bug_id, params=TRIAGE_PARAMS, timeout=30)
        r.raise_for_status()
        info = r.json()
    except (ConnectionError, Timeout) as e:
        print "%s: %s" % (e.__class__.__name__, str(e))
    except HTTPError:
        print "HTTPError %s: %s" % (r.status_code, r.text)
    # slow down: bmo server may refuse service if too many requests made too frequently
    time.sleep(0.5)
    return info['bugs'][0]


def get_needswork_bugs(bmo_session):
    info = None
    try:
        r = bmo_session.get(BZ_API_NEEDSWORK_URL, params=NEEDSWORK_PARAMS, timeout=30)
        r.raise_for_status()
        info = r.json()
    except (ConnectionError, Timeout) as e:
        print "%s: %s" % (e.__class__.__name__, str(e))
    except HTTPError:
        print "HTTPError %s: %s" % (r.status_code, r.text)
    return info['bugs']


def stockwell_whiteboard(existing, new):
    whiteboard = re.sub("\[stockwell.*?\]", "", existing)
    whiteboard = whiteboard + new
    return whiteboard


def main():
    """Posts a bug comment containing stats to each bug whose total number of
       occurrences (in the chosen time window) met the appropriate threshold."""

    print "woo_commenter start at %s" % datetime.datetime.now()

    parser = OptionParser()
    parser.add_option('--weekly', action='store_true', dest='weekly_mode', default=False,
                      help='generate weekly summaries instead of the default daily summaries')
    parser.add_option('--test', action='store_true', dest='test_mode', default=False,
                      help='output bug comments to stdout rather than submitting to Bugzilla')
    options, _ = parser.parse_args()

    try:
        cfg = ConfigParser.ConfigParser()
        cfg.read(CONF_FILE)
        local_server_url = cfg.get('woo', 'local_server_url')
        bugzilla_api_key = cfg.get('bugzilla', 'api_key')
    except ConfigParser.Error as e:
        sys.stderr.write('Error reading %s: %s\n' % (CONF_FILE, e))
        sys.exit(1)

    # For an initial trial period, only bugs in these components will be
    # marked for triage.
    with open('owner_triage_components.json') as f:
        components = json.load(f)
        print "Found %d owner triaged components" % len(components)

    start_date, end_date = calculate_date_strings(options.weekly_mode)
    skip_start_date, skip_end_date = calculate_skip_date_strings()
    threshold = WEEKLY_THRESHOLD if options.weekly_mode else DAILY_THRESHOLD

    template_defaults = {'weekly_mode': options.weekly_mode,
                         'start_date': start_date,
                         'end_date': end_date}
    tmpl = tempita.Template.from_filename(TEMPLATE_FILE, namespace=template_defaults)

    bmo_session = requests.Session()
    bmo_session.headers['User-Agent'] = 'orangefactor-commenter'
    bmo_session.headers['X-BUGZILLA-API-KEY'] = bugzilla_api_key
    # Use a custom HTTP adapter, so we can set a non-zero max_retries value.
    bmo_session.mount("https://", requests.adapters.HTTPAdapter(max_retries=3))

    # Fetch per-repository, per-platform and total failure counts for each bug.
    tb = TopBugs(local_server_url, start_date, end_date, tree='all')
    stats = tb.stats()
    bug_stats = tb.stats_by_bug()
    if options.weekly_mode:
        top = tb.top_bugs()
        top = top[:RANK_THRESHOLD]
        needswork_bugs = get_needswork_bugs(bmo_session)
    else:
        top = []
        needswork_bugs = []
    # Fetch failure counts for the skip threshold period
    skip_tb = TopBugs(local_server_url, skip_start_date, skip_end_date, tree='all')
    skip_bug_stats = skip_tb.stats_by_bug()

    testruncount = stats['testruncount']
    for bug_id, counts in bug_stats.iteritems():
        params = {}
        bug_info = None
        whiteboard = None
        priority = 0
        if skip_bug_stats[bug_id]['total'] >= DISABLE_THRESHOLD:
            bug_info = get_triage_info_for_bug(bmo_session, bug_id)
            whiteboard = bug_info['whiteboard']
            # do not make disable-recommended comment if disable-recommended is
            # already on whiteboard, or if it appears the bug is already fixed
            # or the test disabled, or it it is an infra issue
            if ("[stockwell fixed" not in whiteboard and
                "[stockwell disable" not in whiteboard and
                "[stockwell infra" not in whiteboard):
                priority = 3
        if priority or (counts['total'] >= threshold):
            rank = None
            if (bug_id, counts['total']) in top:
                rank = top.index((bug_id, counts['total']))+1
            if priority == 0:
                if options.weekly_mode and counts['total'] >= PRIORITY1_THRESHOLD:
                    priority = 1
                elif options.weekly_mode and counts['total'] >= PRIORITY2_THRESHOLD:
                    priority = 2
            text = tmpl.substitute(bug_id=bug_id,
                                   total=counts['total'],
                                   testruncount=testruncount,
                                   rank=rank,
                                   priority=priority,
                                   failure_rate=round(counts['total']/float(testruncount), 3),
                                   repositories=dict_to_sorted_list(counts['per_repository']),
                                   platforms=dict_to_sorted_list(counts['per_platform']))
            params = {'comment': {'body': text} }
            # owner-triage updates to priority and whiteboard
            if (counts['total'] >= PRIORITY2_THRESHOLD) or (not options.weekly_mode):
                if not bug_info:
                    bug_info = get_triage_info_for_bug(bmo_session, bug_id)
                whiteboard = bug_info['whiteboard']
                if (([bug_info['product'], bug_info['component']] in components) and
                    ('intermittent-failure' in bug_info['keywords']) and
                    ('[stockwell fixed' not in whiteboard) and
                    ('[stockwell disable' not in whiteboard) and
                    ('[stockwell infra' not in whiteboard)):
                    # do not update priority on bugs already awaiting triage or
                    # already P1/P2/P3 (hopefully recently triaged)
                    if bug_info['priority'] not in ['--', 'P1', 'P2', 'P3']:
                        params['priority'] = '--'
                    # remove any [stockwell xxx] from whiteboard, then add
                    # [stockwell needswork:owner], unless it is already there
                    whiteboard = bug_info['whiteboard']
                    if not WHITEBOARD_NEEDSWORK_OWNER in whiteboard:
                        whiteboard = stockwell_whiteboard(whiteboard, WHITEBOARD_NEEDSWORK_OWNER)
        # When skip-threshold exceeded, also update whiteboard
        if priority == 3:
            if not bug_info:
                bug_info = get_triage_info_for_bug(bmo_session, bug_id)
            whiteboard = bug_info['whiteboard']
            whiteboard = stockwell_whiteboard(whiteboard, WHITEBOARD_DISABLE_RECOMMENDED)
        # Once a week, also check for [stockwell needswork] bugs that are no longer
        # failing frequently and update whiteboard to [stockwell unknown]
        if options.weekly_mode and (counts['total'] < UNKNOWN_THRESHOLD):
            for needswork_bug in needswork_bugs:
                if int(bug_id) == needswork_bug['id']:
                    whiteboard = needswork_bug['whiteboard']
                    whiteboard = stockwell_whiteboard(whiteboard, WHITEBOARD_UNKNOWN)
                    continue
        if whiteboard:
            params['whiteboard'] = whiteboard
        if len(params) > 0:
            if options.test_mode:
                print "\n# Bug %s: update with %s" % (bug_id, params)
            else:
                print "Submitting comment to bug %s (%d occurrences)" % (bug_id, counts['total'])
                submit_bug_change(bmo_session, bug_id, params)
                # sleep between comment submissions to avoid overwhelming servers
                time.sleep(1)

    print "woo_commenter complete at %s" % datetime.datetime.now()


if __name__ == '__main__':
    main()