scripts/vcs-sync/vcs_sync.py
author Mike Hommey <mh@glandium.org>
Thu, 16 Apr 2020 15:55:57 +0900
changeset 4489 187ceee0ce97ce7e1a7ee1dd68dfe58fa68f0584
parent 4488 904f897a1305a6a2dc02cfe754f006a7da37605d
permissions -rwxr-xr-x
Bug 1629115 - Properly use environment set from configuration for git-cinnabar. r=dhouse

#!/usr/bin/env python
# ***** BEGIN LICENSE BLOCK *****
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
# ***** END LICENSE BLOCK *****
"""vcs_sync.py

hg<->git conversions.  Needs to support both the monolithic beagle/gecko.git
type conversions, as well as many-to-many (l10n, build repos, etc.)
"""

from copy import deepcopy
import mmap
import os
import pprint
import re
import sys
import time

try:
    import simplejson as json
    assert json
except ImportError:
    import json

sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))

import mozharness
external_tools_path = os.path.join(
    os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))),
    'external_tools',
)

from mozharness.base.errors import HgErrorList, GitErrorList
from mozharness.base.log import INFO, ERROR, FATAL
from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
from mozharness.base.transfer import TransferMixin
from mozharness.base.vcs.vcssync import VCSSyncScript
from mozharness.mozilla.tooltool import TooltoolMixin


# HgGitScript {{{1
class HgGitScript(VirtualenvMixin, TooltoolMixin, TransferMixin, VCSSyncScript):
    """ Beagle-oriented hg->git script (lots of mozilla-central hardcodes;
        assumption that we're going to be importing lots of branches).

        Beagle is a git repo of mozilla-central, with full cvs history,
        and a number of developer-oriented repositories and branches added.

        The partner-oriented gecko.git could also be incorporated into this
        script with some changes.
        """

    mapfile_binary_search = None
    all_repos = None
    successful_repos = []
    config_options = [
        [["--no-check-incoming", ], {
            "action": "store_false",
            "dest": "check_incoming",
            "default": True,
            "help": "Don't check for incoming changesets",
        }],
        [["--max-log-sample-size", ], {
            "action": "store",
            "dest": "email_max_log_sample_size",
            "type": "int",
            "default": 102400,
            "help": "Specify the maximum number of characters from a log file to be "
                    "embedded in the email body, per embedding (not total - note we "
                    "embed two separate log samples into the email - so maximum "
                    "email body size can end up a little over 2x this amount).",
         }],
    ]

    def __init__(self, require_config_file=True):
        super(HgGitScript, self).__init__(
            config_options=virtualenv_config_options + self.config_options,
            all_actions=[
                'clobber',
                'list-repos',
                'create-virtualenv',
                'update-stage-mirror',
                'update-work-mirror',
                'create-git-notes',
                'publish-to-mapper',
                'push',
                'combine-mapfiles',
                'upload',
                'notify',
            ],
            # These default actions are the update loop that we run after the
            # initial steps to create the work mirror with all the branches +
            # cvs history have been run.
            default_actions=[
                'list-repos',
                'create-virtualenv',
                'update-stage-mirror',
                'update-work-mirror',
                'push',
                'combine-mapfiles',
                'upload',
                'notify',
            ],
            require_config_file=require_config_file
        )
        self.remote_targets = None

    # Helper methods {{{1
    def query_abs_dirs(self):
        """ Define paths.
            """
        if self.abs_dirs:
            return self.abs_dirs
        abs_dirs = super(HgGitScript, self).query_abs_dirs()
        abs_dirs['abs_cvs_history_dir'] = os.path.join(
            abs_dirs['abs_work_dir'], 'mozilla-cvs-history')
        abs_dirs['abs_source_dir'] = os.path.join(
            abs_dirs['abs_work_dir'], 'stage_source')
        abs_dirs['abs_repo_sync_tools_dir'] = os.path.join(
            abs_dirs['abs_work_dir'], 'repo-sync-tools')
        abs_dirs['abs_git_rewrite_dir'] = os.path.join(
            abs_dirs['abs_work_dir'], 'mc-git-rewrite')
        abs_dirs['abs_target_dir'] = os.path.join(abs_dirs['abs_work_dir'],
                                                  'target')
        if 'conversion_dir' in self.config:
            abs_dirs['abs_conversion_dir'] = os.path.join(
                abs_dirs['abs_work_dir'], 'conversion',
                self.config['conversion_dir']
            )
        self.abs_dirs = abs_dirs
        return self.abs_dirs

    def init_git_repo(self, path, additional_args=None, deny_deletes=False):
        """ Create a git repo, with retries.

            We call this with additional_args=['--bare'] to save disk +
            make things cleaner.
            """
        git = self.query_exe("git", return_type="list")
        cmd = git + ['init']
        # generally for --bare
        if additional_args:
            cmd.extend(additional_args)
        cmd.append(path)
        status = self.retry(
            self.run_command,
            args=(cmd, ),
            error_level=FATAL,
            error_message="Can't set up %s!" % path
        )
        status = self.run_command(
            git + ['config', 'receive.denyNonFastForwards', 'true'],
            cwd=path
        )
        if deny_deletes:
            status = self.run_command(
                git + ['config', 'receive.denyDeletes', 'true'],
                cwd=path
            )
        return status

    def write_hggit_hgrc(self, dest):
        """ Update .hg/hgrc, if not already updated
            """
        hgrc = os.path.join(dest, '.hg', 'hgrc')
        contents = ''
        if os.path.exists(hgrc):
            contents = self.read_from_file(hgrc)
        if 'hggit=' not in contents:
            hgrc_update = """[extensions]
hggit=
[git]
intree=1
"""
            self.write_to_file(hgrc, hgrc_update, open_mode='a')

    def _process_locale(self, locale, type, config, l10n_remote_targets, name, l10n_repos):
        """ This contains the common processing that we do on both gecko_config
            and gaia_config for a given locale.
            """
        replace_dict = {'locale': locale}
        new_targets = deepcopy(config.get('targets', {}))
        for target in new_targets:
            dest = target['target_dest']
            if '%(locale)s' in dest:
                new_dest = dest % replace_dict
                target['target_dest'] = new_dest
                remote_target = l10n_remote_targets.get(new_dest)
                if remote_target is None:  # generate target if not seen before
                    possible_remote_target = l10n_remote_targets.get(dest)
                    # target may be remote, or local - we can tell by seeing
                    # local targets will be none - remote targets will not,
                    # so we can use this to test if it is local or remote
                    if possible_remote_target is not None:
                        remote_target = deepcopy(possible_remote_target)
                        remote_repo = remote_target.get('repo')
                        if '%(locale)s' in remote_repo:
                            remote_target['repo'] = remote_repo % replace_dict
                        l10n_remote_targets[new_dest] = remote_target
        long_name = '%s_%s_%s' % (type, name, locale)
        repo_dict = {
            'repo': config['hg_url'] % replace_dict,
            'revision': 'default',
            'repo_name': long_name,
            'conversion_dir': long_name,
            'mapfile_name': '%s-mapfile' % long_name,
            'targets': new_targets,
            'vcs': 'hg',
            'branch_config': {
                'branches': {
                    'default': config['git_branch_name'],
                },
            },
            'tag_config': config.get('tag_config', {}),
            'mapper': config.get('mapper', {}),
            'generate_git_notes': config.get('generate_git_notes', {}),
        }
        l10n_repos.append(repo_dict)

    def _query_l10n_repos(self):
        """ Since I didn't want to have to build a huge static list of l10n
            repos, and since it would be nicest to read the list of locales
            from their SSoT files.
            """
        l10n_repos = []
        l10n_remote_targets = deepcopy(self.config['remote_targets'])
        dirs = self.query_abs_dirs()
        gecko_dict = deepcopy(self.config['l10n_config'].get('gecko_config', {}))
        for name, gecko_config in gecko_dict.items():
            file_name = self.download_file(gecko_config['locales_file_url'],
                                           parent_dir=dirs['abs_work_dir'])
            if not os.path.exists(file_name):
                self.error("Can't download locales from %s; skipping!" % gecko_config['locales_file_url'])
                continue
            contents = self.read_from_file(file_name)
            for locale in contents.splitlines():
                self._process_locale(locale, 'gecko', gecko_config, l10n_remote_targets, name, l10n_repos)

        gaia_dict = deepcopy(self.config['l10n_config'].get('gaia_config', {}))
        for name, gaia_config in gaia_dict.items():
            contents = self.retry(
                self.load_json_from_url,
                args=(gaia_config['locales_file_url'],)
            )
            if not contents:
                self.error("Can't download locales from %s; skipping!" % gaia_config['locales_file_url'])
                continue
            for locale in dict(contents).keys():
                self._process_locale(locale, 'gaia', gaia_config, l10n_remote_targets, name, l10n_repos)
        self.info("Built l10n_repos...")
        self.info(pprint.pformat(l10n_repos, indent=4))
        self.info("Remote targets...")
        self.info(pprint.pformat(l10n_remote_targets, indent=4))
        self.remote_targets = l10n_remote_targets
        return l10n_repos

    def _query_project_repos(self):
        """ Since I didn't want to have to build a huge static list of project
            branch repos.
            """
        project_repos = []
        for project in self.config.get("project_branches", []):
            repo_dict = {
                'hg_clone_option': self.config.get('hg_clone_options', {}).get(project),
                'repo': self.config['project_branch_repo_url'] % {'project': project},
                'revision': 'default',
                'repo_name': project,
                'targets': [{
                    'target_dest': 'github-project-branches',
                }],
                'vcs': 'hg',
                'branch_config': {
                    'branches': {
                        'default': project,
                    },
                },
                'tag_config': {},
            }
            project_repos.append(repo_dict)
        self.info("Built project_repos...")
        self.info(pprint.pformat(project_repos, indent=4))
        return project_repos

    def query_all_repos(self):
        """ Very simple method, but we need this concatenated list many times
            throughout the script.
            """
        if self.all_repos:
            return self.all_repos
        if self.config.get('conversion_type') == 'b2g-l10n':
            self.all_repos = self._query_l10n_repos()
        elif self.config.get('initial_repo'):
            self.all_repos = [self.config['initial_repo']] + list(self.config.get('conversion_repos', []))
        else:
            self.all_repos = list(self.config.get('conversion_repos', []))
        if self.config.get('conversion_type') == 'project-branches':
            self.all_repos += self._query_project_repos()
        return self.all_repos

    def query_all_non_failed_repos(self):
        """ Same as query_all_repos(self) but filters out repos that failed in an earlier
            action - so use this for downstream actions that require earlier actions did
            not fail for a given repo.
            """
        all_repos = self.query_all_repos()
        return [repo for repo in all_repos if repo.get('repo_name') not in self.failures]

    def _query_repo_previous_status(self, repo_name, repo_map=None):
        """ Return False if previous run was unsuccessful.
            Return None if no previous run information.
            """
        if repo_map is None:
            repo_map = self._read_repo_update_json()
        return repo_map.get('repos', {}).get(repo_name, {}).get('previous_push_successful')

    def _update_repo_previous_status(self, repo_name, successful_flag, repo_map=None, write_update=False):
        """ Set the repo_name to successful_flag (False for unsuccessful, True for successful)
            """
        if repo_map is None:
            repo_map = self._read_repo_update_json()
        repo_map.setdefault('repos', {}).setdefault(repo_name, {})['previous_push_successful'] = successful_flag
        if write_update:
            self._write_repo_update_json(repo_map)
        return repo_map

    def _update_stage_repo(self, repo_config, retry=True, clobber=False):
        """ Update a stage repo.
            See update_stage_mirror() for a description of the stage repos.
            """
        hg = self._query_hg_exe()
        dirs = self.query_abs_dirs()
        repo_name = repo_config['repo_name']
        source_dest = os.path.join(dirs['abs_source_dir'],
                                   repo_name)
        if clobber:
            self.rmtree(source_dest)
        if not os.path.exists(source_dest):
            # if hg_clone_option isn't specified for this repo, we need
            # to remove it from the argument list.  leave it in position
            # and remove (most common execution path) rather than doing
            # magic insert. (bug 1253068)
            args = hg + ['clone', '--noupdate', repo_config['repo'],
                repo_config.get('hg_clone_option'), source_dest]
            args = [x for x in args if x]
            if self.retry(
                self.run_command,
                args=[args],
                kwargs={
                    'output_timeout': 15 * 60,
                    'cwd': dirs['abs_work_dir'],
                    'error_list': HgErrorList,
                },
            ):
                if retry:
                    return self._update_stage_repo(
                        repo_config, retry=False, clobber=True)
                else:
                    # Don't leave a failed clone behind
                    self.rmtree(source_dest)
                    self._update_repo_previous_status(repo_name, successful_flag=False, write_update=True)
                    self.add_failure(
                        repo_name,
                        message="Can't clone %s!" % repo_config['repo'],
                        level=ERROR,
                    )
        elif self.config['check_incoming'] and repo_config.get("check_incoming", True):
            previous_status = self._query_repo_previous_status(repo_name)
            if previous_status is None:
                self.info("No previous status for %s; skipping incoming check!" % repo_name)
            elif previous_status is False:
                self.info("Previously unsuccessful status for %s; skipping incoming check!" % repo_name)
            else:
                # Run |hg incoming| and skip all subsequent actions if there
                # are no no changes.
                # If you want to bypass this behavior (e.g. to update branches/tags
                # on a repo without requiring a new commit), set
                # repo_config["incoming_check"] = False.
                cmd = hg + ['incoming', '-n', '-l', '1']
                status = self.retry(
                    self.run_command,
                    args=(cmd, ),
                    kwargs={
                        'output_timeout': 5 * 60,
                        'cwd': source_dest,
                        'error_list': HgErrorList,
                        'success_codes': [0, 1, 256],
                    },
                )
                if status in (1, 256):
                    self.info("No changes for %s; skipping." % repo_name)
                    # Overload self.failures to tell downstream actions to noop on
                    # this repo
                    self.failures.append(repo_name)
                    return
                elif status != 0:
                    self.add_failure(
                        repo_name,
                        message="Error getting changes for %s; skipping!" % repo_config['repo_name'],
                        level=ERROR,
                    )
                    self._update_repo_previous_status(repo_name, successful_flag=False, write_update=True)
                    return
        cmd = hg + ['pull']
        if self.retry(
            self.run_command,
            args=(cmd, ),
            kwargs={
                'output_timeout': 15 * 60,
                'cwd': source_dest,
                'error_list': HgErrorList,
            },
        ):
            if retry:
                return self._update_stage_repo(
                    repo_config, retry=False, clobber=True)
            else:
                self._update_repo_previous_status(repo_name, successful_flag=False, write_update=True)
                self.add_failure(
                    repo_name,
                    message="Can't pull %s!" % repo_config['repo'],
                    level=ERROR,
                )
        # commenting out hg verify since it takes ~5min per repo; hopefully
        # exit codes will save us
#        if self.run_command(hg + ["verify"], cwd=source_dest):
#            if retry:
#                return self._update_stage_repo(repo_config, retry=False, clobber=True)
#            else:
#                self.fatal("Can't verify %s!" % source_dest)

    def _do_push_repo(self, base_command, refs_list=None, kwargs=None):
        """ Helper method for _push_repo() since it has to be able to break
            out of the target_repo list loop, and the commands loop borks that.
            """
        commands = []
        if refs_list:
            while len(refs_list) > 10:
                commands.append(base_command + refs_list[0:10])
                refs_list = refs_list[10:]
            commands.append(base_command + refs_list)
        else:
            commands = [base_command]
        if kwargs is None:
            kwargs = {}
        for command in commands:
            # Do the push, with retry!
            if self.retry(
                self.run_command,
                args=(command, ),
                kwargs=kwargs,
            ):
                return -1

    def _push_repo(self, repo_config):
        """ Push a repo to a path ("test_push") or remote server.

            This was meant to be a cross-vcs method, but currently only
            covers git pushes.
            """
        dirs = self.query_abs_dirs()
        conversion_dir = self.query_abs_conversion_dir(repo_config)
        if not conversion_dir:
            self.fatal("No conversion_dir for %s!" % repo_config['repo_name'])
        source_dir = os.path.join(dirs['abs_source_dir'], repo_config['repo_name'])
        git = self.query_exe('git', return_type='list')
        hg = self._query_hg_exe()
        return_status = ''
        for target_config in repo_config['targets']:
            test_push = False
            remote_config = {}
            if target_config.get("test_push"):
                test_push = True
                force_push = target_config.get("force_push")
                target_name = os.path.join(
                    dirs['abs_target_dir'], target_config['target_dest'])
                target_vcs = target_config.get("vcs")
            else:
                target_name = target_config['target_dest']
                if self.remote_targets is None:
                    self.remote_targets = self.config.get('remote_targets', {})
                remote_config = self.remote_targets.get(target_name, target_config)
                force_push = remote_config.get("force_push", target_config.get("force_push"))
                target_vcs = remote_config.get("vcs", target_config.get("vcs"))
            if target_vcs == "git":
                base_command = git + ['push']
                env = {}
                if force_push:
                    base_command.append("-f")
                if test_push:
                    target_git_repo = target_name
                else:
                    target_git_repo = remote_config['repo']
                    # Allow for using a custom git ssh key.
                    env['GIT_SSH_KEY'] = remote_config['ssh_key']
                    env['GIT_SSH'] = os.path.join(external_tools_path, 'git-ssh-wrapper.sh')
                base_command.append(target_git_repo)
                # Allow for pushing a subset of repo branches to the target.
                # If we specify that subset, we can also specify different
                # names for those branches (e.g. b2g18 -> master for a
                # standalone b2g18 repo)
                # We query hg for these because the conversion dir will have
                # branches from multiple hg repos, and the regexes may match
                # too many things.
                refs_list = []
                if repo_config.get('generate_git_notes', False):
                    refs_list.append('+refs/notes/commits:refs/notes/commits')
                branch_map = self.query_branches(
                    target_config.get('branch_config', repo_config.get('branch_config', {})),
                    source_dir,
                )
                # If the target_config has a branch_config, the key is the
                # local git branch and the value is the target git branch.
                if target_config.get("branch_config"):
                    for (branch, target_branch) in branch_map.items():
                        refs_list += ['+refs/heads/%s:refs/heads/%s' % (branch, target_branch)]
                # Otherwise the key is the hg branch and the value is the git
                # branch; use the git branch for both local and target git
                # branch names.
                else:
                    for (hg_branch, git_branch) in branch_map.items():
                        refs_list += ['+refs/heads/%s:refs/heads/%s' % (git_branch, git_branch)]
                # Allow for pushing a subset of tags to the target, via name or
                # regex.  Again, query hg for this list because the conversion
                # dir will contain tags from multiple hg repos, and the regexes
                # may match too many things.
                tag_config = target_config.get('tag_config', repo_config.get('tag_config', {}))
                if tag_config.get('tags'):
                    for (tag, target_tag) in tag_config['tags'].items():
                        refs_list += ['+refs/tags/%s:refs/tags/%s' % (tag, target_tag)]
                if tag_config.get('tag_regexes'):
                    regex_list = []
                    for regex in tag_config['tag_regexes']:
                        regex_list.append(re.compile(regex))
                    tag_list = self.get_output_from_command(
                        hg + ['tags'],
                        cwd=source_dir,
                    )
                    if tag_list is not None:
                        for tag_line in tag_list.splitlines():
                            if not tag_line:
                                continue
                            tag_parts = tag_line.split()
                            if not tag_parts:
                                self.error("Bogus tag_line? %s" % str(tag_line))
                                continue
                            tag_name = tag_parts[0]
                            for regex in regex_list:
                                if tag_name != 'tip' and regex.search(tag_name) is not None:
                                    refs_list += ['+refs/tags/%s:refs/tags/%s' % (tag_name, tag_name)]
                                    continue
                error_msg = "%s: Can't push %s to %s!\n" % (repo_config['repo_name'], conversion_dir, target_git_repo)
                if self._do_push_repo(
                    base_command,
                    refs_list=refs_list,
                    kwargs={
                        'output_timeout': target_config.get("output_timeout", 90 * 60),
                        'cwd': os.path.join(conversion_dir, '.git'),
                        'error_list': GitErrorList,
                        'partial_env': env,
                    }
                ):
                    if target_config.get("test_push"):
                        error_msg += "This was a test push that failed; not proceeding any further with %s!\n" % repo_config['repo_name']
                    self.error(error_msg)
                    return_status += error_msg
                    if target_config.get("test_push"):
                        break
            else:
                # TODO write hg
                error_msg = "%s: Don't know how to deal with vcs %s!\n" % (
                    target_config['target_dest'], target_vcs)
                self.error(error_msg)
                return_status += error_msg
        return return_status

    def _query_mapped_revision(self, revision=None, mapfile=None):
        """ Use the virtualenv mapper module to search a mapfile for a
            revision.
            """
        if not callable(self.mapfile_binary_search):
            site_packages_path = self.query_python_site_packages_path()
            sys.path.append(os.path.join(site_packages_path, 'mapper'))
            try:
                from bsearch import mapfile_binary_search
                global log
                log = self.log_obj
                self.mapfile_binary_search = mapfile_binary_search
            except ImportError, e:
                self.fatal("Can't import mapfile_binary_search! %s\nDid you create-virtualenv?" % str(e))
        # I wish mapper did this for me, but ...
        fd = open(mapfile, 'rb')
        m = mmap.mmap(fd.fileno(), 0, mmap.MAP_PRIVATE, mmap.PROT_READ)
        return self.mapfile_binary_search(m, revision)

    def _post_fatal(self, message=None, exit_code=None):
        """ After we call fatal(), run this method before exiting.
            """
        if 'notify' in self.actions:
            self.notify(message=message, fatal=True)
        self.copy_logs_to_upload_dir()

    def _read_repo_update_json(self):
        """ repo_update.json is a file we create with information about each
            repo we're converting: git/hg branch names, git/hg revisions,
            pull datetime/timestamp, and push datetime/timestamp.

            Since we want to be able to incrementally update portions of this
            file as we pull/push each branch, we need to be able to read the
            json into memory, so we can update the dict and re-write the json
            to disk.
            """
        repo_map = {}
        dirs = self.query_abs_dirs()
        path = os.path.join(dirs['abs_upload_dir'], 'repo_update.json')
        if os.path.exists(path):
            fh = open(path, 'r')
            repo_map = json.load(fh)
            fh.close()
        return repo_map

    def query_abs_conversion_dir(self, repo_config):
        dirs = self.query_abs_dirs()
        if repo_config.get('conversion_dir'):
            dest = os.path.join(dirs['abs_work_dir'], 'conversion',
                                repo_config['conversion_dir'])
        else:
            dest = dirs.get('abs_conversion_dir')
        return dest

    def _write_repo_update_json(self, repo_map):
        """ The write portion of _read_repo_update_json().
            """
        dirs = self.query_abs_dirs()
        contents = json.dumps(repo_map, sort_keys=True, indent=4)
        self.write_to_file(
            os.path.join(dirs['abs_upload_dir'], 'repo_update.json'),
            contents,
            create_parent_dir=True
        )

    def _query_hg_exe(self):
        """Returns the hg executable command as a list
        """
        # If "hg" is set in "exes" section of config use that.
        # If not, get path from self.query_virtualenv_path() method
        # (respects --work-dir and --venv-path and --virtualenv-path).
        exe_command = self.query_exe('hg', return_type="list", default=[os.path.join(self.query_virtualenv_path(), "bin", "hg")])

        # possible additional command line options can be specified in "hg_options" of self.config
        hg_options = self.config.get("hg_options", ())
        exe_command.extend(hg_options)
        return exe_command

    def query_branches(self, branch_config, repo_path, vcs='hg'):
        """ Given a branch_config of branches and branch_regexes, return
            a dict of existing branch names to target branch names.
            """
        branch_map = {}
        if "branches" in branch_config:
            branch_map = deepcopy(branch_config['branches'])
        if "branch_regexes" in branch_config:
            regex_list = list(branch_config['branch_regexes'])
            full_branch_list = []
            if vcs == 'hg':
                hg = self._query_hg_exe()
                # This assumes we always want closed branches as well.
                # If not we may need more options.
                output = self.get_output_from_command(
                    hg + ['branches', '-a'],
                    cwd=repo_path
                )
                if output:
                    for line in output.splitlines():
                        full_branch_list.append(line.split()[0])
            elif vcs == 'git':
                git = self.query_exe("git", return_type="list")
                output = self.get_output_from_command(
                    git + ['branch', '-l'],
                    cwd=repo_path
                )
                if output:
                    for line in output.splitlines():
                        full_branch_list.append(line.replace('*', '').split()[0])
            for regex in regex_list:
                for branch in full_branch_list:
                    m = re.search(regex, branch)
                    if m:
                        # Don't overwrite branch_map[branch] if it exists
                        branch_map.setdefault(branch, branch)
        return branch_map

    def _combine_mapfiles(self, mapfiles, combined_mapfile, cwd=None):
        """ Adapted from repo-sync-tools/combine_mapfiles

            Consolidate multiple conversion processes' mapfiles into a
            single mapfile.
            """
        self.info("Determining whether we need to combine mapfiles...")
        if cwd is None:
            cwd = self.query_abs_dirs()['abs_upload_dir']
        existing_mapfiles = []
        for f in mapfiles:
            f_path = os.path.join(cwd, f)
            if os.path.exists(f_path):
                existing_mapfiles.append(f)
            else:
                self.warning("%s doesn't exist!" % f_path)
        combined_mapfile_path = os.path.join(cwd, combined_mapfile)
        if os.path.exists(combined_mapfile_path):
            combined_timestamp = os.path.getmtime(combined_mapfile_path)
            for f in existing_mapfiles:
                f_path = os.path.join(cwd, f)
                if os.path.getmtime(f_path) > combined_timestamp:
                    # Yes, we want to combine mapfiles
                    break
            else:
                self.info("No new mapfiles to combine.")
                return
            self.move(combined_mapfile_path, "%s.old" % combined_mapfile_path)
        output = self.get_output_from_command(
            ['sort', '--unique', '-t', ' ',
             '--key=2'] + existing_mapfiles,
            silent=True, halt_on_failure=True,
            cwd=cwd,
        )
        self.write_to_file(combined_mapfile_path, output, verbose=False,
                           error_level=FATAL)
        self.run_command(['ln', '-sf', combined_mapfile,
                          '%s-latest' % combined_mapfile],
                         cwd=cwd)

    # Actions {{{1

    def list_repos(self):
        repos = self.query_all_repos()
        self.info(pprint.pformat(repos))

    def create_test_targets(self):
        """ This action creates local directories to do test pushes to.
            """
        dirs = self.query_abs_dirs()
        for repo_config in self.query_all_non_failed_repos():
            for target_config in repo_config['targets']:
                if not target_config.get('test_push'):
                    continue
                target_dest = os.path.join(dirs['abs_target_dir'], target_config['target_dest'])
                if not os.path.exists(target_dest):
                    self.info("Creating local target repo %s." % target_dest)
                    if target_config.get("vcs", "git") == "git":
                        self.init_git_repo(target_dest, additional_args=['--bare', '--shared=all'],
                                           deny_deletes=True)
                    else:
                        self.fatal("Don't know how to deal with vcs %s!" % target_config['vcs'])
                else:
                    self.debug("%s exists; skipping." % target_dest)

    def update_stage_mirror(self):
        """ The stage mirror is a buffer clean clone of repositories.
            The logic behind this is that we get occasional corruption from
            |hg pull|.  It's much less time-consuming to detect this in
            a clean clone, and reclone, than to detect this in a working
            conversion directory, and try to repair or reclone+reconvert.

            We pull the stage mirror into the work mirror, where the conversion
            is done.
            """
        for repo_config in self.query_all_non_failed_repos():
            self._update_stage_repo(repo_config)

    def pull_out_new_sha_lookups(self, old_file, new_file):
        """ This method will return an iterator which iterates through lines in file
            new_file that do not exist in old_file. If old_file can't be read, all
            lines in new_file are returned. It does not cause any problems if lines
            exist in old_file that do not exist in new_file. Results are sorted by
            the second field (text after first space in line).

            This is somewhat equivalent to:
               ( [ ! -f "${old_file}" ] && cat "${new_file}" || diff "${old_file}" "${new_file}" | sed -n 's/> //p' ) | sort -k2"""
        with self.opened(old_file) as (old, err):
            if err:
                self.info('Map file %s not found - probably first time this has run.' % old_file)
                old_set = frozenset()
            else:
                old_set = frozenset(old)
        with self.opened(new_file, 'rt') as (new, err):
            if err:
                self.error('Could not read contents of map file %s:\n%s' % (new_file, err.message))
                new_set = frozenset()
            else:
                new_set = frozenset(new)
        for line in sorted(new_set.difference(old_set), key=lambda line: line.partition(' ')[2]):
            yield line

    def update_work_mirror(self):
        """ Pull the latest changes into the work mirror, update the repo_map
            json, and run |hg gexport| to convert those latest changes into
            the git conversion repo.
            """
        hg = self._query_hg_exe()
        git = self.query_exe("git", return_type="list")
        dirs = self.query_abs_dirs()
        repo_map = self._read_repo_update_json()
        timestamp = int(time.time())
        datetime = time.strftime('%Y-%m-%d %H:%M %Z')
        repo_map['last_pull_timestamp'] = timestamp
        repo_map['last_pull_datetime'] = datetime
        for repo_config in self.query_all_non_failed_repos():
            repo_name = repo_config['repo_name']
            source = os.path.join(dirs['abs_source_dir'], repo_name)
            dest = self.query_abs_conversion_dir(repo_config)
            if not dest:
                self.fatal("No conversion_dir for %s!" % repo_name)
            if not os.path.exists(dest):
                self.mkdir_p(os.path.dirname(dest))
                self.run_command(hg + ['clone', '--noupdate', source, dest],
                                 error_list=HgErrorList,
                                 halt_on_failure=False)
                if os.path.exists(dest):
                    if not self.config.get('cinnabar'):
                        self.write_hggit_hgrc(dest)
                    self.init_git_repo('%s/.git' % dest, additional_args=['--bare'])
                    self.run_command(
                        git + ['--git-dir', '%s/.git' % dest, 'config', 'gc.auto', '0'],
                    )
                else:
                    self.add_failure(
                        repo_name,
                        message="Failed to clone %s!" % source,
                        level=ERROR,
                    )
                    continue
            # Build branch map.
            branch_map = self.query_branches(
                repo_config.get('branch_config', {}),
                source,
            )
            for (branch, target_branch) in branch_map.items():
                output = self.get_output_from_command(
                    hg + ['id', '-r', branch],
                    cwd=source
                )
                if output:
                    rev = output.split(' ')[0]
                else:
                    self.add_failure(
                        repo_name,
                        message="Branch %s doesn't exist in %s (%s cloned into staging directory %s)!" % (branch, repo_name, repo_config.get('repo'), source),
                        level=ERROR,
                    )
                    continue
                timestamp = int(time.time())
                datetime = time.strftime('%Y-%m-%d %H:%M %Z')
                if self.run_command(hg + ['pull', '-r', rev, source], cwd=dest,
                                    error_list=HgErrorList):
                    # We shouldn't have an issue pulling!
                    self.add_failure(
                        repo_name,
                        message="Unable to pull %s from stage_source; clobbering and skipping!" % repo_name,
                        level=ERROR,
                    )
                    self._update_repo_previous_status(repo_name, successful_flag=False, write_update=True)
                    # don't leave a dirty checkout behind, and skip remaining branches
                    self.rmtree(source)
                    break
                self.run_command(
                    hg + ['bookmark', '-f', '-r', rev, target_branch],
                    cwd=dest, error_list=HgErrorList,
                )
                # This might get a little large.
                repo_map.setdefault('repos', {}).setdefault(repo_name, {}).setdefault('branches', {})[branch] = {
                    'hg_branch': branch,
                    'hg_revision': rev,
                    'git_branch': target_branch,
                    'pull_timestamp': timestamp,
                    'pull_datetime': datetime,
                }
            if self.query_failure(repo_name):
                # We hit an error in the for loop above
                continue

            generated_mapfile = os.path.join(dest, '.hg', 'git-mapfile')
            if self.config.get('cinnabar'):
                self._update_cinnabar_mirror(repo_config, git, dest, generated_mapfile)
            else:
                self.retry(
                    self.run_command,
                    args=(hg + ['-v', 'gexport'], ),
                    kwargs={
                        'output_timeout': repo_config.get("export_timeout", 120 * 60),
                        'cwd': dest,
                        'error_list': HgErrorList,
                    },
                    error_level=FATAL,
                )


            self.copy_to_upload_dir(
                generated_mapfile,
                dest=repo_config.get('mapfile_name', self.config.get('mapfile_name', "gecko-mapfile")),
                log_level=INFO
            )
            for (branch, target_branch) in branch_map.items():
                git_revision = self._query_mapped_revision(
                    revision=rev, mapfile=generated_mapfile)
                repo_map['repos'][repo_name]['branches'][branch]['git_revision'] = git_revision
        self._write_repo_update_json(repo_map)

    def _update_cinnabar_mirror(self, repo_config, git, dest, generated_mapfile):
        # Do things somewhat equivalent to hg gexport with git-cinnabar, which involves:
        # - converting new hg changesets
        # - update tags
        # - update .hg/git-mapfile
        env = self.query_env()
        partial_env = {
            'PATH': os.pathsep.join([
                os.path.join(self.query_virtualenv_path(), 'bin'),
                env['PATH'],
            ]),
        }
        if self.retry(
            self.run_command,
            args=(git + [
                '-c', 'gc.auto=0',
                '-c', 'cinnabar.refs=bookmarks',
                '-c', 'cinnabar.check=no-version-check',
                'fetch', '--progress', 'hg::%s' % dest, 'refs/heads/*:refs/heads/*'], ),
            kwargs={
                'output_timeout': repo_config.get("export_timeout", 120 * 60),
                'cwd': dest,
                'error_list': GitErrorList,
                'partial_env': partial_env,
            },
            error_level=FATAL,
        ):
            self.fatal('Error converting from mercurial to git')

        if self.retry(
            self.run_command,
            args=(git + [
                '-c', 'gc.auto=0',
                '-c', 'cinnabar.check=no-version-check',
                'fetch', '--tags', 'hg::tags:', 'tag', '*'], ),
            kwargs={
                'output_timeout': 60,
                'cwd': dest,
                'error_list': GitErrorList,
                'partial_env': partial_env,
            },
            error_level=FATAL,
        ):
            self.fatal('Error converting tags')

        # Extra security: run git cinnabar fsck.
        if self.retry(
            self.run_command,
            args=(git + [
                '-c', 'cinnabar.check=no-version-check',
                'cinnabar', 'fsck'], ),
            kwargs={
                'output_timeout': 120 * 60,
                'cwd': dest,
                'partial_env': partial_env,
            },
            error_level=FATAL,
        ):
            self.fatal('Consistency error in the git-cinnabar repository')

        # Extra cleanup step: if there are more than 6700 loose objects or more than 50 packs,
        # repack everything.
        self.retry(
            self.run_command,
            args=(git + [
                '-c', 'gc.autoDetach=false',
                # This is only supported by git >= 2.18.0 but earlier versions will just ignore.
                '-c', 'gc.bigPackThreshold=1g',
                'gc',
                '--auto',
                # This is only supported by git >= 2.18.0, skip for now, we'll use a .keep file
                # in .git/objects/pack. Until new packs become large enough to be a problem, we
                # hopefully will have upgraded git.
                #'--keep-largest-pack',
            ], ),
            kwargs={
                'output_timeout': 120 * 60,
                'cwd': dest,
                'error_list': GitErrorList,
            },
            error_level=FATAL,
        )

        # Update map file
        # This relies on some git-cinnabar internals. We keep a `refs/cinnabar/map` ref
        # pointing to where `refs/cinnabar/metadata` pointed when the mapfile was last
        # updated. `refs/cinnabar/metadata^4` and thus `refs/cinnabar/map^4` point to
        # git-cinnabar's git->hg mapping metadata, which is a `git-notes` tree for all
        # the converted git commits. The diff between both an old and a new such tree
        # gives us a list of the new git commits from the update we just did.
        # We can then ask `git cinnabar git2hg` to convert them all back to mercurial
        # changeset ids.
        output = self.get_output_from_command(
            git + ['diff-tree', '-r', 'refs/cinnabar/map^4', 'refs/cinnabar/metadata^4'],
            cwd=dest,
        )
        git_sha1s = [diff_line.split()[-1].replace('/', '') for diff_line in (output or '').splitlines()]
        hg_sha1s = []
        CHUNK_SIZE = 50
        env = env.copy()
        env.update(partial_env)
        for offset in range(0, len(git_sha1s), CHUNK_SIZE):
            output = self.get_output_from_command(
                git + ['-c', 'cinnabar.check=no-version-check', 'cinnabar', 'git2hg'] + git_sha1s[offset:offset + CHUNK_SIZE],
                cwd=dest,
                env=env,
            )
            hg_sha1s += (output or '').split()
        if len(git_sha1s) != len(hg_sha1s) or '0000000000000000000000000000000000000000' in hg_sha1s:
            self.fatal('Error while updating git->hg mapping')

        # Nothing to update, we can return now
        if not git_sha1s:
            return

        def old_map():
            with open(generated_mapfile) as map_file:
                for line in map_file:
                    l = line.split()
                    if len(l) != 2:
                        self.fatal('Error in git-mapfile')
                    yield tuple(l)

        # Merge two iterators of (git_sha1, hg_sha1), ordered by hg_sha1.
        def merge_map(a, b):
            a = iter(a)
            b = iter(b)
            item_a = next(a, None)
            item_b = next(b, None)
            while item_a is not None or item_b is not None:
                while item_a and (item_b is None or item_a[1] < item_b[1]):
                    yield item_a
                    item_a = next(a, None)
                while item_b and (item_a is None or item_b[1] < item_a[1]):
                    yield item_b
                    item_b = next(b, None)
                if item_a is None or item_b is None:
                    continue
                if item_a[1] == item_b[1]:
                    if item_a[0] != item_b[0]:
                        self.fatal('%s already maps to %s ; cannot change that to %s',
                                   item_a[1], item_a[0], item_b[0])
                    yield item_a
                    item_a = next(a, None)
                    item_b = next(b, None)

        new_map = sorted(zip(git_sha1s, hg_sha1s), key=lambda x: x[1])
        with open(generated_mapfile + '.new', 'w') as new_map_file:
            for git_sha1, hg_sha1 in merge_map(old_map(), new_map):
                new_map_file.write('%s %s\n' % (git_sha1, hg_sha1))
        os.rename(generated_mapfile, generated_mapfile + '.old')
        os.rename(generated_mapfile + '.new', generated_mapfile)
        if self.retry(
            self.run_command,
            args=(git + ['update-ref', 'refs/cinnabar/map', 'refs/cinnabar/metadata'], ),
            kwargs={
                'output_timeout': 60,
                'cwd': dest,
                'error_list': GitErrorList,
            },
            error_level=FATAL,
        ):
            self.fatal('Error updating refs/cinnabar/map')

    def create_git_notes(self):
        git = self.query_exe("git", return_type="list")
        for repo_config in self.query_all_non_failed_repos():
            repo = repo_config['repo']
            if repo_config.get('generate_git_notes', False):
                dest = self.query_abs_conversion_dir(repo_config)
                # 'git-mapfile' is created by hggit plugin, containing all the mappings
                complete_mapfile = os.path.join(dest, '.hg', 'git-mapfile')
                # 'added-to-git-notes' is the set of mappings known to be recorded in the git notes
                # of the project (typically 'git-mapfile' from previous run)
                added_to_git_notes = os.path.join(dest, '.hg', 'added-to-git-notes')
                # 'delta-git-notes' is the set of new mappings found on this iteration, that
                # now need to be added to the git notes of the project (the diff between the
                # previous two files described)
                delta_git_notes = os.path.join(dest, '.hg', 'delta-git-notes')
                git_dir = os.path.join(dest, '.git')
                self.rmtree(delta_git_notes)
                git_notes_adding_successful = True
                with self.opened(delta_git_notes, open_mode='w') as (delta_out, err):
                    if err:
                        git_notes_adding_successful = False
                        self.warn("Could not write list of unprocessed git note mappings to file %s - not critical" % delta_git_notes)
                    else:
                        for sha_lookup in self.pull_out_new_sha_lookups(added_to_git_notes, complete_mapfile):
                            print >>delta_out, sha_lookup,
                            (git_sha, hg_sha) = sha_lookup.split()
                            # only add git note if not already there - note
                            # devs may have added their own notes, so don't
                            # replace any existing notes, just add to them
                            output = self.get_output_from_command(
                                git + ['notes', 'show', git_sha],
                                cwd=git_dir,
                                ignore_errors=True
                            )
                            git_note_text = 'Upstream source: %s/rev/%s' % (repo, hg_sha)
                            git_notes_add_return_code = 1
                            if not output or output.find(git_note_text) < 0:
                                git_notes_add_return_code = self.run_command(
                                    git + ['notes', 'append', '-m', git_note_text, git_sha],
                                    cwd=git_dir
                                )
                            # if note was successfully added, or it was already there, we can
                            # mark it as added, by putting it in the delta file...
                            if git_notes_add_return_code == 0 or output.find(git_note_text) >= 0:
                                print >>delta_out, sha_lookup,
                            else:
                                self.error("Was not able to append required git note for git commit %s ('%s')" % (git_sha, git_note_text))
                                git_notes_adding_successful = False
                if git_notes_adding_successful:
                    self.copyfile(complete_mapfile, added_to_git_notes)
            else:
                self.info("Not creating git notes for repo %s (generate_git_notes not set to True)" % repo)

    def publish_to_mapper(self):
        """ This method will attempt to create git notes for any new git<->hg mappings
            found in the generated_mapfile file and also push new mappings to mapper service."""
        for repo_config in self.query_all_non_failed_repos():
            dest = self.query_abs_conversion_dir(repo_config)
            # 'git-mapfile' is created by hggit plugin, containing all the mappings
            complete_mapfile = os.path.join(dest, '.hg', 'git-mapfile')
            # 'published-to-mapper' is all the mappings that are known to be published
            # to mapper, for this project (typically the 'git-mapfile' from the previous
            # run)
            published_to_mapper = os.path.join(dest, '.hg', 'published-to-mapper')
            # 'delta-for-mapper' is the set of mappings that need to be published to
            # mapper on this iteration, i.e. the diff between the previous two files
            # described
            delta_for_mapper = os.path.join(dest, '.hg', 'delta-for-mapper')
            self.rmtree(delta_for_mapper)
            # we only replace published_to_mapper if we successfully updated
            # pushed to mapper
            mapper_config = repo_config.get('mapper', {})
            if mapper_config:
                site_packages_path = self.query_python_site_packages_path()
                if site_packages_path not in sys.path:
                    sys.path.append(site_packages_path)
                try:
                    import requests
                except ImportError as e:
                    self.error("Can't import requests: %s\nDid you create-virtualenv?" % str(e))
                mapper_url = mapper_config['url']
                mapper_project = mapper_config['project']
                insert_url = "%s/%s/insert/ignoredups" % (mapper_url, mapper_project)

                all_new_mappings = []
                all_new_mappings.extend(self.pull_out_new_sha_lookups(published_to_mapper, complete_mapfile))
                self.write_to_file(delta_for_mapper, "".join(all_new_mappings))
                # bug 1193011 says there are problems on occasion, independently
                # check calculation of additions and save off mapfiles
                dirs = self.query_abs_dirs()
                for mapfile in [delta_for_mapper, published_to_mapper, complete_mapfile]:
                    self.copyfile(src=mapfile, dest=os.path.join('logs',
                        os.path.basename(mapfile)))
                import subprocess   # this is debug code
                # from https://gist.github.com/edufelipe/1027906 because we're py 2.6
                def check_output(*popenargs, **kwargs):
                    r"""Run command with arguments and return its output as a byte string.
                    Backported from Python 2.7 as it's implemented as pure python on stdlib.
                    >>> check_output(['/usr/bin/python', '--version'])
                    Python 2.6.2
                    """
                    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
                    output, unused_err = process.communicate()
                    retcode = process.poll()
                    if retcode == 1:
                        # assume bootstrap process (no file) so zero lines
                        output = "0"
                    elif retcode:
                        cmd = kwargs.get("args")
                        if cmd is None:
                            cmd = popenargs[0]
                        error = subprocess.CalledProcessError(retcode, cmd)
                        error.output = output
                        raise error
                    return output
                lines_last_time = 0
                if os.path.exists(published_to_mapper):
                    lines_last_time = int(check_output('wc -l <%s' % published_to_mapper, shell=True))
                lines_this_time = 0
                if os.path.exists(complete_mapfile):
                    lines_this_time = int(check_output('wc -l <%s' % complete_mapfile, shell=True))
                if lines_this_time - lines_last_time != len(all_new_mappings):
                    self.error("Bad calc of new mappings: last %d, now %d, diff %d, calc %d"
                               % (lines_last_time, lines_this_time, lines_this_time - lines_last_time,
                                  len(all_new_mappings)))
                # correct # of entries, but are they the correct
                # entries? None of the lines in delta_for_mapper should
                # be in published_to_mapper. grep -Ff can help verify
                retcode = subprocess.call(['grep', '-Ff',
                    delta_for_mapper, published_to_mapper])
                if retcode != 1:
                    self.error("Bad selection of new mappings, some already there")

                # create authentication headers
                content_type = 'text/plain'
                tc_client_id = os.environ.get(
                    'RELENGAPI_INSERT_HGGIT_MAPPINGS_TASKCLUSTER_CLIENT_ID')
                tc_access_token = os.environ.get(
                    'RELENGAPI_INSERT_HGGIT_MAPPINGS_TASKCLUSTER_ACCESS_TOKEN')
                relengapi_token = os.environ.get(
                    'RELENGAPI_INSERT_HGGIT_MAPPINGS_AUTH_TOKEN')

                # For taskcluster auth, we only import mohawk since we need
                # content to create the header
                try:
                    import mohawk
                except ImportError as e:
                    self.fatal("Can't import mohawk: %s\nDid you create-virtualenv?" % str(e))

                # due to timeouts on load balancer, we only push 200 lines at a time
                # this means that we should get http response back within 30 seconds
                # including the time it takes to insert the mappings in the database
                publish_successful = True

                for i in range(0, len(all_new_mappings), 200):
                    data = "".join(all_new_mappings[i:i+200])

                    if tc_client_id and tc_access_token:
                        headers = {
                            'Content-Type': content_type,
                            'Authentication': mohawk.Sender(
                                credentials=dict(
                                    id=tc_client_id,
                                    key=tc_access_token,
                                    algorithm='sha256',
                                ),
                                ext=dict(),
                                url=insert_url,
                                content=data,
                                content_type=content_type,
                                method='POST',
                            ).request_header,
                        }
                    elif relengapi_token:
                        headers = {
                            'Content-Type': content_type,
                            'Authentication': 'Bearer %s' % relengapi_token,
                        }
                    else:
                        self.fatal(
                            "Please provide either:\n"
                            "- RELENGAPI_INSERT_HGGIT_MAPPINGS_AUTH_TOKEN\n"
                            "- RELENGAPI_INSERT_HGGIT_MAPPINGS_TASKCLUSTER_ACCESS_TOKEN and RELENGAPI_INSERT_HGGIT_MAPPINGS_TASKCLUSTER_CLIENT_ID")

                    r = requests.post(insert_url, data=data, headers=headers)
                    if (r.status_code != 200):
                        self.error("Could not publish mapfile ('%s') line range [%s, %s] to mapper (%s) - received http %s code" % (delta_for_mapper, i, i+200, insert_url, r.status_code))
                        publish_successful = False
                        # we won't break out, since we may be able to publish other mappings
                        # and duplicates are allowed, so we will push the whole lot again next
                        # time anyway
                    else:
                        self.info("Published mapfile ('%s') line range [%s, %s] to mapper (%s)" % (delta_for_mapper, i, i+200, insert_url))

                if publish_successful:
                    # bug 1193011 says there are problems on occasion
                    # with delta uploads. Check that items are in db in
                    # an effort to find the root cause.
                    global publish_verified
                    publish_verified = True
                    try:
                        # previously checked first, last only. No errors
                        # reported, yet map still drifted out of spec.. Check them all!
                        # hashes_to_check = [all_new_mappings[0], all_new_mappings[-1]]
                        hashes_to_check = all_new_mappings[:]
                    except IndexError:
                        hashes_to_check = []
                    def _check_mapping(url, vcs, sha, expected):
                        global publish_verified
                        import urlparse
                        check_url = urlparse.urljoin(url, "../rev/%s/%s"
                                                     % (vcs, sha))
                        try:
                            r = requests.get(check_url)
                        except OSError as e:
                            # every so often, we get an uncaught
                            # exception here. (Once a month.)
                            publish_verified = False
                            self.error("Mapper network error: %s" % repr(e))
                        if r.status_code != 200 or r.text != expected:
                            publish_verified = False
                            self.error("Mapper lookup failure: %s on %s\n('%s') rcvd\n('%s') expected"
                                       % (r.status_code, check_url, r.text, expected))
                    for mapping in hashes_to_check:
                        mapping = mapping.rstrip()
                        git_sha, hg_sha = mapping.split()
                        _check_mapping(insert_url, 'hg', hg_sha, mapping)
                        _check_mapping(insert_url, 'git', git_sha, mapping)

                    if publish_verified:
                        # if we get this far, we know we could successfully post to mapper, so now
                        # we can copy the mapfile over "previously generated" version
                        # so that we don't push to mapper for these commits again
                        self.copyfile(complete_mapfile, published_to_mapper)
            else:
                self.copyfile(complete_mapfile, published_to_mapper)

    def combine_mapfiles(self):
        """ This method is for any job (l10n, project-branches) that needs to combine
            mapfiles.
            """
        if not self.config.get("combined_mapfile"):
            self.info("No combined_mapfile set in config; skipping!")
            return
        dirs = self.query_abs_dirs()
        mapfiles = []
        if self.config.get('conversion_type') == 'b2g-l10n':
            for repo_config in self.query_all_non_failed_repos():
                if repo_config.get("mapfile_name"):
                    mapfiles.append(repo_config['mapfile_name'])
        else:
            mapfiles.append(self.config.get('mapfile_name', 'gecko-mapfile'))
        if self.config.get('external_mapfile_urls'):
            for url in self.config['external_mapfile_urls']:
                file_name = self.download_file(
                    url,
                    parent_dir=dirs['abs_upload_dir'],
                    error_level=FATAL,
                )
                mapfiles.append(file_name)
        if not mapfiles:
            self.info("No mapfiles to combine; skipping!")
            return
        self._combine_mapfiles(mapfiles, self.config['combined_mapfile'])

    def push(self):
        """ Push to all targets.  test_targets are local directory test repos;
            the rest are remote.  Updates the repo_map json.
            """
        self.create_test_targets()
        repo_map = self._read_repo_update_json()
        failure_msg = ""
        timestamp = int(time.time())
        datetime = time.strftime('%Y-%m-%d %H:%M %Z')
        repo_map['last_push_timestamp'] = timestamp
        repo_map['last_push_datetime'] = datetime
        for repo_config in self.query_all_non_failed_repos():
            timestamp = int(time.time())
            datetime = time.strftime('%Y-%m-%d %H:%M %Z')
            status = self._push_repo(repo_config)
            repo_name = repo_config['repo_name']
            if not status:  # good
                if repo_name not in self.successful_repos:
                    self.successful_repos.append(repo_name)
                repo_map.setdefault('repos', {}).setdefault(repo_name, {})['push_timestamp'] = timestamp
                repo_map['repos'][repo_name]['push_datetime'] = datetime
                previous_status = self._query_repo_previous_status(repo_name, repo_map=repo_map)
                if previous_status is None:
                    self.add_summary("Possibly the first successful push of %s." % repo_name)
                elif previous_status is False:
                    self.add_summary("Previously unsuccessful push of %s is now successful!" % repo_name)
                self._update_repo_previous_status(repo_name, successful_flag=True, repo_map=repo_map, write_update=True)
            else:
                self.add_failure(
                    repo_name,
                    message="Unable to push %s." % repo_name,
                    level=ERROR,
                )
                failure_msg += status + "\n"
                self._update_repo_previous_status(repo_name, successful_flag=False, repo_map=repo_map, write_update=True)
        if not failure_msg:
            repo_map['last_successful_push_timestamp'] = repo_map['last_push_timestamp']
            repo_map['last_successful_push_datetime'] = repo_map['last_push_datetime']
        self._write_repo_update_json(repo_map)
        if failure_msg:
            self.fatal("Unable to push these repos:\n%s" % failure_msg)

    def preflight_upload(self):
        if not self.config.get("copy_logs_post_run", True):
            self.copy_logs_to_upload_dir()

    def upload(self):
        """ Upload the upload_dir according to the upload_config.
            """
        failure_msg = ''
        dirs = self.query_abs_dirs()
        for upload_config in self.config.get('upload_config', []):
            if self.retry(
                self.rsync_upload_directory,
                args=(
                    dirs['abs_upload_dir'],
                ),
                kwargs=upload_config,
            ):
                failure_msg += '%s:%s' % (upload_config['remote_host'],
                                          upload_config['remote_path'])
        if failure_msg:
            self.fatal("Unable to upload to this location:\n%s" % failure_msg)


# __main__ {{{1
if __name__ == '__main__':
    conversion = HgGitScript()
    conversion.run()