intl/icu_sources_data.py
author Narcis Beleuzu <nbeleuzu@mozilla.com>
Wed, 24 Jul 2019 04:20:54 +0300
changeset 541521 7637d97a71442be83ea60b8036f65a09255659fd
parent 541158 5712b6093001ee06c76f0130220e89a753000b05
permissions -rw-r--r--
Backed out changeset 12d0b5cc1b57 (bug 1567301) for bc failures on browser_autocomplete_tag_star_visibility.js . a=backout

#!/usr/bin/env python
#
# Any copyright is dedicated to the Public Domain.
# http://creativecommons.org/publicdomain/zero/1.0/
#
# Generate SOURCES in sources.mozbuild files from ICU's Makefile.in
# files, and also build a standalone copy of ICU using its build
# system to generate a new copy of the in-tree ICU data file.
#
# This script expects to be run from `update-icu.sh` after the in-tree
# copy of ICU has been updated.

from __future__ import absolute_import
from __future__ import print_function

import glob
import multiprocessing
import os
import sets
import shutil
import subprocess
import sys
import tempfile

from mozpack import path as mozpath

# The following files have been determined to be dead/unused by a
# semi-automated analysis. You can just remove any of the files below
# if you need them. However, files marked with a "Cluster" comment
# can only be removed together, as they have (directional) dependencies.
# If you want to rerun this analysis, contact :decoder.
UNUSED_SOURCES = sets.Set([
    'intl/icu/source/common/bytestrieiterator.cpp',
    'intl/icu/source/common/cstr.cpp',
    'intl/icu/source/common/cwchar.cpp',
    'intl/icu/source/common/icudataver.cpp',
    'intl/icu/source/common/icuplug.cpp',
    'intl/icu/source/common/pluralmap.cpp',
    'intl/icu/source/common/ucat.cpp',
    'intl/icu/source/common/ucnv2022.cpp',
    'intl/icu/source/common/ucnv_ct.cpp',
    'intl/icu/source/common/ucnvdisp.cpp',
    'intl/icu/source/common/ucnv_ext.cpp',
    'intl/icu/source/common/ucnvhz.cpp',
    'intl/icu/source/common/ucnvisci.cpp',
    'intl/icu/source/common/ucnv_lmb.cpp',
    'intl/icu/source/common/ucnvmbcs.cpp',
    'intl/icu/source/common/uidna.cpp',
    'intl/icu/source/common/unorm.cpp',
    'intl/icu/source/common/usc_impl.cpp',
    'intl/icu/source/common/ustr_wcs.cpp',
    'intl/icu/source/common/util_props.cpp',
    'intl/icu/source/i18n/anytrans.cpp',
    'intl/icu/source/i18n/brktrans.cpp',
    'intl/icu/source/i18n/casetrn.cpp',
    'intl/icu/source/i18n/cpdtrans.cpp',
    'intl/icu/source/i18n/esctrn.cpp',
    'intl/icu/source/i18n/fmtable_cnv.cpp',
    'intl/icu/source/i18n/funcrepl.cpp',
    'intl/icu/source/i18n/gender.cpp',
    'intl/icu/source/i18n/name2uni.cpp',
    'intl/icu/source/i18n/nortrans.cpp',
    'intl/icu/source/i18n/nultrans.cpp',
    'intl/icu/source/i18n/quant.cpp',
    'intl/icu/source/i18n/rbt.cpp',
    'intl/icu/source/i18n/rbt_data.cpp',
    'intl/icu/source/i18n/rbt_pars.cpp',
    'intl/icu/source/i18n/rbt_rule.cpp',
    'intl/icu/source/i18n/rbt_set.cpp',
    'intl/icu/source/i18n/regexcmp.cpp',
    'intl/icu/source/i18n/regeximp.cpp',
    'intl/icu/source/i18n/regexst.cpp',
    'intl/icu/source/i18n/regextxt.cpp',
    'intl/icu/source/i18n/rematch.cpp',
    'intl/icu/source/i18n/remtrans.cpp',
    'intl/icu/source/i18n/repattrn.cpp',
    'intl/icu/source/i18n/scientificnumberformatter.cpp',
    'intl/icu/source/i18n/strmatch.cpp',
    'intl/icu/source/i18n/strrepl.cpp',
    'intl/icu/source/i18n/titletrn.cpp',
    'intl/icu/source/i18n/tolowtrn.cpp',
    'intl/icu/source/i18n/toupptrn.cpp',
    'intl/icu/source/i18n/translit.cpp',
    'intl/icu/source/i18n/transreg.cpp',
    'intl/icu/source/i18n/tridpars.cpp',
    'intl/icu/source/i18n/udateintervalformat.cpp',
    'intl/icu/source/i18n/unesctrn.cpp',
    'intl/icu/source/i18n/uni2name.cpp',
    'intl/icu/source/i18n/uregexc.cpp',
    'intl/icu/source/i18n/uregex.cpp',
    'intl/icu/source/i18n/uregion.cpp',
    'intl/icu/source/i18n/uspoof_build.cpp',
    'intl/icu/source/i18n/uspoof_conf.cpp',
    'intl/icu/source/i18n/utrans.cpp',
    'intl/icu/source/i18n/vzone.cpp',
    'intl/icu/source/i18n/zrule.cpp',
    'intl/icu/source/i18n/ztrans.cpp',

    # Cluster
    'intl/icu/source/common/resbund_cnv.cpp',
    'intl/icu/source/common/ures_cnv.cpp',

    # Cluster
    'intl/icu/source/common/propsvec.cpp',
    'intl/icu/source/common/ucnvsel.cpp',
    'intl/icu/source/common/ucnv_set.cpp',

    # Cluster
    'intl/icu/source/common/ubiditransform.cpp',
    'intl/icu/source/common/ushape.cpp',

    # Cluster
    'intl/icu/source/i18n/csdetect.cpp',
    'intl/icu/source/i18n/csmatch.cpp',
    'intl/icu/source/i18n/csr2022.cpp',
    'intl/icu/source/i18n/csrecog.cpp',
    'intl/icu/source/i18n/csrmbcs.cpp',
    'intl/icu/source/i18n/csrsbcs.cpp',
    'intl/icu/source/i18n/csrucode.cpp',
    'intl/icu/source/i18n/csrutf8.cpp',
    'intl/icu/source/i18n/inputext.cpp',
    'intl/icu/source/i18n/ucsdet.cpp',

    # Cluster
    'intl/icu/source/i18n/alphaindex.cpp',
    'intl/icu/source/i18n/ulocdata.cpp',
])


def find_source_file(dir, filename):
    base = os.path.splitext(filename)[0]
    for ext in ('.cpp', '.c'):
        f = mozpath.join(dir, base + ext)
        if os.path.isfile(f):
            return f
    raise Exception("Couldn't find source file for: %s" % filename)


def get_sources_from_makefile(makefile):
    import pymake.parser
    from pymake.parserdata import SetVariable
    srcdir = os.path.dirname(makefile)
    for statement in pymake.parser.parsefile(makefile):
        if (isinstance(statement, SetVariable) and
                statement.vnameexp.is_static_string and
                statement.vnameexp.s == 'OBJECTS'):
            return sorted((find_source_file(srcdir, s)
                           for s in statement.value.split()),
                          key=lambda x: x.lower())


def list_headers(path):
    result = []
    for name in os.listdir(path):
        f = mozpath.join(path, name)
        if os.path.isfile(f):
            result.append(f)
    return sorted(result, key=lambda x: x.lower())


def write_sources(mozbuild, sources, headers):
    with open(mozbuild, 'wb') as f:
        f.write('# THIS FILE IS GENERATED BY /intl/icu_sources_data.py ' +
                'DO NOT EDIT\n' +
                'SOURCES += [\n')
        f.write(''.join("   '/%s',\n" % s for s in sources))
        f.write(']\n\n')
        f.write('EXPORTS.unicode += [\n')
        f.write(''.join("   '/%s',\n" % s for s in headers))
        f.write(']\n')


def update_sources(topsrcdir):
    print('Updating ICU sources lists...')
    sys.path.append(mozpath.join(topsrcdir, 'build/pymake'))
    for d in ['common', 'i18n']:
        base_path = mozpath.join(topsrcdir, 'intl/icu/source/%s' % d)
        makefile = mozpath.join(base_path, 'Makefile.in')
        mozbuild = mozpath.join(topsrcdir,
                                'config/external/icu/%s/sources.mozbuild' % d)
        sources = [mozpath.relpath(s, topsrcdir)
                   for s in get_sources_from_makefile(makefile)]
        sources = filter(lambda x: x not in UNUSED_SOURCES, sources)
        headers = [mozpath.normsep(os.path.relpath(s, topsrcdir))
                   for s in list_headers(mozpath.join(base_path, 'unicode'))]
        write_sources(mozbuild, sources, headers)


def try_run(name, command, cwd=None, **kwargs):
    try:
        with tempfile.NamedTemporaryFile(prefix=name, delete=False) as f:
            subprocess.check_call(command, cwd=cwd, stdout=f,
                                  stderr=subprocess.STDOUT, **kwargs)
    except subprocess.CalledProcessError:
        print('''Error running "{}" in directory {}
    See output in {}'''.format(' '.join(command), cwd, f.name),
              file=sys.stderr)
        return False
    else:
        os.unlink(f.name)
        return True


def get_data_file(data_dir):
    files = glob.glob(mozpath.join(data_dir, 'icudt*.dat'))
    return files[0] if files else None


def update_data_file(topsrcdir):
    objdir = tempfile.mkdtemp(prefix='icu-obj-')
    configure = mozpath.join(topsrcdir, 'intl/icu/source/configure')
    env = dict(os.environ)
    # bug 1262101 - these should be shared with the moz.build files
    env.update({
        'CPPFLAGS': ('-DU_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 ' +
                     '-DUCONFIG_NO_LEGACY_CONVERSION ' +
                     '-DUCONFIG_NO_TRANSLITERATION ' +
                     '-DUCONFIG_NO_REGULAR_EXPRESSIONS ' +
                     '-DUCONFIG_NO_BREAK_ITERATION ' +
                     '-DU_CHARSET_IS_UTF8')
    })

    # Exclude data that we currently don't need.
    #
    # The file format for ICU's data build tool is described at
    # <https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md>.
    env["ICU_DATA_FILTER_FILE"] = mozpath.join(topsrcdir, 'intl/icu/data_filter.json')

    print('Running ICU configure...')
    if not try_run(
            'icu-configure',
            ['sh', configure,
             '--with-data-packaging=archive',
             '--enable-static',
             '--disable-shared',
             '--disable-extras',
             '--disable-icuio',
             '--disable-layout',
             '--disable-layoutex',
             '--disable-tests',
             '--disable-samples',
             '--disable-strict'],
            cwd=objdir,
            env=env):
        return False
    print('Running ICU make...')
    if not try_run(
            'icu-make',
            ['make',
             '--jobs=%d' % multiprocessing.cpu_count(),
             '--output-sync'],
            cwd=objdir):
        return False
    print('Copying ICU data file...')
    tree_data_path = mozpath.join(topsrcdir,
                                  'config/external/icu/data/')
    old_data_file = get_data_file(tree_data_path)
    if not old_data_file:
        print('Error: no ICU data file in %s' % tree_data_path,
              file=sys.stderr)
        return False
    new_data_file = get_data_file(mozpath.join(objdir, 'data/out'))
    if not new_data_file:
        print('Error: no ICU data in ICU objdir', file=sys.stderr)
        return False
    if os.path.basename(old_data_file) != os.path.basename(new_data_file):
        # Data file name has the major version number embedded.
        os.unlink(old_data_file)
    shutil.copy(new_data_file, tree_data_path)
    try:
        shutil.rmtree(objdir)
    except Exception:
        print('Warning: failed to remove %s' % objdir, file=sys.stderr)
    return True


def main():
    if len(sys.argv) != 2:
        print('Usage: icu_sources_data.py <mozilla topsrcdir>',
              file=sys.stderr)
        sys.exit(1)

    topsrcdir = mozpath.abspath(sys.argv[1])
    update_sources(topsrcdir)
    if not update_data_file(topsrcdir):
        print('Error updating ICU data file', file=sys.stderr)
        sys.exit(1)


if __name__ == '__main__':
    main()