python/devtools/migrate-l10n/migrate/main.py
author Mozilla Releng Treescript <release+treescript@mozilla.org>
Fri, 20 May 2022 19:04:40 +0000
changeset 618368 3b0030f76f1ee69138d29f114d5221f21c55e7e5
parent 554551 994ae8e4833c90447d91f0e26a718573cff5a514
permissions -rw-r--r--
no bug - Bumping Firefox l10n changesets r=release a=l10n-bump DONTBUILD de -> 1d458b9f128b124a25679e755f61139726a77caf dsb -> d3afb988e0d6b558a8a590658da39eb15b2f0b18 et -> 77d8cd6a89d77a9356bc597e476cd2cc764cd607 hsb -> a67aba9c3b33f8bc097d742bd70ea98ca80a1335 hu -> ccf9d887f4eb7e4bac53afde57598686a1e255b9 hye -> ed3709ec02d90cca3ebf3237ea6d6411353e2559 it -> d544065a45d9b684beb7f60bbeaf30fd1a696e69 ja -> 17e2f87f08c519add504e8f4265e0f7b6524a34a ja-JP-mac -> 0e30fddf50953f46b80dbc99825270568b1a0d65 oc -> e114f06272b1c9377d19ca013aa6a51509cb719a pt-PT -> f7bf8255666f78337b22b3206516736daf222d40 zh-CN -> ff1ee767ed480c30d6aaefa00c65c8f62477cc18

from __future__ import absolute_import

import argparse
import glob
import HTMLParser
import logging
import os
import re
import sys
import urllib2


# Import compare-locales parser from parent folder.
script_path = os.path.dirname(os.path.realpath(__file__))
compare_locales_path = os.path.join(
    script_path, "../../../../third_party/python/compare-locales"
)
sys.path.insert(0, compare_locales_path)
from compare_locales import parser


# Configure logging format and level
logging.basicConfig(format="  [%(levelname)s] %(message)s", level=logging.INFO)


# License header to use when creating new properties files.
DEFAULT_HEADER = (
    "# This Source Code Form is subject to the terms of the "
    "Mozilla Public\n# License, v. 2.0. If a copy of the MPL "
    "was not distributed with this\n# file, You can obtain "
    "one at http://mozilla.org/MPL/2.0/.\n"
)


# Base url to retrieve properties files on central, that will be parsed for
# localization notes.
CENTRAL_BASE_URL = (
    "https://hg.mozilla.org/"
    "mozilla-central/raw-file/tip/"
    "devtools/client/locales/en-US/"
)


# HTML parser to translate HTML entities in dtd files.
HTML_PARSER = HTMLParser.HTMLParser()

# Cache to store properties files retrieved over the network.
central_prop_cache = {}

# Cache the parsed entities from the existing DTD files.
dtd_entities_cache = {}


# Retrieve the content of the current version of a properties file for the
# provided filename, from devtools/client on mozilla central. Will return an
# empty array if the file can't be retrieved or read.
def get_central_prop_content(prop_filename):
    if prop_filename in central_prop_cache:
        return central_prop_cache[prop_filename]

    url = CENTRAL_BASE_URL + prop_filename
    logging.info("loading localization file from central: {%s}" % url)

    try:
        central_prop_cache[prop_filename] = urllib2.urlopen(url).readlines()
    except:
        logging.error("failed to load properties file from central: {%s}" % url)
        central_prop_cache[prop_filename] = []

    return central_prop_cache[prop_filename]


# Retrieve the current en-US localization notes for the provided prop_name.
def get_localization_note(prop_name, prop_filename):
    prop_content = get_central_prop_content(prop_filename)

    comment_buffer = []
    for i, line in enumerate(prop_content):
        # Remove line breaks.
        line = line.strip("\n").strip("\r")

        if line.startswith("#"):
            # Comment line, add to the current comment buffer.
            comment_buffer.append(line)
        elif re.search("(^|\n)" + re.escape(prop_name) + "\s*=", line):
            # Property found, the current comment buffer is the localization
            # note.
            break
        else:
            # No match, not a comment, reinitialize the comment buffer.
            comment_buffer = []

    return "\n".join(comment_buffer)


# Retrieve the parsed DTD entities for a provided path. Results are cached by
# dtd path.
def get_dtd_entities(dtd_path):
    if dtd_path in dtd_entities_cache:
        return dtd_entities_cache[dtd_path]

    dtd_parser = parser.getParser(".dtd")
    dtd_parser.readFile(dtd_path)
    dtd_entities_cache[dtd_path] = dtd_parser.parse()
    return dtd_entities_cache[dtd_path]


# Extract the value of an entity in a dtd file.
def get_translation_from_dtd(dtd_path, entity_name):
    entities, map = get_dtd_entities(dtd_path)
    if entity_name not in map:
        # Bail out if translation is missing.
        return

    key = map[entity_name]
    entity = entities[key]
    translation = HTML_PARSER.unescape(entity.val)
    return translation.encode("utf-8")


# Extract the header and file wide comments for the provided properties file
# filename.
def get_properties_header(prop_filename):
    prop_content = get_central_prop_content(prop_filename)

    # if the file content is empty, return the default license header.
    if len(prop_content) == 0:
        return DEFAULT_HEADER

    header_buffer = []
    for i, line in enumerate(prop_content):
        # remove line breaks.
        line = line.strip("\n").strip("\r")

        # regexp matching keys extracted form parser.py.
        is_entity_line = re.search(
            "^(\s*)" "((?:[#!].*?\n\s*)*)" "([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*", line
        )
        is_loc_note = re.search("^(\s*)" "\#\s*LOCALIZATION NOTE\s*\([^)]+\)", line)
        if is_entity_line or is_loc_note:
            # header finished, break the loop.
            break
        else:
            # header line, add to the current buffer.
            header_buffer.append(line)

    # concatenate the current buffer and return.
    return "\n".join(header_buffer)


# Create a new properties file at the provided path.
def create_properties_file(prop_path):
    logging.info("creating new *.properties file: {%s}" % prop_path)

    prop_filename = os.path.basename(prop_path)
    header = get_properties_header(prop_filename)

    prop_file = open(prop_path, "w+")
    prop_file.write(header)
    prop_file.close()


# Migrate a single string entry for a dtd to a properties file.
def migrate_string(dtd_path, prop_path, dtd_name, prop_name):
    if not os.path.isfile(dtd_path):
        logging.error("dtd file can not be found at: {%s}" % dtd_path)
        return

    translation = get_translation_from_dtd(dtd_path, dtd_name)
    if not translation:
        logging.error(
            "translation could not be found for: {%s} in {%s}" % (dtd_name, dtd_path)
        )
        return

    # Create properties file if missing.
    if not os.path.isfile(prop_path):
        create_properties_file(prop_path)

    if not os.path.isfile(prop_path):
        logging.error("could not create new properties file at: {%s}" % prop_path)
        return

    prop_line = prop_name + "=" + translation + "\n"

    # Skip the string if it already exists in the destination file.
    prop_file_content = open(prop_path, "r").read()
    if prop_line in prop_file_content:
        logging.warning("string already migrated, skipping: {%s}" % prop_name)
        return

    # Skip the string and log an error if an existing entry is found, but with
    # a different value.
    if re.search("(^|\n)" + re.escape(prop_name) + "\s*=", prop_file_content):
        logging.error("existing string found, skipping: {%s}" % prop_name)
        return

    prop_filename = os.path.basename(prop_path)
    logging.info("migrating {%s} in {%s}" % (prop_name, prop_filename))
    with open(prop_path, "a") as prop_file:
        localization_note = get_localization_note(prop_name, prop_filename)
        if len(localization_note):
            prop_file.write("\n" + localization_note)
        else:
            logging.warning(
                "localization notes could not be found for: {%s}" % prop_name
            )
        prop_file.write("\n" + prop_line)


# Apply the migration instructions in the provided configuration file.
def migrate_conf(conf_path, l10n_path):
    f = open(conf_path, "r")
    lines = f.readlines()
    f.close()

    for i, line in enumerate(lines):
        # Remove line breaks.
        line = line.strip("\n").strip("\r")

        # Skip invalid lines.
        if " = " not in line:
            continue

        # Expected syntax: ${prop_path}:${prop_name} = ${dtd_path}:${dtd_name}.
        prop_info, dtd_info = line.split(" = ")
        prop_path, prop_name = prop_info.split(":")
        dtd_path, dtd_name = dtd_info.split(":")

        dtd_path = os.path.join(l10n_path, dtd_path)
        prop_path = os.path.join(l10n_path, prop_path)

        migrate_string(dtd_path, prop_path, dtd_name, prop_name)


def main():
    # Read command line arguments.
    arg_parser = argparse.ArgumentParser(
        description="Migrate devtools localized strings."
    )
    arg_parser.add_argument("path", type=str, help="path to l10n repository")
    arg_parser.add_argument(
        "-c", "--config", type=str, help="path to configuration file or folder"
    )
    args = arg_parser.parse_args()

    # Retrieve path to devtools localization files in l10n repository.
    devtools_l10n_path = os.path.join(args.path, "devtools/client/")
    if not os.path.exists(devtools_l10n_path):
        logging.error("l10n path is invalid: {%s}" % devtools_l10n_path)
        exit()
    logging.info("l10n path is valid: {%s}" % devtools_l10n_path)

    # Retrieve configuration files to apply.
    if os.path.isdir(args.config):
        conf_files = glob.glob(args.config + "*")
    elif os.path.isfile(args.config):
        conf_files = [args.config]
    else:
        logging.error("config path is invalid: {%s}" % args.config)
        exit()

    # Perform migration for each configuration file.
    for conf_file in conf_files:
        logging.info("performing migration for config file: {%s}" % conf_file)
        migrate_conf(conf_file, devtools_l10n_path)


if __name__ == "__main__":
    main()