author Fernando Jiménez <>
Fri, 10 May 2013 16:16:56 +0200
changeset 138297 81e072111e989546362b2313e31a2495cf7f1e25
parent 126716 a874d2756f6530a8ee7c8dc6dbcb7102944d5127
permissions -rw-r--r--
Bug 861894 - Avoid apps to schedule new offline cache downloads while device free space is low. r=honzab

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at

import re
import codecs

class MalformedLocaleFileError(Exception):

def parse_file(path):
    return parse(read_file(path), path)

def read_file(path):
        return path, "r", "utf-8" ).readlines()
    except UnicodeDecodeError, e:
        raise MalformedLocaleFileError(
          'Following locale file is not a valid ' +
          'UTF-8 file: %s\n%s"' % (path, str(e)))

COMMENT = re.compile(r'\s*#')
EMPTY = re.compile(r'^\s+$')
KEYVALUE = re.compile(r"\s*([^=:]+)(=|:)\s*(.*)")

def parse(lines, path=None):
    lines = iter(lines)
    lineNo = 1
    pairs = dict()
    for line in lines:
        if COMMENT.match(line) or EMPTY.match(line) or len(line) == 0:
        m = KEYVALUE.match(line)
        if not m:
            raise MalformedLocaleFileError(
                  'Following locale file is not a valid .properties file: %s\n'
                  'Line %d is incorrect:\n%s' % (path, lineNo, line))

        # All spaces are strip. Spaces at the beginning are stripped
        # by the regular expression. We have to strip spaces at the end.
        key =
        val =
        val = val.encode('raw-unicode-escape').decode('raw-unicode-escape')

        # `key` can be empty when key is only made of spaces
        if not key:
            raise MalformedLocaleFileError(
                  'Following locale file is not a valid .properties file: %s\n'
                  'Key is invalid on line %d is incorrect:\n%s' %
                  (path, lineNo, line))

        # Multiline value: keep reading lines, while lines end with backslash
        # and strip spaces at the beginning of lines except the last line
        # that doesn't end up with backslash, we strip all spaces for this one.
        if val.endswith("\\"):
            val = val[:-1]
                # remove spaces before/after and especially the \n at EOL
                line =
                while line.endswith("\\"):
                    val += line[:-1].lstrip()
                    line =
                    lineNo += 1
                val += line.strip()
            except StopIteration:
                raise MalformedLocaleFileError(
                  'Following locale file is not a valid .properties file: %s\n'
                  'Unexpected EOF in multiline sequence at line %d:\n%s' %
                  (path, lineNo, line))
        # Save this new pair
        pairs[key] = val
        lineNo += 1

    normalize_plural(path, pairs)
    return pairs

# Plural forms in properties files are defined like this:
#   key = other form
#   key[one] = one form
#   key[...] = ...
# Parse them and merge each key into one object containing all forms:
#   key: {
#     other: "other form",
#     one: "one form",
#     ...: ...
#   }
PLURAL_FORM = re.compile(r'^(.*)\[(zero|one|two|few|many|other)\]$')
def normalize_plural(path, pairs):
    for key in list(pairs.keys()):
        m = PLURAL_FORM.match(key)
        if not m:
        main_key =
        plural_form =
        # Allows not specifying a generic key (i.e a key without [form])
        if not main_key in pairs:
            pairs[main_key] = {}
            # Ensure that we always have the [other] form
            if not main_key + "[other]" in pairs:
                raise MalformedLocaleFileError(
                      'Following locale file is not a valid UTF-8 file: %s\n'
                      'This plural form doesn\'t have a matching `%s[other]` form:\n'
                      'You have to defined following key:\n%s'
                      % (path, main_key, key, main_key))
        # convert generic form into an object if it is still a string
        if isinstance(pairs[main_key], unicode):
            pairs[main_key] = {"other": pairs[main_key]}
        # then, add this new plural form
        pairs[main_key][plural_form] = pairs[key]
        del pairs[key]