Bug 1530320 - Update IANA language subtag registry data to version 2019-02-20. r=Waldo
authorAndré Bargull <andre.bargull@gmail.com>
Wed, 27 Feb 2019 12:45:49 -0800
changeset 519562 75af9e205c2887e3dffdca703d970eec92e83d08
parent 519561 ec32c653cca2ca7eff0aac75df3a16306e37fdee
child 519563 e3d1480593cfa2a086e020cea30d2af9ac20fbe7
push id10862
push userffxbld-merge
push dateMon, 11 Mar 2019 13:01:11 +0000
treeherdermozilla-beta@a2e7f5c935da [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersWaldo
bugs1530320
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1530320 - Update IANA language subtag registry data to version 2019-02-20. r=Waldo
js/src/builtin/intl/LangTagMappingsGenerated.js
js/src/builtin/intl/make_intl_data.py
--- a/js/src/builtin/intl/LangTagMappingsGenerated.js
+++ b/js/src/builtin/intl/LangTagMappingsGenerated.js
@@ -1,13 +1,13 @@
 // Generated by make_intl_data.py. DO NOT EDIT.
 
 /* eslint-disable complexity */
 // Mappings from complete tags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2018-03-30.
+// Derived from IANA Language Subtag Registry, file date 2019-02-20.
 // https://www.iana.org/assignments/language-subtag-registry
 function updateLangTagMappings(tag) {
     assert(IsObject(tag), "tag is an object");
     assert(!hasOwn("grandfathered", tag), "tag is not a grandfathered tag");
 
     switch (tag.language) {
       case "hy":
         // hy-arevela -> hy
@@ -308,17 +308,17 @@ function updateLangTagMappings(tag) {
             tag.region = undefined;
         }
         break;
     }
 }
 /* eslint-enable complexity */
 
 // Mappings from grandfathered tags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2018-03-30.
+// Derived from IANA Language Subtag Registry, file date 2019-02-20.
 // https://www.iana.org/assignments/language-subtag-registry
 var grandfatheredMappings = {
     "art-lojban": "jbo",
     "cel-gaulish": "cel-gaulish",
     "en-gb-oed": "en-GB-oxendict",
     "i-ami": "ami",
     "i-bnn": "bnn",
     "i-default": "i-default",
@@ -340,17 +340,17 @@ var grandfatheredMappings = {
     "zh-guoyu": "cmn",
     "zh-hakka": "hak",
     "zh-min": "zh-min",
     "zh-min-nan": "nan",
     "zh-xiang": "hsn",
 };
 
 // Mappings from language subtags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2018-03-30.
+// Derived from IANA Language Subtag Registry, file date 2019-02-20.
 // https://www.iana.org/assignments/language-subtag-registry
 var languageMappings = {
     "aam": "aas",
     "adp": "dz",
     "aue": "ktz",
     "ayx": "nun",
     "bgm": "bcg",
     "bjd": "drl",
@@ -424,32 +424,32 @@ var languageMappings = {
     "ybd": "rki",
     "yma": "lrr",
     "ymt": "mtm",
     "yos": "zom",
     "yuu": "yug",
 };
 
 // Mappings from region subtags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2018-03-30.
+// Derived from IANA Language Subtag Registry, file date 2019-02-20.
 // https://www.iana.org/assignments/language-subtag-registry
 var regionMappings = {
     "BU": "MM",
     "DD": "DE",
     "FX": "FR",
     "TP": "TL",
     "YD": "YE",
     "ZR": "CD",
 };
 
 // Mappings from extlang subtags to preferred values.
 // All current deprecated extlang subtags have the form `<prefix>-<extlang>`
 // and their preferred value is exactly equal to `<extlang>`. So each key in
 // extlangMappings acts both as the extlang subtag and its preferred value.
-// Derived from IANA Language Subtag Registry, file date 2018-03-30.
+// Derived from IANA Language Subtag Registry, file date 2019-02-20.
 // https://www.iana.org/assignments/language-subtag-registry
 var extlangMappings = {
     "aao": "ar",
     "abh": "ar",
     "abv": "ar",
     "acm": "ar",
     "acq": "ar",
     "acw": "ar",
@@ -565,16 +565,17 @@ var extlangMappings = {
     "kvk": "sgn",
     "kvr": "ms",
     "kxd": "ms",
     "lbs": "sgn",
     "lce": "ms",
     "lcf": "ms",
     "liw": "ms",
     "lls": "sgn",
+    "lsg": "sgn",
     "lsl": "sgn",
     "lso": "sgn",
     "lsp": "sgn",
     "lst": "sgn",
     "lsy": "sgn",
     "ltg": "lv",
     "lvs": "lv",
     "lws": "sgn",
@@ -618,16 +619,17 @@ var extlangMappings = {
     "pse": "ms",
     "psg": "sgn",
     "psl": "sgn",
     "pso": "sgn",
     "psp": "sgn",
     "psr": "sgn",
     "pys": "sgn",
     "rms": "sgn",
+    "rsi": "sgn",
     "rsl": "sgn",
     "rsm": "sgn",
     "sdl": "sgn",
     "sfb": "sgn",
     "sfs": "sgn",
     "sgg": "sgn",
     "sgx": "sgn",
     "shu": "ar",
@@ -665,16 +667,17 @@ var extlangMappings = {
     "vsl": "sgn",
     "vsv": "sgn",
     "wbs": "sgn",
     "wuu": "zh",
     "xki": "sgn",
     "xml": "sgn",
     "xmm": "ms",
     "xms": "sgn",
+    "yds": "sgn",
     "ygs": "sgn",
     "yhs": "sgn",
     "ysl": "sgn",
     "yue": "zh",
     "zib": "sgn",
     "zlm": "ms",
     "zmi": "ms",
     "zsl": "sgn",
--- a/js/src/builtin/intl/make_intl_data.py
+++ b/js/src/builtin/intl/make_intl_data.py
@@ -34,16 +34,17 @@
 
 from __future__ import print_function
 import os
 import re
 import io
 import sys
 import tarfile
 import tempfile
+from collections import namedtuple
 from contextlib import closing
 from functools import partial, total_ordering
 from itertools import chain, groupby, tee
 from operator import attrgetter, itemgetter
 
 if sys.version_info.major == 2:
     from itertools import ifilter as filter, ifilterfalse as filterfalse, imap as map
     from urllib2 import urlopen, Request as UrlRequest
@@ -101,21 +102,46 @@ def readRegistry(registry):
     grandfatheredMappings = {}
     redundantMappings = {}
     languageMappings = {}
     regionMappings = {}
     variantMappings = {}
     extlangMappings = {}
     extlangSubtags = []
 
+    # Set of language tags which require special handling.
+    SpecialCase = namedtuple("SpecialCase", ["Type", "Subtag", "Prefix", "Preferred_Value"])
+    knownSpecialCases = {
+        SpecialCase("variant", "arevela", "hy", None): "hy",
+        SpecialCase("variant", "arevmda", "hy", None): "hyw",
+        SpecialCase("variant", "heploc", "ja-Latn-hepburn", "alalc97"): "ja-Latn-alalc97",
+    }
+
+    # The de-facto marker for special cases is a comment of the form
+    # "Preferred tag is <preferred>", where <preferred> denotes the preferred
+    # language tag. This is not specified in RFC 5646.
+    specialCaseRE = re.compile("Preferred tag is (?P<preferred>.+)")
+
     for record in readRegistryRecord(registry):
         if "File-Date" in record:
             fileDate = record["File-Date"]
             continue
 
+        # Watch out for cases which need special processing.
+        if "Comments" in record:
+            match = specialCaseRE.match(record["Comments"])
+            if match:
+                replacement = knownSpecialCases[record["Type"],
+                                                record["Subtag"],
+                                                record["Prefix"],
+                                                record.get("Preferred-Value")]
+                if replacement != match.group("preferred"):
+                    raise Exception("Unexpected replacement value for {}".format(record))
+                record["Preferred-Value"] = replacement
+
         if record["Type"] == "grandfathered":
             # Grandfathered tags don't use standard syntax, so
             # CanonicalizeLanguageTag expects the mapping table to provide
             # the final form for all.
             # For grandfatheredMappings, keys must be in lower case; values in
             # the case used in the registry.
             tag = record["Tag"]
             if "Preferred-Value" in record:
@@ -176,19 +202,23 @@ def readRegistry(registry):
             assert False, "Unrecognized Type: {0}".format(record["Type"])
 
     # Check that mappings for language subtags and extlang subtags don't affect
     # each other.
     for extlang in extlangSubtags:
         if extlang in languageMappings:
             raise Exception("Conflict: extlang with lang mapping: " + extlang)
 
-    # Special case for heploc.
-    assert variantMappings["ja-Latn-hepburn-heploc"] == "alalc97"
-    variantMappings["ja-Latn-hepburn-heploc"] = "ja-Latn-alalc97"
+    # Check all known special cases were processed.
+    for elem in knownSpecialCases:
+        tag = "{}-{}".format(elem.Prefix, elem.Subtag)
+        assert elem.Type == "variant", "Unexpected non-variant special case"
+        assert tag in variantMappings, "{} not found in variant mappings".format(tag)
+        assert variantMappings[tag] == knownSpecialCases[elem], \
+            "{} does not map to {}".format(tag, knownSpecialCases[elem])
 
     # ValidateAndCanonicalizeLanguageTag in CommonFunctions.js expects
     # redundantMappings contains no 2*3ALPHA.
     assert all(len(lang) > 3 for lang in redundantMappings.keys())
 
     return {"fileDate": fileDate,
             "grandfatheredMappings": grandfatheredMappings,
             "redundantMappings": redundantMappings,
@@ -300,17 +330,17 @@ def writeMappingsFunction(println, varia
         cond = []
         extlangIndex = 1
         lastVariant = None
         for (kind, subtag) in splitSubtags(tag):
             if kind == Subtag.Language:
                 continue
 
             if kind == Subtag.ExtLang:
-                assert extlangIndex in [1, 2, 3],\
+                assert extlangIndex in [1, 2, 3], \
                     "Language-Tag permits no more than three extlang subtags"
                 cond.append('tag.extlang{} === "{}"'.format(extlangIndex, subtag))
                 extlangIndex += 1
             elif kind == Subtag.Script:
                 cond.append('tag.script === "{}"'.format(subtag))
             elif kind == Subtag.Region:
                 cond.append('tag.region === "{}"'.format(subtag))
             else:
@@ -392,17 +422,17 @@ def writeMappingsFunction(println, varia
         (preferred_kind, preferred_subtag) = preferred_next()
 
         # Remove any extlang subtags per RFC 5646, 4.5:
         # 'The canonical form contains no 'extlang' subtags.'
         # https://tools.ietf.org/html/rfc5646#section-4.5
         assert preferred_kind != Subtag.ExtLang
         extlangIndex = 1
         while tag_kind == Subtag.ExtLang:
-            assert extlangIndex in [1, 2, 3],\
+            assert extlangIndex in [1, 2, 3], \
                 "Language-Tag permits no more than three extlang subtags"
             println3(u"tag.extlang{} = undefined;".format(extlangIndex))
             extlangIndex += 1
             (tag_kind, tag_subtag) = tag_next()
 
         # Update the script and region subtags.
         for kind, prop_name in [(Subtag.Script, "script"), (Subtag.Region, "region")]:
             if tag_kind == kind and preferred_kind == kind:
@@ -500,17 +530,17 @@ def writeMappingsFunction(println, varia
     println(u"")
 
     # Switch on the language subtag.
     println(u"    switch (tag.language) {")
     for lang in sorted({language(tag) for tag in langTagMappings}):
         println(u'      case "{}":'.format(lang))
         isFirstLanguageTag = True
         for tag in sorted(tag for tag in langTagMappings if language(tag) == lang):
-            assert not isinstance(langTagMappings[tag], dict),\
+            assert not isinstance(langTagMappings[tag], dict), \
                 "only supports complete language tags"
             emitCompare(tag, langTagMappings[tag], isFirstLanguageTag)
             isFirstLanguageTag = False
         println(u"        break;")
     println(u"    }")
 
     println(u"}")
     println(u"/* eslint-enable complexity */")