Bug 769872 - Add self-hosted JavaScript core of Intl constructors Collator, NumberFormat, DateTimeFormat (part 4). r=jwalden
authorNorbert Lindenberg <mozilladev@lindenbergsoftware.com>
Wed, 13 Feb 2013 12:20:06 -0800
changeset 121813 095782b5101358425af19282e31b09e3be1b03b0
parent 121812 38e92a7b5bf4a247e9bead46cd2f113e5ca7c5e9
child 121814 53de36ab95d126bd7c602d6912f620e262fed43f
push id1356
push userttaubert@mozilla.com
push dateFri, 15 Feb 2013 09:40:57 +0000
treeherderfx-team@31e89328fe12 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjwalden
bugs769872
milestone21.0a1
Bug 769872 - Add self-hosted JavaScript core of Intl constructors Collator, NumberFormat, DateTimeFormat (part 4). r=jwalden
js/src/Makefile.in
js/src/builtin/IntlData.js
js/src/builtin/Utilities.js
js/src/builtin/make_intl_data.py
--- a/js/src/Makefile.in
+++ b/js/src/Makefile.in
@@ -974,16 +974,17 @@ endif
 # Prepare self-hosted JS code for embedding
 export:: selfhosting
 selfhosting:: selfhosted.out.h
 
 selfhosting_srcs := \
   $(srcdir)/builtin/Utilities.js \
   $(srcdir)/builtin/Array.js \
   $(srcdir)/builtin/Intl.js \
+  $(srcdir)/builtin/IntlData.js \
   $(NULL)
 
 selfhosted_out_h_deps := \
   $(selfhosting_srcs) \
   $(srcdir)/js.msg \
   $(srcdir)/builtin/macros.py \
   $(srcdir)/builtin/js2c.py \
   $(srcdir)/builtin/embedjs.py
new file mode 100644
--- /dev/null
+++ b/js/src/builtin/IntlData.js
@@ -0,0 +1,331 @@
+// Generated by make_intl_data.py. DO NOT EDIT.
+
+// Mappings from complete tags to preferred values.
+// Derived from IANA Language Subtag Registry, file date 2013-01-25.
+// http://www.iana.org/assignments/language-subtag-registry
+var langTagMappings = {
+    "art-lojban": "jbo",
+    "cel-gaulish": "cel-gaulish",
+    "en-gb-oed": "en-GB-oed",
+    "i-ami": "ami",
+    "i-bnn": "bnn",
+    "i-default": "i-default",
+    "i-enochian": "i-enochian",
+    "i-hak": "hak",
+    "i-klingon": "tlh",
+    "i-lux": "lb",
+    "i-mingo": "i-mingo",
+    "i-navajo": "nv",
+    "i-pwn": "pwn",
+    "i-tao": "tao",
+    "i-tay": "tay",
+    "i-tsu": "tsu",
+    "ja-latn-hepburn-heploc": "ja-Latn-alalc97",
+    "no-bok": "nb",
+    "no-nyn": "nn",
+    "sgn-be-fr": "sfb",
+    "sgn-be-nl": "vgt",
+    "sgn-br": "bzs",
+    "sgn-ch-de": "sgg",
+    "sgn-co": "csn",
+    "sgn-de": "gsg",
+    "sgn-dk": "dsl",
+    "sgn-es": "ssp",
+    "sgn-fr": "fsl",
+    "sgn-gb": "bfi",
+    "sgn-gr": "gss",
+    "sgn-ie": "isg",
+    "sgn-it": "ise",
+    "sgn-jp": "jsl",
+    "sgn-mx": "mfs",
+    "sgn-ni": "ncs",
+    "sgn-nl": "dse",
+    "sgn-no": "nsl",
+    "sgn-pt": "psr",
+    "sgn-se": "swl",
+    "sgn-us": "ase",
+    "sgn-za": "sfs",
+    "zh-cmn": "cmn",
+    "zh-cmn-hans": "cmn-Hans",
+    "zh-cmn-hant": "cmn-Hant",
+    "zh-gan": "gan",
+    "zh-guoyu": "cmn",
+    "zh-hakka": "hak",
+    "zh-min": "zh-min",
+    "zh-min-nan": "nan",
+    "zh-wuu": "wuu",
+    "zh-xiang": "hsn",
+    "zh-yue": "yue",
+};
+
+// Mappings from non-extlang subtags to preferred values.
+// Derived from IANA Language Subtag Registry, file date 2013-01-25.
+// http://www.iana.org/assignments/language-subtag-registry
+var langSubtagMappings = {
+    "BU": "MM",
+    "DD": "DE",
+    "FX": "FR",
+    "TP": "TL",
+    "YD": "YE",
+    "ZR": "CD",
+    "ayx": "nun",
+    "bjd": "drl",
+    "ccq": "rki",
+    "cjr": "mom",
+    "cka": "cmr",
+    "cmk": "xch",
+    "drh": "khk",
+    "drw": "prs",
+    "gav": "dev",
+    "hrr": "jal",
+    "ibi": "opa",
+    "in": "id",
+    "iw": "he",
+    "ji": "yi",
+    "jw": "jv",
+    "kgh": "kml",
+    "lcq": "ppr",
+    "mo": "ro",
+    "mst": "mry",
+    "myt": "mry",
+    "sca": "hle",
+    "tie": "ras",
+    "tkk": "twm",
+    "tlw": "weo",
+    "tnf": "prs",
+    "ybd": "rki",
+    "yma": "lrr",
+};
+
+// Mappings from extlang subtags to preferred values.
+// Derived from IANA Language Subtag Registry, file date 2013-01-25.
+// http://www.iana.org/assignments/language-subtag-registry
+var extlangMappings = {
+    "aao": {preferred: "aao", prefix: "ar"},
+    "abh": {preferred: "abh", prefix: "ar"},
+    "abv": {preferred: "abv", prefix: "ar"},
+    "acm": {preferred: "acm", prefix: "ar"},
+    "acq": {preferred: "acq", prefix: "ar"},
+    "acw": {preferred: "acw", prefix: "ar"},
+    "acx": {preferred: "acx", prefix: "ar"},
+    "acy": {preferred: "acy", prefix: "ar"},
+    "adf": {preferred: "adf", prefix: "ar"},
+    "ads": {preferred: "ads", prefix: "sgn"},
+    "aeb": {preferred: "aeb", prefix: "ar"},
+    "aec": {preferred: "aec", prefix: "ar"},
+    "aed": {preferred: "aed", prefix: "sgn"},
+    "aen": {preferred: "aen", prefix: "sgn"},
+    "afb": {preferred: "afb", prefix: "ar"},
+    "afg": {preferred: "afg", prefix: "sgn"},
+    "ajp": {preferred: "ajp", prefix: "ar"},
+    "apc": {preferred: "apc", prefix: "ar"},
+    "apd": {preferred: "apd", prefix: "ar"},
+    "arb": {preferred: "arb", prefix: "ar"},
+    "arq": {preferred: "arq", prefix: "ar"},
+    "ars": {preferred: "ars", prefix: "ar"},
+    "ary": {preferred: "ary", prefix: "ar"},
+    "arz": {preferred: "arz", prefix: "ar"},
+    "ase": {preferred: "ase", prefix: "sgn"},
+    "asf": {preferred: "asf", prefix: "sgn"},
+    "asp": {preferred: "asp", prefix: "sgn"},
+    "asq": {preferred: "asq", prefix: "sgn"},
+    "asw": {preferred: "asw", prefix: "sgn"},
+    "auz": {preferred: "auz", prefix: "ar"},
+    "avl": {preferred: "avl", prefix: "ar"},
+    "ayh": {preferred: "ayh", prefix: "ar"},
+    "ayl": {preferred: "ayl", prefix: "ar"},
+    "ayn": {preferred: "ayn", prefix: "ar"},
+    "ayp": {preferred: "ayp", prefix: "ar"},
+    "bbz": {preferred: "bbz", prefix: "ar"},
+    "bfi": {preferred: "bfi", prefix: "sgn"},
+    "bfk": {preferred: "bfk", prefix: "sgn"},
+    "bjn": {preferred: "bjn", prefix: "ms"},
+    "bog": {preferred: "bog", prefix: "sgn"},
+    "bqn": {preferred: "bqn", prefix: "sgn"},
+    "bqy": {preferred: "bqy", prefix: "sgn"},
+    "btj": {preferred: "btj", prefix: "ms"},
+    "bve": {preferred: "bve", prefix: "ms"},
+    "bvl": {preferred: "bvl", prefix: "sgn"},
+    "bvu": {preferred: "bvu", prefix: "ms"},
+    "bzs": {preferred: "bzs", prefix: "sgn"},
+    "cdo": {preferred: "cdo", prefix: "zh"},
+    "cds": {preferred: "cds", prefix: "sgn"},
+    "cjy": {preferred: "cjy", prefix: "zh"},
+    "cmn": {preferred: "cmn", prefix: "zh"},
+    "coa": {preferred: "coa", prefix: "ms"},
+    "cpx": {preferred: "cpx", prefix: "zh"},
+    "csc": {preferred: "csc", prefix: "sgn"},
+    "csd": {preferred: "csd", prefix: "sgn"},
+    "cse": {preferred: "cse", prefix: "sgn"},
+    "csf": {preferred: "csf", prefix: "sgn"},
+    "csg": {preferred: "csg", prefix: "sgn"},
+    "csl": {preferred: "csl", prefix: "sgn"},
+    "csn": {preferred: "csn", prefix: "sgn"},
+    "csq": {preferred: "csq", prefix: "sgn"},
+    "csr": {preferred: "csr", prefix: "sgn"},
+    "czh": {preferred: "czh", prefix: "zh"},
+    "czo": {preferred: "czo", prefix: "zh"},
+    "doq": {preferred: "doq", prefix: "sgn"},
+    "dse": {preferred: "dse", prefix: "sgn"},
+    "dsl": {preferred: "dsl", prefix: "sgn"},
+    "dup": {preferred: "dup", prefix: "ms"},
+    "ecs": {preferred: "ecs", prefix: "sgn"},
+    "esl": {preferred: "esl", prefix: "sgn"},
+    "esn": {preferred: "esn", prefix: "sgn"},
+    "eso": {preferred: "eso", prefix: "sgn"},
+    "eth": {preferred: "eth", prefix: "sgn"},
+    "fcs": {preferred: "fcs", prefix: "sgn"},
+    "fse": {preferred: "fse", prefix: "sgn"},
+    "fsl": {preferred: "fsl", prefix: "sgn"},
+    "fss": {preferred: "fss", prefix: "sgn"},
+    "gan": {preferred: "gan", prefix: "zh"},
+    "gds": {preferred: "gds", prefix: "sgn"},
+    "gom": {preferred: "gom", prefix: "kok"},
+    "gse": {preferred: "gse", prefix: "sgn"},
+    "gsg": {preferred: "gsg", prefix: "sgn"},
+    "gsm": {preferred: "gsm", prefix: "sgn"},
+    "gss": {preferred: "gss", prefix: "sgn"},
+    "gus": {preferred: "gus", prefix: "sgn"},
+    "hab": {preferred: "hab", prefix: "sgn"},
+    "haf": {preferred: "haf", prefix: "sgn"},
+    "hak": {preferred: "hak", prefix: "zh"},
+    "hds": {preferred: "hds", prefix: "sgn"},
+    "hji": {preferred: "hji", prefix: "ms"},
+    "hks": {preferred: "hks", prefix: "sgn"},
+    "hos": {preferred: "hos", prefix: "sgn"},
+    "hps": {preferred: "hps", prefix: "sgn"},
+    "hsh": {preferred: "hsh", prefix: "sgn"},
+    "hsl": {preferred: "hsl", prefix: "sgn"},
+    "hsn": {preferred: "hsn", prefix: "zh"},
+    "icl": {preferred: "icl", prefix: "sgn"},
+    "ils": {preferred: "ils", prefix: "sgn"},
+    "inl": {preferred: "inl", prefix: "sgn"},
+    "ins": {preferred: "ins", prefix: "sgn"},
+    "ise": {preferred: "ise", prefix: "sgn"},
+    "isg": {preferred: "isg", prefix: "sgn"},
+    "isr": {preferred: "isr", prefix: "sgn"},
+    "jak": {preferred: "jak", prefix: "ms"},
+    "jax": {preferred: "jax", prefix: "ms"},
+    "jcs": {preferred: "jcs", prefix: "sgn"},
+    "jhs": {preferred: "jhs", prefix: "sgn"},
+    "jls": {preferred: "jls", prefix: "sgn"},
+    "jos": {preferred: "jos", prefix: "sgn"},
+    "jsl": {preferred: "jsl", prefix: "sgn"},
+    "jus": {preferred: "jus", prefix: "sgn"},
+    "kgi": {preferred: "kgi", prefix: "sgn"},
+    "knn": {preferred: "knn", prefix: "kok"},
+    "kvb": {preferred: "kvb", prefix: "ms"},
+    "kvk": {preferred: "kvk", prefix: "sgn"},
+    "kvr": {preferred: "kvr", prefix: "ms"},
+    "kxd": {preferred: "kxd", prefix: "ms"},
+    "lbs": {preferred: "lbs", prefix: "sgn"},
+    "lce": {preferred: "lce", prefix: "ms"},
+    "lcf": {preferred: "lcf", prefix: "ms"},
+    "liw": {preferred: "liw", prefix: "ms"},
+    "lls": {preferred: "lls", prefix: "sgn"},
+    "lsg": {preferred: "lsg", prefix: "sgn"},
+    "lsl": {preferred: "lsl", prefix: "sgn"},
+    "lso": {preferred: "lso", prefix: "sgn"},
+    "lsp": {preferred: "lsp", prefix: "sgn"},
+    "lst": {preferred: "lst", prefix: "sgn"},
+    "lsy": {preferred: "lsy", prefix: "sgn"},
+    "ltg": {preferred: "ltg", prefix: "lv"},
+    "lvs": {preferred: "lvs", prefix: "lv"},
+    "lzh": {preferred: "lzh", prefix: "zh"},
+    "max": {preferred: "max", prefix: "ms"},
+    "mdl": {preferred: "mdl", prefix: "sgn"},
+    "meo": {preferred: "meo", prefix: "ms"},
+    "mfa": {preferred: "mfa", prefix: "ms"},
+    "mfb": {preferred: "mfb", prefix: "ms"},
+    "mfs": {preferred: "mfs", prefix: "sgn"},
+    "min": {preferred: "min", prefix: "ms"},
+    "mnp": {preferred: "mnp", prefix: "zh"},
+    "mqg": {preferred: "mqg", prefix: "ms"},
+    "mre": {preferred: "mre", prefix: "sgn"},
+    "msd": {preferred: "msd", prefix: "sgn"},
+    "msi": {preferred: "msi", prefix: "ms"},
+    "msr": {preferred: "msr", prefix: "sgn"},
+    "mui": {preferred: "mui", prefix: "ms"},
+    "mzc": {preferred: "mzc", prefix: "sgn"},
+    "mzg": {preferred: "mzg", prefix: "sgn"},
+    "mzy": {preferred: "mzy", prefix: "sgn"},
+    "nan": {preferred: "nan", prefix: "zh"},
+    "nbs": {preferred: "nbs", prefix: "sgn"},
+    "ncs": {preferred: "ncs", prefix: "sgn"},
+    "nsi": {preferred: "nsi", prefix: "sgn"},
+    "nsl": {preferred: "nsl", prefix: "sgn"},
+    "nsp": {preferred: "nsp", prefix: "sgn"},
+    "nsr": {preferred: "nsr", prefix: "sgn"},
+    "nzs": {preferred: "nzs", prefix: "sgn"},
+    "okl": {preferred: "okl", prefix: "sgn"},
+    "orn": {preferred: "orn", prefix: "ms"},
+    "ors": {preferred: "ors", prefix: "ms"},
+    "pel": {preferred: "pel", prefix: "ms"},
+    "pga": {preferred: "pga", prefix: "ar"},
+    "pks": {preferred: "pks", prefix: "sgn"},
+    "prl": {preferred: "prl", prefix: "sgn"},
+    "prz": {preferred: "prz", prefix: "sgn"},
+    "psc": {preferred: "psc", prefix: "sgn"},
+    "psd": {preferred: "psd", prefix: "sgn"},
+    "pse": {preferred: "pse", prefix: "ms"},
+    "psg": {preferred: "psg", prefix: "sgn"},
+    "psl": {preferred: "psl", prefix: "sgn"},
+    "pso": {preferred: "pso", prefix: "sgn"},
+    "psp": {preferred: "psp", prefix: "sgn"},
+    "psr": {preferred: "psr", prefix: "sgn"},
+    "pys": {preferred: "pys", prefix: "sgn"},
+    "rms": {preferred: "rms", prefix: "sgn"},
+    "rsi": {preferred: "rsi", prefix: "sgn"},
+    "rsl": {preferred: "rsl", prefix: "sgn"},
+    "sdl": {preferred: "sdl", prefix: "sgn"},
+    "sfb": {preferred: "sfb", prefix: "sgn"},
+    "sfs": {preferred: "sfs", prefix: "sgn"},
+    "sgg": {preferred: "sgg", prefix: "sgn"},
+    "sgx": {preferred: "sgx", prefix: "sgn"},
+    "shu": {preferred: "shu", prefix: "ar"},
+    "slf": {preferred: "slf", prefix: "sgn"},
+    "sls": {preferred: "sls", prefix: "sgn"},
+    "sqk": {preferred: "sqk", prefix: "sgn"},
+    "sqs": {preferred: "sqs", prefix: "sgn"},
+    "ssh": {preferred: "ssh", prefix: "ar"},
+    "ssp": {preferred: "ssp", prefix: "sgn"},
+    "ssr": {preferred: "ssr", prefix: "sgn"},
+    "svk": {preferred: "svk", prefix: "sgn"},
+    "swc": {preferred: "swc", prefix: "sw"},
+    "swh": {preferred: "swh", prefix: "sw"},
+    "swl": {preferred: "swl", prefix: "sgn"},
+    "syy": {preferred: "syy", prefix: "sgn"},
+    "tmw": {preferred: "tmw", prefix: "ms"},
+    "tse": {preferred: "tse", prefix: "sgn"},
+    "tsm": {preferred: "tsm", prefix: "sgn"},
+    "tsq": {preferred: "tsq", prefix: "sgn"},
+    "tss": {preferred: "tss", prefix: "sgn"},
+    "tsy": {preferred: "tsy", prefix: "sgn"},
+    "tza": {preferred: "tza", prefix: "sgn"},
+    "ugn": {preferred: "ugn", prefix: "sgn"},
+    "ugy": {preferred: "ugy", prefix: "sgn"},
+    "ukl": {preferred: "ukl", prefix: "sgn"},
+    "uks": {preferred: "uks", prefix: "sgn"},
+    "urk": {preferred: "urk", prefix: "ms"},
+    "uzn": {preferred: "uzn", prefix: "uz"},
+    "uzs": {preferred: "uzs", prefix: "uz"},
+    "vgt": {preferred: "vgt", prefix: "sgn"},
+    "vkk": {preferred: "vkk", prefix: "ms"},
+    "vkt": {preferred: "vkt", prefix: "ms"},
+    "vsi": {preferred: "vsi", prefix: "sgn"},
+    "vsl": {preferred: "vsl", prefix: "sgn"},
+    "vsv": {preferred: "vsv", prefix: "sgn"},
+    "wuu": {preferred: "wuu", prefix: "zh"},
+    "xki": {preferred: "xki", prefix: "sgn"},
+    "xml": {preferred: "xml", prefix: "sgn"},
+    "xmm": {preferred: "xmm", prefix: "ms"},
+    "xms": {preferred: "xms", prefix: "sgn"},
+    "yds": {preferred: "yds", prefix: "sgn"},
+    "ysl": {preferred: "ysl", prefix: "sgn"},
+    "yue": {preferred: "yue", prefix: "zh"},
+    "zib": {preferred: "zib", prefix: "sgn"},
+    "zlm": {preferred: "zlm", prefix: "ms"},
+    "zmi": {preferred: "zmi", prefix: "ms"},
+    "zsl": {preferred: "zsl", prefix: "sgn"},
+    "zsm": {preferred: "zsm", prefix: "ms"},
+};
--- a/js/src/builtin/Utilities.js
+++ b/js/src/builtin/Utilities.js
@@ -1,16 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-/*jshint bitwise: true, camelcase: false, curly: false, eqeqeq: true, forin: true,
-         immed: true, indent: 4, latedef: false, newcap: false, noarg: true,
-         noempty: true, nonew: true, plusplus: false, quotmark: false, regexp: true,
-         undef: true, unused: false, strict: false, trailing: true,
+/*jshint bitwise: true, camelcase: false, curly: false, eqeqeq: true,
+         es5: true, forin: true, immed: true, indent: 4, latedef: false,
+         newcap: false, noarg: true, noempty: true, nonew: true,
+         plusplus: false, quotmark: false, regexp: true, undef: true,
+         unused: false, strict: false, trailing: true,
 */
 
 /*global ToObject: false, ToInteger: false, IsCallable: false, ThrowError: false,
          AssertionFailed: false, MakeConstructible: false, DecompileArg: false,
          RuntimeDefaultLocale: false,
          callFunction: false,
          IS_UNDEFINED: false, TO_UINT32: false,
          JSMSG_NOT_FUNCTION: false, JSMSG_MISSING_FUN_ARG: false,
new file mode 100755
--- /dev/null
+++ b/js/src/builtin/make_intl_data.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+""" Usage: make_intl_data.py [language-subtag-registry.txt]
+
+    This script extracts information about mappings between deprecated and
+    current BCP 47 language tags from the IANA Language Subtag Registry and
+    converts it to JavaScript object definitions in IntlData.js. The definitions
+    are used in Intl.js.
+
+    The IANA Language Subtag Registry is imported from
+    http://www.iana.org/assignments/language-subtag-registry
+    and uses the syntax specified in
+    http://tools.ietf.org/html/rfc5646#section-3
+"""
+
+def readRegistryRecord(registry):
+    """ Yields the records of the IANA Language Subtag Registry as dictionaries. """
+    record = {}
+    for line in registry:
+        line = line.strip()
+        if line == "":
+            continue
+        if line == "%%":
+            yield record
+            record = {}
+        else:
+            if ":" in line:
+                key, value = line.split(":", 1)
+                key, value = key.strip(), value.strip()
+                record[key] = value
+            else:
+                # continuation line
+                record[key] += " " + line
+    if record:
+        yield record
+    return
+
+
+def readRegistry(registry):
+    """ Reads IANA Language Subtag Registry and extracts information for Intl.js.
+
+        Information extracted:
+        - langTagMappings: mappings from complete language tags to preferred
+          complete language tags
+        - langSubtagMappings: mappings from subtags to preferred subtags
+        - extlangMappings: mappings from extlang subtags to preferred subtags,
+          with prefix to be removed
+        Returns these three mappings as dictionaries, along with the registry's
+        file date.
+
+        We also check that mappings for language subtags don't affect extlang
+        subtags and vice versa, so that CanonicalizeLanguageTag doesn't have
+        to separate them for processing. Region codes are separated by case,
+        and script codes by length, so they're unproblematic.
+    """
+    langTagMappings = {}
+    langSubtagMappings = {}
+    extlangMappings = {}
+    languageSubtags = set()
+    extlangSubtags = set()
+
+    for record in readRegistryRecord(registry):
+        if "File-Date" in record:
+            fileDate = record["File-Date"]
+            continue
+
+        if record["Type"] == "grandfathered":
+            # Grandfathered tags don't use standard syntax, so
+            # CanonicalizeLanguageTag expects the mapping table to provide
+            # the final form for all.
+            # For langTagMappings, keys must be in lower case; values in
+            # the case used in the registry.
+            tag = record["Tag"]
+            if "Preferred-Value" in record:
+                langTagMappings[tag.lower()] = record["Preferred-Value"]
+            else:
+                langTagMappings[tag.lower()] = tag
+        elif record["Type"] == "redundant":
+            # For langTagMappings, keys must be in lower case; values in
+            # the case used in the registry.
+            if "Preferred-Value" in record:
+                langTagMappings[record["Tag"].lower()] = record["Preferred-Value"]
+        elif record["Type"] in ("language", "script", "region", "variant"):
+            # For langSubtagMappings, keys and values must be in the case used
+            # in the registry.
+            subtag = record["Subtag"]
+            if record["Type"] == "language":
+                languageSubtags.add(subtag)
+            if "Preferred-Value" in record:
+                if subtag == "heploc":
+                    # The entry for heploc is unique in its complexity; handle
+                    # it as special case below.
+                    continue
+                if "Prefix" in record:
+                    # This might indicate another heploc-like complex case.
+                    raise Exception("Please evaluate: subtag mapping with prefix value.")
+                langSubtagMappings[subtag] = record["Preferred-Value"]
+        elif record["Type"] == "extlang":
+            # For extlangMappings, keys must be in the case used in the
+            # registry; values are records with the preferred value and the
+            # prefix to be removed.
+            subtag = record["Subtag"]
+            extlangSubtags.add(subtag)
+            if "Preferred-Value" in record:
+                preferred = record["Preferred-Value"]
+                prefix = record["Prefix"]
+                extlangMappings[subtag] = {"preferred": preferred, "prefix": prefix}
+        else:
+            # No other types are allowed by
+            # http://tools.ietf.org/html/rfc5646#section-3.1.3
+            assert False, "Unrecognized Type: {0}".format(record["Type"])
+
+    # Check that mappings for language subtags and extlang subtags don't affect
+    # each other.
+    for lang in languageSubtags:
+        if lang in extlangMappings and extlangMappings[lang]["preferred"] != lang:
+            raise Exception("Conflict: lang with extlang mapping: " + lang)
+    for extlang in extlangSubtags:
+        if extlang in langSubtagMappings:
+            raise Exception("Conflict: extlang with lang mapping: " + extlang)
+
+    # Special case for heploc.
+    langTagMappings["ja-latn-hepburn-heploc"] = "ja-Latn-alalc97"
+
+    return {"fileDate": fileDate,
+            "langTagMappings": langTagMappings,
+            "langSubtagMappings": langSubtagMappings,
+            "extlangMappings": extlangMappings}
+
+
+def writeMappingsVar(intlData, dict, name, description, fileDate, url):
+    """ Writes a variable definition with a mapping table to file intlData.
+
+        Writes the contents of dictionary dict to file intlData with the given
+        variable name and a comment with description, fileDate, and URL.
+    """
+    intlData.write("\n")
+    intlData.write("// {0}.\n".format(description))
+    intlData.write("// Derived from IANA Language Subtag Registry, file date {0}.\n".format(fileDate))
+    intlData.write("// {0}\n".format(url))
+    intlData.write("var {0} = {{\n".format(name))
+    keys = sorted(dict)
+    for key in keys:
+        if isinstance(dict[key], basestring):
+            value = '"{0}"'.format(dict[key])
+        else:
+            preferred = dict[key]["preferred"]
+            prefix = dict[key]["prefix"]
+            value = '{{preferred: "{0}", prefix: "{1}"}}'.format(preferred, prefix)
+        intlData.write('    "{0}": {1},\n'.format(key, value))
+    intlData.write("};\n")
+
+
+def writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings):
+    """ Writes the language tag data to the Intl data file. """
+    writeMappingsVar(intlData, langTagMappings, "langTagMappings",
+                     "Mappings from complete tags to preferred values", fileDate, url)
+    writeMappingsVar(intlData, langSubtagMappings, "langSubtagMappings",
+                     "Mappings from non-extlang subtags to preferred values", fileDate, url)
+    writeMappingsVar(intlData, extlangMappings, "extlangMappings",
+                     "Mappings from extlang subtags to preferred values", fileDate, url)
+
+
+if __name__ == '__main__':
+    import codecs
+    import sys
+    import urllib2
+
+    url = "http://www.iana.org/assignments/language-subtag-registry"
+    if len(sys.argv) > 1:
+        print("Always make sure you have the newest language-subtag-registry.txt!")
+        registry = codecs.open(sys.argv[1], "r", encoding="utf-8")
+    else:
+        print("Downloading IANA Language Subtag Registry...")
+        reader = urllib2.urlopen(url)
+        text = reader.read().decode("utf-8")
+        reader.close()
+        registry = codecs.open("language-subtag-registry.txt", "w+", encoding="utf-8")
+        registry.write(text)
+        registry.seek(0)
+
+    print("Processing IANA Language Subtag Registry...")
+    data = readRegistry(registry)
+    fileDate = data["fileDate"]
+    langTagMappings = data["langTagMappings"]
+    langSubtagMappings = data["langSubtagMappings"]
+    extlangMappings = data["extlangMappings"]
+    registry.close()
+
+    print("Writing Intl data...")
+    intlData = codecs.open("IntlData.js", "w", encoding="utf-8")
+    intlData.write("// Generated by make_intl_data.py. DO NOT EDIT.\n")
+    writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings)
+    intlData.close()