Bug 1570370 - Part 6: Switch language tag parser from JS to C++. r=jwalden
authorAndré Bargull <andre.bargull@gmail.com>
Fri, 11 Oct 2019 19:25:32 +0000
changeset 497319 7e272f3c9fa48620151dbdd970802be7d3c0271e
parent 497318 f22fdbd968ed04fedd47c406db2678fa5ae1205e
child 497320 e7dbd9ac5b7ddbc9fbad5386f1814fbce72be383
push id36682
push userncsoregi@mozilla.com
push dateSat, 12 Oct 2019 09:52:03 +0000
treeherdermozilla-central@06ea2371f897 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjwalden
bugs1570370
milestone71.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1570370 - Part 6: Switch language tag parser from JS to C++. r=jwalden Differential Revision: https://phabricator.services.mozilla.com/D40072
js/src/builtin/String.js
js/src/builtin/intl/CommonFunctions.js
js/src/builtin/intl/LanguageTag.cpp
js/src/builtin/intl/LanguageTag.h
js/src/builtin/intl/Locale.cpp
js/src/builtin/intl/Locale.h
js/src/vm/SelfHosting.cpp
--- a/js/src/builtin/String.js
+++ b/js/src/builtin/String.js
@@ -614,17 +614,17 @@ function String_toLocaleLowerCase() {
     // argument) first.
     var locales = arguments.length > 0 ? arguments[0] : undefined;
     var requestedLocale;
     if (locales === undefined) {
         // Steps 3, 6.
         requestedLocale = undefined;
     } else if (typeof locales === "string") {
         // Steps 3, 5.
-        requestedLocale = ValidateAndCanonicalizeLanguageTag(locales);
+        requestedLocale = intl_ValidateAndCanonicalizeLanguageTag(locales, false);
     } else {
         // Step 3.
         var requestedLocales = CanonicalizeLocaleList(locales);
 
         // Steps 4-6.
         requestedLocale = requestedLocales.length > 0 ? requestedLocales[0] : undefined;
     }
 
@@ -655,17 +655,17 @@ function String_toLocaleUpperCase() {
     // argument) first.
     var locales = arguments.length > 0 ? arguments[0] : undefined;
     var requestedLocale;
     if (locales === undefined) {
         // Steps 3, 6.
         requestedLocale = undefined;
     } else if (typeof locales === "string") {
         // Steps 3, 5.
-        requestedLocale = ValidateAndCanonicalizeLanguageTag(locales);
+        requestedLocale = intl_ValidateAndCanonicalizeLanguageTag(locales, false);
     } else {
         // Step 3.
         var requestedLocales = CanonicalizeLocaleList(locales);
 
         // Steps 4-6.
         requestedLocale = requestedLocales.length > 0 ? requestedLocales[0] : undefined;
     }
 
--- a/js/src/builtin/intl/CommonFunctions.js
+++ b/js/src/builtin/intl/CommonFunctions.js
@@ -2,21 +2,20 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 /* Portions Copyright Norbert Lindenberg 2011-2012. */
 
 #ifdef DEBUG
 #define assertIsValidAndCanonicalLanguageTag(locale, desc) \
     do { \
-        let localeObj = parseLanguageTag(locale); \
-        assert(localeObj !== null, \
+        let canonical = intl_TryValidateAndCanonicalizeLanguageTag(locale); \
+        assert(canonical !== null, \
                `${desc} is a structurally valid language tag`); \
-        CanonicalizeLanguageTagObject(localeObj); \
-        assert(StringFromLanguageTagObject(localeObj) === locale, \
+        assert(canonical === locale, \
                `${desc} is a canonicalized language tag`); \
     } while (false)
 #else
 #define assertIsValidAndCanonicalLanguageTag(locale, desc) ; // Elided assertion.
 #endif
 
 /**
  * Returns the start index of a "Unicode locale extension sequence", which the
@@ -114,1099 +113,30 @@ function getUnicodeExtensions(locale) {
 
     var start = startOfUnicodeExtensions(locale);
     assert(start >= 0, "start of Unicode extension sequence not found");
     var end = endOfUnicodeExtensions(locale, start);
 
     return Substring(locale, start, end - start);
 }
 
-// The three possible token type bits. Expressed as #defines to avoid
-// extra named lookups in the interpreter/jits.
-#define NONE  0b00
-#define ALPHA 0b01
-#define DIGIT 0b10
-
-// Constants for code units used below.
-#define HYPHEN  0x2D
-#define DIGIT_ZERO 0x30
-#define DIGIT_NINE 0x39
-#define UPPER_A 0x41
-#define UPPER_Z 0x5A
-#define LOWER_A 0x61
-#define LOWER_T 0x74
-#define LOWER_U 0x75
-#define LOWER_X 0x78
-#define LOWER_Z 0x7A
-
-// The requirement to use callFunction() for method calls makes the parser
-// harder to read. Use macros for the rescue.
-
-// Reads the next token.
-#define NEXT_TOKEN_OR_RETURN_NULL(ts)       \
-    if (!callFunction(ts.nextToken, ts))    \
-        return null;
-
-#ifdef DEBUG
-#define NEXT_TOKEN_OR_ASSERT(ts)            \
-    if (!callFunction(ts.nextToken, ts))    \
-        assert(false, "unexpected invalid subtag");
-#else
-#define NEXT_TOKEN_OR_ASSERT(ts)            \
-    callFunction(ts.nextToken, ts);
-#endif
-
-// Assigns the current subtag part transformed to lower-case to the target.
-#define SUBTAG_VAR_OR_RETURN_NULL(ts, target)                                   \
-    {                                                                           \
-        target = Substring(ts.localeLowercase, ts.tokenStart, ts.tokenLength);  \
-        NEXT_TOKEN_OR_RETURN_NULL(ts);                                          \
-    }
-
-// Assigns the current subtag part transformed to lower-case to the target.
-#define SUBTAG_VAR_OR_ASSERT(ts, target)                                        \
-    {                                                                           \
-        target = Substring(ts.localeLowercase, ts.tokenStart, ts.tokenLength);  \
-        NEXT_TOKEN_OR_ASSERT(ts)                                                \
-    }
-
-/**
- * Tokenizer for Unicode BCP 47 locale identifiers.
- */
-function BCP47TokenStream(locale) {
-    this.locale = locale;
-
-    // Locale identifiers are compared and processed case-insensitively, so
-    // technically it's not necessary to adjust case. But for easier processing,
-    // and because the canonical form for most subtags is lower case, we start
-    // with lower case for all.
-    //
-    // Note that the tokenizer function keeps using the original input string
-    // to properly detect non-ASCII characters. The lower-case string can't be
-    // used to detect those characters, because some non-ASCII characters
-    // lower-case map into ASCII characters, e.g. U+212A (KELVIN SIGN) lower-
-    // case maps to U+006B (LATIN SMALL LETTER K).
-    this.localeLowercase = callFunction(std_String_toLowerCase, locale);
-
-    // Current parse index in |locale|.
-    this.index = 0;
-
-    // The current token type, its start index, and its length.
-    this.token = NONE;
-    this.tokenStart = 0;
-    this.tokenLength = 0;
-
-    assert(std_String_fromCharCode(HYPHEN) === "-" &&
-           std_String_fromCharCode(DIGIT_ZERO) === "0" &&
-           std_String_fromCharCode(DIGIT_NINE) === "9" &&
-           std_String_fromCharCode(UPPER_A) === "A" &&
-           std_String_fromCharCode(UPPER_Z) === "Z" &&
-           std_String_fromCharCode(LOWER_A) === "a" &&
-           std_String_fromCharCode(LOWER_T) === "t" &&
-           std_String_fromCharCode(LOWER_U) === "u" &&
-           std_String_fromCharCode(LOWER_X) === "x" &&
-           std_String_fromCharCode(LOWER_Z) === "z",
-           "code unit constants should match the expected characters");
-}
-
-MakeConstructible(BCP47TokenStream, {
-    __proto__: null,
-
-    // Reads the next token, returns |false| if an illegal character was found,
-    // otherwise returns |true|.
-    //
-    // eslint-disable-next-line object-shorthand
-    nextToken: function() {
-        var type = NONE;
-        var {index, locale} = this;
-        for (var i = index; i < locale.length; i++) {
-            // UTS 35, section 3.1.
-            // alpha = [A-Z a-z] ;
-            // digit = [0-9] ;
-            var c = callFunction(std_String_charCodeAt, locale, i);
-            if ((UPPER_A <= c && c <= UPPER_Z) || (LOWER_A <= c && c <= LOWER_Z))
-                type |= ALPHA;
-            else if (DIGIT_ZERO <= c && c <= DIGIT_NINE)
-                type |= DIGIT;
-            else if (c === HYPHEN && i > index && i + 1 < locale.length)
-                break;
-            else
-                return false;
-        }
-
-        this.token = type;
-        this.tokenStart = index;
-        this.tokenLength = i - index;
-        this.index = i + 1;
-        return true;
-    },
-
-    // Returns true if the character at the requested index within the current
-    // token is a digit.
-    //
-    // eslint-disable-next-line object-shorthand
-    isDigitAt: function(index) {
-        assert(0 <= index && index < this.tokenLength,
-               "must be an index into the current token");
-        var c = callFunction(std_String_charCodeAt, this.localeLowercase, this.tokenStart + index);
-        assert(!(c <= DIGIT_NINE) || c >= DIGIT_ZERO,
-               "token-start-code-unit <= '9' implies token-start-code-unit is in '0'..'9' " +
-               "and because all digits are sorted before any letters");
-        return c <= DIGIT_NINE;
-    },
-
-    // Returns the code unit of the first character at the current token
-    // position. Always returns the lower-case form of an alphabetical
-    // character.
-    //
-    // eslint-disable-next-line object-shorthand
-    singletonKey: function() {
-        assert(this.tokenLength === 1, "token is not a singleton");
-        var c = callFunction(std_String_charCodeAt, this.localeLowercase, this.tokenStart);
-        assert((DIGIT_ZERO <= c && c <= DIGIT_NINE) || (LOWER_A <= c && c <= LOWER_Z),
-               "unexpected code unit");
-        return c;
-    },
-
-    // eslint-disable-next-line object-shorthand
-    singletonValue: function() {
-        var singletonStart = this.tokenStart;
-        var min = callFunction(this.singletonKey, this) === LOWER_X ? 1 : 2;
-
-        NEXT_TOKEN_OR_RETURN_NULL(this);
-
-        // At least one non-singleton subtag must be present.
-        if (!(min <= this.tokenLength && this.tokenLength <= 8))
-            return null;
-        do {
-            NEXT_TOKEN_OR_RETURN_NULL(this);
-        } while (min <= this.tokenLength && this.tokenLength <= 8);
-
-        return callFunction(this.singletonValueAt, this, singletonStart);
-    },
-
-    // eslint-disable-next-line object-shorthand
-    singletonValueAt: function(start) {
-        // Singletons must be followed by a non-singleton subtag, "en-a-b" is not allowed.
-        var length = this.tokenStart - 1 - start;
-        if (length <= 2)
-            return null;
-        return Substring(this.localeLowercase, start, length);
-    }
-});
-
-/* eslint-disable complexity */
-/**
- * Parser for Unicode BCP 47 locale identifiers.
- *
- * Returns null if |locale| can't be parsed as a `unicode_locale_id`. If the
- * input is a grandfathered language tag, it is directly canonicalized to its
- * modern form. The returned object has the following structure:
- *
- *   {
- *     language: `unicode_language_subtag`,
- *     script: `unicode_script_subtag` / undefined,
- *     region: `unicode_region_subtag` / undefined,
- *     variants: array of `unicode_variant_subtag`,
- *     extensions: array of `extensions`,
- *     privateuse: `pu_extensions` / undefined,
- *   }
- *
- * All locale identifier subtags are returned in their normalized case:
- *
- *   var langtag = parseLanguageTag("en-latn-us");
- *   assertEq("en", langtag.language);
- *   assertEq("Latn", langtag.script);
- *   assertEq("US", langtag.region);
- *
- * Spec: https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers
- */
-function parseLanguageTag(locale) {
-    assert(typeof locale === "string", "locale is a string");
-
-    // unicode_locale_id = unicode_language_id
-    //                     extensions*
-    //                     pu_extensions? ;
-    var ts = new BCP47TokenStream(locale);
-    NEXT_TOKEN_OR_RETURN_NULL(ts);
-
-    var language, script, region, privateuse;
-    var variants = [];
-    var extensions = [];
-
-    // unicode_language_id = unicode_language_subtag
-    //                       (sep unicode_script_subtag)?
-    //                       (sep unicode_region_subtag)?
-    //                       (sep unicode_variant_subtag)* ;
-    //
-    // sep                 = "-"
-    //
-    // Note: Unicode CLDR locale identifier backward compatibility extensions
-    //       removed from `unicode_language_id`.
-
-    // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
-    if (ts.token !== ALPHA || ts.tokenLength === 1 || ts.tokenLength === 4 || ts.tokenLength > 8) {
-        // Four character language subtags are not allowed in Unicode BCP 47
-        // locale identifiers. Also see the comparison to Unicode CLDR locale
-        // identifiers in <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
-        return null;
-    }
-    assert((2 <= ts.tokenLength && ts.tokenLength <= 3) ||
-           (5 <= ts.tokenLength && ts.tokenLength <= 8),
-           "language subtags have 2-3 or 5-8 letters");
-
-    SUBTAG_VAR_OR_RETURN_NULL(ts, language);
-
-    // unicode_script_subtag = alpha{4} ;
-    if (ts.tokenLength === 4 && ts.token === ALPHA) {
-        SUBTAG_VAR_OR_RETURN_NULL(ts, script);
-
-        // The first character of a script code needs to be capitalized.
-        // "hans" -> "Hans"
-        script = callFunction(std_String_toUpperCase, script[0]) +
-                 Substring(script, 1, script.length - 1);
-    }
-
-    // unicode_region_subtag = (alpha{2} | digit{3}) ;
-    if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
-        (ts.tokenLength === 3 && ts.token === DIGIT))
-    {
-        SUBTAG_VAR_OR_RETURN_NULL(ts, region);
-
-        // Region codes need to be in upper-case. "bu" -> "BU"
-        region = callFunction(std_String_toUpperCase, region);
-    }
-
-    // unicode_variant_subtag = (alphanum{5,8}
-    //                        | digit alphanum{3}) ;
-    //
-    // alphanum               = [0-9 A-Z a-z] ;
-    while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
-           (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
-    {
-        // Locale identifiers are case insensitive (UTS 35, section 3.2).
-        // All seen variants are compared ignoring case differences by
-        // using the lower-case form. This allows to properly detect and
-        // reject variant repetitions with differing case, e.g.
-        // "en-variant-Variant".
-        var variant;
-        SUBTAG_VAR_OR_RETURN_NULL(ts, variant);
-
-        // Reject the Locale identifier if a duplicate variant was found.
-        //
-        // This linear-time verification step means the whole variant
-        // subtag checking is potentially quadratic, but we're okay doing
-        // that because language tags are unlikely to be deliberately
-        // pathological.
-        if (callFunction(ArrayIndexOf, variants, variant) !== -1)
-            return null;
-        _DefineDataProperty(variants, variants.length, variant);
-    }
-
-    // extensions = unicode_locale_extensions
-    //            | transformed_extensions
-    //            | other_extensions ;
-    //
-    // unicode_locale_extensions = sep [uU]
-    //                             ((sep keyword)+
-    //                             |(sep attribute)+ (sep keyword)*) ;
-    //
-    // transformed_extensions = sep [tT]
-    //                          ((sep tlang (sep tfield)*)
-    //                          |(sep tfield)+) ;
-    //
-    // other_extensions = [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
-    //
-    // keyword = key (sep type)? ;
-    //
-    // key = alphanum alpha ;
-    //
-    // type = alphanum{3,8} (sep alphanum{3,8})* ;
-    //
-    // attribute = alphanum{3,8} ;
-    //
-    // tlang = unicode_language_subtag
-    //         (sep unicode_script_subtag)?
-    //         (sep unicode_region_subtag)?
-    //         (sep unicode_variant_subtag)* ;
-    //
-    // tfield = tkey tvalue;
-    //
-    // tkey = alpha digit ;
-    //
-    // tvalue = (sep alphanum{3,8})+ ;
-    var seenSingletons = [];
-    while (ts.tokenLength === 1) {
-        var singleton = callFunction(ts.singletonKey, ts);
-        if (singleton === LOWER_X)
-            break;
-
-        // Locale identifiers are case insensitive (UTS 35, section 3.2).
-        // Ensure |singletonKey()| does not return the code unit of an
-        // upper-case character, so we can properly detect and reject
-        // singletons with different case, e.g. "en-u-foo-U-foo".
-        assert(!(UPPER_A <= singleton && singleton <= UPPER_Z),
-               "unexpected upper-case code unit");
-
-        // Reject the input if a duplicate singleton was found.
-        //
-        // Similar to the variant validation step this check is O(n**2),
-        // but given that there are only 35 possible singletons the
-        // quadratic runtime is negligible.
-        if (callFunction(ArrayIndexOf, seenSingletons, singleton) !== -1)
-            return null;
-        _DefineDataProperty(seenSingletons, seenSingletons.length, singleton);
-
-        var extension;
-        if (singleton === LOWER_U) {
-            var extensionStart = ts.tokenStart;
-            NEXT_TOKEN_OR_RETURN_NULL(ts);
-
-            while (2 <= ts.tokenLength && ts.tokenLength <= 8) {
-                // `key` doesn't allow a digit as its second character.
-                if (ts.tokenLength === 2 && callFunction(ts.isDigitAt, ts, 1))
-                    return null;
-                NEXT_TOKEN_OR_RETURN_NULL(ts);
-            }
-            extension = callFunction(ts.singletonValueAt, ts, extensionStart);
-        } else if (singleton === LOWER_T) {
-            var extensionStart = ts.tokenStart;
-            NEXT_TOKEN_OR_RETURN_NULL(ts);
-
-            // `tfield` starts with `tkey`, which in turn is `alpha digit`, so
-            // an alpha-only token must be a `tlang`.
-            if (ts.token === ALPHA) {
-                // `unicode_language_subtag`
-                if (ts.tokenLength === 1 || ts.tokenLength === 4 || ts.tokenLength > 8)
-                    return null;
-                NEXT_TOKEN_OR_RETURN_NULL(ts);
-
-                // `unicode_script_subtag` (optional)
-                if (ts.tokenLength === 4 && ts.token === ALPHA) {
-                    NEXT_TOKEN_OR_RETURN_NULL(ts);
-                }
-
-                // `unicode_region_subtag` (optional)
-                if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
-                    (ts.tokenLength === 3 && ts.token === DIGIT))
-                {
-                    NEXT_TOKEN_OR_RETURN_NULL(ts);
-                }
-
-                // `unicode_variant_subtag` (optional)
-                while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
-                       (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
-                {
-                    NEXT_TOKEN_OR_RETURN_NULL(ts);
-                }
-            }
-
-            // Trailing `tfield` subtags.
-            while (ts.tokenLength === 2) {
-                // `tkey` is `alpha digit`.
-                if (callFunction(ts.isDigitAt, ts, 0) ||
-                    !callFunction(ts.isDigitAt, ts, 1))
-                {
-                    return null;
-                }
-                NEXT_TOKEN_OR_RETURN_NULL(ts);
-
-                // `tfield` requires at least one `tvalue`.
-                if (!(3 <= ts.tokenLength && ts.tokenLength <= 8))
-                    return null;
-                do {
-                    NEXT_TOKEN_OR_RETURN_NULL(ts);
-                } while (3 <= ts.tokenLength && ts.tokenLength <= 8);
-            }
-            extension = callFunction(ts.singletonValueAt, ts, extensionStart);
-        } else {
-            extension = callFunction(ts.singletonValue, ts);
-        }
-        if (!extension)
-            return null;
-
-        _DefineDataProperty(extensions, extensions.length, extension);
-    }
-
-    // Trailing pu_extensions component of the unicode_locale_id production.
-    //
-    // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
-    if (ts.tokenLength === 1 && callFunction(ts.singletonKey, ts) === LOWER_X) {
-        privateuse = callFunction(ts.singletonValue, ts);
-        if (!privateuse)
-            return null;
-    }
-
-    // Reject the input if it couldn't be parsed completely.
-    if (ts.token !== NONE)
-        return null;
-
-    var tagObj = {
-        language,
-        script,
-        region,
-        variants,
-        extensions,
-        privateuse,
-    };
-
-    // Handle grandfathered tags right away, so we don't need to have extra
-    // paths for grandfathered tags later on.
-    //
-    // grandfathered = "art-lojban"     ; non-redundant tags registered
-    //               / "cel-gaulish"    ; during the RFC 3066 era
-    //               / "zh-guoyu"       ; these tags match the 'langtag'
-    //               / "zh-hakka"       ; production, but their subtags
-    //               / "zh-xiang"       ; are not extended language
-    //                                  ; or variant subtags: their meaning
-    //                                  ; is defined by their registration
-    //                                  ; and all of these are deprecated
-    //                                  ; in favor of a more modern
-    //                                  ; subtag or sequence of subtags
-    if (hasOwn(ts.localeLowercase, grandfatheredMappings))
-        updateGrandfatheredMappings(tagObj);
-
-    // Return if the complete input was successfully parsed.
-    return tagObj;
-}
-
-/**
- * Return the locale and fields components of the given valid Transform
- * extension subtag.
- */
-function TransformExtensionComponents(extension) {
-    assert(typeof extension === "string", "extension is a String value");
-    assert(callFunction(std_String_startsWith, extension, "t-"),
-           "extension starts with 't-'");
-
-    var ts = new BCP47TokenStream(Substring(extension, 2, extension.length - 2));
-    NEXT_TOKEN_OR_ASSERT(ts);
-
-    // `tfield` starts with `tkey`, which in turn is `alpha digit`, so
-    // an alpha-only token must be a `tlang`.
-    var localeObj;
-    if (ts.token === ALPHA) {
-        // `unicode_language_subtag`
-        assert((2 <= ts.tokenLength && ts.tokenLength <= 3) ||
-                (5 <= ts.tokenLength && ts.tokenLength <= 8),
-                "language subtags have 2-3 or 5-8 letters");
-
-        var language;
-        SUBTAG_VAR_OR_ASSERT(ts, language);
-
-        // unicode_script_subtag = alpha{4} ;
-        var script;
-        if (ts.tokenLength === 4 && ts.token === ALPHA) {
-            SUBTAG_VAR_OR_ASSERT(ts, script);
-
-            // The first character of a script code needs to be capitalized.
-            // "hans" -> "Hans"
-            script = callFunction(std_String_toUpperCase, script[0]) +
-                     Substring(script, 1, script.length - 1);
-        }
-
-        // unicode_region_subtag = (alpha{2} | digit{3}) ;
-        var region;
-        if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
-            (ts.tokenLength === 3 && ts.token === DIGIT))
-        {
-            SUBTAG_VAR_OR_ASSERT(ts, region);
-
-            // Region codes need to be in upper-case. "bu" -> "BU"
-            region = callFunction(std_String_toUpperCase, region);
-        }
-
-        // unicode_variant_subtag = (alphanum{5,8}
-        //                        | digit alphanum{3}) ;
-        //
-        // alphanum               = [0-9 A-Z a-z] ;
-        var variants = [];
-        while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
-               (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
-        {
-            var variant;
-            SUBTAG_VAR_OR_ASSERT(ts, variant);
-
-            _DefineDataProperty(variants, variants.length, variant);
-        }
-
-        localeObj = {
-            language,
-            script,
-            region,
-            variants,
-            extensions: [],
-            privateuse: undefined,
-        };
-    }
-
-    // Trailing `tfield` subtags. (Any other trailing subtags are an error,
-    // because we're guaranteed to only see a valid tranform extension here.)
-    var fields = [];
-    while (ts.tokenLength === 2) {
-        // `tkey` is `alpha digit`.
-        assert(!callFunction(ts.isDigitAt, ts, 0) && callFunction(ts.isDigitAt, ts, 1),
-               "unexpected invalid tkey subtag");
-
-        var key;
-        SUBTAG_VAR_OR_ASSERT(ts, key);
-
-        // `tfield` requires at least one `tvalue`.
-        assert(3 <= ts.tokenLength && ts.tokenLength <= 8,
-               "unexpected invalid tvalue subtag");
-
-        var value;
-        SUBTAG_VAR_OR_ASSERT(ts, value);
-
-        while (3 <= ts.tokenLength && ts.tokenLength <= 8) {
-            var part;
-            SUBTAG_VAR_OR_ASSERT(ts, part);
-            value += "-" + part;
-        }
-
-        _DefineDataProperty(fields, fields.length, {key, value});
-    }
-
-    assert(ts.token === NONE,
-           "unexpected trailing characters in promised-to-be-valid transform extension");
-
-    return {locale: localeObj, fields};
-}
-/* eslint-enable complexity */
-
-#undef NONE
-#undef ALPHA
-#undef DIGIT
-
-#undef HYPHEN
-#undef DIGIT_ZERO
-#undef DIGIT_NINE
-#undef UPPER_A
-#undef UPPER_Z
-#undef LOWER_A
-#undef LOWER_T
-#undef LOWER_U
-#undef LOWER_X
-#undef LOWER_Z
-
-#undef SUBTAG_VAR_OR_ASSERT
-#undef SUBTAG_VAR_OR_RETURN_NULL
-#undef NEXT_TOKEN_OR_ASSERT
-#undef NEXT_TOKEN_OR_RETURN_NULL
-
-/**
- * Verifies that the given string is a well-formed BCP 47 language tag
- * with no duplicate variant or singleton subtags.
- *
- * Spec: ECMAScript Internationalization API Specification, 6.2.2.
- */
-function IsStructurallyValidLanguageTag(locale) {
-    return parseLanguageTag(locale) !== null;
-}
-
-/**
- * Canonicalizes the given structurally valid Unicode BCP 47 locale identifier,
- * including regularized case of subtags. For example, the language tag
- * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
- *
- *     Zh             ; 2*3ALPHA
- *     -haNS          ; ["-" script]
- *     -bu            ; ["-" region]
- *     -variant2      ; *("-" variant)
- *     -Variant1
- *     -u-ca-chinese  ; *("-" extension)
- *     -t-Zh-laTN
- *     -x-PRIVATE     ; ["-" privateuse]
- *
- * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
- *
- * UTS 35 specifies two different canonicalization algorithms. There's one to
- * canonicalize BCP 47 language tags and other one to canonicalize Unicode
- * locale identifiers. The latter one wasn't present when ECMA-402 was changed
- * to use Unicode BCP 47 locale identifiers instead of BCP 47 language tags, so
- * ECMA-402 currently only uses the former to canonicalize Unicode BCP 47 locale
- * identifiers.
- *
- * Spec: ECMAScript Internationalization API Specification, 6.2.3.
- * Spec: https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
- * Spec: https://unicode.org/reports/tr35/#BCP_47_Language_Tag_Conversion
- */
-function CanonicalizeLanguageTagObject(localeObj) {
-    assert(IsObject(localeObj), "CanonicalizeLanguageTagObject");
-
-    // Per UTS 35, 3.3.1, the very first step is to canonicalize the syntax by
-    // normalizing the case and ordering all subtags. The canonical syntax form
-    // itself is specified in UTS 35, 3.2.1.
-
-    // The parser already normalized the case for all subtags.
-
-#ifdef DEBUG
-    function IsLowerCase(s) {
-        return s === callFunction(std_String_toLowerCase, s);
-    }
-    function IsUpperCase(s) {
-        return s === callFunction(std_String_toUpperCase, s);
-    }
-    function IsTitleCase(s) {
-        assert(s.length > 0, "unexpected empy string");
-        var r = callFunction(std_String_toUpperCase, s[0]) +
-                callFunction(std_String_toLowerCase, Substring(s, 1, s.length - 1));
-        return s === r;
-    }
-#endif
-
-    // 1. Any script subtag is in title case.
-    assert(localeObj.script === undefined || IsTitleCase(localeObj.script),
-           "If present, script subtag is in title case");
-
-    // 2. Any region subtag is in uppercase.
-    assert(localeObj.region === undefined || IsUpperCase(localeObj.region),
-           "If present, region subtag is in upper case");
-
-    // 3. All other subtags are in lowercase.
-    assert(IsLowerCase(localeObj.language),
-           "language subtag is in lower case");
-    assert(callFunction(ArrayEvery, localeObj.variants, IsLowerCase),
-           "variant subtags are in lower case");
-    assert(callFunction(ArrayEvery, localeObj.extensions, IsLowerCase),
-           "extension subtags are in lower case");
-    assert(localeObj.privateuse === undefined || IsLowerCase(localeObj.privateuse),
-           "If present, privateuse subtag is in lower case");
-
-
-    // The second step in UTS 35, 3.2.1, is to order all subtags.
-
-    // 1. Any variants are in alphabetical order.
-    var variants = localeObj.variants;
-    if (variants.length > 0) {
-        callFunction(ArraySort, variants);
-    }
-
-    // 2. Any extensions are in alphabetical order by their singleton.
-    var extensions = localeObj.extensions;
-    if (extensions.length > 0) {
-        // Extension sequences are sorted by their singleton characters.
-        // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
-        callFunction(ArraySort, extensions);
-
-        // The last three bullet points in UTS 35, 3.2.1 apply only to Unicode and Transform
-        // extensions.
-        //
-        // 3. All attributes are sorted in alphabetical order.
-        //
-        // 4. All keywords and tfields are sorted by alphabetical order of their
-        //    keys, within their respective extensions.
-        //
-        // 5. Any type or tfield value "true" is removed.
-
-        for (var i = 0; i < extensions.length; i++) {
-            var ext = extensions[i];
-            assert(IsLowerCase(ext),
-                   "extension subtags must be in lower-case");
-            assert(ext[1] === "-",
-                   "extension subtags start with a singleton");
-
-            // Canonicalize Unicode locale extension subtag if present.
-            if (ext[0] === "u") {
-                var {attributes, keywords} = UnicodeExtensionComponents(ext);
-                extensions[i] = CanonicalizeUnicodeExtension(attributes, keywords, false);
-            }
-
-            // Canonicalize Unicode BCP 47 T extension if present.
-            if (ext[0] === "t") {
-                var {locale, fields} = TransformExtensionComponents(ext);
-                extensions[i] = CanonicalizeTransformExtension(locale, fields);
-            }
-        }
-    }
-
-    // The next two steps in 3.3.1 replace deprecated language and region
-    // subtags with their preferred mappings.
-    updateLocaleIdMappings(localeObj);
-
-    // The two final steps in 3.3.1, handling irregular grandfathered and
-    // private-use only language tags, don't apply, because these two forms
-    // can't occur in Unicode BCP 47 locale identifiers.
-}
-
-/**
- * Intl.Locale proposal
- *
- * UnicodeExtensionComponents( extension )
- *
- * Returns the components of |extension| where |extension| is a "Unicode locale
- * extension sequence" (ECMA-402, 6.2.1) without the starting separator
- * character.
- */
-function UnicodeExtensionComponents(extension) {
-    assert(typeof extension === "string", "extension is a String value");
-
-    // Step 1.
-    var attributes = [];
-
-    // Step 2.
-    var keywords = [];
-
-    // Step 3.
-    var isKeyword = false;
-
-    // Step 4.
-    var size = extension.length;
-
-    // Step 5.
-    // |extension| starts with "u-" instead of "-u-" in our implementation, so
-    // we need to initialize |k| with 2 instead of 3.
-    assert(callFunction(std_String_startsWith, extension, "u-"),
-           "extension starts with 'u-'");
-    var k = 2;
-
-    // Step 6.
-    var key, value;
-    while (k < size) {
-        // Step 6.a.
-        var e = callFunction(std_String_indexOf, extension, "-", k);
-
-        // Step 6.b.
-        var len = (e < 0 ? size : e) - k;
-
-        // Step 6.c.
-        var subtag = Substring(extension, k, len);
-
-        // Steps 6.d-e.
-        if (!isKeyword) {
-            // Step 6.d.
-            // NB: Duplicates are handled elsewhere in our implementation.
-            if (len !== 2)
-                _DefineDataProperty(attributes, attributes.length, subtag);
-        } else {
-            // Steps 6.e.i-ii.
-            if (len === 2) {
-                // Step 6.e.i.1.
-                // NB: Duplicates are handled elsewhere in our implementation.
-                _DefineDataProperty(keywords, keywords.length, {key, value});
-            } else {
-                // Step 6.e.ii.1.
-                if (value !== "")
-                    value += "-";
-
-                // Step 6.e.ii.2.
-                value += subtag;
-            }
-        }
-
-        // Step 6.f.
-        if (len === 2) {
-            // Step 6.f.i.
-            isKeyword = true;
-
-            // Step 6.f.ii.
-            key = subtag;
-
-            // Step 6.f.iii.
-            value = "";
-        }
-
-        // Step 6.g.
-        k += len + 1;
-    }
-
-    // Step 7.
-    if (isKeyword) {
-        // Step 7.a.
-        // NB: Duplicates are handled elsewhere in our implementation.
-        _DefineDataProperty(keywords, keywords.length, {key, value});
-    }
-
-    // Step 8.
-    return {attributes, keywords};
-}
-
-/**
- * CanonicalizeUnicodeExtension( attributes, keywords )
- *
- * Canonical syntax per <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
- *
- * - All attributes and keywords are in lowercase.
- *   - Note: The parser already converted keywords to lowercase.
- * - All attributes are sorted in alphabetical order.
- * - All keywords are sorted by alphabetical order of their keys.
- * - Any type value "true" is removed.
- *
- * Canonical form:
- * - All keys and types use the canonical form (from the name attribute;
- *   see Section 3.6.4 U Extension Data Files).
- */
-function CanonicalizeUnicodeExtension(attributes, keywords, canonicalForm) {
-    assert(attributes.length > 0 || keywords.length > 0,
-           "unexpected empty Unicode locale extension components");
-
-    // All attributes are sorted in alphabetical order.
-    if (attributes.length > 1)
-        callFunction(ArraySort, attributes);
-
-    // All keywords are sorted by alphabetical order of keys.
-    if (keywords.length > 1) {
-        function UnicodeKeySort(left, right) {
-            var leftKey = left.key;
-            var rightKey = right.key;
-            assert(leftKey.length === 2, "left key is a Unicode key");
-            assert(rightKey.length === 2, "right key is a Unicode key");
-
-            // Compare both strings using charCodeAt(), because relational
-            // string comparison always calls into the VM, whereas charCodeAt
-            // can be inlined by Ion.
-            var diff = callFunction(std_String_charCodeAt, leftKey, 0) -
-                       callFunction(std_String_charCodeAt, rightKey, 0);
-            if (diff === 0) {
-                diff = callFunction(std_String_charCodeAt, leftKey, 1) -
-                       callFunction(std_String_charCodeAt, rightKey, 1);
-            }
-            return diff;
-        }
-
-        callFunction(ArraySort, keywords, UnicodeKeySort);
-    }
-
-    var extension = "u";
-
-    // Append all attributes.
-    for (var i = 0; i < attributes.length; i++) {
-        var attribute = attributes[i];
-        assert(attribute === callFunction(std_String_toLowerCase, attribute),
-               "Attributes are already canonicalized to lower case");
-
-        // UnicodeExtensionComponents ignores duplicate attributes.
-        if (canonicalForm && i > 0 && attributes[i - 1] === attribute) {
-            continue;
-        }
-
-        extension += "-" + attributes[i];
-    }
-
-    // Append all keywords.
-    for (var i = 0; i < keywords.length; i++) {
-        var {key, value} = keywords[i];
-        assert(key === callFunction(std_String_toLowerCase, key) &&
-               value === callFunction(std_String_toLowerCase, value),
-               "Keywords are already canonicalized to lower case");
-
-
-        // UnicodeExtensionComponents ignores duplicate keys.
-        if (canonicalForm && i > 0 && keywords[i - 1].key === key) {
-            continue;
-        }
-
-        extension += "-" + key;
-
-        if (canonicalForm &&
-            hasOwn(key, deprecatedUnicodeExtensionTypes) &&
-            hasOwn(value, deprecatedUnicodeExtensionTypes[key]))
-        {
-            value = deprecatedUnicodeExtensionTypes[key][value];
-            assert(value === callFunction(std_String_toLowerCase, value),
-                   "Preferred keyword value is already in lower case");
-        }
-
-        // Type value "true" is removed.
-        if (value !== "" && value !== "true")
-            extension += "-" + value;
-    }
-
-    return extension;
-}
-
-/**
- * CanonicalizeTransformExtension
- *
- * Canonical form per <https://unicode.org/reports/tr35/#BCP47_T_Extension>:
- *
- * - These subtags are all in lowercase (that is the canonical casing for these
- *   subtags), [...].
- *
- * And per <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
- *
- * - All keywords and tfields are sorted by alphabetical order of their keys,
- *   within their respective extensions.
- */
-function CanonicalizeTransformExtension(localeObj, fields) {
-    assert(localeObj !== undefined || fields.length > 0,
-           "unexpected empty Transform locale extension components");
-
-    if (fields.length > 0) {
-        function TransformKeySort(left, right) {
-            var leftKey = left.key;
-            var rightKey = right.key;
-            assert(leftKey.length === 2, "left key is a Transform key");
-            assert(rightKey.length === 2, "right key is a Transform key");
-
-            // Compare both strings using charCodeAt(), because relational
-            // string comparison always calls into the VM, whereas charCodeAt
-            // can be inlined by Ion.
-            var diff = callFunction(std_String_charCodeAt, leftKey, 0) -
-                       callFunction(std_String_charCodeAt, rightKey, 0);
-            if (diff === 0) {
-                diff = callFunction(std_String_charCodeAt, leftKey, 1) -
-                       callFunction(std_String_charCodeAt, rightKey, 1);
-            }
-            return diff;
-        }
-
-        callFunction(ArraySort, fields, TransformKeySort);
-    }
-
-    var extension = "t";
-
-    // Append the language subtag if present.
-    if (localeObj !== undefined) {
-        // [1] is a bit unclear whether or not the `tlang` subtag also needs
-        // to be canonicalized (and case-adjusted). For now simply append it as
-        // is and change it to all lower-case. If we switch to [2], the `tlang`
-        // subtag also needs to be canonicalized according to the same rules as
-        // `unicode_language_id` subtags are canonicalized. Also see [3].
-        //
-        // [1] https://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier
-        // [2] https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
-        // [3] https://github.com/tc39/ecma402/issues/330
-        var localeStr = StringFromLanguageTagObject(localeObj);
-        extension += "-" + callFunction(std_String_toLowerCase, localeStr);
-    }
-
-    // Append all fields.
-    for (var i = 0; i < fields.length; i++) {
-        // UTS 35, 3.2.1 specifies:
-        // - Any type or tfield value "true" is removed.
-        //
-        // But the `tvalue` subtag is mandatory in `tfield: tkey tvalue`, so
-        // ignore this apparently invalid part of the UTS 35 specification and
-        // simply append all `tfield` subtags.
-        var {key, value} = fields[i];
-        extension += "-" + key + "-" + value;
-    }
-
-    return extension;
-}
-
-/**
- * Canonicalizes the given structurally valid BCP 47 language tag, including
- * regularized case of subtags. For example, the language tag
- * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
- *
- *     Zh             ; 2*3ALPHA
- *     -haNS          ; ["-" script]
- *     -bu            ; ["-" region]
- *     -variant2      ; *("-" variant)
- *     -Variant1
- *     -u-ca-chinese  ; *("-" extension)
- *     -t-Zh-laTN
- *     -x-PRIVATE     ; ["-" privateuse]
- *
- * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
- *
- * Spec: ECMAScript Internationalization API Specification, 6.2.3.
- */
-function CanonicalizeLanguageTag(locale) {
-    var localeObj = parseLanguageTag(locale);
-    assert(localeObj !== null, "CanonicalizeLanguageTag");
-
-    CanonicalizeLanguageTagObject(localeObj);
-
-    return StringFromLanguageTagObject(localeObj);
-}
-
-/**
- * Returns the string representation of the given language tag object.
- */
-function StringFromLanguageTagObject(localeObj) {
-    assert(IsObject(localeObj), "StringFromLanguageTagObject");
-
-    var {
-        language,
-        script,
-        region,
-        variants,
-        extensions,
-        privateuse,
-    } = localeObj;
-
-    var canonical = language;
-
-    if (script !== undefined)
-        canonical += "-" + script;
-
-    if (region !== undefined)
-        canonical += "-" + region;
-
-    if (variants.length > 0)
-        canonical += "-" + callFunction(std_Array_join, variants, "-");
-
-    if (extensions.length > 0)
-        canonical += "-" + callFunction(std_Array_join, extensions, "-");
-
-    if (privateuse !== undefined)
-        canonical += "-" + privateuse;
-
-    return canonical;
-}
-
 /**
  * Returns true if the input contains only ASCII alphabetical characters.
  */
 function IsASCIIAlphaString(s) {
     assert(typeof s === "string", "IsASCIIAlphaString");
 
     for (var i = 0; i < s.length; i++) {
         var c = callFunction(std_String_charCodeAt, s, i);
         if (!((0x41 <= c && c <= 0x5A) || (0x61 <= c && c <= 0x7A)))
             return false;
     }
     return true;
 }
 
-/**
- * Validates and canonicalizes the given language tag.
- */
-function ValidateAndCanonicalizeLanguageTag(locale) {
-    assert(typeof locale === "string", "ValidateAndCanonicalizeLanguageTag");
-
-    // Handle the common case (a standalone language) first.
-    // Only the following Unicode BCP 47 locale identifier subset is accepted:
-    //   unicode_locale_id = unicode_language_id
-    //   unicode_language_id = unicode_language_subtag
-    //   unicode_language_subtag = alpha{2,3}
-    if (locale.length === 2 || locale.length === 3) {
-        if (!IsASCIIAlphaString(locale))
-            ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
-        assert(IsStructurallyValidLanguageTag(locale), "2*3ALPHA is a valid language tag");
-
-        // The language subtag is canonicalized to lower case.
-        locale = callFunction(std_String_toLowerCase, locale);
-
-        // updateLocaleIdMappings may modify tags containing only |language|
-        // subtags, if the language is in |complexLanguageMappings|, so we need
-        // to handle that case first.
-        if (!hasOwn(locale, complexLanguageMappings)) {
-            // Replace deprecated subtags with their preferred values.
-            locale = hasOwn(locale, languageMappings)
-                     ? languageMappings[locale]
-                     : locale;
-            assert(locale === CanonicalizeLanguageTag(locale), "expected same canonicalization");
-
-            return locale;
-        }
-    }
-
-    var localeObj = parseLanguageTag(locale);
-    if (localeObj === null)
-        ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
-
-    CanonicalizeLanguageTagObject(localeObj)
-
-    return StringFromLanguageTagObject(localeObj);
-}
-
 // The last-ditch locale is used if none of the available locales satisfies a
 // request. "en-GB" is used based on the assumptions that English is the most
 // common second language, that both en-GB and en-US are normally available in
 // an implementation, and that en-GB is more representative of the English used
 // in other locales.
 function lastDitchLocale() {
     // Per bug 1177929, strings don't clone out of self-hosted code as atoms,
     // breaking IonBuilder::constant.  Put this in a function for now.
@@ -1242,24 +172,20 @@ var localeCache = {
  */
 function DefaultLocaleIgnoringAvailableLocales() {
     const runtimeDefaultLocale = RuntimeDefaultLocale();
     if (runtimeDefaultLocale === localeCandidateCache.runtimeDefaultLocale)
         return localeCandidateCache.candidateDefaultLocale;
 
     // If we didn't get a cache hit, compute the candidate default locale and
     // cache it.  Fall back on the last-ditch locale when necessary.
-    var candidate = parseLanguageTag(runtimeDefaultLocale);
+    var candidate = intl_TryValidateAndCanonicalizeLanguageTag(runtimeDefaultLocale);
     if (candidate === null) {
         candidate = lastDitchLocale();
     } else {
-        CanonicalizeLanguageTagObject(candidate);
-
-        candidate = StringFromLanguageTagObject(candidate);
-
         // The default locale must be in [[availableLocales]], and that list
         // must not contain any locales with Unicode extension sequences, so
         // remove any present in the candidate.
         candidate = removeUnicodeExtensions(candidate);
 
         if (hasOwn(candidate, oldStyleLanguageTagMappings))
             candidate = oldStyleLanguageTagMappings[candidate];
     }
@@ -1345,22 +271,22 @@ function addSpecialMissingLanguageTags(a
  * Spec: ECMAScript Internationalization API Specification, 9.2.1.
  */
 function CanonicalizeLocaleList(locales) {
     // Step 1.
     if (locales === undefined)
         return [];
 
     // Step 3 (and the remaining steps).
-    if (typeof locales === "string")
-        return [ValidateAndCanonicalizeLanguageTag(locales)];
-
-    var unboxedLocale = LocaleToStringOrNull(locales);
-    if (unboxedLocale !== null)
-        return [unboxedLocale];
+    var tag = intl_ValidateAndCanonicalizeLanguageTag(locales, false);
+    if (tag !== null) {
+        assert(typeof tag === "string",
+               "intl_ValidateAndCanonicalizeLanguageTag returns a string value");
+        return [tag];
+    }
 
     // Step 2.
     var seen = [];
 
     // Step 4.
     var O = ToObject(locales);
 
     // Step 5.
@@ -1376,20 +302,19 @@ function CanonicalizeLocaleList(locales)
             // Step 7.c.i.
             var kValue = O[k];
 
             // Step 7.c.ii.
             if (!(typeof kValue === "string" || IsObject(kValue)))
                 ThrowTypeError(JSMSG_INVALID_LOCALES_ELEMENT);
 
             // Steps 7.c.iii-iv.
-            var unboxedLocale = LocaleToStringOrNull(kValue);
-            var tag = unboxedLocale !== null
-                      ? unboxedLocale
-                      : ValidateAndCanonicalizeLanguageTag(ToString(kValue));
+            var tag = intl_ValidateAndCanonicalizeLanguageTag(kValue, true);
+            assert(typeof tag === "string",
+                   "ValidateAndCanonicalizeLanguageTag returns a string value");
 
             // Step 7.c.v.
             if (callFunction(ArrayIndexOf, seen, tag) === -1)
                 _DefineDataProperty(seen, seen.length, tag);
         }
 
         // Step 7.d.
         k++;
--- a/js/src/builtin/intl/LanguageTag.cpp
+++ b/js/src/builtin/intl/LanguageTag.cpp
@@ -1588,10 +1588,93 @@ bool ParseStandaloneRegionTag(HandleLine
       return false;
     }
     result.set(str->twoByteRange(nogc));
   }
   result.toUpperCase();
   return true;
 }
 
+template <typename CharT>
+static bool IsAsciiLowercaseAlpha(const mozilla::Range<const CharT>& range) {
+  // Tell the analysis the |std::all_of| function can't GC.
+  JS::AutoSuppressGCAnalysis nogc;
+
+  const CharT* ptr = range.begin().get();
+  size_t length = range.length();
+  return std::all_of(ptr, ptr + length, mozilla::IsAsciiLowercaseAlpha<CharT>);
+}
+
+static bool IsAsciiLowercaseAlpha(JSLinearString* str) {
+  JS::AutoCheckCannotGC nogc;
+  return str->hasLatin1Chars() ? IsAsciiLowercaseAlpha(str->latin1Range(nogc))
+                               : IsAsciiLowercaseAlpha(str->twoByteRange(nogc));
+}
+
+template <typename CharT>
+static bool IsAsciiAlpha(const mozilla::Range<const CharT>& range) {
+  // Tell the analysis the |std::all_of| function can't GC.
+  JS::AutoSuppressGCAnalysis nogc;
+
+  const CharT* ptr = range.begin().get();
+  size_t length = range.length();
+  return std::all_of(ptr, ptr + length, mozilla::IsAsciiAlpha<CharT>);
+}
+
+static bool IsAsciiAlpha(JSLinearString* str) {
+  JS::AutoCheckCannotGC nogc;
+  return str->hasLatin1Chars() ? IsAsciiAlpha(str->latin1Range(nogc))
+                               : IsAsciiAlpha(str->twoByteRange(nogc));
+}
+
+JS::Result<JSString*> ParseStandaloneISO639LanguageTag(JSContext* cx,
+                                                       HandleLinearString str) {
+  // ISO-639 language codes contain either two or three characters.
+  size_t length = str->length();
+  if (length != 2 && length != 3) {
+    return nullptr;
+  }
+
+  // We can directly the return the input below if it's in the correct case.
+  bool isLowerCase = IsAsciiLowercaseAlpha(str);
+  if (!isLowerCase) {
+    // Must be an ASCII alpha string.
+    if (!IsAsciiAlpha(str)) {
+      return nullptr;
+    }
+  }
+
+  LanguageSubtag languageTag;
+  if (str->hasLatin1Chars()) {
+    JS::AutoCheckCannotGC nogc;
+    languageTag.set(str->latin1Range(nogc));
+  } else {
+    JS::AutoCheckCannotGC nogc;
+    languageTag.set(str->twoByteRange(nogc));
+  }
+
+  if (!isLowerCase) {
+    // The language subtag is canonicalized to lower case.
+    languageTag.toLowerCase();
+  }
+
+  // Reject the input if the canonical tag contains more than just a single
+  // language subtag.
+  if (LanguageTag::complexLanguageMapping(languageTag)) {
+    return nullptr;
+  }
+
+  // Take care to replace deprecated subtags with their preferred values.
+  JSString* result;
+  if (LanguageTag::languageMapping(languageTag) || !isLowerCase) {
+    auto range = languageTag.range();
+    result = NewStringCopyN<CanGC>(cx, range.begin().get(), range.length());
+  } else {
+    result = str;
+  }
+  if (!result) {
+    return cx->alreadyReportedOOM();
+  }
+  return result;
+}
+
 }  // namespace intl
 }  // namespace js
--- a/js/src/builtin/intl/LanguageTag.h
+++ b/js/src/builtin/intl/LanguageTag.h
@@ -702,13 +702,21 @@ MOZ_MUST_USE bool ParseStandaloneScriptT
 /**
  * Parse a string as a standalone |region| tag. If |str| is a standalone region
  * tag, store it in case-normalized form in |result| and return true. Otherwise
  * return false.
  */
 MOZ_MUST_USE bool ParseStandaloneRegionTag(JS::Handle<JSLinearString*> str,
                                            RegionSubtag& result);
 
+/**
+ * Parse a string as an ISO-639 language code. Return |nullptr| in the result if
+ * the input could not be parsed or the canonical form of the resulting language
+ * tag contains more than a single language subtag.
+ */
+JS::Result<JSString*> ParseStandaloneISO639LanguageTag(
+    JSContext* cx, JS::Handle<JSLinearString*> str);
+
 }  // namespace intl
 
 }  // namespace js
 
 #endif /* builtin_intl_LanguageTag_h */
--- a/js/src/builtin/intl/Locale.cpp
+++ b/js/src/builtin/intl/Locale.cpp
@@ -482,16 +482,39 @@ static bool ApplyUnicodeExtensionToTag(J
   // Insert the new Unicode extension string into the language tag.
   UniqueChars newExtensionChars(newExtension.extractOrCopyRawBuffer());
   if (!newExtensionChars) {
     return false;
   }
   return tag.setUnicodeExtension(std::move(newExtensionChars));
 }
 
+static JS::Result<JSString*> LanguageTagFromMaybeWrappedLocale(JSContext* cx,
+                                                               JSObject* obj) {
+  if (obj->is<LocaleObject>()) {
+    return obj->as<LocaleObject>().languageTag();
+  }
+
+  JSObject* unwrapped = CheckedUnwrapStatic(obj);
+  if (!unwrapped) {
+    ReportAccessDenied(cx);
+    return cx->alreadyReportedError();
+  }
+
+  if (!unwrapped->is<LocaleObject>()) {
+    return nullptr;
+  }
+
+  RootedString tagStr(cx, unwrapped->as<LocaleObject>().languageTag());
+  if (!cx->compartment()->wrap(cx, &tagStr)) {
+    return cx->alreadyReportedError();
+  }
+  return tagStr.get();
+}
+
 /**
  * Intl.Locale( tag[, options] )
  */
 static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
   CallArgs args = CallArgsFromVp(argc, vp);
 
   // Step 1.
   if (!ThrowIfNotConstructing(cx, args, "Intl.Locale")) {
@@ -499,52 +522,37 @@ static bool Locale(JSContext* cx, unsign
   }
 
   // Steps 2-6 (Inlined 9.1.14, OrdinaryCreateFromConstructor).
   RootedObject proto(cx);
   if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_Null, &proto)) {
     return false;
   }
 
-  // Step 7.
-  if (args.length() == 0 || (!args[0].isString() && !args[0].isObject())) {
+  // Steps 7-9.
+  HandleValue tagValue = args.get(0);
+  JSString* tagStr;
+  if (tagValue.isObject()) {
+    JS_TRY_VAR_OR_RETURN_FALSE(
+        cx, tagStr,
+        LanguageTagFromMaybeWrappedLocale(cx, &tagValue.toObject()));
+    if (!tagStr) {
+      tagStr = ToString(cx, tagValue);
+      if (!tagStr) {
+        return false;
+      }
+    }
+  } else if (tagValue.isString()) {
+    tagStr = tagValue.toString();
+  } else {
     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
                               JSMSG_INVALID_LOCALES_ELEMENT);
     return false;
   }
 
-  // Steps 8-9.
-  RootedString tagStr(cx);
-  if (args[0].isObject()) {
-    JSObject* obj = &args[0].toObject();
-    if (obj->is<LocaleObject>()) {
-      tagStr = obj->as<LocaleObject>().languageTag();
-    } else {
-      JSObject* unwrapped = CheckedUnwrapStatic(obj);
-      if (!unwrapped) {
-        ReportAccessDenied(cx);
-        return false;
-      }
-
-      if (unwrapped->is<LocaleObject>()) {
-        tagStr = unwrapped->as<LocaleObject>().languageTag();
-        if (!cx->compartment()->wrap(cx, &tagStr)) {
-          return false;
-        }
-      } else {
-        tagStr = ToString(cx, args[0]);
-        if (!tagStr) {
-          return false;
-        }
-      }
-    }
-  } else {
-    tagStr = args[0].toString();
-  }
-
   RootedLinearString tagLinearStr(cx, tagStr->ensureLinear(cx));
   if (!tagLinearStr) {
     return false;
   }
 
   // ApplyOptionsToTag, steps 2 and 9.
   LanguageTag tag(cx);
   if (!LanguageTagParser::parse(cx, tagLinearStr, tag)) {
@@ -1280,8 +1288,115 @@ JSObject* js::CreateLocalePrototype(JSCo
 
   global->setReservedSlot(LOCALE_PROTO, ObjectValue(*localeProto));
   return true;
 }
 
 bool js::AddLocaleConstructor(JSContext* cx, JS::Handle<JSObject*> intl) {
   return GlobalObject::addLocaleConstructor(cx, intl);
 }
+
+bool js::intl_ValidateAndCanonicalizeLanguageTag(JSContext* cx, unsigned argc,
+                                                 Value* vp) {
+  CallArgs args = CallArgsFromVp(argc, vp);
+  MOZ_ASSERT(args.length() == 2);
+
+  HandleValue tagValue = args[0];
+  bool applyToString = args[1].toBoolean();
+
+  if (tagValue.isObject()) {
+    JSString* tagStr;
+    JS_TRY_VAR_OR_RETURN_FALSE(
+        cx, tagStr,
+        LanguageTagFromMaybeWrappedLocale(cx, &tagValue.toObject()));
+    if (tagStr) {
+      args.rval().setString(tagStr);
+      return true;
+    }
+  }
+
+  if (!applyToString && !tagValue.isString()) {
+    args.rval().setNull();
+    return true;
+  }
+
+  JSString* tagStr = ToString(cx, tagValue);
+  if (!tagStr) {
+    return false;
+  }
+
+  RootedLinearString tagLinearStr(cx, tagStr->ensureLinear(cx));
+  if (!tagLinearStr) {
+    return false;
+  }
+
+  // Handle the common case (a standalone language) first.
+  // Only the following Unicode BCP 47 locale identifier subset is accepted:
+  //   unicode_locale_id = unicode_language_id
+  //   unicode_language_id = unicode_language_subtag
+  //   unicode_language_subtag = alpha{2,3}
+  JSString* language;
+  JS_TRY_VAR_OR_RETURN_FALSE(
+      cx, language, intl::ParseStandaloneISO639LanguageTag(cx, tagLinearStr));
+  if (language) {
+    args.rval().setString(language);
+    return true;
+  }
+
+  LanguageTag tag(cx);
+  if (!LanguageTagParser::parse(cx, tagLinearStr, tag)) {
+    return false;
+  }
+
+  if (!tag.canonicalize(cx, LanguageTag::UnicodeExtensionCanonicalForm::No)) {
+    return false;
+  }
+
+  JSStringBuilder sb(cx);
+  if (!tag.appendTo(cx, sb)) {
+    return false;
+  }
+
+  JSString* resultStr = sb.finishString();
+  if (!resultStr) {
+    return false;
+  }
+  args.rval().setString(resultStr);
+  return true;
+}
+
+bool js::intl_TryValidateAndCanonicalizeLanguageTag(JSContext* cx,
+                                                    unsigned argc, Value* vp) {
+  CallArgs args = CallArgsFromVp(argc, vp);
+  MOZ_ASSERT(args.length() == 1);
+
+  RootedLinearString linear(cx, args[0].toString()->ensureLinear(cx));
+  if (!linear) {
+    return false;
+  }
+
+  LanguageTag tag(cx);
+  bool ok;
+  JS_TRY_VAR_OR_RETURN_FALSE(cx, ok,
+                             LanguageTagParser::tryParse(cx, linear, tag));
+
+  // The caller handles invalid inputs.
+  if (!ok) {
+    args.rval().setNull();
+    return true;
+  }
+
+  if (!tag.canonicalize(cx, LanguageTag::UnicodeExtensionCanonicalForm::No)) {
+    return false;
+  }
+
+  JSStringBuilder sb(cx);
+  if (!tag.appendTo(cx, sb)) {
+    return false;
+  }
+
+  JSString* resultStr = sb.finishString();
+  if (!resultStr) {
+    return false;
+  }
+  args.rval().setString(resultStr);
+  return true;
+}
--- a/js/src/builtin/intl/Locale.h
+++ b/js/src/builtin/intl/Locale.h
@@ -44,11 +44,18 @@ class LocaleObject : public NativeObject
     return getFixedSlot(UNICODE_EXTENSION_SLOT);
   }
 };
 
 extern JSObject* CreateLocalePrototype(JSContext* cx,
                                        JS::Handle<JSObject*> Intl,
                                        JS::Handle<GlobalObject*> global);
 
+extern MOZ_MUST_USE bool intl_ValidateAndCanonicalizeLanguageTag(JSContext* cx,
+                                                                 unsigned argc,
+                                                                 Value* vp);
+
+extern MOZ_MUST_USE bool intl_TryValidateAndCanonicalizeLanguageTag(
+    JSContext* cx, unsigned argc, Value* vp);
+
 }  // namespace js
 
 #endif /* builtin_intl_Locale_h */
--- a/js/src/vm/SelfHosting.cpp
+++ b/js/src/vm/SelfHosting.cpp
@@ -2075,45 +2075,16 @@ static bool intrinsic_ToNumeric(JSContex
   MOZ_ASSERT(args.length() == 1);
   if (!ToNumeric(cx, args[0])) {
     return false;
   }
   args.rval().set(args[0]);
   return true;
 }
 
-static bool intrinsic_LocaleToStringOrNull(JSContext* cx, unsigned argc,
-                                           Value* vp) {
-  CallArgs args = CallArgsFromVp(argc, vp);
-  MOZ_ASSERT(args.length() == 1);
-
-  if (!args[0].isObject()) {
-    args.rval().setNull();
-    return true;
-  }
-
-  JSObject* unwrapped = CheckedUnwrapStatic(&args[0].toObject());
-  if (!unwrapped) {
-    ReportAccessDenied(cx);
-    return false;
-  }
-
-  if (!unwrapped->is<LocaleObject>()) {
-    args.rval().setNull();
-    return true;
-  }
-
-  RootedString str(cx, unwrapped->as<LocaleObject>().languageTag());
-  if (!cx->compartment()->wrap(cx, &str)) {
-    return false;
-  }
-  args.rval().setString(str);
-  return true;
-}
-
 // The self-hosting global isn't initialized with the normal set of builtins.
 // Instead, individual C++-implemented functions that're required by
 // self-hosted code are defined as global functions. Accessing these
 // functions via a content compartment's builtins would be unsafe, because
 // content script might have changed the builtins' prototypes' members.
 // Installing the whole set of builtins in the self-hosting compartment, OTOH,
 // would be wasteful: it increases memory usage and initialization time for
 // self-hosting compartment.
@@ -2479,16 +2450,20 @@ static const JSFunctionSpec intrinsic_fu
           intl_PluralRules_availableLocales, 0, 0),
     JS_FN("intl_GetPluralCategories", intl_GetPluralCategories, 1, 0),
     JS_FN("intl_SelectPluralRule", intl_SelectPluralRule, 2, 0),
     JS_FN("intl_RelativeTimeFormat_availableLocales",
           intl_RelativeTimeFormat_availableLocales, 0, 0),
     JS_FN("intl_FormatRelativeTime", intl_FormatRelativeTime, 4, 0),
     JS_FN("intl_toLocaleLowerCase", intl_toLocaleLowerCase, 2, 0),
     JS_FN("intl_toLocaleUpperCase", intl_toLocaleUpperCase, 2, 0),
+    JS_FN("intl_ValidateAndCanonicalizeLanguageTag",
+          intl_ValidateAndCanonicalizeLanguageTag, 2, 0),
+    JS_FN("intl_TryValidateAndCanonicalizeLanguageTag",
+          intl_TryValidateAndCanonicalizeLanguageTag, 1, 0),
 
     JS_INLINABLE_FN("GuardToCollator", intrinsic_GuardToBuiltin<CollatorObject>,
                     1, 0, IntlGuardToCollator),
     JS_INLINABLE_FN("GuardToDateTimeFormat",
                     intrinsic_GuardToBuiltin<DateTimeFormatObject>, 1, 0,
                     IntlGuardToDateTimeFormat),
     JS_INLINABLE_FN("GuardToNumberFormat",
                     intrinsic_GuardToBuiltin<NumberFormatObject>, 1, 0,
@@ -2523,18 +2498,16 @@ static const JSFunctionSpec intrinsic_fu
     JS_FN("GetNumberFormatConstructor",
           intrinsic_GetBuiltinIntlConstructor<
               GlobalObject::getOrCreateNumberFormatConstructor>,
           0, 0),
     JS_FN("RuntimeDefaultLocale", intrinsic_RuntimeDefaultLocale, 0, 0),
     JS_FN("IsRuntimeDefaultLocale", intrinsic_IsRuntimeDefaultLocale, 1, 0),
 #endif  // ENABLE_INTL_API
 
-    JS_FN("LocaleToStringOrNull", intrinsic_LocaleToStringOrNull, 1, 0),
-
     JS_FN("GetOwnPropertyDescriptorToArray", GetOwnPropertyDescriptorToArray, 2,
           0),
 
     JS_INLINABLE_FN("IsRegExpObject",
                     intrinsic_IsInstanceOfBuiltin<RegExpObject>, 1, 0,
                     IsRegExpObject),
     JS_FN("CallRegExpMethodIfWrapped",
           CallNonGenericSelfhostedMethod<Is<RegExpObject>>, 2, 0),