Bug 1522070 - Part 4: Remove support for four letter language subtags. r=jwalden
authorAndré Bargull <andre.bargull@gmail.com>
Tue, 09 Apr 2019 09:16:34 +0000
changeset 468535 c5a97d3424310716d3a849dfb95f1ec86f7eb783
parent 468534 b215b68fbccce1f1297b79ae471341bbe1cc7164
child 468536 7b0c2144242cbaadc9ce80e0f5bfe804bf58ff6a
push id112733
push usercsabou@mozilla.com
push dateTue, 09 Apr 2019 16:30:22 +0000
treeherdermozilla-inbound@e14dba56bbfd [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjwalden
bugs1522070
milestone68.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1522070 - Part 4: Remove support for four letter language subtags. r=jwalden Unicode BCP 47 locale identifiers don't support four letter language subtags. Differential Revision: https://phabricator.services.mozilla.com/D23539
js/src/builtin/intl/CommonFunctions.js
js/src/tests/non262/Intl/four-letter-language-codes.js
--- a/js/src/builtin/intl/CommonFunctions.js
+++ b/js/src/builtin/intl/CommonFunctions.js
@@ -256,43 +256,43 @@ function parseLanguageTag(locale) {
 
     // Language-Tag = langtag           ; normal language tags
     //              / grandfathered     ; grandfathered tags
     if (!nextToken())
         return null;
 
     // All Language-Tag productions start with the ALPHA token, have at least
     // two characters, and contain less-or-equal to eight characters.
-    if (token !== ALPHA || tokenLength < 2 || tokenLength > 8)
-        return null;
 
     var language, script, region, privateuse;
     var variants = [];
     var extensions = [];
 
     // langtag = language
     //           ["-" script]
     //           ["-" region]
     //           *("-" variant)
     //           *("-" extension)
     //           ["-" privateuse]
 
     // language = 2*3ALPHA          ; shortest ISO 639 code
-    //          / 4ALPHA            ; or reserved for future use
     //          / 5*8ALPHA          ; or registered language subtag
-    if (tokenLength <= 3) {
-        language = tokenStringLower();
-        if (!nextToken())
-            return null;
-    } else {
-        assert(4 <= tokenLength && tokenLength <= 8, "reserved/registered language subtags");
-        language = tokenStringLower();
-        if (!nextToken())
-            return null;
+    if (token !== ALPHA || tokenLength === 1 || tokenLength === 4 || tokenLength > 8) {
+        // Four character language subtags are not allowed in Unicode BCP 47
+        // locale identifiers. Also see the comparison to Unicode CLDR locale
+        // identifiers in <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
+        return null;
     }
+    assert((2 <= tokenLength && tokenLength <= 3) ||
+           (5 <= tokenLength && tokenLength <= 8),
+           "language subtags have 2-3 or 5-8 letters");
+
+    language = tokenStringLower();
+    if (!nextToken())
+        return null;
 
     // script = 4ALPHA              ; ISO 15924 code
     if (tokenLength === 4 && token === ALPHA) {
         script = tokenStringLower();
 
         // The first character of a script code needs to be capitalized.
         // "hans" -> "Hans"
         script = callFunction(std_String_toUpperCase, script[0]) +
--- a/js/src/tests/non262/Intl/four-letter-language-codes.js
+++ b/js/src/tests/non262/Intl/four-letter-language-codes.js
@@ -1,20 +1,22 @@
 // |reftest| skip-if(!this.hasOwnProperty("Intl"))
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-// So many non-existent four letter language codes to pick from.
-const languageTags = {
-    "Flob": "flob",
-    "ZORK": "zork",
-    "Blah-latn": "blah-Latn",
-    "QuuX-latn-us": "quux-Latn-US",
-    "SPAM-gb-x-Sausages-BACON-eggs": "spam-GB-x-sausages-bacon-eggs",
-};
+// Four letter language subtags are not allowed.
+const languageTags = [
+    "root", // Special meaning in Unicode CLDR locale identifiers.
+    "Latn", // Unicode CLDR locale identifiers can start with a script subtag.
+    "Flob", // And now some non-sense input.
+    "ZORK",
+    "Blah-latn",
+    "QuuX-latn-us",
+    "SPAM-gb-x-Sausages-BACON-eggs",
+];
 
-for (let [tag, canonical] of Object.entries(languageTags)) {
-    assertEq(Intl.getCanonicalLocales(tag)[0], canonical);
+for (let tag of languageTags) {
+    assertThrowsInstanceOf(() => Intl.getCanonicalLocales(tag), RangeError);
 }
 
 if (typeof reportCompare === "function")
     reportCompare(0, 0);