Bug 912701 - When removing Unicode extension sequences from a locale, ignore similar syntax that might be found in a privateuse component. r=abargull
authorJeff Walden <jwalden@mit.edu>
Sun, 28 Jun 2015 07:00:01 -0700
changeset 250723 88e918fce0e332290bc0777a293a76dd3ba69338
parent 250722 335268dfcd2cee23cb07f7c8497be80f3bcb0314
child 250724 f9c8d00afb56323362f3106ba7eb61e034239da4
push idunknown
push userunknown
push dateunknown
reviewersabargull
bugs912701
milestone42.0a1
Bug 912701 - When removing Unicode extension sequences from a locale, ignore similar syntax that might be found in a privateuse component. r=abargull
js/src/builtin/Intl.js
js/src/tests/Intl/NumberFormat/remove-unicode-extensions.js
--- a/js/src/builtin/Intl.js
+++ b/js/src/builtin/Intl.js
@@ -76,33 +76,56 @@ internalIntlRegExps.currencyDigitsRE = n
  * match the extension production in RFC 5646, where the singleton component is
  * "u".
  *
  * Spec: ECMAScript Internationalization API Specification, 6.2.1.
  */
 function getUnicodeLocaleExtensionSequenceRE() {
     return internalIntlRegExps.unicodeLocaleExtensionSequenceRE ||
            (internalIntlRegExps.unicodeLocaleExtensionSequenceRE =
-            regexp_construct_no_statics("-u(-[a-z0-9]{2,8})+"));
+            regexp_construct_no_statics("-u(?:-[a-z0-9]{2,8})+"));
 }
 
 
 /**
  * Removes Unicode locale extension sequences from the given language tag.
  */
 function removeUnicodeExtensions(locale) {
-    // Don't use std_String_replace directly with a regular expression,
-    // as that would set RegExp statics.
+    // A wholly-privateuse locale has no extension sequences.
+    if (callFunction(std_String_startsWith, locale, "x-"))
+        return locale;
+
+    // Otherwise, split on "-x-" marking the start of any privateuse component.
+    // Replace Unicode locale extension sequences in the left half, and return
+    // the concatenation.
+    var pos = callFunction(std_String_indexOf, locale, "-x-");
+    if (pos < 0)
+        pos = locale.length;
+
+    var left = callFunction(std_String_substring, locale, 0, pos);
+    var right = callFunction(std_String_substring, locale, pos);
+
     var extensions;
     var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
-    while ((extensions = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, locale)) !== null) {
-        locale = callFunction(std_String_replace, locale, extensions[0], "");
+    while ((extensions = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, left)) !== null) {
+        left = callFunction(std_String_replace, left, extensions[0], "");
         unicodeLocaleExtensionSequenceRE.lastIndex = 0;
     }
-    return locale;
+
+    var combined = left + right;
+    assert(IsStructurallyValidLanguageTag(combined), "recombination produced an invalid language tag");
+    assert(function() {
+        var uindex = callFunction(std_String_indexOf, combined, "-u-");
+        if (uindex < 0)
+            return true;
+        var xindex = callFunction(std_String_indexOf, combined, "-x-");
+        return xindex > 0 && xindex < uindex;
+    }(), "recombination failed to remove all Unicode locale extension sequences");
+
+    return combined;
 }
 
 
 /**
  * Regular expression defining BCP 47 language tags.
  *
  * Spec: RFC 5646 section 2.1.
  */
new file mode 100644
--- /dev/null
+++ b/js/src/tests/Intl/NumberFormat/remove-unicode-extensions.js
@@ -0,0 +1,24 @@
+// |reftest| skip-if(!this.hasOwnProperty("Intl"))
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Locale processing is supposed to internally remove any Unicode extension
+// sequences in the locale.  Test that various weird testcases invoking
+// algorithmic edge cases don't assert or throw exceptions.
+
+var weirdCases =
+  [
+   "x-u-foo",
+   "en-x-u-foo",
+   "en-a-bar-x-u-foo",
+   "en-x-u-foo-a-bar",
+   "en-a-bar-u-baz-x-u-foo",
+  ];
+
+for (var locale of weirdCases)
+  Intl.NumberFormat(locale).format(5);
+
+if (typeof reportCompare === "function")
+  reportCompare(true, true);