Bug 866473: Implement caseFirst option in Intl.Collator. r=Waldo
☠☠ backed out by 5e87b990f4b0 ☠ ☠
authorAndré Bargull <andre.bargull@gmail.com>
Thu, 02 Mar 2017 03:01:09 -0800
changeset 374653 c92fa71c097e3edb4a307aa5f2c29d51da501c1d
parent 374652 5ae59bdcd2d4f9617d6663ab823b4a384fdd6204
child 374654 9b990c5890a81b46e4ffd8ae28c230effe9fba40
push id10863
push userjlorenzo@mozilla.com
push dateMon, 06 Mar 2017 23:02:23 +0000
treeherdermozilla-aurora@0931190cd725 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersWaldo
bugs866473
milestone54.0a1
Bug 866473: Implement caseFirst option in Intl.Collator. r=Waldo
js/src/builtin/Intl.cpp
js/src/builtin/Intl.h
js/src/builtin/Intl.js
js/src/tests/Intl/Collator/caseFirst.js
js/src/vm/SelfHosting.cpp
--- a/js/src/builtin/Intl.cpp
+++ b/js/src/builtin/Intl.cpp
@@ -7,16 +7,17 @@
 /*
  * The Intl module specified by standard ECMA-402,
  * ECMAScript Internationalization API Specification.
  */
 
 #include "builtin/Intl.h"
 
 #include "mozilla/Casting.h"
+#include "mozilla/HashFunctions.h"
 #include "mozilla/PodOperations.h"
 #include "mozilla/Range.h"
 
 #include <string.h>
 
 #include "jsapi.h"
 #include "jsatom.h"
 #include "jscntxt.h"
@@ -1321,22 +1322,24 @@ NewUCollator(JSContext* cx, Handle<Colla
         uNumeric = UCOL_ON;
 
     if (!GetProperty(cx, internals, internals, cx->names().caseFirst, &value))
         return nullptr;
     if (!value.isUndefined()) {
         JSLinearString* caseFirst = value.toString()->ensureLinear(cx);
         if (!caseFirst)
             return nullptr;
-        if (StringEqualsAscii(caseFirst, "upper"))
+        if (StringEqualsAscii(caseFirst, "upper")) {
             uCaseFirst = UCOL_UPPER_FIRST;
-        else if (StringEqualsAscii(caseFirst, "lower"))
+        } else if (StringEqualsAscii(caseFirst, "lower")) {
             uCaseFirst = UCOL_LOWER_FIRST;
-        else
+        } else {
             MOZ_ASSERT(StringEqualsAscii(caseFirst, "false"));
+            uCaseFirst = UCOL_OFF;
+        }
     }
 
     UErrorCode status = U_ZERO_ERROR;
     UCollator* coll = ucol_open(icuLocale(locale.ptr()), &status);
     if (U_FAILURE(status)) {
         JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INTERNAL_INTL_ERROR);
         return nullptr;
     }
@@ -1416,16 +1419,152 @@ js::intl_CompareStrings(JSContext* cx, u
     }
 
     // Use the UCollator to actually compare the strings.
     RootedString str1(cx, args[1].toString());
     RootedString str2(cx, args[2].toString());
     return intl_CompareStrings(cx, coll, str1, str2, args.rval());
 }
 
+js::SharedIntlData::LocaleHasher::Lookup::Lookup(JSLinearString* locale)
+  : js::SharedIntlData::LinearStringLookup(locale)
+{
+    if (isLatin1)
+        hash = mozilla::HashString(latin1Chars, length);
+    else
+        hash = mozilla::HashString(twoByteChars, length);
+}
+
+bool
+js::SharedIntlData::LocaleHasher::match(Locale key, const Lookup& lookup)
+{
+    if (key->length() != lookup.length)
+        return false;
+
+    if (key->hasLatin1Chars()) {
+        const Latin1Char* keyChars = key->latin1Chars(lookup.nogc);
+        if (lookup.isLatin1)
+            return EqualChars(keyChars, lookup.latin1Chars, lookup.length);
+        return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
+    }
+
+    const char16_t* keyChars = key->twoByteChars(lookup.nogc);
+    if (lookup.isLatin1)
+        return EqualChars(lookup.latin1Chars, keyChars, lookup.length);
+    return EqualChars(keyChars, lookup.twoByteChars, lookup.length);
+}
+
+bool
+js::SharedIntlData::ensureUpperCaseFirstLocales(JSContext* cx)
+{
+    if (upperCaseFirstInitialized)
+        return true;
+
+    // If ensureUpperCaseFirstLocales() was called previously, but didn't
+    // complete due to OOM, clear all data and start from scratch.
+    if (upperCaseFirstLocales.initialized())
+        upperCaseFirstLocales.finish();
+    if (!upperCaseFirstLocales.init()) {
+        ReportOutOfMemory(cx);
+        return false;
+    }
+
+    UErrorCode status = U_ZERO_ERROR;
+    UEnumeration* available = ucol_openAvailableLocales(&status);
+    if (U_FAILURE(status)) {
+        JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INTERNAL_INTL_ERROR);
+        return false;
+    }
+    ScopedICUObject<UEnumeration, uenum_close> toClose(available);
+
+    RootedAtom locale(cx);
+    while (true) {
+        int32_t size;
+        const char* rawLocale = uenum_next(available, &size, &status);
+        if (U_FAILURE(status)) {
+            JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INTERNAL_INTL_ERROR);
+            return false;
+        }
+
+        if (rawLocale == nullptr)
+            break;
+
+        UCollator* collator = ucol_open(rawLocale, &status);
+        if (U_FAILURE(status)) {
+            JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INTERNAL_INTL_ERROR);
+            return false;
+        }
+        ScopedICUObject<UCollator, ucol_close> toCloseCollator(collator);
+
+        UColAttributeValue caseFirst = ucol_getAttribute(collator, UCOL_CASE_FIRST, &status);
+        if (U_FAILURE(status)) {
+            JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INTERNAL_INTL_ERROR);
+            return false;
+        }
+
+        if (caseFirst != UCOL_UPPER_FIRST)
+            continue;
+
+        MOZ_ASSERT(size >= 0);
+        locale = Atomize(cx, rawLocale, size_t(size));
+        if (!locale)
+            return false;
+
+        LocaleHasher::Lookup lookup(locale);
+        LocaleSet::AddPtr p = upperCaseFirstLocales.lookupForAdd(lookup);
+
+        // ICU shouldn't report any duplicate locales, but if it does, just
+        // ignore the duplicated locale.
+        if (!p && !upperCaseFirstLocales.add(p, locale)) {
+            ReportOutOfMemory(cx);
+            return false;
+        }
+    }
+
+    MOZ_ASSERT(!upperCaseFirstInitialized,
+               "ensureUpperCaseFirstLocales is neither reentrant nor thread-safe");
+    upperCaseFirstInitialized = true;
+
+    return true;
+}
+
+bool
+js::SharedIntlData::isUpperCaseFirst(JSContext* cx, HandleString locale, bool* isUpperFirst)
+{
+    if (!ensureUpperCaseFirstLocales(cx))
+        return false;
+
+    RootedLinearString localeLinear(cx, locale->ensureLinear(cx));
+    if (!localeLinear)
+        return false;
+
+    LocaleHasher::Lookup lookup(localeLinear);
+    *isUpperFirst = upperCaseFirstLocales.has(lookup);
+
+    return true;
+}
+
+bool
+js::intl_isUpperCaseFirst(JSContext* cx, unsigned argc, Value* vp)
+{
+    CallArgs args = CallArgsFromVp(argc, vp);
+    MOZ_ASSERT(args.length() == 1);
+    MOZ_ASSERT(args[0].isString());
+
+    SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref();
+
+    RootedString locale(cx, args[0].toString());
+    bool isUpperFirst;
+    if (!sharedIntlData.isUpperCaseFirst(cx, locale, &isUpperFirst))
+        return false;
+
+    args.rval().setBoolean(isUpperFirst);
+    return true;
+}
+
 
 /******************** NumberFormat ********************/
 
 const ClassOps NumberFormatObject::classOps_ = {
     nullptr, /* addProperty */
     nullptr, /* delProperty */
     nullptr, /* getProperty */
     nullptr, /* setProperty */
@@ -2631,25 +2770,22 @@ HashStringIgnoreCaseASCII(const Char* s,
 {
     uint32_t hash = 0;
     for (size_t i = 0; i < length; i++)
         hash = mozilla::AddToHash(hash, ToUpperASCII(s[i]));
     return hash;
 }
 
 js::SharedIntlData::TimeZoneHasher::Lookup::Lookup(JSLinearString* timeZone)
-  : isLatin1(timeZone->hasLatin1Chars()), length(timeZone->length())
+  : js::SharedIntlData::LinearStringLookup(timeZone)
 {
-    if (isLatin1) {
-        latin1Chars = timeZone->latin1Chars(nogc);
+    if (isLatin1)
         hash = HashStringIgnoreCaseASCII(latin1Chars, length);
-    } else {
-        twoByteChars = timeZone->twoByteChars(nogc);
+    else
         hash = HashStringIgnoreCaseASCII(twoByteChars, length);
-    }
 }
 
 bool
 js::SharedIntlData::TimeZoneHasher::match(TimeZoneName key, const Lookup& lookup)
 {
     if (key->length() != lookup.length)
         return false;
 
@@ -2678,17 +2814,17 @@ IsLegacyICUTimeZone(const char* timeZone
 }
 
 bool
 js::SharedIntlData::ensureTimeZones(JSContext* cx)
 {
     if (timeZoneDataInitialized)
         return true;
 
-    // If initTimeZones() was called previously, but didn't complete due to
+    // If ensureTimeZones() was called previously, but didn't complete due to
     // OOM, clear all sets/maps and start from scratch.
     if (availableTimeZones.initialized())
         availableTimeZones.finish();
     if (!availableTimeZones.init()) {
         ReportOutOfMemory(cx);
         return false;
     }
 
@@ -2846,35 +2982,38 @@ js::SharedIntlData::tryCanonicalizeTimeZ
 }
 
 void
 js::SharedIntlData::destroyInstance()
 {
     availableTimeZones.finish();
     ianaZonesTreatedAsLinksByICU.finish();
     ianaLinksCanonicalizedDifferentlyByICU.finish();
+    upperCaseFirstLocales.finish();
 }
 
 void
 js::SharedIntlData::trace(JSTracer* trc)
 {
     // Atoms are always tenured.
     if (!JS::CurrentThreadIsHeapMinorCollecting()) {
         availableTimeZones.trace(trc);
         ianaZonesTreatedAsLinksByICU.trace(trc);
         ianaLinksCanonicalizedDifferentlyByICU.trace(trc);
+        upperCaseFirstLocales.trace(trc);
     }
 }
 
 size_t
 js::SharedIntlData::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const
 {
     return availableTimeZones.sizeOfExcludingThis(mallocSizeOf) +
            ianaZonesTreatedAsLinksByICU.sizeOfExcludingThis(mallocSizeOf) +
-           ianaLinksCanonicalizedDifferentlyByICU.sizeOfExcludingThis(mallocSizeOf);
+           ianaLinksCanonicalizedDifferentlyByICU.sizeOfExcludingThis(mallocSizeOf) +
+           upperCaseFirstLocales.sizeOfExcludingThis(mallocSizeOf);
 }
 
 bool
 js::intl_IsValidTimeZoneName(JSContext* cx, unsigned argc, Value* vp)
 {
     CallArgs args = CallArgsFromVp(argc, vp);
     MOZ_ASSERT(args.length() == 1);
     MOZ_ASSERT(args[0].isString());
--- a/js/src/builtin/Intl.h
+++ b/js/src/builtin/Intl.h
@@ -45,16 +45,38 @@ InitIntlClass(JSContext* cx, HandleObjec
 /**
  * Stores Intl data which can be shared across compartments (but not contexts).
  *
  * Used for data which is expensive when computed repeatedly or is not
  * available through ICU.
  */
 class SharedIntlData
 {
+    struct LinearStringLookup
+    {
+        union {
+            const JS::Latin1Char* latin1Chars;
+            const char16_t* twoByteChars;
+        };
+        bool isLatin1;
+        size_t length;
+        JS::AutoCheckCannotGC nogc;
+        HashNumber hash = 0;
+
+        explicit LinearStringLookup(JSLinearString* string)
+          : isLatin1(string->hasLatin1Chars()), length(string->length())
+        {
+            if (isLatin1)
+                latin1Chars = string->latin1Chars(nogc);
+            else
+                twoByteChars = string->twoByteChars(nogc);
+        }
+    };
+
+  private:
     /**
      * Information tracking the set of the supported time zone names, derived
      * from the IANA time zone database <https://www.iana.org/time-zones>.
      *
      * There are two kinds of IANA time zone names: Zone and Link (denoted as
      * such in database source files). Zone names are the canonical, preferred
      * name for a time zone, e.g. Asia/Kolkata. Link names simply refer to
      * target Zone names for their meaning, e.g. Asia/Calcutta targets
@@ -74,27 +96,18 @@ class SharedIntlData
      * Also see <https://ssl.icu-project.org/trac/ticket/12044> and
      * <http://unicode.org/cldr/trac/ticket/9892>.
      */
 
     using TimeZoneName = JSAtom*;
 
     struct TimeZoneHasher
     {
-        struct Lookup
+        struct Lookup : LinearStringLookup
         {
-            union {
-                const JS::Latin1Char* latin1Chars;
-                const char16_t* twoByteChars;
-            };
-            bool isLatin1;
-            size_t length;
-            JS::AutoCheckCannotGC nogc;
-            HashNumber hash;
-
             explicit Lookup(JSLinearString* timeZone);
         };
 
         static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
         static bool match(TimeZoneName key, const Lookup& lookup);
     };
 
     using TimeZoneSet = js::GCHashSet<TimeZoneName,
@@ -164,16 +177,67 @@ class SharedIntlData
      * was found, |result| remains unchanged.
      *
      * This method only handles time zones which are canonicalized differently
      * by ICU when compared to IANA.
      */
     bool tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx, JS::HandleString timeZone,
                                                    JS::MutableHandleString result);
 
+  private:
+    /**
+     * The case first parameter (BCP47 key "kf") allows to switch the order of
+     * upper- and lower-case characters. ICU doesn't directly provide an API
+     * to query the default case first value of a given locale, but instead
+     * requires to instantiate a collator object and then query the case first
+     * attribute (UCOL_CASE_FIRST).
+     * To avoid instantiating an additional collator object whenever we need
+     * to retrieve the default case first value of a specific locale, we
+     * compute the default case first value for every supported locale only
+     * once and then keep a list of all locales which don't use the default
+     * case first setting.
+     * There is almost no difference between lower-case first and when case
+     * first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to
+     * track locales which use upper-case first as their default setting.
+     */
+
+    using Locale = JSAtom*;
+
+    struct LocaleHasher
+    {
+        struct Lookup : LinearStringLookup
+        {
+            explicit Lookup(JSLinearString* locale);
+        };
+
+        static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
+        static bool match(Locale key, const Lookup& lookup);
+    };
+
+    using LocaleSet = js::GCHashSet<Locale,
+                                    LocaleHasher,
+                                    js::SystemAllocPolicy>;
+
+    LocaleSet upperCaseFirstLocales;
+
+    bool upperCaseFirstInitialized = false;
+
+    /**
+     * Precomputes the available locales which use upper-case first sorting.
+     */
+    bool ensureUpperCaseFirstLocales(JSContext* cx);
+
+  public:
+    /**
+     * Sets |isUpperFirst| to true if |locale| sorts upper-case characters
+     * before lower-case characters.
+     */
+    bool isUpperCaseFirst(JSContext* cx, JS::HandleString locale, bool* isUpperFirst);
+
+  public:
     void destroyInstance();
 
     void trace(JSTracer* trc);
 
     size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
 };
 
 /*
@@ -241,16 +305,25 @@ intl_availableCollations(JSContext* cx, 
  *
  * Spec: ECMAScript Internationalization API Specification, 10.3.2.
  *
  * Usage: result = intl_CompareStrings(collator, x, y)
  */
 extern MOZ_MUST_USE bool
 intl_CompareStrings(JSContext* cx, unsigned argc, Value* vp);
 
+/**
+ * Returns true if the given locale sorts upper-case before lower-case
+ * characters.
+ *
+ * Usage: result = intl_isUpperCaseFirst(locale)
+ */
+extern MOZ_MUST_USE bool
+intl_isUpperCaseFirst(JSContext* cx, unsigned argc, Value* vp);
+
 
 /******************** NumberFormat ********************/
 
 class NumberFormatObject : public NativeObject
 {
   public:
     static const Class class_;
 
--- a/js/src/builtin/Intl.js
+++ b/js/src/builtin/Intl.js
@@ -1596,32 +1596,60 @@ var collatorInternalProperties = {
         var locales = this._availableLocales;
         if (locales)
             return locales;
 
         locales = intl_Collator_availableLocales();
         addSpecialMissingLanguageTags(locales);
         return (this._availableLocales = locales);
     },
-    relevantExtensionKeys: ["co", "kn"]
+    relevantExtensionKeys: ["co", "kn", "kf"]
 };
 
 
+/**
+ * Returns the default caseFirst values for the given locale and usage. The
+ * first element in the returned array denotes the default value per ES2017
+ * Intl, 9.1 Internal slots of Service Constructors.
+ */
+function collatorCaseFirst(locale, usage) {
+    assert(typeof locale === "string", "locale should be string");
+    assert(usage === "sort" || usage === "search", "invalid usage option");
+
+    if (usage === "sort") {
+        // If |locale| is the default locale (e.g. da-DK), but only supported
+        // through a fallback (da), we need to get the actual locale before we
+        // can call intl_isUpperCaseFirst. Also see BestAvailableLocaleHelper.
+        var availableLocales = callFunction(collatorInternalProperties.availableLocales,
+                                            collatorInternalProperties);
+        var actualLocale = BestAvailableLocaleIgnoringDefault(availableLocales, locale);
+
+        if (intl_isUpperCaseFirst(actualLocale))
+            return ["upper", "false", "lower"];
+    }
+
+    // Default caseFirst values for all other languages.
+    return ["false", "lower", "upper"];
+}
+
+
 function collatorSortLocaleData(locale) {
     return {
         co: intl_availableCollations(locale),
-        kn: ["false", "true"]
+        kn: ["false", "true"],
+        kf: collatorCaseFirst(locale, "sort"),
     };
 }
 
 
 function collatorSearchLocaleData(locale) {
     return {
         co: [null],
         kn: ["false", "true"],
+        kf: collatorCaseFirst(locale, "search"),
         // In theory the default sensitivity is locale dependent;
         // in reality the CLDR/ICU default strength is always tertiary.
         sensitivity: "variant"
     };
 }
 
 
 /**
new file mode 100644
--- /dev/null
+++ b/js/src/tests/Intl/Collator/caseFirst.js
@@ -0,0 +1,197 @@
+// |reftest| skip-if(!this.hasOwnProperty("Intl"))
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Locales which use caseFirst=off for the standard (sort) collation type.
+const defaultLocales = Intl.Collator.supportedLocalesOf(["en", "de", "es", "sv", "ar", "zh", "ja"]);
+
+// Locales which use caseFirst=upper for the standard (sort) collation type.
+const upperFirstLocales = Intl.Collator.supportedLocalesOf(["cu", "da", "mt"]);
+
+// Default collation for zh (pinyin) reorders "á" before "a" at secondary strength level.
+const accentReordered = ["zh"];
+
+const allLocales = [...defaultLocales, ...upperFirstLocales];
+
+
+// Check default "caseFirst" option is resolved correctly.
+for (let locale of defaultLocales) {
+    let col = new Intl.Collator(locale, {usage: "sort"});
+    assertEq(col.resolvedOptions().caseFirst, "false");
+}
+for (let locale of upperFirstLocales) {
+    let col = new Intl.Collator(locale, {usage: "sort"});
+    assertEq(col.resolvedOptions().caseFirst, "upper");
+}
+for (let locale of allLocales) {
+    let col = new Intl.Collator(locale, {usage: "search"});
+    assertEq(col.resolvedOptions().caseFirst, "false");
+}
+
+
+const collOptions = {usage: "sort"};
+const primary = {sensitivity: "base"};
+const secondary = {sensitivity: "accent"};
+const tertiary = {sensitivity: "variant"};
+const caseLevel = {sensitivity: "case"};
+const strengths = [primary, secondary, tertiary, caseLevel];
+
+// "A" is sorted after "a" when caseFirst=off is the default and strength is tertiary.
+for (let locale of defaultLocales) {
+    let col = new Intl.Collator(locale, Object.assign({}, collOptions, tertiary));
+
+    assertEq(col.compare("A", "a"), 1);
+    assertEq(col.compare("a", "A"), -1);
+}
+for (let locale of defaultLocales.filter(loc => !accentReordered.includes(loc))) {
+    let col = new Intl.Collator(locale, Object.assign({}, collOptions, tertiary));
+
+    assertEq(col.compare("A", "á"), -1);
+    assertEq(col.compare("á", "A"), 1);
+}
+
+// Also sorted after "a" with the sensitivity=case collator.
+for (let locale of defaultLocales) {
+    let col = new Intl.Collator(locale, Object.assign({}, collOptions, caseLevel));
+
+    assertEq(col.compare("A", "a"), 1);
+    assertEq(col.compare("a", "A"), -1);
+
+    assertEq(col.compare("A", "á"), 1);
+    assertEq(col.compare("á", "A"), -1);
+}
+
+
+// "A" is sorted before "a" when caseFirst=upper is the default and strength is tertiary.
+for (let locale of upperFirstLocales) {
+    let col = new Intl.Collator(locale, Object.assign({}, collOptions, tertiary));
+
+    assertEq(col.compare("A", "a"), -1);
+    assertEq(col.compare("a", "A"), 1);
+
+    assertEq(col.compare("A", "á"), -1);
+    assertEq(col.compare("á", "A"), 1);
+}
+
+// Also sorted before "a" with the sensitivity=case collator.
+for (let locale of upperFirstLocales) {
+    let col = new Intl.Collator(locale, Object.assign({}, collOptions, caseLevel));
+
+    assertEq(col.compare("A", "a"), -1);
+    assertEq(col.compare("a", "A"), 1);
+
+    assertEq(col.compare("A", "á"), -1);
+    assertEq(col.compare("á", "A"), 1);
+}
+
+
+// caseFirst=upper doesn't change the sort order when strength is below tertiary.
+for (let locale of allLocales) {
+    let col = new Intl.Collator(locale, Object.assign({}, collOptions, secondary));
+
+    assertEq(col.compare("A", "a"), 0);
+    assertEq(col.compare("a", "A"), 0);
+}
+for (let locale of allLocales.filter(loc => !accentReordered.includes(loc))) {
+    let col = new Intl.Collator(locale, Object.assign({}, collOptions, secondary));
+
+    assertEq(col.compare("A", "á"), -1);
+    assertEq(col.compare("á", "A"), 1);
+}
+
+for (let locale of allLocales) {
+    let col = new Intl.Collator(locale, Object.assign({}, collOptions, primary));
+
+    assertEq(col.compare("A", "a"), 0);
+    assertEq(col.compare("a", "A"), 0);
+
+    assertEq(col.compare("A", "á"), 0);
+    assertEq(col.compare("á", "A"), 0);
+}
+
+
+// caseFirst=upper doesn't change the sort order when there's a primary difference.
+for (let locale of allLocales) {
+    for (let strength of strengths) {
+        let col = new Intl.Collator(locale, Object.assign({}, collOptions, strength));
+
+        assertEq(col.compare("A", "b"), -1);
+        assertEq(col.compare("a", "B"), -1);
+    }
+}
+
+
+// caseFirst set through Unicode extension tag.
+for (let locale of allLocales) {
+    let colKfFalse = new Intl.Collator(locale + "-u-kf-false", {});
+    let colKfLower = new Intl.Collator(locale + "-u-kf-lower", {});
+    let colKfUpper = new Intl.Collator(locale + "-u-kf-upper", {});
+
+    assertEq(colKfFalse.resolvedOptions().caseFirst, "false");
+    assertEq(colKfFalse.compare("A", "a"), 1);
+    assertEq(colKfFalse.compare("a", "A"), -1);
+
+    assertEq(colKfLower.resolvedOptions().caseFirst, "lower");
+    assertEq(colKfLower.compare("A", "a"), 1);
+    assertEq(colKfLower.compare("a", "A"), -1);
+
+    assertEq(colKfUpper.resolvedOptions().caseFirst, "upper");
+    assertEq(colKfUpper.compare("A", "a"), -1);
+    assertEq(colKfUpper.compare("a", "A"), 1);
+}
+
+
+// caseFirst set through options value.
+for (let locale of allLocales) {
+    let colKfFalse = new Intl.Collator(locale, {caseFirst: "false"});
+    let colKfLower = new Intl.Collator(locale, {caseFirst: "lower"});
+    let colKfUpper = new Intl.Collator(locale, {caseFirst: "upper"});
+
+    assertEq(colKfFalse.resolvedOptions().caseFirst, "false");
+    assertEq(colKfFalse.compare("A", "a"), 1);
+    assertEq(colKfFalse.compare("a", "A"), -1);
+
+    assertEq(colKfLower.resolvedOptions().caseFirst, "lower");
+    assertEq(colKfLower.compare("A", "a"), 1);
+    assertEq(colKfLower.compare("a", "A"), -1);
+
+    assertEq(colKfUpper.resolvedOptions().caseFirst, "upper");
+    assertEq(colKfUpper.compare("A", "a"), -1);
+    assertEq(colKfUpper.compare("a", "A"), 1);
+}
+
+
+// Test Unicode extension tag and options value, the latter should win.
+for (let locale of allLocales) {
+    let colKfFalse = new Intl.Collator(locale + "-u-kf-upper", {caseFirst: "false"});
+    let colKfLower = new Intl.Collator(locale + "-u-kf-upper", {caseFirst: "lower"});
+    let colKfUpper = new Intl.Collator(locale + "-u-kf-lower", {caseFirst: "upper"});
+
+    assertEq(colKfFalse.resolvedOptions().caseFirst, "false");
+    assertEq(colKfFalse.compare("A", "a"), 1);
+    assertEq(colKfFalse.compare("a", "A"), -1);
+
+    assertEq(colKfLower.resolvedOptions().caseFirst, "lower");
+    assertEq(colKfLower.compare("A", "a"), 1);
+    assertEq(colKfLower.compare("a", "A"), -1);
+
+    assertEq(colKfUpper.resolvedOptions().caseFirst, "upper");
+    assertEq(colKfUpper.compare("A", "a"), -1);
+    assertEq(colKfUpper.compare("a", "A"), 1);
+}
+
+// Ensure languages are properly detected when additional subtags are present.
+if (Intl.Collator.supportedLocalesOf("da").length !== 0) {
+    assertEq(new Intl.Collator("da-DK", {usage: "sort"}).resolvedOptions().caseFirst, "upper");
+    assertEq(new Intl.Collator("da-Latn-DK", {usage: "sort"}).resolvedOptions().caseFirst, "upper");
+}
+if (Intl.Collator.supportedLocalesOf("mt").length !== 0) {
+    assertEq(new Intl.Collator("mt-MT", {usage: "sort"}).resolvedOptions().caseFirst, "upper");
+    assertEq(new Intl.Collator("mt-Latn-MT", {usage: "sort"}).resolvedOptions().caseFirst, "upper");
+}
+
+
+if (typeof reportCompare === "function")
+    reportCompare(0, 0, "ok");
--- a/js/src/vm/SelfHosting.cpp
+++ b/js/src/vm/SelfHosting.cpp
@@ -2612,16 +2612,17 @@ static const JSFunctionSpec intrinsic_fu
     JS_FN("intl_DateTimeFormat_availableLocales", intl_DateTimeFormat_availableLocales, 0,0),
     JS_FN("intl_defaultTimeZone", intl_defaultTimeZone, 0,0),
     JS_FN("intl_defaultTimeZoneOffset", intl_defaultTimeZoneOffset, 0,0),
     JS_FN("intl_FormatDateTime", intl_FormatDateTime, 2,0),
     JS_FN("intl_FormatNumber", intl_FormatNumber, 2,0),
     JS_FN("intl_GetCalendarInfo", intl_GetCalendarInfo, 1,0),
     JS_FN("intl_GetLocaleInfo", intl_GetLocaleInfo, 1,0),
     JS_FN("intl_ComputeDisplayNames", intl_ComputeDisplayNames, 3,0),
+    JS_FN("intl_isUpperCaseFirst", intl_isUpperCaseFirst, 1,0),
     JS_FN("intl_IsValidTimeZoneName", intl_IsValidTimeZoneName, 1,0),
     JS_FN("intl_NumberFormat", intl_NumberFormat, 2,0),
     JS_FN("intl_NumberFormat_availableLocales", intl_NumberFormat_availableLocales, 0,0),
     JS_FN("intl_numberingSystem", intl_numberingSystem, 1,0),
     JS_FN("intl_patternForSkeleton", intl_patternForSkeleton, 2,0),
     JS_FN("intl_patternForStyle", intl_patternForStyle, 3,0),
     JS_FN("intl_PluralRules_availableLocales", intl_PluralRules_availableLocales, 0,0),
     JS_FN("intl_GetPluralCategories", intl_GetPluralCategories, 2, 0),