Bug 1400006 - Extend language negotiation in LocaleService to support looking for the best likelySubtag for the locale with region stripped. r=Pike, a=lizzard FIREFOX_56b13_RELBRANCH
authorZibi Braniecki <zbraniecki@mozilla.com>
Thu, 14 Sep 2017 15:21:33 -0700
branchFIREFOX_56b13_RELBRANCH
changeset 431384 c7571bece2ba41ff6b438cb55493246047142eab
parent 431383 d333328035f52b3841301e52cc8e64a633306108
child 431385 ccc85929fce03d5ad3b7c71fce837688655c29c7
push id7783
push userryanvm@gmail.com
push dateThu, 21 Sep 2017 00:21:59 +0000
treeherdermozilla-beta@a6fb3b978941 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersPike, lizzard
bugs1400006
milestone56.0
Bug 1400006 - Extend language negotiation in LocaleService to support looking for the best likelySubtag for the locale with region stripped. r=Pike, a=lizzard Add additional logic to our language negotation to do apply likelySubtags when a direct match is not available. Currently, if the user specifies the locale with region, and we do not have a direct for that region, we pick all locales for the same language and other regions in no order. The example of where it returns suboptimal results: 1) Requested locale "en-CA" 2) Available locales ["en-ZA", "en-GB", "en-US"] 3) Negotiated locales ["en-ZA", "en-GB", "en-US"] This would not happen, if the user requested a generic "de", "en" etc.: 1) Requested locale "en" 2) Available locales ["en-ZA", "en-GB", "en-US"] 3) Negotiated locales ["en-US", "en-ZA", "en-GB"] because after not finding a direct match, we would use likelySubtags to extend "en" to "en-Latn-US" and then find the priority match in "en-US". This patch extends this logic to "en-US" or "de-LU" by adding a step which strips the region tag and then applies likelySubtag on the result. This means that in absence of direct match the following fallbacks would happen: "de-LU" -> "de-DE" "es-CL" -> "es-ES" "en-CA" -> "en-US" This does not affect languages that use multiple scripts, so ar, sr and zh are not affected. MozReview-Commit-ID: BR1WrgXSf6a
intl/locale/LocaleService.cpp
intl/locale/LocaleService.h
intl/locale/tests/unit/test_localeService_negotiateLanguages.js
--- a/intl/locale/LocaleService.cpp
+++ b/intl/locale/LocaleService.cpp
@@ -344,17 +344,17 @@ LocaleService::OnLocalesChanged()
             case LangNegStrategy::Filtering: \
               break; \
           }
 
 /**
  * This is the raw algorithm for language negotiation based roughly
  * on RFC4647 language filtering, with changes from LDML language matching.
  *
- * The exact algorithm is custom, and consist of 5 level strategy:
+ * The exact algorithm is custom, and consists of a 6 level strategy:
  *
  * 1) Attempt to find an exact match for each requested locale in available
  *    locales.
  *    Example: ['en-US'] * ['en-US'] = ['en-US']
  *
  * 2) Attempt to match a requested locale to an available locale treated
  *    as a locale range.
  *    Example: ['en-US'] * ['en'] = ['en']
@@ -367,17 +367,24 @@ LocaleService::OnLocalesChanged()
  *               ^^
  *               |-- ICU likelySubtags expands it to 'en-Latn-US'
  *
  * 4) Attempt to look up for a different variant of the same locale.
  *    Example: ['ja-JP-win'] * ['ja-JP-mac'] = ['ja-JP-mac']
  *               ^^^^^^^^^
  *               |----------- replace variant with range: 'ja-JP-*'
  *
- * 5) Attempt to look up for a different region of the same locale.
+ * 5) Attempt to look up for a maximized version of the requested locale,
+ *    stripped of the region code.
+ *    Example: ['en-CA'] * ['en-ZA', 'en-US'] = ['en-US', 'en-ZA']
+ *               ^^^^^
+ *               |----------- look for likelySubtag of 'en': 'en-Latn-US'
+ *
+ *
+ * 6) Attempt to look up for a different region of the same locale.
  *    Example: ['en-GB'] * ['en-AU'] = ['en-AU']
  *               ^^^^^
  *               |----- replace region with range: 'en-*'
  *
  * It uses one of the strategies described in LocaleService.h.
  */
 void
 LocaleService::FilterMatches(const nsTArray<nsCString>& aRequested,
@@ -453,17 +460,25 @@ LocaleService::FilterMatches(const nsTAr
     }
 
     // 4) Try to match against a variant as a range
     requestedLocale.SetVariantRange();
     if (findRangeMatches(requestedLocale)) {
       HANDLE_STRATEGY;
     }
 
-    // 5) Try to match against a region as a range
+    // 5) Try to match against the likely subtag without region
+    if (requestedLocale.AddLikelySubtagsWithoutRegion()) {
+      if (findRangeMatches(requestedLocale)) {
+        HANDLE_STRATEGY;
+      }
+    }
+
+
+    // 6) Try to match against a region as a range
     requestedLocale.SetRegionRange();
     if (findRangeMatches(requestedLocale)) {
       HANDLE_STRATEGY;
     }
   }
 }
 
 bool
@@ -816,38 +831,63 @@ void
 LocaleService::Locale::SetRegionRange()
 {
   mRegion.AssignLiteral("*");
 }
 
 bool
 LocaleService::Locale::AddLikelySubtags()
 {
+  return AddLikelySubtagsForLocale(mLocaleStr);
+}
+
+bool
+LocaleService::Locale::AddLikelySubtagsWithoutRegion()
+{
+  nsAutoCString locale(mLanguage);
+
+  if (!mScript.IsEmpty()) {
+    locale.Append("-");
+    locale.Append(mScript);
+  }
+
+  // We don't add variant here because likelySubtag doesn't care about it.
+
+  return AddLikelySubtagsForLocale(locale);
+}
+
+bool
+LocaleService::Locale::AddLikelySubtagsForLocale(const nsACString& aLocale)
+{
 #ifdef ENABLE_INTL_API
   const int32_t kLocaleMax = 160;
   char maxLocale[kLocaleMax];
+  nsAutoCString locale(aLocale);
 
   UErrorCode status = U_ZERO_ERROR;
-  uloc_addLikelySubtags(mLocaleStr.get(), maxLocale, kLocaleMax, &status);
+  uloc_addLikelySubtags(locale.get(), maxLocale, kLocaleMax, &status);
 
   if (U_FAILURE(status)) {
     return false;
   }
 
   nsDependentCString maxLocStr(maxLocale);
   Locale loc = Locale(maxLocStr, false);
 
   if (loc == *this) {
     return false;
   }
 
   mLanguage = loc.mLanguage;
   mScript = loc.mScript;
   mRegion = loc.mRegion;
-  mVariant = loc.mVariant;
+
+  // We don't update variant from likelySubtag since it's not going to
+  // provide it and we want to preserve the range
+
   return true;
 #else
   return false;
 #endif
 }
 
 NS_IMETHODIMP
 LocaleService::GetRequestedLocales(uint32_t* aCount, char*** aOutArray)
--- a/intl/locale/LocaleService.h
+++ b/intl/locale/LocaleService.h
@@ -264,17 +264,19 @@ private:
     Locale(const nsCString& aLocale, bool aRange);
 
     bool Matches(const Locale& aLocale) const;
     bool LanguageMatches(const Locale& aLocale) const;
 
     void SetVariantRange();
     void SetRegionRange();
 
-    bool AddLikelySubtags(); // returns false if nothing changed
+    // returns false if nothing changed
+    bool AddLikelySubtags();
+    bool AddLikelySubtagsWithoutRegion();
 
     const nsCString& AsString() const {
       return mLocaleStr;
     }
 
     bool operator== (const Locale& aOther) {
       const auto& cmp = nsCaseInsensitiveCStringComparator();
       return mLanguage.Equals(aOther.mLanguage, cmp) &&
@@ -284,16 +286,18 @@ private:
     }
 
   private:
     const nsCString& mLocaleStr;
     nsCString mLanguage;
     nsCString mScript;
     nsCString mRegion;
     nsCString mVariant;
+
+    bool AddLikelySubtagsForLocale(const nsACString& aLocale);
   };
 
   void FilterMatches(const nsTArray<nsCString>& aRequested,
                      const nsTArray<nsCString>& aAvailable,
                      LangNegStrategy aStrategy,
                      nsTArray<nsCString>& aRetVal);
 
   void NegotiateAppLocales(nsTArray<nsCString>& aRetVal);
--- a/intl/locale/tests/unit/test_localeService_negotiateLanguages.js
+++ b/intl/locale/tests/unit/test_localeService_negotiateLanguages.js
@@ -30,16 +30,19 @@ const data = {
       [["fr"], ["fr-CA", "fr-FR"], ["fr-FR", "fr-CA"]],
       [["az-IR"], ["az-Latn", "az-Arab"], ["az-Arab"]],
       [["sr-RU"], ["sr-Cyrl", "sr-Latn"], ["sr-Latn"]],
       [["sr"], ["sr-Latn", "sr-Cyrl"], ["sr-Cyrl"]],
       [["zh-GB"], ["zh-Hans", "zh-Hant"], ["zh-Hant"]],
       [["sr", "ru"], ["sr-Latn", "ru"], ["ru"]],
       [["sr-RU"], ["sr-Latn-RO", "sr-Cyrl"], ["sr-Latn-RO"]],
     ],
+    "should match likelySubtag region over other regions": [
+      [["en-CA"], ["en-ZA", "en-GB", "en-US"], ["en-US", "en-ZA", "en-GB"]],
+    ],
     "should match on a requested locale as a range": [
       [["en-*-US"], ["en-US"], ["en-US"]],
       [["en-Latn-US-*"], ["en-Latn-US"], ["en-Latn-US"]],
       [["en-*-US-*"], ["en-US"], ["en-US"]],
     ],
     "should match cross-region": [
       [["en"], ["en-US"], ["en-US"]],
       [["en-US"], ["en-GB"], ["en-GB"]],