Bug 844744, part 1 - Add some utils classes and static methods to aid in using ICU from gecko code. r=smontagu
authorJonathan Watt <jwatt@jwatt.org>
Thu, 23 Jan 2014 15:42:54 +0000
changeset 182529 340786b6f4dd21e3036c117e33fea3fd97925476
parent 182528 2cd1363497cc5e8b19f1f4fcccf16ca96bd7e78a
child 182530 2682af062a4bb866c9de8da6527e15365b86ee4d
push id3343
push userffxbld
push dateMon, 17 Mar 2014 21:55:32 +0000
treeherdermozilla-beta@2f7d3415f79f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmontagu
bugs844744
milestone29.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 844744, part 1 - Add some utils classes and static methods to aid in using ICU from gecko code. r=smontagu
intl/unicharutil/util/ICUUtils.cpp
intl/unicharutil/util/ICUUtils.h
intl/unicharutil/util/Makefile.in
intl/unicharutil/util/internal/Makefile.in
intl/unicharutil/util/moz.build
intl/unicharutil/util/objs.mozbuild
new file mode 100644
--- /dev/null
+++ b/intl/unicharutil/util/ICUUtils.cpp
@@ -0,0 +1,256 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef MOZILLA_INTERNAL_API
+#ifdef ENABLE_INTL_API
+
+#include "ICUUtils.h"
+#include "mozilla/Preferences.h"
+#include "nsIContent.h"
+#include "nsIDocument.h"
+#include "nsIToolkitChromeRegistry.h"
+#include "nsStringGlue.h"
+#include "unicode/uloc.h"
+#include "unicode/unum.h"
+
+using namespace mozilla;
+
+static bool gLocaleNumberGroupingEnabled;
+static const char LOCALE_NUMBER_GROUPING_PREF_STR[] = "dom.forms.number.grouping";
+
+static bool
+LocaleNumberGroupingIsEnabled()
+{
+  static bool sInitialized = false;
+
+  if (!sInitialized) {
+    /* check and register ourselves with the pref */
+    Preferences::AddBoolVarCache(&gLocaleNumberGroupingEnabled,
+                                 LOCALE_NUMBER_GROUPING_PREF_STR,
+                                 true);
+    sInitialized = true;
+  }
+
+  return gLocaleNumberGroupingEnabled;
+}
+
+void
+ICUUtils::LanguageTagIterForContent::GetNext(nsACString& aBCP47LangTag)
+{
+  if (mCurrentFallbackIndex < 0) {
+    mCurrentFallbackIndex = 0;
+    // Try the language specified by a 'lang'/'xml:lang' attribute on mContent
+    // or any ancestor, if such an attribute is specified:
+    nsAutoString lang;
+    mContent->GetLang(lang);
+    if (!lang.IsEmpty()) {
+      aBCP47LangTag = NS_ConvertUTF16toUTF8(lang);
+      return;
+    }
+  }
+
+  if (mCurrentFallbackIndex < 1) {
+    mCurrentFallbackIndex = 1;
+    // Else try the language specified by any Content-Language HTTP header or
+    // pragma directive:
+    nsIDocument* doc = mContent->OwnerDoc();
+    nsAutoString lang;
+    doc->GetContentLanguage(lang);
+    if (!lang.IsEmpty()) {
+      aBCP47LangTag = NS_ConvertUTF16toUTF8(lang);
+      return;
+    }
+  }
+
+  if (mCurrentFallbackIndex < 2) {
+    mCurrentFallbackIndex = 2;
+    // Else try the user-agent's locale:
+    nsCOMPtr<nsIToolkitChromeRegistry> cr =
+      mozilla::services::GetToolkitChromeRegistryService();
+    nsAutoCString uaLangTag;
+    if (cr) {
+      cr->GetSelectedLocale(NS_LITERAL_CSTRING("global"), uaLangTag);
+    }
+    if (!uaLangTag.IsEmpty()) {
+      aBCP47LangTag = uaLangTag;
+      return;
+    }
+  }
+
+  // TODO: Probably not worth it, but maybe have a fourth fallback to using
+  // the OS locale?
+
+  aBCP47LangTag.Truncate(); // Signal iterator exhausted
+}
+
+/* static */ bool
+ICUUtils::LocalizeNumber(double aValue,
+                         LanguageTagIterForContent& aLangTags,
+                         nsAString& aLocalizedValue)
+{
+  MOZ_ASSERT(aLangTags.IsAtStart(), "Don't call Next() before passing");
+
+  static const int32_t kBufferSize = 256;
+
+  UChar buffer[kBufferSize];
+
+  nsAutoCString langTag;
+  aLangTags.GetNext(langTag);
+  while (!langTag.IsEmpty()) {
+    UErrorCode status = U_ZERO_ERROR;
+    AutoCloseUNumberFormat format(unum_open(UNUM_DECIMAL, nullptr, 0,
+                                            langTag.get(), nullptr, &status));
+    unum_setAttribute(format, UNUM_GROUPING_USED,
+                      LocaleNumberGroupingIsEnabled());
+    int32_t length = unum_formatDouble(format, aValue, buffer, kBufferSize,
+                                       nullptr, &status);
+    NS_ASSERTION(length < kBufferSize &&
+                 status != U_BUFFER_OVERFLOW_ERROR &&
+                 status != U_STRING_NOT_TERMINATED_WARNING,
+                 "Need a bigger buffer?!");
+    if (U_SUCCESS(status)) {
+      ICUUtils::AssignUCharArrayToString(buffer, length, aLocalizedValue);
+      return true;
+    }
+    aLangTags.GetNext(langTag);
+  }
+  return false;
+}
+
+/* static */ double
+ICUUtils::ParseNumber(nsAString& aValue,
+                      LanguageTagIterForContent& aLangTags)
+{
+  MOZ_ASSERT(aLangTags.IsAtStart(), "Don't call Next() before passing");
+
+  if (aValue.IsEmpty()) {
+    return std::numeric_limits<float>::quiet_NaN();
+  }
+
+  uint32_t length = aValue.Length();
+
+  nsAutoCString langTag;
+  aLangTags.GetNext(langTag);
+  while (!langTag.IsEmpty()) {
+    UErrorCode status = U_ZERO_ERROR;
+    AutoCloseUNumberFormat format(unum_open(UNUM_DECIMAL, nullptr, 0,
+                                            langTag.get(), nullptr, &status));
+    int32_t parsePos = 0;
+    static_assert(sizeof(UChar) == 2 && sizeof(nsAString::char_type) == 2,
+                  "Unexpected character size - the following cast is unsafe");
+    double val = unum_parseDouble(format,
+                                  (const UChar*)PromiseFlatString(aValue).get(),
+                                  length, &parsePos, &status);
+    if (U_SUCCESS(status) && parsePos == (int32_t)length) {
+      return val;
+    }
+    aLangTags.GetNext(langTag);
+  }
+  return std::numeric_limits<float>::quiet_NaN();
+}
+
+/* static */ void
+ICUUtils::AssignUCharArrayToString(UChar* aICUString,
+                                   int32_t aLength,
+                                   nsAString& aMozString)
+{
+  // Both ICU's UnicodeString and Mozilla's nsAString use UTF-16, so we can
+  // cast here.
+
+  static_assert(sizeof(UChar) == 2 && sizeof(nsAString::char_type) == 2,
+                "Unexpected character size - the following cast is unsafe");
+
+  aMozString.Assign((const nsAString::char_type*)aICUString, aLength);
+
+  NS_ASSERTION((int32_t)aMozString.Length() == aLength, "Conversion failed");
+}
+
+#if 0
+/* static */ Locale
+ICUUtils::BCP47CodeToLocale(const nsAString& aBCP47Code)
+{
+  MOZ_ASSERT(!aBCP47Code.IsEmpty(), "Don't pass an empty BCP 47 code");
+
+  Locale locale;
+  locale.setToBogus();
+
+  // BCP47 codes are guaranteed to be ASCII, so lossy conversion is okay
+  NS_LossyConvertUTF16toASCII bcp47code(aBCP47Code);
+
+  UErrorCode status = U_ZERO_ERROR;
+  int32_t needed;
+
+  char localeID[256];
+  needed = uloc_forLanguageTag(bcp47code.get(), localeID,
+                               PR_ARRAY_SIZE(localeID) - 1, nullptr,
+                               &status);
+  MOZ_ASSERT(needed < int32_t(PR_ARRAY_SIZE(localeID)) - 1,
+             "Need a bigger buffer");
+  if (needed <= 0 || U_FAILURE(status)) {
+    return locale;
+  }
+
+  char lang[64];
+  needed = uloc_getLanguage(localeID, lang, PR_ARRAY_SIZE(lang) - 1,
+                            &status);
+  MOZ_ASSERT(needed < int32_t(PR_ARRAY_SIZE(lang)) - 1,
+             "Need a bigger buffer");
+  if (needed <= 0 || U_FAILURE(status)) {
+    return locale;
+  }
+
+  char country[64];
+  needed = uloc_getCountry(localeID, country, PR_ARRAY_SIZE(country) - 1,
+                           &status);
+  MOZ_ASSERT(needed < int32_t(PR_ARRAY_SIZE(country)) - 1,
+             "Need a bigger buffer");
+  if (needed > 0 && U_SUCCESS(status)) {
+    locale = Locale(lang, country);
+  }
+
+  if (locale.isBogus()) {
+    // Using the country resulted in a bogus Locale, so try with only the lang
+    locale = Locale(lang);
+  }
+
+  return locale;
+}
+
+/* static */ void
+ICUUtils::ToMozString(UnicodeString& aICUString, nsAString& aMozString)
+{
+  // Both ICU's UnicodeString and Mozilla's nsAString use UTF-16, so we can
+  // cast here.
+
+  static_assert(sizeof(UChar) == 2 && sizeof(nsAString::char_type) == 2,
+                "Unexpected character size - the following cast is unsafe");
+
+  const nsAString::char_type* buf =
+    (const nsAString::char_type*)aICUString.getTerminatedBuffer();
+  aMozString.Assign(buf);
+
+  NS_ASSERTION(aMozString.Length() == (uint32_t)aICUString.length(),
+               "Conversion failed");
+}
+
+/* static */ void
+ICUUtils::ToICUString(nsAString& aMozString, UnicodeString& aICUString)
+{
+  // Both ICU's UnicodeString and Mozilla's nsAString use UTF-16, so we can
+  // cast here.
+
+  static_assert(sizeof(UChar) == 2 && sizeof(nsAString::char_type) == 2,
+                "Unexpected character size - the following cast is unsafe");
+
+  aICUString.setTo((UChar*)PromiseFlatString(aMozString).get(),
+                   aMozString.Length());
+
+  NS_ASSERTION(aMozString.Length() == (uint32_t)aICUString.length(),
+               "Conversion failed");
+}
+#endif
+
+#endif /* ENABLE_INTL_API */
+#endif /* MOZILLA_INTERNAL_API */
+
new file mode 100644
--- /dev/null
+++ b/intl/unicharutil/util/ICUUtils.h
@@ -0,0 +1,109 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_ICUUtils_h__
+#define mozilla_ICUUtils_h__
+
+// We only build the ICU utils if we're building ICU:
+#ifdef ENABLE_INTL_API
+
+// The ICU utils implementation needs internal things like XPCOM strings and
+// nsGkAtom, so we only build when included into internal libs:
+#ifdef MOZILLA_INTERNAL_API
+
+#include "mozilla/Scoped.h"
+#include "nsStringGlue.h"
+#include "unicode/unum.h" // for UNumberFormat
+
+class nsIContent;
+
+namespace {
+  struct ScopedUNumberFormatTraits {
+    typedef UNumberFormat* type;
+    static type empty() { return nullptr; }
+    static void release(type handle) { if (handle) unum_close(handle); }
+  };
+};
+typedef mozilla::Scoped<ScopedUNumberFormatTraits> AutoCloseUNumberFormat;
+
+class ICUUtils
+{
+public:
+
+  /**
+   * This class is used to encapsulate an nsIContent object to allow lazy
+   * iteration over its primary and fallback BCP 47 language tags.
+   */
+  class LanguageTagIterForContent {
+  public:
+    LanguageTagIterForContent(nsIContent* aContent)
+      : mContent(aContent)
+      , mCurrentFallbackIndex(-1)
+    {}
+
+    /**
+     * Used to iterate over the nsIContent object's primary language tag and
+     * its fallbacks tags. The following sources of language tag information
+     * are tried in turn:
+     *
+     * 1) the "lang" of the nsIContent object (which is based on the 'lang'/
+     *    'xml:lang' attribute on itself or the nearest ancestor to have such
+     *    an attribute, if any);
+     * 2) the Content-Language HTTP pragma directive or HTTP header;
+     * 3) the configured language tag of the user-agent.
+     *
+     * Once all fallbacks have been exhausted then this function will set
+     * aBCP47LangTag to the empty string.
+     */
+    void GetNext(nsACString& aBCP47LangTag);
+
+    bool IsAtStart() const {
+      return mCurrentFallbackIndex < 0;
+    }
+
+  private:
+    nsIContent* mContent;
+    int8_t mCurrentFallbackIndex;
+  };
+
+  /**
+   * Attempts to localize aValue and return the result via the aLocalizedValue
+   * outparam. Returns true on success. Returns false on failure, in which
+   * case aLocalizedValue will be untouched.
+   */
+  static bool LocalizeNumber(double aValue,
+                             LanguageTagIterForContent& aLangTags,
+                             nsAString& aLocalizedValue);
+
+  /**
+   * Parses the localized number that is serialized in aValue using aLangTags
+   * and returns the result as a double. Returns NaN on failure.
+   */
+  static double ParseNumber(nsAString& aValue,
+                            LanguageTagIterForContent& aLangTags);
+
+  static void AssignUCharArrayToString(UChar* aICUString,
+                                       int32_t aLength,
+                                       nsAString& aMozString);
+
+#if 0
+  // Currently disabled because using C++ API doesn't play nicely with enabling
+  // system ICU.
+
+  /**
+   * Converts an IETF BCP 47 language code to an ICU Locale.
+   */
+  static Locale BCP47CodeToLocale(const nsAString& aBCP47Code);
+
+  static void ToMozString(UnicodeString& aICUString, nsAString& aMozString);
+  static void ToICUString(nsAString& aMozString, UnicodeString& aICUString);
+#endif
+};
+
+#endif /* ENABLE_INTL_API */
+#endif /* MOZILLA_INTERNAL_API */
+
+#endif /* mozilla_ICUUtils_h__ */
+
--- a/intl/unicharutil/util/Makefile.in
+++ b/intl/unicharutil/util/Makefile.in
@@ -9,13 +9,17 @@
 
 DIST_INSTALL = 1
 SDK_LIBRARY = $(LIBRARY)
 
 USE_STATIC_LIBS = 1
 
 include $(topsrcdir)/config/rules.mk
 
+ifdef ENABLE_INTL_API
+LOCAL_INCLUDES += $(MOZ_ICU_CFLAGS)
+endif
+
 ifdef _MSC_VER
 # Don't include directives about which CRT to use
 OS_COMPILE_CXXFLAGS += -Zl
 OS_COMPILE_CFLAGS += -Zl
 endif
--- a/intl/unicharutil/util/internal/Makefile.in
+++ b/intl/unicharutil/util/internal/Makefile.in
@@ -2,10 +2,14 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # This makefile builds the version of unicharutils_s static library which uses
 # internal linkage. Components that use frozen (external) linkage should use
 # unicharutil_external_s.
 
+ifdef ENABLE_INTL_API
+LOCAL_INCLUDES += $(MOZ_ICU_CFLAGS)
+endif
+
 DIST_INSTALL = 1
 MOZILLA_INTERNAL_API = 1
--- a/intl/unicharutil/util/moz.build
+++ b/intl/unicharutil/util/moz.build
@@ -2,16 +2,17 @@
 # vim: set filetype=python:
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 DIRS += ['internal']
 
 EXPORTS += [
+    'ICUUtils.h',
     'nsBidiUtils.h',
     'nsSpecialCasingData.h',
     'nsUnicharUtils.h',
     'nsUnicodeProperties.h',
     'nsUnicodeScriptCodes.h',
 ]
 
 include('objs.mozbuild')
--- a/intl/unicharutil/util/objs.mozbuild
+++ b/intl/unicharutil/util/objs.mozbuild
@@ -1,15 +1,22 @@
 # -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*-
 # vim: set filetype=python:
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-intl_unicharutil_util_lcppsrcs = [
+intl_unicharutil_util_lcppsrcs = []
+
+if CONFIG['ENABLE_INTL_API']:
+    intl_unicharutil_util_lcppsrcs += [
+        'ICUUtils.cpp',
+    ]
+
+intl_unicharutil_util_lcppsrcs += [
     'nsBidiUtils.cpp',
     'nsSpecialCasingData.cpp',
     'nsUnicharUtils.cpp',
     'nsUnicodeProperties.cpp',
 ]
 
 intl_unicharutil_util_cppsrcs = [
     '%s/intl/unicharutil/util/%s' % (TOPSRCDIR, s) \