Bug 1719733 - Part 1: Add a MeasureUnit class to the unified Intl API. r=platform-i18n-reviewers,dminor
authorAndré Bargull <andre.bargull@gmail.com>
Mon, 20 Sep 2021 20:04:17 +0000
changeset 592549 55a4f2cf06f49227d1bdc56b49a24172b5bc9e31
parent 592548 1e7406c23249466462c778afcd1d9e14ef4f9291
child 592550 949781998bbf4d769c296bed911d2948f381e1c9
push id150062
push userandre.bargull@gmail.com
push dateMon, 20 Sep 2021 20:06:43 +0000
treeherderautoland@949781998bbf [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersplatform-i18n-reviewers, dminor
bugs1719733
milestone94.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1719733 - Part 1: Add a MeasureUnit class to the unified Intl API. r=platform-i18n-reviewers,dminor Currently only provides a single method to retrieve all available measurement units. `MeasureUnit::GetAvailable()` returns an enumeration similar to the other Intl classes which use `intl::Enumeration`. This approach gives us a more consistent API, because it abstracts away the internal ICU implementation, which looks up the measurement units through `UResourceBundle`. But it also means the implementation is slightly more complicated due to this additional abstraction. The default constructor was deleted because all methods are static. Differential Revision: https://phabricator.services.mozilla.com/D125966
intl/components/gtest/TestMeasureUnit.cpp
intl/components/gtest/moz.build
intl/components/moz.build
intl/components/src/MeasureUnit.cpp
intl/components/src/MeasureUnit.h
intl/components/src/NumberFormatterSkeleton.cpp
new file mode 100644
--- /dev/null
+++ b/intl/components/gtest/TestMeasureUnit.cpp
@@ -0,0 +1,43 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "gtest/gtest.h"
+
+#include "mozilla/intl/MeasureUnit.h"
+#include "mozilla/Span.h"
+
+namespace mozilla::intl {
+
+TEST(IntlMeasureUnit, GetAvailable)
+{
+  auto units = MeasureUnit::GetAvailable();
+  ASSERT_TRUE(units.isOk());
+
+  // Test a subset of the installed measurement units.
+  auto gigabyte = MakeStringSpan("gigabyte");
+  auto liter = MakeStringSpan("liter");
+  auto meter = MakeStringSpan("meter");
+  auto meters = MakeStringSpan("meters");  // Plural "meters" is invalid.
+
+  bool hasGigabyte = false;
+  bool hasLiter = false;
+  bool hasMeter = false;
+  bool hasMeters = false;
+
+  for (auto unit : units.unwrap()) {
+    ASSERT_TRUE(unit.isOk());
+    auto span = unit.unwrap();
+
+    hasGigabyte |= span == gigabyte;
+    hasLiter |= span == liter;
+    hasMeter |= span == meter;
+    hasMeters |= span == meters;
+  }
+
+  ASSERT_TRUE(hasGigabyte);
+  ASSERT_TRUE(hasLiter);
+  ASSERT_TRUE(hasMeter);
+  ASSERT_FALSE(hasMeters);
+}
+
+}  // namespace mozilla::intl
--- a/intl/components/gtest/moz.build
+++ b/intl/components/gtest/moz.build
@@ -6,16 +6,17 @@
 
 UNIFIED_SOURCES += [
     "TestCalendar.cpp",
     "TestCollator.cpp",
     "TestCurrency.cpp",
     "TestDateTimeFormat.cpp",
     "TestListFormat.cpp",
     "TestLocaleCanonicalizer.cpp",
+    "TestMeasureUnit.cpp",
     "TestNumberFormat.cpp",
     "TestPluralRules.cpp",
     "TestRelativeTimeFormat.cpp",
 ]
 
 FINAL_LIBRARY = "xul-gtest"
 
 REQUIRES_UNIFIED_BUILD = True
--- a/intl/components/moz.build
+++ b/intl/components/moz.build
@@ -8,32 +8,34 @@ EXPORTS.mozilla.intl = [
     "src/Collator.h",
     "src/Currency.h",
     "src/DateTimeFormat.h",
     "src/DateTimePatternGenerator.h",
     "src/ICU4CGlue.h",
     "src/ICUError.h",
     "src/ListFormat.h",
     "src/LocaleCanonicalizer.h",
+    "src/MeasureUnit.h",
     "src/NumberFormat.h",
     "src/NumberPart.h",
     "src/NumberRangeFormat.h",
     "src/PluralRules.h",
     "src/RelativeTimeFormat.h",
 ]
 
 UNIFIED_SOURCES += [
     "src/Calendar.cpp",
     "src/Collator.cpp",
     "src/Currency.cpp",
     "src/DateTimeFormat.cpp",
     "src/DateTimePatternGenerator.cpp",
     "src/ICU4CGlue.cpp",
     "src/ListFormat.cpp",
     "src/LocaleCanonicalizer.cpp",
+    "src/MeasureUnit.cpp",
     "src/NumberFormat.cpp",
     "src/NumberFormatFields.cpp",
     "src/NumberFormatterSkeleton.cpp",
     "src/NumberRangeFormat.cpp",
     "src/PluralRules.cpp",
     "src/RelativeTimeFormat.cpp",
 ]
 
new file mode 100644
--- /dev/null
+++ b/intl/components/src/MeasureUnit.cpp
@@ -0,0 +1,110 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/intl/MeasureUnit.h"
+
+#include "unicode/udata.h"
+#include "unicode/ures.h"
+#include "unicode/utypes.h"
+
+namespace mozilla::intl {
+
+void MeasureUnit::UResourceBundleDeleter::operator()(UResourceBundle* aPtr) {
+  ures_close(aPtr);
+}
+
+MeasureUnit::Enumeration::Enumeration(UniqueUResourceBundle aRootLocale,
+                                      UniqueUResourceBundle aUnits)
+    : mRootLocale(std::move(aRootLocale)), mUnits(std::move(aUnits)) {
+  mUnitsSize = ures_getSize(mUnits.get());
+}
+
+MeasureUnit::Enumeration::Iterator::value_type
+MeasureUnit::Enumeration::Iterator::operator*() const {
+  // Return an error result after an ICU error has occurred.
+  if (mHasError) {
+    return Err(InternalError{});
+  }
+
+  // Otherwise return the name of the current measurement unit.
+  const char* unitIdentifier = ures_getKey(mSubtype.get());
+  MOZ_ASSERT(unitIdentifier);
+  return MakeStringSpan(unitIdentifier);
+}
+
+void MeasureUnit::Enumeration::Iterator::advance() {
+  // Reject any attempts to modify this iterator after an error has occurred.
+  if (mHasError) {
+    return;
+  }
+
+  while (true) {
+    // Read the next measurement unit in the types table.
+    if (mTypePos < mTypeSize) {
+      UErrorCode status = U_ZERO_ERROR;
+      UResourceBundle* rawSubtype =
+          ures_getByIndex(mType.get(), mTypePos, nullptr, &status);
+      if (U_FAILURE(status)) {
+        mHasError = true;
+        return;
+      }
+
+      mTypePos += 1;
+      mSubtype.reset(rawSubtype);
+      return;
+    }
+
+    // Read the next measurement unit type in the "units" table.
+    if (mUnitsPos < mEnumeration.mUnitsSize) {
+      UErrorCode status = U_ZERO_ERROR;
+      UResourceBundle* rawType = ures_getByIndex(mEnumeration.mUnits.get(),
+                                                 mUnitsPos, nullptr, &status);
+      if (U_FAILURE(status)) {
+        mHasError = true;
+        return;
+      }
+
+      mUnitsPos += 1;
+      mType.reset(rawType);
+      mTypeSize = ures_getSize(rawType);
+      mTypePos = 0;
+      continue;
+    }
+
+    // All measurement units have been processed. Reset the two |mType*| fields
+    // to zero to match the end-iterator state and then return.
+    MOZ_ASSERT(mUnitsPos == mEnumeration.mUnitsSize);
+    mTypePos = 0;
+    mTypeSize = 0;
+    return;
+  }
+}
+
+Result<MeasureUnit::Enumeration, ICUError>
+MeasureUnit::Enumeration::TryCreate() {
+  // Look up the available measurement units in the resource bundle of the root
+  // locale.
+
+  static const char packageName[] =
+      U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "unit";
+  static const char rootLocale[] = "";
+
+  UErrorCode status = U_ZERO_ERROR;
+  UResourceBundle* rawRes = ures_open(packageName, rootLocale, &status);
+  if (U_FAILURE(status)) {
+    return Err(ToICUError(status));
+  }
+  UniqueUResourceBundle res(rawRes);
+
+  UResourceBundle* rawUnits =
+      ures_getByKey(res.get(), "units", nullptr, &status);
+  if (U_FAILURE(status)) {
+    return Err(ToICUError(status));
+  }
+  UniqueUResourceBundle units(rawUnits);
+
+  return MeasureUnit::Enumeration(std::move(res), std::move(units));
+}
+
+}  // namespace mozilla::intl
new file mode 100644
--- /dev/null
+++ b/intl/components/src/MeasureUnit.h
@@ -0,0 +1,153 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef intl_components_MeasureUnit_h_
+#define intl_components_MeasureUnit_h_
+
+#include "mozilla/Assertions.h"
+#include "mozilla/intl/ICU4CGlue.h"
+#include "mozilla/intl/ICUError.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+
+#include <iterator>
+#include <stddef.h>
+#include <stdint.h>
+#include <utility>
+
+struct UResourceBundle;
+
+namespace mozilla::intl {
+
+/**
+ * This component is a Mozilla-focused API for working with measurement units in
+ * internationalization code. It is used in coordination with other operations
+ * such as number formatting.
+ */
+class MeasureUnit final {
+  class UResourceBundleDeleter {
+   public:
+    void operator()(UResourceBundle* aPtr);
+  };
+
+  using UniqueUResourceBundle =
+      UniquePtr<UResourceBundle, UResourceBundleDeleter>;
+
+ public:
+  MeasureUnit() = delete;
+
+  class Enumeration final {
+    // Resource bundle for the root locale.
+    UniqueUResourceBundle mRootLocale = nullptr;
+
+    // Resource bundle for the root locale's "units" resource table.
+    UniqueUResourceBundle mUnits = nullptr;
+
+    // The overall amount of available units.
+    int32_t mUnitsSize = 0;
+
+   public:
+    Enumeration(UniqueUResourceBundle aRootLocale,
+                UniqueUResourceBundle aUnits);
+
+    class Iterator {
+      // std::iterator traits.
+      using iterator_category = std::input_iterator_tag;
+      using value_type = SpanResult<char>;
+      using difference_type = ptrdiff_t;
+      using pointer = value_type*;
+      using reference = value_type&;
+
+      const Enumeration& mEnumeration;
+
+      // Resource bundle to a measurement type within the "units" table.
+      //
+      // Measurement types describe various categories, like "area", "length",
+      // or "mass".
+      UniqueUResourceBundle mType = nullptr;
+
+      // Resource bundle to a specific subtype within the type table.
+      //
+      // Measurement subtypes describe concrete measure units, like "acre",
+      // "meter", or "kilogram".
+      UniqueUResourceBundle mSubtype = nullptr;
+
+      // The next position within the "units" table.
+      int32_t mUnitsPos = 0;
+
+      // The overall amount of types within the |mType| table.
+      int32_t mTypeSize = 0;
+
+      // The next position within the |mType| table.
+      int32_t mTypePos = 0;
+
+      // Flag set when an ICU error has occurred. All further operations on this
+      // iterator will return an error result when this flag is set.
+      bool mHasError = false;
+
+      void advance();
+
+     public:
+      Iterator(const Enumeration& aEnumeration, int32_t aUnitsPos)
+          : mEnumeration(aEnumeration), mUnitsPos(aUnitsPos) {
+        advance();
+      }
+
+      Iterator& operator++() {
+        advance();
+        return *this;
+      }
+
+      // The post-increment operator would return an invalid iterator, so it's
+      // not implemented.
+      Iterator operator++(int) = delete;
+
+      bool operator==(const Iterator& aOther) const {
+        // It's an error to compare an iterator against an iterator from a
+        // different enumeration.
+        MOZ_ASSERT(&mEnumeration == &aOther.mEnumeration);
+
+        return mUnitsPos == aOther.mUnitsPos && mTypeSize == aOther.mTypeSize &&
+               mTypePos == aOther.mTypePos && mHasError == aOther.mHasError;
+      }
+
+      bool operator!=(const Iterator& aOther) const {
+        return !(*this == aOther);
+      }
+
+      value_type operator*() const;
+    };
+
+    friend class Iterator;
+
+    // std::iterator begin() and end() methods.
+
+    /**
+     * Return an iterator pointing to the start of the "units" table.
+     */
+    Iterator begin() { return Iterator(*this, 0); }
+
+    /**
+     * Return an iterator pointing to the end of the "units" table.
+     */
+    Iterator end() { return Iterator(*this, mUnitsSize); }
+
+    /**
+     * Create a new measurement unit enumeration.
+     */
+    static Result<Enumeration, ICUError> TryCreate();
+  };
+
+  /**
+   * Return an enumeration over all available measurement units.
+   */
+  static Result<Enumeration, ICUError> GetAvailable() {
+    return Enumeration::TryCreate();
+  }
+};
+
+}  // namespace mozilla::intl
+
+#endif
--- a/intl/components/src/NumberFormatterSkeleton.cpp
+++ b/intl/components/src/NumberFormatterSkeleton.cpp
@@ -117,17 +117,17 @@ bool NumberFormatterSkeleton::currencyDi
       return true;
     case NumberFormatOptions::CurrencyDisplay::NarrowSymbol:
       return appendToken(u"unit-width-narrow");
   }
   MOZ_ASSERT_UNREACHABLE("unexpected currency display type");
   return false;
 }
 
-static const MeasureUnit& FindSimpleMeasureUnit(std::string_view name) {
+static const ::MeasureUnit& FindSimpleMeasureUnit(std::string_view name) {
   const auto* measureUnit = std::lower_bound(
       std::begin(simpleMeasureUnits), std::end(simpleMeasureUnits), name,
       [](const auto& measureUnit, std::string_view name) {
         return name.compare(measureUnit.name) > 0;
       });
   MOZ_ASSERT(measureUnit != std::end(simpleMeasureUnits),
              "unexpected unit identifier: unit not found");
   MOZ_ASSERT(measureUnit->name == name,
@@ -141,17 +141,17 @@ static constexpr size_t MaxUnitLength() 
     length = std::max(length, std::char_traits<char>::length(unit.name));
   }
   return length * 2 + std::char_traits<char>::length("-per-");
 }
 
 bool NumberFormatterSkeleton::unit(std::string_view unit) {
   MOZ_RELEASE_ASSERT(unit.length() <= MaxUnitLength());
 
-  auto appendUnit = [this](const MeasureUnit& unit) {
+  auto appendUnit = [this](const ::MeasureUnit& unit) {
     return append(unit.type, strlen(unit.type)) && append('-') &&
            append(unit.name, strlen(unit.name));
   };
 
   // |unit| can be a compound unit identifier, separated by "-per-".
   static constexpr char separator[] = "-per-";
   size_t separator_len = strlen(separator);
   size_t offset = unit.find(separator);