Bug 728180 - Use ICU normalization functions to implement nsUnicodeNormalizer when ENABLE_INTL_API is defined, in place of our obsolete/unmaintained normalization code. r=emk
authorJonathan Kew <jkew@mozilla.com>
Thu, 29 Sep 2016 12:27:50 +0100
changeset 315734 01cd7a8a158d425c216cb319898832c4891ec35e
parent 315733 826cc48624a31a2755c23bfe83535311df1567bc
child 315735 b6f62be719ae36609cde88c878c35c5160ff502f
push id82249
push userjkew@mozilla.com
push dateThu, 29 Sep 2016 11:29:22 +0000
treeherdermozilla-inbound@01cd7a8a158d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersemk
bugs728180
milestone52.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 728180 - Use ICU normalization functions to implement nsUnicodeNormalizer when ENABLE_INTL_API is defined, in place of our obsolete/unmaintained normalization code. r=emk
intl/unicharutil/moz.build
intl/unicharutil/nsUnicodeNormalizer.h
intl/unicharutil/nsUnicodeNormalizer_ICU.cpp
--- a/intl/unicharutil/moz.build
+++ b/intl/unicharutil/moz.build
@@ -22,15 +22,23 @@ EXPORTS += [
     'nsUnicodeNormalizer.h',
 ]
 
 UNIFIED_SOURCES += [
     'nsCaseConversionImp2.cpp',
     'nsCategoryImp.cpp',
     'nsEntityConverter.cpp',
     'nsSaveAsCharset.cpp',
-    'nsUnicodeNormalizer.cpp',
 ]
 
+if CONFIG['ENABLE_INTL_API']:
+    UNIFIED_SOURCES += [
+        'nsUnicodeNormalizer_ICU.cpp',
+    ]
+else:
+    UNIFIED_SOURCES += [
+        'nsUnicodeNormalizer.cpp',
+    ]
+
 FINAL_LIBRARY = 'xul'
 
 if CONFIG['GNU_CXX']:
     CXXFLAGS += ['-Wno-error=shadow']
--- a/intl/unicharutil/nsUnicodeNormalizer.h
+++ b/intl/unicharutil/nsUnicodeNormalizer.h
@@ -20,18 +20,21 @@ public:
 
    NS_DECL_ISUPPORTS 
 
    NS_IMETHOD NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest) override;
    NS_IMETHOD NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest) override;
    NS_IMETHOD NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest) override;
    NS_IMETHOD NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest) override;
 
-   // low-level access to the composition data needed for HarfBuzz callbacks
+#if !ENABLE_INTL_API
+   // Low-level access to the composition data needed for HarfBuzz callbacks;
+   // only required when ICU is not available in the build.
    static bool Compose(uint32_t a, uint32_t b, uint32_t *ab);
    static bool DecomposeNonRecursively(uint32_t comp, uint32_t *c1, uint32_t *c2);
+#endif
 
 private:
    virtual ~nsUnicodeNormalizer();
 };
 
 #endif //nsUnicodeNormalizer_h__
 
new file mode 100644
--- /dev/null
+++ b/intl/unicharutil/nsUnicodeNormalizer_ICU.cpp
@@ -0,0 +1,98 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUnicodeNormalizer.h"
+#include "ICUUtils.h"
+#include "unicode/unorm2.h"
+#include "unicode/utext.h"
+
+NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)
+
+nsUnicodeNormalizer::nsUnicodeNormalizer()
+{
+}
+
+nsUnicodeNormalizer::~nsUnicodeNormalizer()
+{
+}
+
+static nsresult
+DoNormalization(const UNormalizer2* aNorm, const nsAString& aSrc,
+                nsAString& aDest)
+{
+  UErrorCode errorCode = U_ZERO_ERROR;
+  const int32_t length = aSrc.Length();
+  const UChar* src = reinterpret_cast<const UChar*>(aSrc.BeginReading());
+  // Initial guess for a capacity that is likely to be enough for most cases.
+  int32_t capacity = length + (length >> 8) + 8;
+  do {
+    aDest.SetLength(capacity);
+    UChar* dest = reinterpret_cast<UChar*>(aDest.BeginWriting());
+    int32_t len = unorm2_normalize(aNorm, src, aSrc.Length(), dest, capacity,
+                                   &errorCode);
+    if (U_SUCCESS(errorCode)) {
+      aDest.SetLength(len);
+      break;
+    }
+    if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
+      // Buffer wasn't big enough; adjust to the reported size and try again.
+      capacity = len;
+      errorCode = U_ZERO_ERROR;
+      continue;
+    }
+  } while (false);
+  return ICUUtils::UErrorToNsResult(errorCode);
+}
+
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFD(const nsAString& aSrc,
+                                         nsAString& aDest)
+{
+  // The unorm2_getNF*Instance functions return static singletons that should
+  // not be deleted, so we just get them once on first use.
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFDInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
+}
+
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFC(const nsAString& aSrc,
+                                         nsAString& aDest)
+{
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFCInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
+}
+
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFKD(const nsAString& aSrc,
+                                          nsAString& aDest)
+{
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFKDInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
+}
+
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFKC(const nsAString& aSrc,
+                                          nsAString& aDest)
+{
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFKCInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
+}