Bug 728180 - Use ICU normalization functions to implement nsUnicodeNormalizer when ENABLE_INTL_API is defined, in place of our obsolete/unmaintained normalization code. r=emk
authorJonathan Kew <jkew@mozilla.com>
Thu, 29 Sep 2016 12:27:50 +0100
changeset 315821 01cd7a8a158d425c216cb319898832c4891ec35e
parent 315820 826cc48624a31a2755c23bfe83535311df1567bc
child 315822 b6f62be719ae36609cde88c878c35c5160ff502f
push id20634
push usercbook@mozilla.com
push dateFri, 30 Sep 2016 10:10:13 +0000
treeherderfx-team@afe79b010d13 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersemk
bugs728180
milestone52.0a1
Bug 728180 - Use ICU normalization functions to implement nsUnicodeNormalizer when ENABLE_INTL_API is defined, in place of our obsolete/unmaintained normalization code. r=emk
intl/unicharutil/moz.build
intl/unicharutil/nsUnicodeNormalizer.h
intl/unicharutil/nsUnicodeNormalizer_ICU.cpp
--- a/intl/unicharutil/moz.build
+++ b/intl/unicharutil/moz.build
@@ -22,15 +22,23 @@ EXPORTS += [
     'nsUnicodeNormalizer.h',
 ]
 
 UNIFIED_SOURCES += [
     'nsCaseConversionImp2.cpp',
     'nsCategoryImp.cpp',
     'nsEntityConverter.cpp',
     'nsSaveAsCharset.cpp',
-    'nsUnicodeNormalizer.cpp',
 ]
 
+if CONFIG['ENABLE_INTL_API']:
+    UNIFIED_SOURCES += [
+        'nsUnicodeNormalizer_ICU.cpp',
+    ]
+else:
+    UNIFIED_SOURCES += [
+        'nsUnicodeNormalizer.cpp',
+    ]
+
 FINAL_LIBRARY = 'xul'
 
 if CONFIG['GNU_CXX']:
     CXXFLAGS += ['-Wno-error=shadow']
--- a/intl/unicharutil/nsUnicodeNormalizer.h
+++ b/intl/unicharutil/nsUnicodeNormalizer.h
@@ -20,18 +20,21 @@ public:
 
    NS_DECL_ISUPPORTS 
 
    NS_IMETHOD NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest) override;
    NS_IMETHOD NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest) override;
    NS_IMETHOD NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest) override;
    NS_IMETHOD NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest) override;
 
-   // low-level access to the composition data needed for HarfBuzz callbacks
+#if !ENABLE_INTL_API
+   // Low-level access to the composition data needed for HarfBuzz callbacks;
+   // only required when ICU is not available in the build.
    static bool Compose(uint32_t a, uint32_t b, uint32_t *ab);
    static bool DecomposeNonRecursively(uint32_t comp, uint32_t *c1, uint32_t *c2);
+#endif
 
 private:
    virtual ~nsUnicodeNormalizer();
 };
 
 #endif //nsUnicodeNormalizer_h__
 
new file mode 100644
--- /dev/null
+++ b/intl/unicharutil/nsUnicodeNormalizer_ICU.cpp
@@ -0,0 +1,98 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUnicodeNormalizer.h"
+#include "ICUUtils.h"
+#include "unicode/unorm2.h"
+#include "unicode/utext.h"
+
+NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)
+
+nsUnicodeNormalizer::nsUnicodeNormalizer()
+{
+}
+
+nsUnicodeNormalizer::~nsUnicodeNormalizer()
+{
+}
+
+static nsresult
+DoNormalization(const UNormalizer2* aNorm, const nsAString& aSrc,
+                nsAString& aDest)
+{
+  UErrorCode errorCode = U_ZERO_ERROR;
+  const int32_t length = aSrc.Length();
+  const UChar* src = reinterpret_cast<const UChar*>(aSrc.BeginReading());
+  // Initial guess for a capacity that is likely to be enough for most cases.
+  int32_t capacity = length + (length >> 8) + 8;
+  do {
+    aDest.SetLength(capacity);
+    UChar* dest = reinterpret_cast<UChar*>(aDest.BeginWriting());
+    int32_t len = unorm2_normalize(aNorm, src, aSrc.Length(), dest, capacity,
+                                   &errorCode);
+    if (U_SUCCESS(errorCode)) {
+      aDest.SetLength(len);
+      break;
+    }
+    if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
+      // Buffer wasn't big enough; adjust to the reported size and try again.
+      capacity = len;
+      errorCode = U_ZERO_ERROR;
+      continue;
+    }
+  } while (false);
+  return ICUUtils::UErrorToNsResult(errorCode);
+}
+
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFD(const nsAString& aSrc,
+                                         nsAString& aDest)
+{
+  // The unorm2_getNF*Instance functions return static singletons that should
+  // not be deleted, so we just get them once on first use.
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFDInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
+}
+
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFC(const nsAString& aSrc,
+                                         nsAString& aDest)
+{
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFCInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
+}
+
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFKD(const nsAString& aSrc,
+                                          nsAString& aDest)
+{
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFKDInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
+}
+
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFKC(const nsAString& aSrc,
+                                          nsAString& aDest)
+{
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFKCInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
+}