bug 728866 - pt 1 - expose low-level APIs for canonical composition and decomposition of a single Unicode character. r=smontagu
--- a/intl/unicharutil/public/Makefile.in
+++ b/intl/unicharutil/public/Makefile.in
@@ -43,12 +43,13 @@ VPATH = @srcdir@
include $(DEPTH)/config/autoconf.mk
MODULE = unicharutil
EXPORTS = \
nsICaseConversion.h \
nsIUGenCategory.h \
nsUnicharUtilCIID.h \
+ nsUnicodeNormalizer.h \
$(NULL)
include $(topsrcdir)/config/rules.mk
rename from intl/unicharutil/src/nsUnicodeNormalizer.h
rename to intl/unicharutil/public/nsUnicodeNormalizer.h
--- a/intl/unicharutil/src/nsUnicodeNormalizer.h
+++ b/intl/unicharutil/public/nsUnicodeNormalizer.h
@@ -52,12 +52,16 @@ public:
virtual ~nsUnicodeNormalizer();
NS_DECL_ISUPPORTS
NS_IMETHOD NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest);
NS_IMETHOD NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest);
NS_IMETHOD NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest);
NS_IMETHOD NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest);
+
+ // low-level access to the composition data needed for HarfBuzz callbacks
+ static bool Compose(PRUint32 a, PRUint32 b, PRUint32 *ab);
+ static bool DecomposeNonRecursively(PRUint32 comp, PRUint32 *c1, PRUint32 *c2);
};
#endif //nsUnicodeNormalizer_h__
--- a/intl/unicharutil/src/nsUnicodeNormalizer.cpp
+++ b/intl/unicharutil/src/nsUnicodeNormalizer.cpp
@@ -708,8 +708,35 @@ nsUnicodeNormalizer::NormalizeUnicodeNFK
}
nsresult
nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
{
return mdn_normalize(true, true, aSrc, aDest);
}
+bool
+nsUnicodeNormalizer::Compose(PRUint32 a, PRUint32 b, PRUint32 *ab)
+{
+ return mdn__unicode_compose(a, b, ab) == NS_OK;
+}
+
+bool
+nsUnicodeNormalizer::DecomposeNonRecursively(PRUint32 c, PRUint32 *c1, PRUint32 *c2)
+{
+ // We can't use mdn__unicode_decompose here, because that does a recursive
+ // decomposition that may yield more than two characters, but the harfbuzz
+ // callback wants just a single-step decomp that is guaranteed to produce
+ // no more than two characters. So we do a low-level lookup in the table
+ // of decomp sequences.
+ const PRUint32 *seq;
+ PRUint32 seqidx = decompose_char(c, &seq);
+ if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
+ return false;
+ }
+ *c1 = *seq & ~END_BIT;
+ if (*seq & END_BIT) {
+ *c2 = 0;
+ } else {
+ *c2 = *++seq & ~END_BIT;
+ }
+ return true;
+}