bug 728866 - pt 1 - expose low-level APIs for canonical composition and decomposition of a single Unicode character. r=smontagu
authorJonathan Kew <jfkthame@gmail.com>
Mon, 20 Feb 2012 20:39:59 +0000
changeset 88762 8ee6310cecda70be036be4eadd436735677d0898
parent 88761 096697e2beab9af9069f0f4c6897cefb6ace5f84
child 88763 2c3f7f3ace8913fa91417c4dc9c462b536a031fb
push id975
push userffxbld
push dateTue, 13 Mar 2012 21:39:16 +0000
treeherdermozilla-aurora@99faebf9dc36 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmontagu
bugs728866
milestone13.0a1
bug 728866 - pt 1 - expose low-level APIs for canonical composition and decomposition of a single Unicode character. r=smontagu
intl/unicharutil/public/Makefile.in
intl/unicharutil/public/nsUnicodeNormalizer.h
intl/unicharutil/src/nsUnicodeNormalizer.cpp
intl/unicharutil/src/nsUnicodeNormalizer.h
--- a/intl/unicharutil/public/Makefile.in
+++ b/intl/unicharutil/public/Makefile.in
@@ -43,12 +43,13 @@ VPATH		= @srcdir@
 include $(DEPTH)/config/autoconf.mk
 
 MODULE		= unicharutil
 
 EXPORTS		= \
 		nsICaseConversion.h \
 		nsIUGenCategory.h \
 		nsUnicharUtilCIID.h \
+		nsUnicodeNormalizer.h \
 		$(NULL)
 
 include $(topsrcdir)/config/rules.mk
 
rename from intl/unicharutil/src/nsUnicodeNormalizer.h
rename to intl/unicharutil/public/nsUnicodeNormalizer.h
--- a/intl/unicharutil/src/nsUnicodeNormalizer.h
+++ b/intl/unicharutil/public/nsUnicodeNormalizer.h
@@ -52,12 +52,16 @@ public:
    virtual ~nsUnicodeNormalizer();
 
    NS_DECL_ISUPPORTS 
 
    NS_IMETHOD NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest);
    NS_IMETHOD NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest);
    NS_IMETHOD NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest);
    NS_IMETHOD NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest);
+
+   // low-level access to the composition data needed for HarfBuzz callbacks
+   static bool Compose(PRUint32 a, PRUint32 b, PRUint32 *ab);
+   static bool DecomposeNonRecursively(PRUint32 comp, PRUint32 *c1, PRUint32 *c2);
 };
 
 #endif //nsUnicodeNormalizer_h__
 
--- a/intl/unicharutil/src/nsUnicodeNormalizer.cpp
+++ b/intl/unicharutil/src/nsUnicodeNormalizer.cpp
@@ -708,8 +708,35 @@ nsUnicodeNormalizer::NormalizeUnicodeNFK
 }
 
 nsresult  
 nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
 {
   return mdn_normalize(true, true, aSrc, aDest);
 }
 
+bool
+nsUnicodeNormalizer::Compose(PRUint32 a, PRUint32 b, PRUint32 *ab)
+{
+  return mdn__unicode_compose(a, b, ab) == NS_OK;
+}
+
+bool
+nsUnicodeNormalizer::DecomposeNonRecursively(PRUint32 c, PRUint32 *c1, PRUint32 *c2)
+{
+  // We can't use mdn__unicode_decompose here, because that does a recursive
+  // decomposition that may yield more than two characters, but the harfbuzz
+  // callback wants just a single-step decomp that is guaranteed to produce
+  // no more than two characters. So we do a low-level lookup in the table
+  // of decomp sequences.
+  const PRUint32 *seq;
+  PRUint32 seqidx = decompose_char(c, &seq);
+  if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
+    return false;
+  }
+  *c1 = *seq & ~END_BIT;
+  if (*seq & END_BIT) {
+    *c2 = 0;
+  } else {
+    *c2 = *++seq & ~END_BIT;
+  }
+  return true;
+}