Bug 336959. Use Pango to break lines for CJK and Thai. r+sr=roc, patch by Theppitak Karoonboonyanan.
authorroc+@cs.cmu.edu
Tue, 22 May 2007 21:06:20 -0700
changeset 1755 7c4ea83b9516b948f0e81ce1a3f80829e8647224
parent 1754 175daa67b4b769deb427acdc85fdebac6c596292
child 1756 3e60b9d72f4b7c6b80f7b15e12e0ef6ad34cd61c
push idunknown
push userunknown
push dateunknown
bugs336959
milestone1.9a5pre
Bug 336959. Use Pango to break lines for CJK and Thai. r+sr=roc, patch by Theppitak Karoonboonyanan.
content/base/src/nsLineBreaker.cpp
intl/build/Makefile.in
intl/build/nsI18nModule.cpp
intl/lwbrk/src/Makefile.in
intl/lwbrk/src/nsLWBrkConstructors.h
intl/lwbrk/src/nsLWIMP.h
intl/lwbrk/src/nsPangoLineBreaker.cpp
intl/lwbrk/src/nsPangoLineBreaker.h
--- a/content/base/src/nsLineBreaker.cpp
+++ b/content/base/src/nsLineBreaker.cpp
@@ -52,20 +52,22 @@ static inline int
 IS_SPACE(PRUint8 u)
 {
   return u == 0x0020;
 }
 
 static inline int
 IS_CJK_CHAR(PRUnichar u)
 {
-  return (0x1100 <= u && u <= 0x11ff) ||
-         (0x2e80 <= u && u <= 0xd7ff) ||
-         (0xf900 <= u && u <= 0xfaff) ||
-         (0xff00 <= u && u <= 0xffef);
+  // CJK plus other scripts that require special line breaking treatment
+  return (0x0e01 <= u && u <= 0x0e5f) || // Thai
+         (0x1100 <= u && u <= 0x11ff) || // Hangul Jamo
+         (0x2e80 <= u && u <= 0xd7ff) || // several CJK blocks
+         (0xf900 <= u && u <= 0xfaff) || // CJK Compatibility Idographs
+         (0xff00 <= u && u <= 0xffef);   // Halfwidth and Fullwidth Forms
 }
 
 nsLineBreaker::nsLineBreaker()
   : mCurrentWordContainsCJK(PR_FALSE),
     mBreakBeforeNonWhitespace(PR_FALSE)
 {
 }
 
--- a/intl/build/Makefile.in
+++ b/intl/build/Makefile.in
@@ -101,8 +101,18 @@ EXTRA_DSO_LDOPTS = \
 ifneq (,$(filter mac cocoa,$(MOZ_WIDGET_TOOLKIT)))
 EXTRA_DSO_LDOPTS += \
         $(TK_LIBS) \
         $(NULL)
 endif
 
 include $(topsrcdir)/config/rules.mk
 
+ifdef MOZ_ENABLE_PANGO
+CXXFLAGS += \
+	$(MOZ_PANGO_CFLAGS) \
+	$(NULL)
+
+EXTRA_DSO_LDOPTS += \
+	$(MOZ_PANGO_LIBS) \
+	$(NULL)
+endif
+
--- a/intl/build/nsI18nModule.cpp
+++ b/intl/build/nsI18nModule.cpp
@@ -54,18 +54,23 @@
 #include "nsLocaleConstructors.h"
 
 
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsSemanticUnitScanner)
 
 static nsModuleComponentInfo components[] =
 {
  // lwbrk
+#ifdef MOZ_ENABLE_PANGO
+  { "Line Breaker", NS_LBRK_CID, 
+    NS_LBRK_CONTRACTID, nsPangoLineBreakerConstructor},
+#else
   { "Line Breaker", NS_LBRK_CID, 
     NS_LBRK_CONTRACTID, nsJISx4051LineBreakerConstructor},
+#endif
   { "Word Breaker", NS_WBRK_CID,
     NS_WBRK_CONTRACTID, nsSampleWordBreakerConstructor},
   { "Semantic Unit Scanner", NS_SEMANTICUNITSCANNER_CID,
     NS_SEMANTICUNITSCANNER_CONTRACTID, nsSemanticUnitScannerConstructor},
 
  // unicharutil
   { "Unichar Utility", NS_UNICHARUTIL_CID, 
       NS_UNICHARUTIL_CONTRACTID, nsCaseConversionImp2Constructor},
--- a/intl/lwbrk/src/Makefile.in
+++ b/intl/lwbrk/src/Makefile.in
@@ -50,14 +50,34 @@ LIBXUL_LIBRARY  = 1
 REQUIRES	= xpcom \
 		  string \
 		  unicharutil \
 		  $(NULL)
 
 CSRCS		= rulebrk.c
 
 CPPSRCS		= \
-		nsJISx4501LineBreaker.cpp \
 		nsSampleWordBreaker.cpp \
 		nsSemanticUnitScanner.cpp \
 		$(NULL)
 
+ifdef MOZ_ENABLE_PANGO
+CPPSRCS		+= \
+		nsPangoLineBreaker.cpp \
+		$(NULL)
+else
+CPPSRCS		+= \
+		nsJISx4501LineBreaker.cpp \
+		$(NULL)
+endif
+
 include $(topsrcdir)/config/rules.mk
+
+ifdef MOZ_ENABLE_PANGO
+CXXFLAGS		+= \
+			$(MOZ_PANGO_CFLAGS) \
+			$(NULL)
+
+EXTRA_DSO_LDOPTS	+= \
+			$(MOZ_PANGO_LIBS) \
+			$(NULL)
+endif
+
--- a/intl/lwbrk/src/nsLWBrkConstructors.h
+++ b/intl/lwbrk/src/nsLWBrkConstructors.h
@@ -36,17 +36,26 @@
  * ***** END LICENSE BLOCK ***** */
 
 #ifndef nsLWBrkConstructors_h__
 #define nsLWBrkConstructors_h__
 
 #include "nsLWBrkCIID.h"
 #include "nsILineBreaker.h"
 #include "nsIWordBreaker.h"
-#include "nsJISx4501LineBreaker.h"
+#ifdef MOZ_ENABLE_PANGO
+# include "nsPangoLineBreaker.h"
+#else
+# include "nsJISx4501LineBreaker.h"
+#endif
 #include "nsSampleWordBreaker.h"
 #include "nsLWBRKDll.h"
 
+#ifdef MOZ_ENABLE_PANGO
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsPangoLineBreaker)
+#else
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsJISx4051LineBreaker)
+#endif
+
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsSampleWordBreaker)
 
 #endif
      
--- a/intl/lwbrk/src/nsLWIMP.h
+++ b/intl/lwbrk/src/nsLWIMP.h
@@ -32,15 +32,21 @@
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 #ifndef nsLWIMP_h__
 #define nsLWIMP_h__
 
-#include "nsJISx4501LineBreaker.h"
+#ifdef MOZ_ENABLE_PANGO
+# include "nsPangoLineBreaker.h"
+# define LINEBREAKER nsPangoLineBreaker
+#else
+# include "nsJISx4501LineBreaker.h"
+# define LINEBREAKER nsJISx4501LineBreaker
+#endif
+
 #include "nsSampleWordBreaker.h"
 
-#define LINEBREAKER nsJISx4501LineBreaker
 #define WORDBREAKER nsSampleWordBreaker
 
 #endif  /* nsLWIMP_h__ */
new file mode 100644
--- /dev/null
+++ b/intl/lwbrk/src/nsPangoLineBreaker.cpp
@@ -0,0 +1,161 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Theppitak Karoonboonyanan <thep@linux.thai.net>.
+ * Portions created by the Initial Developer are Copyright (C) 2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * - Theppitak Karoonboonyanan <thep@linux.thai.net>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+
+
+#include "nsPangoLineBreaker.h"
+
+#include <pango/pango.h>
+
+#include "nsLWBRKDll.h"
+#include "nsUnicharUtils.h"
+#include "nsUTF8Utils.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+
+NS_IMPL_ISUPPORTS1(nsPangoLineBreaker, nsILineBreaker)
+
+PRBool
+nsPangoLineBreaker::BreakInBetween(const PRUnichar* aText1 , PRUint32 aTextLen1,
+                                   const PRUnichar* aText2 , PRUint32 aTextLen2)
+{
+  if (!aText1 || !aText2 || (0 == aTextLen1) || (0 == aTextLen2) ||
+      NS_IS_HIGH_SURROGATE(aText1[aTextLen1-1]) && 
+      NS_IS_LOW_SURROGATE(aText2[0]) )  //Do not separate a surrogate pair
+  {
+    return PR_FALSE;
+  }
+
+  nsAutoString concat(aText1, aTextLen1);
+  concat.Append(aText2, aTextLen2);
+
+  nsAutoTArray<PRPackedBool, 2000> breakState;
+  if (!breakState.AppendElements(concat.Length()))
+    return NS_ERROR_OUT_OF_MEMORY;
+
+  GetJISx4051Breaks(concat.Data(), concat.Length(), breakState.Elements());
+
+  return breakState[aTextLen1];
+}
+
+
+PRInt32
+nsPangoLineBreaker::Next(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos) 
+{
+  NS_ASSERTION(aText, "aText shouldn't be null");
+  NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
+
+  nsAutoTArray<PRPackedBool, 2000> breakState;
+  if (!breakState.AppendElements(aLen))
+    return NS_ERROR_OUT_OF_MEMORY;
+
+  GetJISx4051Breaks(aText, aLen, breakState.Elements());
+
+  while (++aPos < aLen)
+    if (breakState[aPos])
+      return aPos;
+
+  return NS_LINEBREAKER_NEED_MORE_TEXT;
+}
+
+
+PRInt32
+nsPangoLineBreaker::Prev(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos) 
+{
+  NS_ASSERTION(aText, "aText shouldn't be null");
+  NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
+
+  nsAutoTArray<PRPackedBool, 2000> breakState;
+  if (!breakState.AppendElements(aLen))
+    return NS_ERROR_OUT_OF_MEMORY;
+
+  GetJISx4051Breaks(aText, aLen, breakState.Elements());
+
+  while (aPos > 0)
+    if (breakState[--aPos])
+      return aPos;
+
+  return NS_LINEBREAKER_NEED_MORE_TEXT;
+}
+
+void
+nsPangoLineBreaker::GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLen,
+                                      PRPackedBool* aBreakBefore)
+{
+  NS_ASSERTION(aText, "aText shouldn't be null");
+  NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
+
+  nsAutoTArray<PangoLogAttr, 2000> attrBuffer;
+  if (!attrBuffer.AppendElements(aLen + 1))
+    return;
+
+  NS_ConvertUTF16toUTF8 aUTF8(aText, aLen);
+
+  const gchar* p = aUTF8.Data();
+  const gchar* end = p + aUTF8.Length();
+  PRUint32     u16Offset = 0;
+
+  while (p < end)
+  {
+    PangoLogAttr* attr = attrBuffer.Elements();
+    pango_get_log_attrs(p, end - p, -1, pango_language_get_default(),
+                        attr, attrBuffer.Length());
+
+    while (p < end)
+    {
+      aBreakBefore[u16Offset] = attr->is_line_break;
+      if (NS_IS_LOW_SURROGATE(aText[u16Offset]))
+        aBreakBefore[++u16Offset] = PR_FALSE; // Skip high surrogate
+      ++u16Offset;
+
+      PRUint32 ch = UTF8CharEnumerator::NextChar(&p, end);
+      ++attr;
+
+      if (ch == 0) {
+        // pango_break (pango 1.16.2) only analyses text before the
+        // first NUL (but sets one extra attr). Workaround loop to call
+        // pango_break again to analyse after the NUL is done somewhere else
+        // (gfx/thebes/src/gfxPangoFonts.cpp: SetupClusterBoundaries()).
+        // So, we do the same here for pango_get_log_attrs.
+        break;
+      }
+    }
+  }
+}
+
new file mode 100644
--- /dev/null
+++ b/intl/lwbrk/src/nsPangoLineBreaker.h
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Theppitak Karoonboonyanan <thep@linux.thai.net>.
+ * Portions created by the Initial Developer are Copyright (C) 2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * - Theppitak Karoonboonyanan <thep@linux.thai.net>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+#ifndef nsPangoLineBreaker_h__
+#define nsPangoLineBreaker_h__
+
+#include "nsILineBreaker.h"
+
+class nsPangoLineBreaker : public nsILineBreaker
+{
+  NS_DECL_ISUPPORTS
+
+public:
+  PRBool BreakInBetween(const PRUnichar* aText1 , PRUint32 aTextLen1,
+                        const PRUnichar* aText2 , PRUint32 aTextLen2);
+
+  PRInt32 Next(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
+
+  PRInt32 Prev(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
+
+  void GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLen,
+                         PRPackedBool* aBreakBefore);
+};
+
+#endif  /* nsPangoLineBreaker_h__ */