Bug 336959. Use Pango to break lines for CJK and Thai. r+sr=roc, patch by Theppitak Karoonboonyanan.
--- a/content/base/src/nsLineBreaker.cpp
+++ b/content/base/src/nsLineBreaker.cpp
@@ -52,20 +52,22 @@ static inline int
IS_SPACE(PRUint8 u)
{
return u == 0x0020;
}
static inline int
IS_CJK_CHAR(PRUnichar u)
{
- return (0x1100 <= u && u <= 0x11ff) ||
- (0x2e80 <= u && u <= 0xd7ff) ||
- (0xf900 <= u && u <= 0xfaff) ||
- (0xff00 <= u && u <= 0xffef);
+ // CJK plus other scripts that require special line breaking treatment
+ return (0x0e01 <= u && u <= 0x0e5f) || // Thai
+ (0x1100 <= u && u <= 0x11ff) || // Hangul Jamo
+ (0x2e80 <= u && u <= 0xd7ff) || // several CJK blocks
+ (0xf900 <= u && u <= 0xfaff) || // CJK Compatibility Idographs
+ (0xff00 <= u && u <= 0xffef); // Halfwidth and Fullwidth Forms
}
nsLineBreaker::nsLineBreaker()
: mCurrentWordContainsCJK(PR_FALSE),
mBreakBeforeNonWhitespace(PR_FALSE)
{
}
--- a/intl/build/Makefile.in
+++ b/intl/build/Makefile.in
@@ -101,8 +101,18 @@ EXTRA_DSO_LDOPTS = \
ifneq (,$(filter mac cocoa,$(MOZ_WIDGET_TOOLKIT)))
EXTRA_DSO_LDOPTS += \
$(TK_LIBS) \
$(NULL)
endif
include $(topsrcdir)/config/rules.mk
+ifdef MOZ_ENABLE_PANGO
+CXXFLAGS += \
+ $(MOZ_PANGO_CFLAGS) \
+ $(NULL)
+
+EXTRA_DSO_LDOPTS += \
+ $(MOZ_PANGO_LIBS) \
+ $(NULL)
+endif
+
--- a/intl/build/nsI18nModule.cpp
+++ b/intl/build/nsI18nModule.cpp
@@ -54,18 +54,23 @@
#include "nsLocaleConstructors.h"
NS_GENERIC_FACTORY_CONSTRUCTOR(nsSemanticUnitScanner)
static nsModuleComponentInfo components[] =
{
// lwbrk
+#ifdef MOZ_ENABLE_PANGO
+ { "Line Breaker", NS_LBRK_CID,
+ NS_LBRK_CONTRACTID, nsPangoLineBreakerConstructor},
+#else
{ "Line Breaker", NS_LBRK_CID,
NS_LBRK_CONTRACTID, nsJISx4051LineBreakerConstructor},
+#endif
{ "Word Breaker", NS_WBRK_CID,
NS_WBRK_CONTRACTID, nsSampleWordBreakerConstructor},
{ "Semantic Unit Scanner", NS_SEMANTICUNITSCANNER_CID,
NS_SEMANTICUNITSCANNER_CONTRACTID, nsSemanticUnitScannerConstructor},
// unicharutil
{ "Unichar Utility", NS_UNICHARUTIL_CID,
NS_UNICHARUTIL_CONTRACTID, nsCaseConversionImp2Constructor},
--- a/intl/lwbrk/src/Makefile.in
+++ b/intl/lwbrk/src/Makefile.in
@@ -50,14 +50,34 @@ LIBXUL_LIBRARY = 1
REQUIRES = xpcom \
string \
unicharutil \
$(NULL)
CSRCS = rulebrk.c
CPPSRCS = \
- nsJISx4501LineBreaker.cpp \
nsSampleWordBreaker.cpp \
nsSemanticUnitScanner.cpp \
$(NULL)
+ifdef MOZ_ENABLE_PANGO
+CPPSRCS += \
+ nsPangoLineBreaker.cpp \
+ $(NULL)
+else
+CPPSRCS += \
+ nsJISx4501LineBreaker.cpp \
+ $(NULL)
+endif
+
include $(topsrcdir)/config/rules.mk
+
+ifdef MOZ_ENABLE_PANGO
+CXXFLAGS += \
+ $(MOZ_PANGO_CFLAGS) \
+ $(NULL)
+
+EXTRA_DSO_LDOPTS += \
+ $(MOZ_PANGO_LIBS) \
+ $(NULL)
+endif
+
--- a/intl/lwbrk/src/nsLWBrkConstructors.h
+++ b/intl/lwbrk/src/nsLWBrkConstructors.h
@@ -36,17 +36,26 @@
* ***** END LICENSE BLOCK ***** */
#ifndef nsLWBrkConstructors_h__
#define nsLWBrkConstructors_h__
#include "nsLWBrkCIID.h"
#include "nsILineBreaker.h"
#include "nsIWordBreaker.h"
-#include "nsJISx4501LineBreaker.h"
+#ifdef MOZ_ENABLE_PANGO
+# include "nsPangoLineBreaker.h"
+#else
+# include "nsJISx4501LineBreaker.h"
+#endif
#include "nsSampleWordBreaker.h"
#include "nsLWBRKDll.h"
+#ifdef MOZ_ENABLE_PANGO
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsPangoLineBreaker)
+#else
NS_GENERIC_FACTORY_CONSTRUCTOR(nsJISx4051LineBreaker)
+#endif
+
NS_GENERIC_FACTORY_CONSTRUCTOR(nsSampleWordBreaker)
#endif
--- a/intl/lwbrk/src/nsLWIMP.h
+++ b/intl/lwbrk/src/nsLWIMP.h
@@ -32,15 +32,21 @@
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsLWIMP_h__
#define nsLWIMP_h__
-#include "nsJISx4501LineBreaker.h"
+#ifdef MOZ_ENABLE_PANGO
+# include "nsPangoLineBreaker.h"
+# define LINEBREAKER nsPangoLineBreaker
+#else
+# include "nsJISx4501LineBreaker.h"
+# define LINEBREAKER nsJISx4501LineBreaker
+#endif
+
#include "nsSampleWordBreaker.h"
-#define LINEBREAKER nsJISx4501LineBreaker
#define WORDBREAKER nsSampleWordBreaker
#endif /* nsLWIMP_h__ */
new file mode 100644
--- /dev/null
+++ b/intl/lwbrk/src/nsPangoLineBreaker.cpp
@@ -0,0 +1,161 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Theppitak Karoonboonyanan <thep@linux.thai.net>.
+ * Portions created by the Initial Developer are Copyright (C) 2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * - Theppitak Karoonboonyanan <thep@linux.thai.net>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+
+
+#include "nsPangoLineBreaker.h"
+
+#include <pango/pango.h>
+
+#include "nsLWBRKDll.h"
+#include "nsUnicharUtils.h"
+#include "nsUTF8Utils.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+
+NS_IMPL_ISUPPORTS1(nsPangoLineBreaker, nsILineBreaker)
+
+PRBool
+nsPangoLineBreaker::BreakInBetween(const PRUnichar* aText1 , PRUint32 aTextLen1,
+ const PRUnichar* aText2 , PRUint32 aTextLen2)
+{
+ if (!aText1 || !aText2 || (0 == aTextLen1) || (0 == aTextLen2) ||
+ NS_IS_HIGH_SURROGATE(aText1[aTextLen1-1]) &&
+ NS_IS_LOW_SURROGATE(aText2[0]) ) //Do not separate a surrogate pair
+ {
+ return PR_FALSE;
+ }
+
+ nsAutoString concat(aText1, aTextLen1);
+ concat.Append(aText2, aTextLen2);
+
+ nsAutoTArray<PRPackedBool, 2000> breakState;
+ if (!breakState.AppendElements(concat.Length()))
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ GetJISx4051Breaks(concat.Data(), concat.Length(), breakState.Elements());
+
+ return breakState[aTextLen1];
+}
+
+
+PRInt32
+nsPangoLineBreaker::Next(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
+{
+ NS_ASSERTION(aText, "aText shouldn't be null");
+ NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
+
+ nsAutoTArray<PRPackedBool, 2000> breakState;
+ if (!breakState.AppendElements(aLen))
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ GetJISx4051Breaks(aText, aLen, breakState.Elements());
+
+ while (++aPos < aLen)
+ if (breakState[aPos])
+ return aPos;
+
+ return NS_LINEBREAKER_NEED_MORE_TEXT;
+}
+
+
+PRInt32
+nsPangoLineBreaker::Prev(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
+{
+ NS_ASSERTION(aText, "aText shouldn't be null");
+ NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
+
+ nsAutoTArray<PRPackedBool, 2000> breakState;
+ if (!breakState.AppendElements(aLen))
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ GetJISx4051Breaks(aText, aLen, breakState.Elements());
+
+ while (aPos > 0)
+ if (breakState[--aPos])
+ return aPos;
+
+ return NS_LINEBREAKER_NEED_MORE_TEXT;
+}
+
+void
+nsPangoLineBreaker::GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLen,
+ PRPackedBool* aBreakBefore)
+{
+ NS_ASSERTION(aText, "aText shouldn't be null");
+ NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
+
+ nsAutoTArray<PangoLogAttr, 2000> attrBuffer;
+ if (!attrBuffer.AppendElements(aLen + 1))
+ return;
+
+ NS_ConvertUTF16toUTF8 aUTF8(aText, aLen);
+
+ const gchar* p = aUTF8.Data();
+ const gchar* end = p + aUTF8.Length();
+ PRUint32 u16Offset = 0;
+
+ while (p < end)
+ {
+ PangoLogAttr* attr = attrBuffer.Elements();
+ pango_get_log_attrs(p, end - p, -1, pango_language_get_default(),
+ attr, attrBuffer.Length());
+
+ while (p < end)
+ {
+ aBreakBefore[u16Offset] = attr->is_line_break;
+ if (NS_IS_LOW_SURROGATE(aText[u16Offset]))
+ aBreakBefore[++u16Offset] = PR_FALSE; // Skip high surrogate
+ ++u16Offset;
+
+ PRUint32 ch = UTF8CharEnumerator::NextChar(&p, end);
+ ++attr;
+
+ if (ch == 0) {
+ // pango_break (pango 1.16.2) only analyses text before the
+ // first NUL (but sets one extra attr). Workaround loop to call
+ // pango_break again to analyse after the NUL is done somewhere else
+ // (gfx/thebes/src/gfxPangoFonts.cpp: SetupClusterBoundaries()).
+ // So, we do the same here for pango_get_log_attrs.
+ break;
+ }
+ }
+ }
+}
+
new file mode 100644
--- /dev/null
+++ b/intl/lwbrk/src/nsPangoLineBreaker.h
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Theppitak Karoonboonyanan <thep@linux.thai.net>.
+ * Portions created by the Initial Developer are Copyright (C) 2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * - Theppitak Karoonboonyanan <thep@linux.thai.net>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+#ifndef nsPangoLineBreaker_h__
+#define nsPangoLineBreaker_h__
+
+#include "nsILineBreaker.h"
+
+class nsPangoLineBreaker : public nsILineBreaker
+{
+ NS_DECL_ISUPPORTS
+
+public:
+ PRBool BreakInBetween(const PRUnichar* aText1 , PRUint32 aTextLen1,
+ const PRUnichar* aText2 , PRUint32 aTextLen2);
+
+ PRInt32 Next(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
+
+ PRInt32 Prev(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
+
+ void GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLen,
+ PRPackedBool* aBreakBefore);
+};
+
+#endif /* nsPangoLineBreaker_h__ */