Bug 249159 - Part 1 Add word-break interface to nsILineBreaker. r=smontagu
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Mon, 07 May 2012 12:18:23 -0700
changeset 93399 4acd1e285cc4dfbb4fd64290574ef4eb7a5d99dd
parent 93398 cab8af0ca0407acb480255690836d058306aa517
child 93400 2998b1105a1140f0ec95db658b27e68d28780c63
push id22634
push useremorley@mozilla.com
push dateTue, 08 May 2012 09:48:43 +0000
treeherdermozilla-central@e4f9e2eab6b1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmontagu
bugs249159
milestone15.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 249159 - Part 1 Add word-break interface to nsILineBreaker. r=smontagu
content/base/public/nsLineBreaker.h
content/base/src/nsLineBreaker.cpp
intl/lwbrk/public/nsILineBreaker.h
intl/lwbrk/src/nsJISx4501LineBreaker.cpp
intl/lwbrk/src/nsJISx4501LineBreaker.h
--- a/content/base/public/nsLineBreaker.h
+++ b/content/base/public/nsLineBreaker.h
@@ -196,16 +196,22 @@ public:
    * This must be called at least once between any call to AppendText() and
    * destroying the object.
    * @param aTrailingBreak this is set to true when there is a break opportunity
    * at the end of the text. This will normally only be declared true when there
    * is breakable whitespace at the end.
    */
   nsresult Reset(bool* aTrailingBreak);
 
+  /*
+   * Set word-break mode for linebreaker.  This is set by word-break property.
+   * @param aMode is nsILineBreaker::kWordBreak_* value.
+   */
+  void SetWordBreak(PRUint8 aMode) { mWordBreak = aMode; }
+
 private:
   // This is a list of text sources that make up the "current word" (i.e.,
   // run of text which does not contain any whitespace). All the mLengths
   // are are nonzero, these cannot overlap.
   struct TextItem {
     TextItem(nsILineBreakSink* aSink, PRUint32 aSinkOffset, PRUint32 aLength,
              PRUint32 aFlags)
       : mSink(aSink), mSinkOffset(aSinkOffset), mLength(aLength), mFlags(aFlags) {}
@@ -238,11 +244,13 @@ private:
   bool                        mCurrentWordContainsMixedLang;
   bool                        mCurrentWordContainsComplexChar;
 
   // True if the previous character was breakable whitespace
   bool                        mAfterBreakableSpace;
   // True if a break must be allowed at the current position because
   // a run of breakable whitespace ends here
   bool                        mBreakHere;
+  // line break mode by "word-break" style
+  PRUint8                     mWordBreak;
 };
 
 #endif /*NSLINEBREAKER_H_*/
--- a/content/base/src/nsLineBreaker.cpp
+++ b/content/base/src/nsLineBreaker.cpp
@@ -42,17 +42,18 @@
 #include "gfxFont.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
 #include "nsHyphenationManager.h"
 #include "nsHyphenator.h"
 
 nsLineBreaker::nsLineBreaker()
   : mCurrentWordLangGroup(nsnull),
     mCurrentWordContainsMixedLang(false),
     mCurrentWordContainsComplexChar(false),
-    mAfterBreakableSpace(false), mBreakHere(false)
+    mAfterBreakableSpace(false), mBreakHere(false),
+    mWordBreak(nsILineBreaker::kWordBreak_Normal)
 {
 }
 
 nsLineBreaker::~nsLineBreaker()
 {
   NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
 }
 
@@ -91,23 +92,27 @@ nsLineBreaker::FlushCurrentWord()
   PRUint32 length = mCurrentWord.Length();
   nsAutoTArray<PRUint8,4000> breakState;
   if (!breakState.AppendElements(length))
     return NS_ERROR_OUT_OF_MEMORY;
   
   nsTArray<bool> capitalizationState;
 
   if (!mCurrentWordContainsComplexChar) {
-    // Just set everything internal to "no break"!
+    // For break-strict set everything internal to "break", otherwise
+    // to "no break"!
     memset(breakState.Elements(),
-           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
+           mWordBreak == nsILineBreaker::kWordBreak_BreakAll ?
+             gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
+             gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
            length*sizeof(PRUint8));
   } else {
     nsContentUtils::LineBreaker()->
-      GetJISx4051Breaks(mCurrentWord.Elements(), length, breakState.Elements());
+      GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak,
+                        breakState.Elements());
   }
 
   bool autoHyphenate = mCurrentWordLangGroup &&
     !mCurrentWordContainsMixedLang;
   PRUint32 i;
   for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
     TextItem* ti = &mTextItems[i];
     if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
@@ -261,16 +266,17 @@ nsLineBreaker::AppendText(nsIAtom* aLang
       if (offset > wordStart && aSink) {
         if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
           if (wordHasComplexChar) {
             // Save current start-of-word state because GetJISx4051Breaks will
             // set it to false
             PRUint8 currentStart = breakState[wordStart];
             nsContentUtils::LineBreaker()->
               GetJISx4051Breaks(aText + wordStart, offset - wordStart,
+                                mWordBreak,
                                 breakState.Elements() + wordStart);
             breakState[wordStart] = currentStart;
           }
           if (hyphenator) {
             FindHyphenationPoints(hyphenator,
                                   aText + wordStart, aText + offset,
                                   breakState.Elements() + wordStart);
           }
@@ -406,32 +412,36 @@ nsLineBreaker::AppendText(nsIAtom* aLang
   bool wordHasComplexChar = false;
 
   for (;;) {
     PRUint8 ch = aText[offset];
     bool isSpace = IsSpace(ch);
     bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
 
     if (aSink) {
+      // Consider word-break style.  Since the break position of CJK scripts
+      // will be set by nsILineBreaker, we don't consider CJK at this point.
       breakState[offset] =
-        mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ?
+        mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
+        (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ?
           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
     }
     mBreakHere = false;
     mAfterBreakableSpace = isBreakableSpace;
 
     if (isSpace) {
       if (offset > wordStart && wordHasComplexChar) {
         if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
           // Save current start-of-word state because GetJISx4051Breaks will
           // set it to false
           PRUint8 currentStart = breakState[wordStart];
           nsContentUtils::LineBreaker()->
             GetJISx4051Breaks(aText + wordStart, offset - wordStart,
+                              mWordBreak,
                               breakState.Elements() + wordStart);
           breakState[wordStart] = currentStart;
         }
         wordHasComplexChar = false;
       }
 
       ++offset;
       if (offset >= aLength)
--- a/intl/lwbrk/public/nsILineBreaker.h
+++ b/intl/lwbrk/public/nsILineBreaker.h
@@ -38,40 +38,49 @@
 #define nsILineBreaker_h__
 
 #include "nsISupports.h"
 
 #include "nscore.h"
 
 #define NS_LINEBREAKER_NEED_MORE_TEXT -1
 
-// {5ae68851-d9a3-49fd-9388-58586dad8044}
+// 	{0x4b0b9e04-6ffb-4647-aa5f-2fa2ebd883e8}
 #define NS_ILINEBREAKER_IID \
-{ 0x5ae68851, 0xd9a3, 0x49fd, \
-    { 0x93, 0x88, 0x58, 0x58, 0x6d, 0xad, 0x80, 0x44 } }
+{0x4b0b9e04, 0x6ffb, 0x4647, \
+    {0xaa, 0x5f, 0x2f, 0xa2, 0xeb, 0xd8, 0x83, 0xe8}}
 
 class nsILineBreaker : public nsISupports
 {
 public:
   NS_DECLARE_STATIC_IID_ACCESSOR(NS_ILINEBREAKER_IID)
+
+  enum {
+    kWordBreak_Normal   = 0, // default
+    kWordBreak_BreakAll = 1, // break all
+    kWordBreak_KeepAll  = 2  // always keep 
+  };
+
   virtual PRInt32 Next( const PRUnichar* aText, PRUint32 aLen, 
                         PRUint32 aPos) = 0;
 
   virtual PRInt32 Prev( const PRUnichar* aText, PRUint32 aLen, 
                         PRUint32 aPos) = 0;
 
   // Call this on a word with whitespace at either end. We will apply JISx4501
   // rules to find breaks inside the word. aBreakBefore is set to the break-
   // before status of each character; aBreakBefore[0] will always be false
   // because we never return a break before the first character.
   // aLength is the length of the aText array and also the length of the aBreakBefore
   // output array.
   virtual void GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLength,
+                                 PRUint8 aWordBreak,
                                  PRUint8* aBreakBefore) = 0;
   virtual void GetJISx4051Breaks(const PRUint8* aText, PRUint32 aLength,
+                                 PRUint8 aWordBreak,
                                  PRUint8* aBreakBefore) = 0;
 };
 
 NS_DEFINE_STATIC_IID_ACCESSOR(nsILineBreaker, NS_ILINEBREAKER_IID)
 
 static inline bool
 NS_IsSpace(PRUnichar u)
 {
--- a/intl/lwbrk/src/nsJISx4501LineBreaker.cpp
+++ b/intl/lwbrk/src/nsJISx4501LineBreaker.cpp
@@ -792,17 +792,18 @@ nsJISx4051LineBreaker::WordMove(const PR
     // (This is required for serializers. See Bug #344816.)
     // Also fall back to this when out of memory.
     if (aDirection < 0) {
       ret = (begin == PRInt32(aPos)) ? begin - 1 : begin;
     } else {
       ret = end;
     }
   } else {
-    GetJISx4051Breaks(aText + begin, end - begin, breakState.Elements());
+    GetJISx4051Breaks(aText + begin, end - begin, nsILineBreaker::kWordBreak_Normal,
+                      breakState.Elements());
 
     ret = aPos;
     do {
       ret += aDirection;
     } while (begin < ret && ret < end && !breakState[ret - begin]);
   }
 
   return ret;
@@ -828,16 +829,17 @@ nsJISx4051LineBreaker::Prev(const PRUnic
                "Bad position passed to nsJISx4051LineBreaker::Prev");
 
   PRInt32 prevPos = WordMove(aText, aLen, aPos, -1);
   return prevPos > 0 ? prevPos : NS_LINEBREAKER_NEED_MORE_TEXT;
 }
 
 void
 nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUnichar* aChars, PRUint32 aLength,
+                                         PRUint8 aWordBreak,
                                          PRUint8* aBreakBefore)
 {
   PRUint32 cur;
   PRInt8 lastClass = CLASS_NONE;
   ContextState state(aChars, aLength);
 
   for (cur = 0; cur < aLength; ++cur, state.AdvanceIndex()) {
     PRUnichar ch = aChars[cur];
@@ -850,51 +852,59 @@ nsJISx4051LineBreaker::GetJISx4051Breaks
                               state);
     } else {
       if (ch == U_EQUAL)
         state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
       cl = GetClass(ch);
     }
 
-    bool allowBreak;
+    bool allowBreak = false;
     if (cur > 0) {
       NS_ASSERTION(CLASS_COMPLEX != lastClass || CLASS_COMPLEX != cl,
                    "Loop should have prevented adjacent complex chars here");
-      if (state.UseConservativeBreaking())
-        allowBreak = GetPairConservative(lastClass, cl);
-      else
-        allowBreak = GetPair(lastClass, cl);
-    } else {
-      allowBreak = false;
+      if (aWordBreak == nsILineBreaker::kWordBreak_Normal) {
+        allowBreak = (state.UseConservativeBreaking()) ?
+          GetPairConservative(lastClass, cl) : GetPair(lastClass, cl);
+      } else if (aWordBreak == nsILineBreaker::kWordBreak_BreakAll) {
+        allowBreak = true;
+      }
     }
     aBreakBefore[cur] = allowBreak;
     if (allowBreak)
       state.NotifyBreakBefore();
     lastClass = cl;
     if (CLASS_COMPLEX == cl) {
       PRUint32 end = cur + 1;
 
       while (end < aLength && CLASS_COMPLEX == GetClass(aChars[end])) {
         ++end;
       }
 
       NS_GetComplexLineBreaks(aChars + cur, end - cur, aBreakBefore + cur);
 
+      // We have to consider word-break value again for complex characters
+      if (aWordBreak != nsILineBreaker::kWordBreak_Normal) {
+        // Respect word-break property 
+        for (PRUint32 i = cur; i < end; i++)
+          aBreakBefore[i] = (aWordBreak == nsILineBreaker::kWordBreak_BreakAll);
+      }
+
       // restore breakability at chunk begin, which was always set to false
       // by the complex line breaker
       aBreakBefore[cur] = allowBreak;
 
       cur = end - 1;
     }
   }
 }
 
 void
 nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUint8* aChars, PRUint32 aLength,
+                                         PRUint8 aWordBreak,
                                          PRUint8* aBreakBefore)
 {
   PRUint32 cur;
   PRInt8 lastClass = CLASS_NONE;
   ContextState state(aChars, aLength);
 
   for (cur = 0; cur < aLength; ++cur, state.AdvanceIndex()) {
     PRUnichar ch = aChars[cur];
@@ -907,23 +917,23 @@ nsJISx4051LineBreaker::GetJISx4051Breaks
                               state);
     } else {
       if (ch == U_EQUAL)
         state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
       cl = GetClass(ch);
     }
 
-    bool allowBreak;
+    bool allowBreak = false;
     if (cur > 0) {
-      if (state.UseConservativeBreaking())
-        allowBreak = GetPairConservative(lastClass, cl);
-      else
-        allowBreak = GetPair(lastClass, cl);
-    } else {
-      allowBreak = false;
+      if (aWordBreak == nsILineBreaker::kWordBreak_Normal) {
+        allowBreak = (state.UseConservativeBreaking()) ?
+          GetPairConservative(lastClass, cl) : GetPair(lastClass, cl);
+      } else if (aWordBreak == nsILineBreaker::kWordBreak_BreakAll) {
+        allowBreak = true;
+      }
     }
     aBreakBefore[cur] = allowBreak;
     if (allowBreak)
       state.NotifyBreakBefore();
     lastClass = cl;
   }
 }
--- a/intl/lwbrk/src/nsJISx4501LineBreaker.h
+++ b/intl/lwbrk/src/nsJISx4501LineBreaker.h
@@ -48,18 +48,20 @@ public:
   nsJISx4051LineBreaker();
   virtual ~nsJISx4051LineBreaker();
 
   PRInt32 Next( const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
 
   PRInt32 Prev( const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
 
   virtual void GetJISx4051Breaks(const PRUnichar* aText, PRUint32 aLength,
+                                 PRUint8 aBreakMode,
                                  PRUint8* aBreakBefore);
   virtual void GetJISx4051Breaks(const PRUint8* aText, PRUint32 aLength,
+                                 PRUint8 aBreakMode,
                                  PRUint8* aBreakBefore);
 
 private:
   PRInt32 WordMove(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos,
                    PRInt8 aDirection);
 };
 
 #endif  /* nsJISx4501LineBreaker_h__ */