Not part of the build. Change nsLineBreaker so that breaks at element boundaries are always governed by the white-space property on the element that's the nearest common ancestor of the DOM nodes on each side of the break point --- fixes bug 382289. Also, pass NBSP through to textruns so that whitespace trimming doesn't trim NBSP. Also, count trimmed spaces correctly when updating mClusterCount.
authorroc+@cs.cmu.edu
Wed, 30 May 2007 15:27:27 -0700
changeset 2013 b551b70b0c58471092ec376c6d1cf428e4847a95
parent 2012 10a447e291da9d647a69a07ffbf0078a50fa5bd9
child 2014 f2c93f2d0241f4fbac3e49e5755f786794446312
push id1
push userbsmedberg@mozilla.com
push dateThu, 20 Mar 2008 16:49:24 +0000
treeherdermozilla-central@61007906a1f8 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
bugs382289
milestone1.9a5pre
Not part of the build. Change nsLineBreaker so that breaks at element boundaries are always governed by the white-space property on the element that's the nearest common ancestor of the DOM nodes on each side of the break point --- fixes bug 382289. Also, pass NBSP through to textruns so that whitespace trimming doesn't trim NBSP. Also, count trimmed spaces correctly when updating mClusterCount.
content/base/public/nsLineBreaker.h
content/base/src/nsLineBreaker.cpp
layout/generic/nsTextFrameThebes.cpp
layout/generic/nsTextFrameUtils.cpp
layout/generic/nsTextFrameUtils.h
--- a/content/base/public/nsLineBreaker.h
+++ b/content/base/public/nsLineBreaker.h
@@ -67,87 +67,77 @@ public:
  * with each text chunk, which might happen during the corresponding AppendText
  * call, or might happen during a later AppendText call or even a Reset()
  * call.
  * 
  * The linebreak results MUST NOT depend on how the text is broken up
  * into AppendText calls.
  * 
  * The current strategy is that we break the overall text into
- * whitespace-delimited "words". Then for words that contain a CJK character,
- * we break within the word using JISx4051 rules.
- * XXX This approach is not very good and we should replace it with something
- * better, such as some variant of UAX#14.
+ * whitespace-delimited "words". Then for words that contain a "complex" 
+ * character (currently CJK or Thai), we break within the word using complex
+ * rules (JISx4051 or Pango).
  */
 class nsLineBreaker {
 public:
   nsLineBreaker();
   ~nsLineBreaker();
 
-  // We need finegrained control of the line breaking behaviour to ensure
-  // that we get tricky CSS semantics right (in particular, the way we currently
-  // interpret and implement them; there's some ambiguity in the spec). The
-  // rules for CSS 'white-space' are slightly different for breaks induced by
-  // whitespace and space induced by nonwhitespace. Breaks induced by
-  // whitespace are always controlled by the
-  // 'white-space' property of the text node containing the
-  // whitespace. Breaks induced by non-whitespace where the break is between
-  // two nodes are controled by the 'white-space' property on the nearest
-  // common ancestor node. Therefore we provide separate control over
-  // a) whether whitespace in this text induces breaks b) whether we can
-  // break between nonwhitespace inside this text and c) whether we can break
-  // between nonwhitespace between the last text and this text.
+  // Normally, break opportunities exist at the end of each run of whitespace
+  // (Unicode ZWSP (U+200B) and ASCII space (U+0020)). Break opportunities can
+  // also exist inside runs of non-whitespace, as determined by nsILineBreaker.
+  // We provide flags to control on a per-chunk basis where breaks are allowed.
+  // At any character boundary, exactly one text chunk governs whether a
+  // break is allowed at that boundary.
   //
-  // "Whitespace" below means Unicode ZWSP (U+200B) and ASCII space (U+0020). We
-  // operate on text after whitespace processing has been applied, so
+  // We operate on text after whitespace processing has been applied, so
   // other characters (e.g. tabs and newlines) may have been converted to
   // spaces.
   enum {
     /**
-     * Allow breaks where a non-whitespace character in this block of text
-     * is preceded by a whitespace character.
+     * Allow a break opportunity at the start of this chunk of text.
      */
-    BREAK_WHITESPACE_END       = 0x01,
+    BREAK_ALLOW_INITIAL = 0x01,
     /**
-     * Allow breaks between eligible nonwhitespace characters when the break
-     * is in the interior of this block of text.
+     * Allow a break opportunity in the interior of this chunk of text.
      */
-    BREAK_NONWHITESPACE_INSIDE = 0x02,
-    /**
-     * Allow break between eligible nonwhitespace characters when the break
-     * is at the beginning of this block of text.
-     */
-    BREAK_NONWHITESPACE_BEFORE = 0x04
+    BREAK_ALLOW_INSIDE = 0x02,
   };
 
   /**
-   * Feed Unicode text into the linebreaker for analysis.
-   * If aLength is zero, then we assume the string is "invisible whitespace"
-   * which can induce breaks.
+   * Append "invisible whitespace". This acts like whitespace, but there is
+   * no actual text associated with it.
+   */
+  nsresult AppendInvisibleWhitespace();
+
+  /**
+   * Feed Unicode text into the linebreaker for analysis. aLength must be
+   * nonzero.
    */
   nsresult AppendText(nsIAtom* aLangGroup, const PRUnichar* aText, PRUint32 aLength,
                       PRUint32 aFlags, nsILineBreakSink* aSink);
   /**
-   * Feed 8-bit text into the linebreaker for analysis.
-   * If aLength is zero, then we assume the string is "invisible whitespace"
-   * which can induce breaks.
+   * Feed 8-bit text into the linebreaker for analysis. aLength must be nonzero.
    */
   nsresult AppendText(nsIAtom* aLangGroup, const PRUint8* aText, PRUint32 aLength,
                       PRUint32 aFlags, nsILineBreakSink* aSink);
   /**
    * Reset all state. This means the current run has ended; any outstanding
    * calls through nsILineBreakSink are made, and all outstanding references to
    * nsILineBreakSink objects are dropped.
    * After this call, this linebreaker can be reused.
    * This must be called at least once between any call to AppendText() and
    * destroying the object.
    */
   nsresult Reset() { return FlushCurrentWord(); }
 
 private:
+  // This is a list of text sources that make up the "current word" (i.e.,
+  // run of text which does not contain any whitespace). All the mLengths
+  // are are nonzero, these cannot overlap.
   struct TextItem {
     TextItem(nsILineBreakSink* aSink, PRUint32 aSinkOffset, PRUint32 aLength,
              PRUint32 aFlags)
       : mSink(aSink), mSinkOffset(aSinkOffset), mLength(aLength), mFlags(aFlags) {}
 
     nsILineBreakSink* mSink;
     PRUint32          mSinkOffset;
     PRUint32          mLength;
@@ -162,14 +152,13 @@ private:
   // appropriate sink(s). Then we clear the current word state.
   nsresult FlushCurrentWord();
 
   nsAutoTArray<PRUnichar,100> mCurrentWord;
   // All the items that contribute to mCurrentWord
   nsAutoTArray<TextItem,2>    mTextItems;
   PRPackedBool                mCurrentWordContainsCJK;
 
-  // When mCurrentWord is empty, this indicates whether we should allow a break
-  // before the next text if it starts with non-whitespace.
-  PRPackedBool                mBreakBeforeNonWhitespace;
+  // True if the previous character was whitespace
+  PRPackedBool                mAfterSpace;
 };
 
 #endif /*NSLINEBREAKER_H_*/
--- a/content/base/src/nsLineBreaker.cpp
+++ b/content/base/src/nsLineBreaker.cpp
@@ -60,17 +60,17 @@ IS_CJK_CHAR(PRUnichar u)
   return (0x1100 <= u && u <= 0x11ff) ||
          (0x2e80 <= u && u <= 0xd7ff) ||
          (0xf900 <= u && u <= 0xfaff) ||
          (0xff00 <= u && u <= 0xffef);
 }
 
 nsLineBreaker::nsLineBreaker()
   : mCurrentWordContainsCJK(PR_FALSE),
-    mBreakBeforeNonWhitespace(PR_FALSE)
+    mAfterSpace(PR_FALSE)
 {
 }
 
 nsLineBreaker::~nsLineBreaker()
 {
   NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
 }
 
@@ -90,20 +90,20 @@ nsLineBreaker::FlushCurrentWord()
   }
 
   PRUint32 i;
   PRUint32 offset = 0;
   for (i = 0; i < mTextItems.Length(); ++i) {
     TextItem* ti = &mTextItems[i];
     NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
 
-    if (!(ti->mFlags & BREAK_NONWHITESPACE_BEFORE) && ti->mSinkOffset == 0) {
+    if (!(ti->mFlags & BREAK_ALLOW_INITIAL) && ti->mSinkOffset == 0) {
       breakState[offset] = PR_FALSE;
     }
-    if (!(ti->mFlags & BREAK_NONWHITESPACE_INSIDE)) {
+    if (!(ti->mFlags & BREAK_ALLOW_INSIDE)) {
       PRUint32 exclude = ti->mSinkOffset == 0 ? 1 : 0;
       memset(breakState.Elements() + offset + exclude, PR_FALSE, ti->mLength - exclude);
     }
 
     // Don't set the break state for the first character of the word, because
     // it was already set correctly earlier and we don't know what the true
     // value should be.
     PRUint32 skipSet = i == 0 ? 1 : 0;
@@ -117,30 +117,23 @@ nsLineBreaker::FlushCurrentWord()
   mCurrentWordContainsCJK = PR_FALSE;
   return NS_OK;
 }
 
 nsresult
 nsLineBreaker::AppendText(nsIAtom* aLangGroup, const PRUnichar* aText, PRUint32 aLength,
                           PRUint32 aFlags, nsILineBreakSink* aSink)
 {
-  if (aLength == 0) {
-    // Treat as "invisible whitespace"
-    nsresult rv = FlushCurrentWord();
-    if (NS_FAILED(rv))
-      return rv;
-    mBreakBeforeNonWhitespace = (aFlags & BREAK_WHITESPACE_END) != 0;
-    return NS_OK;
-  }
+  NS_ASSERTION(aLength > 0, "Appending empty text...");
 
   PRUint32 offset = 0;
 
   // Continue the current word
   if (mCurrentWord.Length() > 0) {
-    NS_ASSERTION(!mBreakBeforeNonWhitespace, "These should not be set");
+    NS_ASSERTION(!mAfterSpace, "These should not be set");
 
     while (offset < aLength && !IS_SPACE(aText[offset])) {
       mCurrentWord.AppendElement(aText[offset]);
       if (!mCurrentWordContainsCJK && IS_CJK_CHAR(aText[offset])) {
         mCurrentWordContainsCJK = PR_TRUE;
       }
       ++offset;
     }
@@ -161,41 +154,38 @@ nsLineBreaker::AppendText(nsIAtom* aLang
   nsAutoTArray<PRPackedBool,4000> breakState;
   if (!breakState.AppendElements(aLength))
     return NS_ERROR_OUT_OF_MEMORY;
 
   PRUint32 start = offset;
   PRUint32 wordStart = offset;
   PRBool wordHasCJK = PR_FALSE;
 
-  PRBool breakNextIfNonWhitespace = mBreakBeforeNonWhitespace;
   for (;;) {
     PRUnichar ch = aText[offset];
     PRBool isSpace = IS_SPACE(ch);
 
-    breakState[offset] = breakNextIfNonWhitespace && !isSpace;
-    breakNextIfNonWhitespace = PR_FALSE;
+    breakState[offset] = mAfterSpace && !isSpace &&
+      (aFlags & (start == 0 ? BREAK_ALLOW_INITIAL : BREAK_ALLOW_INSIDE));
+    mAfterSpace = isSpace;
 
     if (isSpace) {
       if (offset > wordStart && wordHasCJK) {
-        if (aFlags & BREAK_NONWHITESPACE_INSIDE) {
+        if (aFlags & BREAK_ALLOW_INSIDE) {
           // Save current start-of-word state because GetJISx4051Breaks will
           // set it to false
           PRPackedBool currentStart = breakState[wordStart];
           nsContentUtils::LineBreaker()->
             GetJISx4051Breaks(aText + wordStart, offset - wordStart,
                               breakState.Elements() + wordStart);
           breakState[wordStart] = currentStart;
         }
         wordHasCJK = PR_FALSE;
       }
 
-      if (aFlags & BREAK_WHITESPACE_END) {
-        breakNextIfNonWhitespace = PR_TRUE;
-      }
       ++offset;
       if (offset >= aLength)
         break;
       wordStart = offset;
     } else {
       if (!wordHasCJK && IS_CJK_CHAR(ch)) {
         wordHasCJK = PR_TRUE;
       }
@@ -212,38 +202,30 @@ nsLineBreaker::AppendText(nsIAtom* aLang
         // Ensure that the break-before for this word is written out
         offset = wordStart + 1;
         break;
       }
     }
   }
 
   aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
-  mBreakBeforeNonWhitespace = breakNextIfNonWhitespace;
   return NS_OK;
 }
 
 nsresult
 nsLineBreaker::AppendText(nsIAtom* aLangGroup, const PRUint8* aText, PRUint32 aLength,
                           PRUint32 aFlags, nsILineBreakSink* aSink)
 {
-  if (aLength == 0) {
-    // Treat as "invisible whitespace"
-    nsresult rv = FlushCurrentWord();
-    if (NS_FAILED(rv))
-      return rv;
-    mBreakBeforeNonWhitespace = (aFlags & BREAK_WHITESPACE_END) != 0;
-    return NS_OK;
-  }
+  NS_ASSERTION(aLength > 0, "Appending empty text...");
 
   PRUint32 offset = 0;
 
   // Continue the current word
   if (mCurrentWord.Length() > 0) {
-    NS_ASSERTION(!mBreakBeforeNonWhitespace, "These should not be set");
+    NS_ASSERTION(!mAfterSpace, "These should not be set");
 
     while (offset < aLength && !IS_SPACE(aText[offset])) {
       mCurrentWord.AppendElement(aText[offset]);
       ++offset;
     }
 
     if (offset > 0) {
       mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
@@ -262,28 +244,27 @@ nsLineBreaker::AppendText(nsIAtom* aLang
 
   nsAutoTArray<PRPackedBool,4000> breakState;
   if (!breakState.AppendElements(aLength))
     return NS_ERROR_OUT_OF_MEMORY;
 
   PRUint32 start = offset;
   PRUint32 wordStart = offset;
 
-  PRBool breakNextIfNonWhitespace = mBreakBeforeNonWhitespace;
   for (;;) {
     PRUint8 ch = aText[offset];
     PRBool isSpace = IS_SPACE(ch);
 
-    breakState[offset] = breakNextIfNonWhitespace && !isSpace;
-    breakNextIfNonWhitespace = PR_FALSE;
+    breakState[offset] = mAfterSpace && !isSpace &&
+      (aFlags & (start == 0 ? BREAK_ALLOW_INITIAL : BREAK_ALLOW_INSIDE));
+    mAfterSpace = isSpace;
 
     if (isSpace) {
-      if (aFlags & BREAK_WHITESPACE_END) {
-        breakNextIfNonWhitespace = PR_TRUE;
-      }
+      // The current word can't have any special (CJK/Thai) characters inside it
+      // because this is 8-bit text, so just ignore it
       ++offset;
       if (offset >= aLength)
         break;
       wordStart = offset;
     } else {
       ++offset;
       if (offset >= aLength) {
         // Save this word
@@ -302,11 +283,20 @@ nsLineBreaker::AppendText(nsIAtom* aLang
         break;
       }
       // We can't break inside words in 8-bit text (no CJK characters), so
       // there is no need to do anything else to handle words
     }
   }
 
   aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
-  mBreakBeforeNonWhitespace = breakNextIfNonWhitespace;
   return NS_OK;
 }
+
+nsresult
+nsLineBreaker::AppendInvisibleWhitespace() {
+  // Treat as "invisible whitespace"
+  nsresult rv = FlushCurrentWord();
+  if (NS_FAILED(rv))
+    return rv;
+  mAfterSpace = PR_TRUE;
+  return NS_OK;  
+}
--- a/layout/generic/nsTextFrameThebes.cpp
+++ b/layout/generic/nsTextFrameThebes.cpp
@@ -846,18 +846,18 @@ public:
 
   // Like TextRunMappedFlow but with some differences. mStartFrame to mEndFrame
   // are a sequence of in-flow frames. There can be multiple MappedFlows per
   // content element; the frames in each MappedFlow all have the same style
   // context.
   struct MappedFlow {
     nsTextFrame* mStartFrame;
     nsTextFrame* mEndFrame;
-    // When we break between non-whitespace characters, the nearest common
-    // ancestor of the frames containing the characters is the one whose
+    // When we consider breaking between elements, the nearest common
+    // ancestor of the elements containing the characters is the one whose
     // CSS 'white-space' property governs. So this records the nearest common
     // ancestor of mStartFrame and the previous text frame, or null if there
     // was no previous text frame on this line.
     nsIFrame*    mAncestorControllingInitialBreak;
     PRInt32      mContentOffset;
     PRInt32      mContentEndOffset;
     PRUint32     mTransformedTextOffset; // Only used inside BuildTextRunForFrames
   };
@@ -1553,58 +1553,93 @@ BuildTextRunsScanner::BuildTextRunForFra
   // making up the textrun, but I don't see a way to avoid this.
   SetupBreakSinksForTextRun(textRun, PR_FALSE);
 
   // Actually wipe out the textruns associated with the mapped frames and associate
   // those frames with this text run.
   AssignTextRun(textRun);
 }
 
+static PRBool
+HasCompressedLeadingWhitespace(nsTextFrame* aFrame, PRInt32 aContentEndOffset,
+                               gfxSkipCharsIterator* aIterator)
+{
+  if (!aIterator->IsOriginalCharSkipped())
+    return PR_FALSE;
+
+  PRBool result = PR_FALSE;
+  PRInt32 savedOffset = aIterator->GetOriginalOffset();
+  PRInt32 frameContentOffset = aFrame->GetContentOffset();
+  const nsTextFragment* frag = aFrame->GetContent()->GetText();
+  while (frameContentOffset < aContentEndOffset &&
+         aIterator->IsOriginalCharSkipped()) {
+    if (IsSpace(frag, frameContentOffset)) {
+      result = PR_TRUE;
+      break;
+    }
+    ++frameContentOffset;
+  }
+  aIterator->SetOriginalOffset(savedOffset);
+  return result;
+}
+
 void
 BuildTextRunsScanner::SetupBreakSinksForTextRun(gfxTextRun* aTextRun, 
                                                 PRBool aIsExistingTextRun)
 {
   // textruns have uniform language
   nsIAtom* lang = mMappedFlows[0].mStartFrame->GetStyleVisibility()->mLangGroup;
+  // We keep this pointed at the skip-chars data for the current mappedFlow.
+  // This lets us cheaply check whether the flow has compressed initial
+  // whitespace...
+  gfxSkipCharsIterator iter(aTextRun->GetSkipChars());
+
   PRUint32 i;
   for (i = 0; i < mMappedFlows.Length(); ++i) {
     MappedFlow* mappedFlow = &mMappedFlows[i];
     nsAutoPtr<BreakSink>* breakSink = mBreakSinks.AppendElement(
       new BreakSink(aTextRun, mappedFlow->mTransformedTextOffset, aIsExistingTextRun));
     if (!breakSink || !*breakSink)
       return;
     PRUint32 offset = mappedFlow->mTransformedTextOffset;
 
     PRUint32 length =
       (i == mMappedFlows.Length() - 1 ? aTextRun->GetLength()
        : mMappedFlows[i + 1].mTransformedTextOffset)
       - offset;
 
-    PRUint32 flags = 0;
-    if (!mappedFlow->mAncestorControllingInitialBreak ||
-        mappedFlow->mAncestorControllingInitialBreak->GetStyleText()->WhiteSpaceCanWrap()) {
-      flags |= nsLineBreaker::BREAK_NONWHITESPACE_BEFORE;
+    nsTextFrame* startFrame = mappedFlow->mStartFrame;
+    if (HasCompressedLeadingWhitespace(startFrame, mappedFlow->mContentEndOffset, &iter)) {
+      mLineBreaker.AppendInvisibleWhitespace();
     }
-    const nsStyleText* textStyle = mappedFlow->mStartFrame->GetStyleText();
-    if (textStyle->WhiteSpaceCanWrap()) {
-      // If white-space is preserved, then the only break opportunity is at
-      // the end of whitespace runs; otherwise there is a break opportunity before
-      // and after each whitespace character
-      flags |= nsLineBreaker::BREAK_NONWHITESPACE_INSIDE |
-        nsLineBreaker::BREAK_WHITESPACE_END;
+
+    if (length > 0) {
+      PRUint32 flags = 0;
+      if (!mappedFlow->mAncestorControllingInitialBreak ||
+          mappedFlow->mAncestorControllingInitialBreak->GetStyleText()->WhiteSpaceCanWrap()) {
+        flags |= nsLineBreaker::BREAK_ALLOW_INITIAL;
+      }
+      const nsStyleText* textStyle = startFrame->GetStyleText();
+      if (textStyle->WhiteSpaceCanWrap()) {
+        // If white-space is preserved, then the only break opportunity is at
+        // the end of whitespace runs; otherwise there is a break opportunity before
+        // and after each whitespace character
+        flags |= nsLineBreaker::BREAK_ALLOW_INSIDE;
+      }
+    
+      if (aTextRun->GetFlags() & gfxFontGroup::TEXT_IS_8BIT) {
+        mLineBreaker.AppendText(lang, aTextRun->GetText8Bit() + offset,
+                                length, flags, *breakSink);
+      } else {
+        mLineBreaker.AppendText(lang, aTextRun->GetTextUnicode() + offset,
+                                length, flags, *breakSink);
+      }
     }
-    // If length is zero, the linebreaker treats the text as invisible whitespace.
-    // Thus runs of entirely-skipped whitespace can still induce breaks.
-    if (aTextRun->GetFlags() & gfxFontGroup::TEXT_IS_8BIT) {
-      mLineBreaker.AppendText(lang, aTextRun->GetText8Bit() + offset,
-                              length, flags, *breakSink);
-    } else {
-      mLineBreaker.AppendText(lang, aTextRun->GetTextUnicode() + offset,
-                              length, flags, *breakSink);
-    }
+    
+    iter.AdvanceOriginal(mappedFlow->mContentEndOffset - mappedFlow->mContentOffset);
   }
 }
 
 void
 BuildTextRunsScanner::AssignTextRun(gfxTextRun* aTextRun)
 {
   nsIContent* lastContent = nsnull;
   PRUint32 i;
@@ -1724,27 +1759,27 @@ nsTextFrame::EnsureTextRun(nsIRenderingC
     startAt = userData->mLastFlowIndex - 1;
   }
   NS_ERROR("Can't find flow containing this frame???");
   static const gfxSkipChars emptySkipChars;
   return gfxSkipCharsIterator(emptySkipChars, 0);
 }
 
 static PRUint32
-GetLengthOfTrimmedText(const nsTextFragment* aFrag,
-                       PRUint32 aStart, PRUint32 aEnd,
-                       gfxSkipCharsIterator* aIterator)
+GetEndOfTrimmedText(const nsTextFragment* aFrag,
+                    PRUint32 aStart, PRUint32 aEnd,
+                    gfxSkipCharsIterator* aIterator)
 {
   aIterator->SetSkippedOffset(aEnd);
   while (aIterator->GetSkippedOffset() > aStart) {
     aIterator->AdvanceSkipped(-1);
     if (!IsSpace(aFrag, aIterator->GetOriginalOffset()))
-      return aIterator->GetSkippedOffset() + 1 - aStart;
-  }
-  return 0;
+      return aIterator->GetSkippedOffset() + 1;
+  }
+  return aStart;
 }
 
 nsTextFrame::TrimmedOffsets
 nsTextFrame::GetTrimmedOffsets(const nsTextFragment* aFrag,
                                PRBool aTrimAfter)
 {
   NS_ASSERTION(mTextRun, "Need textrun here");
 
@@ -1872,17 +1907,18 @@ public:
    */
   PropertyProvider(gfxTextRun* aTextRun, const nsStyleText* aTextStyle,
                    const nsTextFragment* aFrag, nsTextFrame* aFrame,
                    const gfxSkipCharsIterator& aStart, PRInt32 aLength,
                    nsIFrame* aLineContainer,
                    nscoord aOffsetFromBlockOriginForTabs)
     : mTextRun(aTextRun), mFontGroup(nsnull), mTextStyle(aTextStyle), mFrag(aFrag),
       mLineContainer(aLineContainer),
-      mFrame(aFrame), mStart(aStart), mTabWidths(nsnull), mLength(aLength),
+      mFrame(aFrame), mStart(aStart), mTempIterator(aStart),
+      mTabWidths(nsnull), mLength(aLength),
       mWordSpacing(StyleToCoord(mTextStyle->mWordSpacing)),
       mLetterSpacing(StyleToCoord(mTextStyle->mLetterSpacing)),
       mJustificationSpacing(0),
       mHyphenWidth(-1),
       mOffsetFromBlockOriginForTabs(aOffsetFromBlockOriginForTabs),
       mReflowing(PR_TRUE)
   {
     NS_ASSERTION(mStart.IsInitialized(), "Start not initialized?");
@@ -1893,17 +1929,18 @@ public:
    * have other data around. Gets everything from the frame. EnsureTextRun
    * *must* be called before this!!!
    */
   PropertyProvider(nsTextFrame* aFrame, const gfxSkipCharsIterator& aStart)
     : mTextRun(aFrame->GetTextRun()), mFontGroup(nsnull),
       mTextStyle(aFrame->GetStyleText()),
       mFrag(aFrame->GetContent()->GetText()),
       mLineContainer(nsnull),
-      mFrame(aFrame), mStart(aStart), mTabWidths(nsnull),
+      mFrame(aFrame), mStart(aStart), mTempIterator(aStart),
+      mTabWidths(nsnull),
       mLength(aFrame->GetContentLength()),
       mWordSpacing(StyleToCoord(mTextStyle->mWordSpacing)),
       mLetterSpacing(StyleToCoord(mTextStyle->mLetterSpacing)),
       mJustificationSpacing(0),
       mHyphenWidth(-1),
       mOffsetFromBlockOriginForTabs(0),
       mReflowing(PR_FALSE)
   {
@@ -1940,26 +1977,29 @@ public:
     if (!mFontGroup) {
       mFontGroup = GetFontGroupForFrame(mFrame);
     }
     return mFontGroup;
   }
 
   gfxFloat* GetTabWidths(PRUint32 aTransformedStart, PRUint32 aTransformedLength);
 
+  const gfxSkipCharsIterator& GetEndHint() { return mTempIterator; }
+
 protected:
   void SetupJustificationSpacing();
   
   gfxTextRun*           mTextRun;
   gfxFontGroup*         mFontGroup;
   const nsStyleText*    mTextStyle;
   const nsTextFragment* mFrag;
   nsIFrame*             mLineContainer;
   nsTextFrame*          mFrame;
   gfxSkipCharsIterator  mStart;  // Offset in original and transformed string
+  gfxSkipCharsIterator  mTempIterator;
   nsTArray<gfxFloat>*   mTabWidths;  // widths for each transformed string character
   PRInt32               mLength; // DOM string length
   gfxFloat              mWordSpacing;     // space for each whitespace char
   gfxFloat              mLetterSpacing;   // space for each letter
   gfxFloat              mJustificationSpacing;
   gfxFloat              mHyphenWidth;
   gfxFloat              mOffsetFromBlockOriginForTabs;
   PRPackedBool          mReflowing;
@@ -4483,20 +4523,20 @@ FindStartAfterSkippingWhitespace(Propert
                                  nsIFrame::InlineIntrinsicWidthData* aData,
                                  PRBool aCollapseWhitespace,
                                  gfxSkipCharsIterator* aIterator,
                                  PRUint32 aFlowEndInTextRun)
 {
   if (aData->skipWhitespace && aCollapseWhitespace) {
     while (aIterator->GetSkippedOffset() < aFlowEndInTextRun &&
            IsSpace(aProvider->GetFragment(), aIterator->GetOriginalOffset())) {
-      aIterator->AdvanceSkipped(1);
+      aIterator->AdvanceOriginal(1);
     }
   }
-  return aIterator->GetSkippedOffset();  
+  return aIterator->GetSkippedOffset();
 }
 
 /* virtual */ 
 void nsTextFrame::MarkIntrinsicWidthsDirty()
 {
   ClearTextRun();
   nsFrame::MarkIntrinsicWidthsDirty();
 }
@@ -4510,21 +4550,21 @@ nsTextFrame::AddInlineMinWidthForFlow(ns
   PRUint32 flowEndInTextRun;
   gfxSkipCharsIterator iter =
     EnsureTextRun(aRenderingContext, nsnull, nsnull, &flowEndInTextRun);
   if (!mTextRun)
     return;
 
   // Pass null for the line container. This will disable tab spacing, but that's
   // OK since we can't really handle tabs for intrinsic sizing anyway.
-  PropertyProvider provider(mTextRun, GetStyleText(), mContent->GetText(), this,
+  const nsTextFragment* frag = mContent->GetText();
+  PropertyProvider provider(mTextRun, GetStyleText(), frag, this,
                             iter, GetInFlowContentLength(), nsnull, 0);
 
   PRBool collapseWhitespace = !provider.GetStyleText()->WhiteSpaceIsSignificant();
-  PRBool canWrap = provider.GetStyleText()->WhiteSpaceCanWrap();
   PRUint32 start =
     FindStartAfterSkippingWhitespace(&provider, aData, collapseWhitespace,
                                      &iter, flowEndInTextRun);
   if (start >= flowEndInTextRun)
     return;
 
   if (mTextRun->CanBreakLineBefore(start)) {
     aData->Break(aRenderingContext);
@@ -4538,27 +4578,20 @@ nsTextFrame::AddInlineMinWidthForFlow(ns
       continue;
 
     nscoord width =
       NSToCoordCeil(mTextRun->GetAdvanceWidth(wordStart, i - wordStart, &provider));
     aData->currentLine += width;
 
     if (collapseWhitespace) {
       nscoord trailingWhitespaceWidth;
-      PRUint32 lengthAfterTrim;
-      if (canWrap) {
-        lengthAfterTrim = GetLengthOfTrimmedText(provider.GetFragment(),
-                                                 wordStart, i, &iter);
-      } else {
-        lengthAfterTrim = i - wordStart;
-      }
-      if (lengthAfterTrim == 0) {
+      PRUint32 trimStart = GetEndOfTrimmedText(frag, wordStart, i, &iter);
+      if (trimStart == start) {
         trailingWhitespaceWidth = width;
       } else {
-        PRUint32 trimStart = wordStart + lengthAfterTrim;
         trailingWhitespaceWidth =
           NSToCoordCeil(mTextRun->GetAdvanceWidth(trimStart, i - trimStart, &provider));
       }
       aData->trailingWhitespace += trailingWhitespaceWidth;
     } else {
       aData->trailingWhitespace = 0;
     }
     if (i < flowEndInTextRun) {
@@ -4608,40 +4641,33 @@ nsTextFrame::AddInlinePrefWidthForFlow(n
     return;
 
   // Pass null for the line container. This will disable tab spacing, but that's
   // OK since we can't really handle tabs for intrinsic sizing anyway.
   PropertyProvider provider(mTextRun, GetStyleText(), mContent->GetText(), this,
                             iter, GetInFlowContentLength(), nsnull, 0);
 
   PRBool collapseWhitespace = !provider.GetStyleText()->WhiteSpaceIsSignificant();
-  PRBool canWrap = provider.GetStyleText()->WhiteSpaceCanWrap();
   PRUint32 start =
     FindStartAfterSkippingWhitespace(&provider, aData, collapseWhitespace,
                                      &iter, flowEndInTextRun);
   if (start >= flowEndInTextRun)
     return;
 
   if (collapseWhitespace) {
     // \n line breaks are not honoured, so everything would like to go
     // onto one line, so just measure it
-    PRUint32 lengthAfterTrim;
-    if (canWrap) {
-      lengthAfterTrim = GetLengthOfTrimmedText(provider.GetFragment(), start,
-                                               flowEndInTextRun, &iter);
-    } else {
-      lengthAfterTrim = flowEndInTextRun;
-    }
     aData->currentLine +=
       NSToCoordCeil(mTextRun->GetAdvanceWidth(start, flowEndInTextRun - start, &provider));
 
-    PRUint32 trimStart = start + lengthAfterTrim;
+    PRUint32 trimStart = GetEndOfTrimmedText(provider.GetFragment(), start,
+                                             flowEndInTextRun, &iter);
     nscoord trimWidth =
       NSToCoordCeil(mTextRun->GetAdvanceWidth(trimStart, flowEndInTextRun - trimStart, &provider));
-    if (lengthAfterTrim == 0) {
+    if (trimStart == start) {
       // This is *all* trimmable whitespace, so whatever trailingWhitespace
       // we saw previously is still trailing...
       aData->trailingWhitespace += trimWidth;
     } else {
       // Some non-whitespace so the old trailingWhitespace is no longer trailing
       aData->trailingWhitespace = trimWidth;
     }
   } else {
@@ -4871,21 +4897,16 @@ nsTextFrame::Reflow(nsPresContext*      
   // The metrics for the text go in here
   gfxTextRun::Metrics textMetrics;
   PRBool needTightBoundingBox = (GetStateBits() & TEXT_FIRST_LETTER) != 0;
 #ifdef MOZ_MATHML
   if (NS_REFLOW_CALC_BOUNDING_METRICS & aMetrics.mFlags) {
     needTightBoundingBox = PR_TRUE;
   }
 #endif
-  // The "end" iterator points to the first character after the string mapped
-  // by this frame. Basically, it's original-string offset is offset+charsFit
-  // after we've computed charsFit.
-  gfxSkipCharsIterator end(provider.GetStart());
-
   PRBool suppressInitialBreak = PR_FALSE;
   if (!lineLayout.LineIsBreakable()) {
     suppressInitialBreak = PR_TRUE;
   } else {
     PRBool trailingTextFrameCanWrap;
     nsIFrame* lastTextFrame = lineLayout.GetTrailingTextFrame(&trailingTextFrameCanWrap);
     if (!lastTextFrame) {
       suppressInitialBreak = PR_TRUE;
@@ -4923,16 +4944,20 @@ nsTextFrame::Reflow(nsPresContext*      
   PRUint32 transformedCharsFit =
     mTextRun->BreakAndMeasureText(transformedOffset, transformedLength,
                                   (GetStateBits() & TEXT_START_OF_LINE) != 0,
                                   availWidth,
                                   &provider, suppressInitialBreak,
                                   canTrimTrailingWhitespace ? &trimmedWidth : nsnull,
                                   &textMetrics, needTightBoundingBox,
                                   &usedHyphenation, &transformedLastBreak);
+  // The "end" iterator points to the first character after the string mapped
+  // by this frame. Basically, it's original-string offset is offset+charsFit
+  // after we've computed charsFit.
+  gfxSkipCharsIterator end(provider.GetEndHint());
   end.SetSkippedOffset(transformedOffset + transformedCharsFit);
   PRInt32 charsFit = end.GetOriginalOffset() - offset;
   // That might have taken us beyond our assigned content range (because
   // we might have advanced over some skipped chars that extend outside
   // this frame), so get back in.
   PRInt32 lastBreak = -1;
   if (charsFit >= limitLength) {
     charsFit = limitLength;
@@ -5070,18 +5095,19 @@ nsTextFrame::Reflow(nsPresContext*      
     // This is corrected for in nsLineLayout::TrimWhiteSpaceIn.
     PRInt32 numJustifiableCharacters =
       provider.ComputeJustifiableCharacters(offset, charsFit);
     // Currently canTrimTrailingWhitespace is always true here
     // because of the !textStyle->WhiteSpaceIsSignificant() test,
     // but that could change...
     if (canTrimTrailingWhitespace) {
       // Count trimmed spaces and add them to the cluster count
-      PRUint32 charIndex = transformedCharsFit;
-      while (charIndex > 0 && mTextRun->GetChar(charIndex - 1) == ' ') {
+      PRUint32 charIndex = transformedOffset + transformedCharsFit;
+      while (charIndex > transformedOffset &&
+             mTextRun->GetChar(charIndex - 1) == ' ') {
         ++textMetrics.mClusterCount;
         --charIndex;
       }
     }
 
     NS_ASSERTION(numJustifiableCharacters <= textMetrics.mClusterCount,
                  "Justifiable characters combined???");
     lineLayout.SetTextJustificationWeights(numJustifiableCharacters,
--- a/layout/generic/nsTextFrameUtils.cpp
+++ b/layout/generic/nsTextFrameUtils.cpp
@@ -110,20 +110,17 @@ nsTextFrameUtils::TransformText(const PR
     // Skip discardables.
     PRUint32 i;
     for (i = 0; i < aLength; ++i) {
       PRUnichar ch = *aText++;
       if (IsDiscardable(ch, &flags)) {
         aSkipChars->SkipChar();
       } else {
         aSkipChars->KeepChar();
-        if (ch == CH_NBSP) {
-          ch = ' ';
-          flags |= TEXT_WAS_TRANSFORMED;
-        } else if (ch == '\t') {
+        if (ch == '\t') {
           flags |= TEXT_HAS_TAB;
         }
         *aOutput++ = ch;
       }
     }
     *aIncomingWhitespace = PR_FALSE;
   } else {
     PRBool inWhitespace = *aIncomingWhitespace;
@@ -148,20 +145,16 @@ nsTextFrameUtils::TransformText(const PR
         nowInWhitespace = ch == '\t';
       }
 
       if (!nowInWhitespace) {
         if (IsDiscardable(ch, &flags)) {
           aSkipChars->SkipChar();
           nowInWhitespace = inWhitespace;
         } else {
-          if (ch == CH_NBSP) {
-            ch = ' ';
-            flags |= TEXT_WAS_TRANSFORMED;
-          }
           *aOutput++ = ch;
           aSkipChars->KeepChar();
         }
       } else {
         if (inWhitespace) {
           aSkipChars->SkipChar();
         } else {
           if (ch != ' ') {
@@ -198,20 +191,17 @@ nsTextFrameUtils::TransformText(const PR
     // Skip discardables.
     PRUint32 i;
     for (i = 0; i < aLength; ++i) {
       PRUint8 ch = *aText++;
       if (IsDiscardable(ch, &flags)) {
         aSkipChars->SkipChar();
       } else {
         aSkipChars->KeepChar();
-        if (ch == CH_NBSP) {
-          ch = ' ';
-          flags |= TEXT_WAS_TRANSFORMED;
-        } else if (ch == '\t') {
+        if (ch == '\t') {
           flags |= TEXT_HAS_TAB;
         }
         *aOutput++ = ch;
       }
     }
     *aIncomingWhitespace = PR_FALSE;
   } else {
     PRBool inWhitespace = *aIncomingWhitespace;
@@ -219,20 +209,16 @@ nsTextFrameUtils::TransformText(const PR
     for (i = 0; i < aLength; ++i) {
       PRUint8 ch = *aText++;
       PRBool nowInWhitespace = ch == ' ' || ch == '\t' || ch == '\n' || ch == '\f';
       if (!nowInWhitespace) {
         if (IsDiscardable(ch, &flags)) {
           aSkipChars->SkipChar();
           nowInWhitespace = inWhitespace;
         } else {
-          if (ch == CH_NBSP) {
-            ch = ' ';
-            flags |= TEXT_WAS_TRANSFORMED;
-          }
           *aOutput++ = ch;
           aSkipChars->KeepChar();
         }
       } else {
         if (inWhitespace) {
           aSkipChars->SkipChar();
         } else {
           if (ch != ' ') {
--- a/layout/generic/nsTextFrameUtils.h
+++ b/layout/generic/nsTextFrameUtils.h
@@ -57,17 +57,18 @@ public:
     TEXT_HAS_SHY             = 0x020000,
     TEXT_WAS_TRANSFORMED     = 0x040000,
 
     // The following flags are set by nsTextFrame
 
     TEXT_IS_SIMPLE_FLOW      = 0x100000,
     TEXT_INCOMING_WHITESPACE = 0x200000,
     TEXT_TRAILING_WHITESPACE = 0x400000,
-    TEXT_IS_UNCACHED         = 0x800000
+    TEXT_COMPRESSED_LEADING_WHITESPACE = 0x800000,
+    TEXT_IS_UNCACHED         = 0x1000000
   };
 
   static PRBool
   IsPunctuationMark(PRUnichar aChar);
 
   /**
    * Returns PR_TRUE if aChars/aLength are something that make a space
    * character not be whitespace when they follow the space character.