bug 703100 - pt 3 - remove copy of original characters from gfxTextRun. r=roc
authorJonathan Kew <jfkthame@gmail.com>
Tue, 06 Dec 2011 12:39:19 +0000
changeset 85103 102dff1e0bb54afa509dccbda5700f8db3673174
parent 85102 e053ff76503dc6f96de659d3bab88de46075ea48
child 85104 433b37e097c724cd18e2e5e16b243d0c6a37cb9a
push id805
push userakeybl@mozilla.com
push dateWed, 01 Feb 2012 18:17:35 +0000
treeherdermozilla-aurora@6fb3bf232436 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersroc
bugs703100
milestone12.0a1
bug 703100 - pt 3 - remove copy of original characters from gfxTextRun. r=roc
gfx/thebes/gfxFont.cpp
gfx/thebes/gfxFont.h
layout/generic/nsTextFrameThebes.cpp
layout/generic/nsTextRunTransformations.cpp
layout/generic/nsTextRunTransformations.h
--- a/gfx/thebes/gfxFont.cpp
+++ b/gfx/thebes/gfxFont.cpp
@@ -1723,22 +1723,30 @@ gfxFont::Measure(gfxTextRun *aTextRun,
 #define MAX_SHAPING_LENGTH  32760 // slightly less than 32K, trying to avoid
                                   // over-stressing platform shapers
 
 #define BACKTRACK_LIMIT  1024 // If we can't find a space or a cluster start
                               // within 1K chars, just chop arbitrarily.
                               // Limiting backtrack here avoids pathological
                               // behavior on long runs with no whitespace.
 
-// XXX should we treat NBSP or SPACE combined with other characters as a word
-// boundary? Currently this does.
 static bool
-IsBoundarySpace(PRUnichar aChar)
+IsClusterExtender(PRUint32 aUSV)
 {
-    return aChar == ' ' || aChar == 0x00A0;
+    PRUint8 category = gfxUnicodeProperties::GetGeneralCategory(aUSV);
+    return ((category >= HB_CATEGORY_COMBINING_MARK &&
+             category <= HB_CATEGORY_NON_SPACING_MARK) ||
+            (aUSV >= 0x200c && aUSV <= 0x200d) || // ZWJ, ZWNJ
+            (aUSV >= 0xff9e && aUSV <= 0xff9f));  // katakana sound marks
+}
+
+static bool
+IsBoundarySpace(PRUnichar aChar, PRUnichar aNextChar)
+{
+    return (aChar == ' ' || aChar == 0x00A0) && !IsClusterExtender(aNextChar);
 }
 
 static inline PRUint32
 HashMix(PRUint32 aHash, PRUnichar aCh)
 {
     return (aHash >> 28) ^ (aHash << 4) ^ aCh;
 }
 
@@ -1777,16 +1785,26 @@ gfxFont::GetShapedWord(gfxContext *aCont
         ok = ShapeWord(aContext, entry->mShapedWord, (const PRUnichar*)aText);
     } else {
         nsAutoString utf16;
         AppendASCIItoUTF16((const char*)aText, utf16);
         ok = ShapeWord(aContext, entry->mShapedWord, utf16.BeginReading());
     }
     NS_WARN_IF_FALSE(ok, "failed to shape word - expect garbled text");
 
+    for (PRUint32 i = 0; i < aLength; ++i) {
+        if (aText[i] == ' ') {
+            entry->mShapedWord->SetIsSpace(i);
+        } else if (i > 0 &&
+                   NS_IS_HIGH_SURROGATE(aText[i - 1]) &&
+                   NS_IS_LOW_SURROGATE(aText[i])) {
+            entry->mShapedWord->SetIsLowSurrogate(i);
+        }
+    }
+
     return entry->mShapedWord;
 }
 
 bool
 gfxFont::CacheHashEntry::KeyEquals(const KeyTypePointer aKey) const
 {
     const gfxShapedWord *sw = mShapedWord;
     if (!sw) {
@@ -1865,44 +1883,42 @@ template<typename T>
 bool
 gfxFont::SplitAndInitTextRun(gfxContext *aContext,
                              gfxTextRun *aTextRun,
                              const T *aString,
                              PRUint32 aRunStart,
                              PRUint32 aRunLength,
                              PRInt32 aRunScript)
 {
+    if (aRunLength == 0) {
+        return true;
+    }
+
     InitWordCache();
 
     // the only flags we care about for ShapedWord construction/caching
     PRUint32 flags = aTextRun->GetFlags() &
         (gfxTextRunFactory::TEXT_IS_RTL |
          gfxTextRunFactory::TEXT_DISABLE_OPTIONAL_LIGATURES);
     if (sizeof(T) == sizeof(PRUint8)) {
         flags |= gfxTextRunFactory::TEXT_IS_8BIT;
     }
 
     const T *text = aString + aRunStart;
     PRUint32 wordStart = 0;
     PRUint32 hash = 0;
     bool wordIs8Bit = true;
     PRInt32 appUnitsPerDevUnit = aTextRun->GetAppUnitsPerDevUnit();
 
+    T nextCh = text[0];
     for (PRUint32 i = 0; i <= aRunLength; ++i) {
-        T ch;
-        bool boundary, invalid;
-        if (i < aRunLength) {
-            ch = text[i];
-            boundary = IsBoundarySpace(ch);
-            invalid = !boundary && gfxFontGroup::IsInvalidChar(ch);
-        } else {
-            ch = '\n';
-            boundary = false;
-            invalid = true;
-        }
+        T ch = nextCh;
+        nextCh = (i < aRunLength - 1) ? text[i + 1] : '\n';
+        bool boundary = IsBoundarySpace(ch, nextCh);
+        bool invalid = !boundary && gfxFontGroup::IsInvalidChar(ch);
         PRUint32 length = i - wordStart;
 
         // break into separate ShapedWords when we hit an invalid char,
         // or a boundary space (always handled individually),
         // or the first non-space after a space
         bool breakHere = boundary || invalid;
 
         if (!breakHere) {
@@ -1911,20 +1927,17 @@ gfxFont::SplitAndInitTextRun(gfxContext 
                 // in 8-bit text, no clusters or surrogates to worry about
                 if (length >= gfxShapedWord::kMaxLength) {
                     breakHere = true;
                 }
             } else {
                 // try to avoid breaking before combining mark or low surrogate
                 if (length >= gfxShapedWord::kMaxLength - 15) {
                     if (!NS_IS_LOW_SURROGATE(ch)) {
-                        PRUint8 cat =
-                            gfxUnicodeProperties::GetGeneralCategory(ch);
-                        if (cat < HB_CATEGORY_COMBINING_MARK ||
-                            cat > HB_CATEGORY_NON_SPACING_MARK) {
+                        if (!IsClusterExtender(ch)) {
                             breakHere = true;
                         }
                     }
                     if (!breakHere && length >= gfxShapedWord::kMaxLength - 3) {
                         if (!NS_IS_LOW_SURROGATE(ch)) {
                             breakHere = true;
                         }
                     }
@@ -1967,17 +1980,17 @@ gfxFont::SplitAndInitTextRun(gfxContext 
             } else {
                 return false; // failed, presumably out of memory?
             }
         }
 
         if (boundary) {
             // word was terminated by a space: add that to the textrun
             if (!aTextRun->SetSpaceGlyphIfSimple(this, aContext,
-                                                 aRunStart + i))
+                                                 aRunStart + i, ch))
             {
                 static const PRUint8 space = ' ';
                 gfxShapedWord *sw =
                     GetShapedWord(aContext,
                                   &space, 1,
                                   HashMix(0, ' '), aRunScript,
                                   appUnitsPerDevUnit,
                                   flags | gfxTextRunFactory::TEXT_IS_8BIT);
@@ -1988,18 +2001,28 @@ gfxFont::SplitAndInitTextRun(gfxContext 
                 }
             }
             hash = 0;
             wordStart = i + 1;
             wordIs8Bit = true;
             continue;
         }
 
+        if (i == aRunLength) {
+            break;
+        }
+
         if (invalid) {
-            // word was terminated by an invalid char: skip it
+            // word was terminated by an invalid char: skip it,
+            // but record where TAB or NEWLINE occur
+            if (ch == '\t') {
+                aTextRun->SetIsTab(aRunStart + i);
+            } else if (ch == '\n') {
+                aTextRun->SetIsNewline(aRunStart + i);
+            }
             hash = 0;
             wordStart = i + 1;
             wordIs8Bit = true;
             continue;
         }
 
         // word was forcibly broken, so current char will begin next word
         hash = HashMix(0, ch);
@@ -3045,50 +3068,72 @@ gfxFontGroup::InitScriptRun(gfxContext *
         const gfxTextRange& range = fontRanges[r];
         PRUint32 matchedLength = range.Length();
         gfxFont *matchedFont = (range.font ? range.font.get() : nsnull);
 
         // create the glyph run for this range
         if (matchedFont) {
             aTextRun->AddGlyphRun(matchedFont, range.matchType,
                                   runStart, (matchedLength > 0));
-        } else {
-            aTextRun->AddGlyphRun(mainFont, gfxTextRange::kFontGroup,
-                                  runStart, (matchedLength > 0));
-        }
-        if (matchedFont) {
             // do glyph layout and record the resulting positioned glyphs
             if (!matchedFont->SplitAndInitTextRun(aContext, aTextRun, aString,
                                                   runStart, matchedLength,
                                                   aRunScript)) {
                 // glyph layout failed! treat as missing glyphs
                 matchedFont = nsnull;
             }
+        } else {
+            aTextRun->AddGlyphRun(mainFont, gfxTextRange::kFontGroup,
+                                  runStart, (matchedLength > 0));
         }
+
         if (!matchedFont) {
             // for PRUnichar text, we need to set cluster boundaries so that
             // surrogate pairs, combining characters, etc behave properly,
             // even if we don't have glyphs for them
             if (sizeof(T) == sizeof(PRUnichar)) {
                 gfxShapedWord::SetupClusterBoundaries(aTextRun->GetCharacterGlyphs() + runStart,
                                                       reinterpret_cast<const PRUnichar*>(aString) + runStart,
                                                       matchedLength);
             }
-            for (PRUint32 index = runStart; index < runStart + matchedLength; index++) {
-                // Record the char code so we can draw a box with the Unicode value
+
+            // various "missing" characters may need special handling,
+            // so we check for them here
+            PRUint32 runLimit = runStart + matchedLength;
+            for (PRUint32 index = runStart; index < runLimit; index++) {
                 T ch = aString[index];
-                if (sizeof(T) == sizeof(PRUnichar) &&
-                    NS_IS_HIGH_SURROGATE(ch) &&
-                    index + 1 < aScriptRunEnd &&
-                    NS_IS_LOW_SURROGATE(aString[index+1])) {
-                    aTextRun->SetMissingGlyph(index,
-                                              SURROGATE_TO_UCS4(ch,
-                                                                aString[index+1]));
-                    index++;
-                } else if (!IsInvalidChar(ch)) {
+
+                // tab and newline are not to be displayed as hexboxes,
+                // but do need to be recorded in the textrun
+                if (ch == '\n') {
+                    aTextRun->SetIsNewline(index);
+                    continue;
+                }
+                if (ch == '\t') {
+                    aTextRun->SetIsTab(index);
+                    continue;
+                }
+
+                // for 16-bit textruns only, check for surrogate pairs and
+                // special Unicode spaces; omit these checks in 8-bit runs
+                if (sizeof(T) == sizeof(PRUnichar)) {
+                    if (NS_IS_HIGH_SURROGATE(ch) &&
+                        index + 1 < aScriptRunEnd &&
+                        NS_IS_LOW_SURROGATE(aString[index + 1]))
+                    {
+                        aTextRun->SetMissingGlyph(index,
+                                                  SURROGATE_TO_UCS4(ch,
+                                                                    aString[index + 1]));
+                        index++;
+                        aTextRun->SetIsLowSurrogate(index);
+                        continue;
+                    }
+
+                    // check if this is a known Unicode whitespace character that
+                    // we can render using the space glyph with a custom width
                     gfxFloat wid = mainFont->SynthesizeSpaceWidth(ch);
                     if (wid >= 0.0) {
                         nscoord advance =
                             aTextRun->GetAppUnitsPerDevUnit() * floor(wid + 0.5);
                         gfxTextRun::CompressedGlyph g;
                         if (gfxTextRun::CompressedGlyph::IsSimpleAdvance(advance)) {
                             aTextRun->SetSimpleGlyph(index,
                                                      g.SetSimpleGlyph(advance,
@@ -3097,20 +3142,27 @@ gfxFontGroup::InitScriptRun(gfxContext *
                             gfxTextRun::DetailedGlyph detailedGlyph;
                             detailedGlyph.mGlyphID = mainFont->GetSpaceGlyph();
                             detailedGlyph.mAdvance = advance;
                             detailedGlyph.mXOffset = detailedGlyph.mYOffset = 0;
                             g.SetComplex(true, true, 1);
                             aTextRun->SetGlyphs(index,
                                                 g, &detailedGlyph);
                         }
-                    } else {
-                        aTextRun->SetMissingGlyph(index, ch);
+                        continue;
                     }
                 }
+
+                if (IsInvalidChar(ch)) {
+                    // invalid chars are left as zero-width/invisible
+                    continue;
+                }
+
+                // record char code so we can draw a box with the Unicode value
+                aTextRun->SetMissingGlyph(index, ch);
             }
         }
 
         runStart += matchedLength;
     }
 }
 
 already_AddRefed<gfxFont>
@@ -3567,21 +3619,17 @@ gfxShapedWord::SetupClusterBoundaries(Co
             ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
             surrogatePair = true;
         }
 
         PRUint8 category = gfxUnicodeProperties::GetGeneralCategory(ch);
         gfxUnicodeProperties::HSType hangulType = gfxUnicodeProperties::HST_NONE;
 
         // combining marks extend the cluster
-        if ((category >= HB_CATEGORY_COMBINING_MARK &&
-             category <= HB_CATEGORY_NON_SPACING_MARK) ||
-            (ch >= 0x200c && ch <= 0x200d) || // ZWJ, ZWNJ
-            (ch >= 0xff9e && ch <= 0xff9f))   // katakana sound marks
-        {
+        if (IsClusterExtender(ch)) {
             aGlyphs[i] = extendCluster;
         } else if (category == HB_CATEGORY_OTHER_LETTER) {
             // handle special cases in Letter_Other category
 #if 0
             // Currently disabled. This would follow the UAX#29 specification
             // for extended grapheme clusters, but this is not favored by
             // Thai users, at least for editing behavior.
             // See discussion of equivalent Pango issue in bug 474068 and
@@ -3809,117 +3857,65 @@ AccountStorageForTextRun(gfxTextRun *aTe
       bytes &= ~(sizeof(gfxTextRun::CompressedGlyph) - 1);
     }
     bytes += sizeof(gfxTextRun);
     gTextRunStorage += bytes*aSign;
     gTextRunStorageHighWaterMark = NS_MAX(gTextRunStorageHighWaterMark, gTextRunStorage);
 }
 #endif
 
-static PRUint64
-GlyphStorageAllocCount(PRUint32 aLength, PRUint32 aFlags)
-{
-    // always need to allocate storage for the glyph data
-    PRUint64 allocCount = aLength;
-
-    // if the text is not persistent, we also need space for a copy
-    if (!(aFlags & gfxTextRunFactory::TEXT_IS_PERSISTENT)) {
-        // figure out number of extra CompressedGlyph elements we need to
-        // get sufficient space for the text
-        typedef gfxTextRun::CompressedGlyph CompressedGlyph;
-        if (aFlags & gfxTextRunFactory::TEXT_IS_8BIT) {
-            allocCount += (aLength + sizeof(CompressedGlyph) - 1) /
-                          sizeof(CompressedGlyph);
-        } else {
-            allocCount += (aLength * sizeof(PRUnichar) +
-                              sizeof(CompressedGlyph) - 1) /
-                          sizeof(CompressedGlyph);
-        }
-    }
-    return allocCount;
-}
-
-// Helper for textRun creation to preallocate storage for glyphs and text;
-// this function returns a pointer to the newly-allocated glyph storage,
-// AND modifies the aText parameter if TEXT_IS_PERSISTENT was not set.
-// In that case, the text is appended to the glyph storage, so a single
-// delete[] operation in the textRun destructor will free both.
+// Helper for textRun creation to preallocate storage for glyph records;
+// this function returns a pointer to the newly-allocated glyph storage.
 // Returns nsnull if allocation fails.
 gfxTextRun::CompressedGlyph *
-gfxTextRun::AllocateStorage(const void*& aText, PRUint32 aLength, PRUint32 aFlags)
+gfxTextRun::AllocateStorage(PRUint32 aLength)
 {
-    // Here, we rely on CompressedGlyph being the largest unit we care about for
-    // allocation/alignment of either glyph data or text, so we allocate an array
-    // of CompressedGlyphs, then take the last chunk of that and cast a pointer to
-    // PRUint8* or PRUnichar* for text storage.
-
-    PRUint64 allocCount = GlyphStorageAllocCount(aLength, aFlags);
-
     // allocate the storage we need, returning nsnull on failure rather than
     // throwing an exception (because web content can create huge runs)
-    CompressedGlyph *storage = new (std::nothrow) CompressedGlyph[allocCount];
+    CompressedGlyph *storage = new (std::nothrow) CompressedGlyph[aLength];
     if (!storage) {
-        NS_WARNING("failed to allocate glyph/text storage for text run!");
+        NS_WARNING("failed to allocate glyph storage for text run!");
         return nsnull;
     }
 
-    // copy the text if we need to keep a copy in the textrun
-    if (!(aFlags & gfxTextRunFactory::TEXT_IS_PERSISTENT)) {
-        if (aFlags & gfxTextRunFactory::TEXT_IS_8BIT) {
-            PRUint8 *newText = reinterpret_cast<PRUint8*>(storage + aLength);
-            memcpy(newText, aText, aLength);
-            aText = newText;
-        } else {
-            PRUnichar *newText = reinterpret_cast<PRUnichar*>(storage + aLength);
-            memcpy(newText, aText, aLength*sizeof(PRUnichar));
-            aText = newText;
-        }
-    }
-
     return storage;
 }
 
 gfxTextRun *
 gfxTextRun::Create(const gfxTextRunFactory::Parameters *aParams, const void *aText,
                    PRUint32 aLength, gfxFontGroup *aFontGroup, PRUint32 aFlags)
 {
-    CompressedGlyph *glyphStorage = AllocateStorage(aText, aLength, aFlags);
+    CompressedGlyph *glyphStorage = AllocateStorage(aLength);
     if (!glyphStorage) {
         return nsnull;
     }
 
     return new gfxTextRun(aParams, aText, aLength, aFontGroup, aFlags, glyphStorage);
 }
 
 gfxTextRun::gfxTextRun(const gfxTextRunFactory::Parameters *aParams, const void *aText,
                        PRUint32 aLength, gfxFontGroup *aFontGroup, PRUint32 aFlags,
                        CompressedGlyph *aGlyphStorage)
   : mCharacterGlyphs(aGlyphStorage),
     mUserData(aParams->mUserData),
     mFontGroup(aFontGroup),
     mAppUnitsPerDevUnit(aParams->mAppUnitsPerDevUnit),
-    mFlags(aFlags), mCharacterCount(aLength), mHashCode(0)
+    mFlags(aFlags), mCharacterCount(aLength)
 {
     NS_ASSERTION(mAppUnitsPerDevUnit != 0, "Invalid app unit scale");
     MOZ_COUNT_CTOR(gfxTextRun);
     NS_ADDREF(mFontGroup);
     if (aParams->mSkipChars) {
         mSkipChars.TakeFrom(aParams->mSkipChars);
     }
 
-    if (mFlags & gfxTextRunFactory::TEXT_IS_8BIT) {
-        mText.mSingle = static_cast<const PRUint8 *>(aText);
-    } else {
-        mText.mDouble = static_cast<const PRUnichar *>(aText);
-    }
 #ifdef DEBUG_TEXT_RUN_STORAGE_METRICS
     AccountStorageForTextRun(this, 1);
 #endif
 
-    mUserFontSetGeneration = mFontGroup->GetGeneration();
     mSkipDrawing = mFontGroup->ShouldSkipDrawing();
 }
 
 gfxTextRun::~gfxTextRun()
 {
 #ifdef DEBUG_TEXT_RUN_STORAGE_METRICS
     AccountStorageForTextRun(this, -1);
 #endif
@@ -4573,17 +4569,17 @@ gfxTextRun::BreakAndMeasureText(PRUint32
                 charAdvance += space->mBefore + space->mAfter;
             }
         } else {
             charAdvance = ComputePartialLigatureWidth(i, i + 1, aProvider);
         }
         
         advance += charAdvance;
         if (aTrimWhitespace) {
-            if (GetChar(i) == ' ') {
+            if (mCharacterGlyphs[i].CharIsSpace()) {
                 ++trimmableChars;
                 trimmableAdvance += charAdvance;
             } else {
                 trimmableAdvance = 0;
                 trimmableChars = 0;
             }
         }
     }
@@ -4862,16 +4858,23 @@ gfxTextRun::SetGlyphs(PRUint32 aIndex, C
         memcpy(details, aGlyphs, sizeof(DetailedGlyph)*glyphCount);
     }
     mCharacterGlyphs[aIndex] = aGlyph;
 }
 
 void
 gfxTextRun::SetMissingGlyph(PRUint32 aIndex, PRUint32 aChar)
 {
+    PRUint8 category = gfxUnicodeProperties::GetGeneralCategory(aChar);
+    if (category >= HB_CATEGORY_COMBINING_MARK &&
+        category <= HB_CATEGORY_NON_SPACING_MARK)
+    {
+        mCharacterGlyphs[aIndex].SetComplex(false, true, 0);
+    }
+
     DetailedGlyph *details = AllocateDetailedGlyphs(aIndex, 1);
     if (!details)
         return;
 
     details->mGlyphID = aChar;
     GlyphRun *glyphRun = &mGlyphRuns[FindFirstGlyphRunContaining(aIndex)];
     if (IsDefaultIgnorable(aChar)) {
         // Setting advance width to zero will prevent drawing the hexbox
@@ -4881,34 +4884,16 @@ gfxTextRun::SetMissingGlyph(PRUint32 aIn
                                 gfxFontMissingGlyphs::GetDesiredMinWidth(aChar));
         details->mAdvance = PRUint32(width*GetAppUnitsPerDevUnit());
     }
     details->mXOffset = 0;
     details->mYOffset = 0;
     mCharacterGlyphs[aIndex].SetMissing(1);
 }
 
-bool
-gfxTextRun::FilterIfIgnorable(PRUint32 aIndex)
-{
-    PRUint32 ch = GetChar(aIndex);
-    if (IsDefaultIgnorable(ch)) {
-        DetailedGlyph *details = AllocateDetailedGlyphs(aIndex, 1);
-        if (details) {
-            details->mGlyphID = ch;
-            details->mAdvance = 0;
-            details->mXOffset = 0;
-            details->mYOffset = 0;
-            mCharacterGlyphs[aIndex].SetMissing(1);
-            return true;
-        }
-    }
-    return false;
-}
-
 void
 gfxTextRun::CopyGlyphDataFrom(const gfxShapedWord *aShapedWord, PRUint32 aOffset)
 {
     PRUint32 wordLen = aShapedWord->Length();
     NS_ASSERTION(aOffset + wordLen <= GetLength(),
                  "word overruns end of textrun!");
 
     const CompressedGlyph *wordGlyphs = aShapedWord->GetCharacterGlyphs();
@@ -4997,61 +4982,60 @@ gfxTextRun::CopyGlyphDataFrom(gfxTextRun
         nsresult rv = AddGlyphRun(font, iter.GetGlyphRun()->mMatchType,
                                   start - aStart + aDest, false);
         if (NS_FAILED(rv))
             return;
     }
 }
 
 void
-gfxTextRun::SetSpaceGlyph(gfxFont *aFont, gfxContext *aContext, PRUint32 aCharIndex)
+gfxTextRun::SetSpaceGlyph(gfxFont *aFont, gfxContext *aContext,
+                          PRUint32 aCharIndex)
 {
-    PRUint32 spaceGlyph = aFont->GetSpaceGlyph();
-    float spaceWidth = aFont->GetMetrics().spaceWidth;
-    PRUint32 spaceWidthAppUnits = NS_lroundf(spaceWidth*mAppUnitsPerDevUnit);
-    if (!spaceGlyph ||
-        !CompressedGlyph::IsSimpleGlyphID(spaceGlyph) ||
-        !CompressedGlyph::IsSimpleAdvance(spaceWidthAppUnits)) {
-        gfxTextRunFactory::Parameters params = {
-            aContext, nsnull, nsnull, nsnull, 0, mAppUnitsPerDevUnit
-        };
-        static const PRUint8 space = ' ';
-        nsAutoPtr<gfxTextRun> textRun;
-        textRun = mFontGroup->MakeTextRun(&space, 1, &params,
-            gfxTextRunFactory::TEXT_IS_8BIT | gfxTextRunFactory::TEXT_IS_ASCII |
-            gfxTextRunFactory::TEXT_IS_PERSISTENT);
-        if (!textRun || !textRun->mCharacterGlyphs)
-            return;
-        CopyGlyphDataFrom(textRun, 0, 1, aCharIndex);
+    if (SetSpaceGlyphIfSimple(aFont, aContext, aCharIndex, ' ')) {
         return;
     }
-    AddGlyphRun(aFont, gfxTextRange::kFontGroup, aCharIndex, false);
-    CompressedGlyph g;
-    g.SetSimpleGlyph(spaceWidthAppUnits, spaceGlyph);
-    SetSimpleGlyph(aCharIndex, g);
+
+    aFont->InitWordCache();
+    static const PRUint8 space = ' ';
+    gfxShapedWord *sw = aFont->GetShapedWord(aContext,
+                                             &space, 1,
+                                             HashMix(0, ' '), 
+                                             HB_SCRIPT_LATIN,
+                                             mAppUnitsPerDevUnit,
+                                             gfxTextRunFactory::TEXT_IS_8BIT |
+                                             gfxTextRunFactory::TEXT_IS_ASCII |
+                                             gfxTextRunFactory::TEXT_IS_PERSISTENT);
+    if (sw) {
+        AddGlyphRun(aFont, gfxTextRange::kFontGroup, aCharIndex, false);
+        CopyGlyphDataFrom(sw, aCharIndex);
+    }
 }
 
 bool
 gfxTextRun::SetSpaceGlyphIfSimple(gfxFont *aFont, gfxContext *aContext,
-                                  PRUint32 aCharIndex)
+                                  PRUint32 aCharIndex, PRUnichar aSpaceChar)
 {
     PRUint32 spaceGlyph = aFont->GetSpaceGlyph();
     if (!spaceGlyph || !CompressedGlyph::IsSimpleGlyphID(spaceGlyph)) {
         return false;
     }
 
     PRUint32 spaceWidthAppUnits =
         NS_lroundf(aFont->GetMetrics().spaceWidth * mAppUnitsPerDevUnit);
     if (!CompressedGlyph::IsSimpleAdvance(spaceWidthAppUnits)) {
         return false;
     }
 
     AddGlyphRun(aFont, gfxTextRange::kFontGroup, aCharIndex, false);
     CompressedGlyph g;
     g.SetSimpleGlyph(spaceWidthAppUnits, spaceGlyph);
+    if (aSpaceChar == ' ') {
+        g.SetIsSpace();
+    }
     SetSimpleGlyph(aCharIndex, g);
     return true;
 }
 
 void
 gfxTextRun::FetchGlyphExtents(gfxContext *aRefContext)
 {
     bool needsGlyphExtents = NeedsGlyphExtents(this);
@@ -5168,18 +5152,17 @@ gfxTextRun::ClusterIterator::ClusterAdva
 
 size_t
 gfxTextRun::SizeOfExcludingThis(nsMallocSizeOfFun aMallocSizeOf)
 {
     // The second arg is how much gfxTextRun::AllocateStorage would have
     // allocated.
     size_t total =
         aMallocSizeOf(mCharacterGlyphs,
-                      sizeof(CompressedGlyph) *
-                      GlyphStorageAllocCount(mCharacterCount, mFlags));
+                      sizeof(CompressedGlyph) * mCharacterCount);
 
     if (mDetailedGlyphs) {
         total += mDetailedGlyphs->SizeOfIncludingThis(aMallocSizeOf);
     }
 
     total += mGlyphRuns.SizeOfExcludingThis(aMallocSizeOf);
 
     return total;
@@ -5196,26 +5179,17 @@ gfxTextRun::SizeOfIncludingThis(nsMalloc
 #ifdef DEBUG
 void
 gfxTextRun::Dump(FILE* aOutput) {
     if (!aOutput) {
         aOutput = stdout;
     }
 
     PRUint32 i;
-    fputc('"', aOutput);
-    for (i = 0; i < mCharacterCount; ++i) {
-        PRUnichar ch = GetChar(i);
-        if (ch >= 32 && ch < 128) {
-            fputc(ch, aOutput);
-        } else {
-            fprintf(aOutput, "\\u%4x", ch);
-        }
-    }
-    fputs("\" [", aOutput);
+    fputc('[', aOutput);
     for (i = 0; i < mGlyphRuns.Length(); ++i) {
         if (i > 0) {
             fputc(',', aOutput);
         }
         gfxFont* font = mGlyphRuns[i].mFont;
         const gfxFontStyle* style = font->GetStyle();
         NS_ConvertUTF16toUTF8 fontName(font->GetName());
         nsCAutoString lang;
--- a/gfx/thebes/gfxFont.h
+++ b/gfx/thebes/gfxFont.h
@@ -1646,48 +1646,54 @@ public:
     class CompressedGlyph {
     public:
         CompressedGlyph() { mValue = 0; }
 
         enum {
             // Indicates that a cluster and ligature group starts at this
             // character; this character has a single glyph with a reasonable
             // advance and zero offsets. A "reasonable" advance
-            // is one that fits in the available bits (currently 13) (specified
+            // is one that fits in the available bits (currently 12) (specified
             // in appunits).
             FLAG_IS_SIMPLE_GLYPH  = 0x80000000U,
 
             // Indicates whether a linebreak is allowed before this character;
             // this is a two-bit field that holds a FLAG_BREAK_TYPE_xxx value
             // indicating the kind of linebreak (if any) allowed here.
             FLAGS_CAN_BREAK_BEFORE = 0x60000000U,
 
             FLAGS_CAN_BREAK_SHIFT = 29,
             FLAG_BREAK_TYPE_NONE   = 0,
             FLAG_BREAK_TYPE_NORMAL = 1,
             FLAG_BREAK_TYPE_HYPHEN = 2,
 
+            FLAG_CHAR_IS_SPACE     = 0x10000000U,
+
             // The advance is stored in appunits
-            ADVANCE_MASK  = 0x1FFF0000U,
+            ADVANCE_MASK  = 0x0FFF0000U,
             ADVANCE_SHIFT = 16,
 
             GLYPH_MASK = 0x0000FFFFU,
 
             // Non-simple glyphs may or may not have glyph data in the
             // corresponding mDetailedGlyphs entry. They have the following
             // flag bits:
 
             // When NOT set, indicates that this character corresponds to a
             // missing glyph and should be skipped (or possibly, render the character
             // Unicode value in some special way). If there are glyphs,
             // the mGlyphID is actually the UTF16 character code. The bit is
             // inverted so we can memset the array to zero to indicate all missing.
             FLAG_NOT_MISSING              = 0x01,
             FLAG_NOT_CLUSTER_START        = 0x02,
             FLAG_NOT_LIGATURE_GROUP_START = 0x04,
+
+            FLAG_CHAR_IS_TAB              = 0x08,
+            FLAG_CHAR_IS_NEWLINE          = 0x10,
+            FLAG_CHAR_IS_LOW_SURROGATE    = 0x20,
             
             GLYPH_COUNT_MASK = 0x00FFFF00U,
             GLYPH_COUNT_SHIFT = 8
         };
 
         // "Simple glyphs" have a simple glyph ID, simple advance and their
         // x and y offsets are zero. Also the glyph extents do not overflow
         // the font-box defined by the font ascent, descent and glyph advance width.
@@ -1715,16 +1721,33 @@ public:
             return (mValue & FLAG_IS_SIMPLE_GLYPH) || !(mValue & FLAG_NOT_LIGATURE_GROUP_START);
         }
         bool IsLigatureContinuation() const {
             return (mValue & FLAG_IS_SIMPLE_GLYPH) == 0 &&
                 (mValue & (FLAG_NOT_LIGATURE_GROUP_START | FLAG_NOT_MISSING)) ==
                     (FLAG_NOT_LIGATURE_GROUP_START | FLAG_NOT_MISSING);
         }
 
+        // Return true if the original character was a normal (breakable,
+        // trimmable) space (U+0020). Not true for other characters that
+        // may happen to map to the space glyph (U+00A0).
+        bool CharIsSpace() const {
+            return (mValue & FLAG_CHAR_IS_SPACE) != 0;
+        }
+
+        bool CharIsTab() const {
+            return !IsSimpleGlyph() && (mValue & FLAG_CHAR_IS_TAB) != 0;
+        }
+        bool CharIsNewline() const {
+            return !IsSimpleGlyph() && (mValue & FLAG_CHAR_IS_NEWLINE) != 0;
+        }
+        bool CharIsLowSurrogate() const {
+            return !IsSimpleGlyph() && (mValue & FLAG_CHAR_IS_LOW_SURROGATE) != 0;
+        }
+
         void SetClusterStart(bool aIsClusterStart) {
             NS_ASSERTION(!IsSimpleGlyph(),
                          "can't call SetClusterStart on simple glyphs");
             if (aIsClusterStart) {
                 mValue &= ~FLAG_NOT_CLUSTER_START;
             } else {
                 mValue |= FLAG_NOT_CLUSTER_START;
             }
@@ -1769,16 +1792,32 @@ public:
                 (aGlyphCount << GLYPH_COUNT_SHIFT);
             return *this;
         }
         PRUint32 GetGlyphCount() const {
             NS_ASSERTION(!IsSimpleGlyph(), "Expected non-simple-glyph");
             return (mValue & GLYPH_COUNT_MASK) >> GLYPH_COUNT_SHIFT;
         }
 
+        void SetIsSpace() {
+            mValue |= FLAG_CHAR_IS_SPACE;
+        }
+        void SetIsTab() {
+            NS_ASSERTION(!IsSimpleGlyph(), "Expected non-simple-glyph");
+            mValue |= FLAG_CHAR_IS_TAB;
+        }
+        void SetIsNewline() {
+            NS_ASSERTION(!IsSimpleGlyph(), "Expected non-simple-glyph");
+            mValue |= FLAG_CHAR_IS_NEWLINE;
+        }
+        void SetIsLowSurrogate() {
+            NS_ASSERTION(!IsSimpleGlyph(), "Expected non-simple-glyph");
+            mValue |= FLAG_CHAR_IS_LOW_SURROGATE;
+        }
+
     private:
         PRUint32 mValue;
     };
 
     /**
      * When the glyphs for a character don't fit into a CompressedGlyph record
      * in SimpleGlyph format, we use an array of DetailedGlyphs instead.
      */
@@ -1859,16 +1898,25 @@ public:
         }
     }
 
     void SetGlyphs(PRUint32 aCharIndex, CompressedGlyph aGlyph,
                    const DetailedGlyph *aGlyphs);
 
     void SetMissingGlyph(PRUint32 aIndex, PRUint32 aChar, gfxFont *aFont);
 
+    void SetIsSpace(PRUint32 aIndex) {
+        mCharacterGlyphs[aIndex].SetIsSpace();
+    }
+
+    void SetIsLowSurrogate(PRUint32 aIndex) {
+        SetGlyphs(aIndex, CompressedGlyph().SetComplex(false, false, 0), nsnull);
+        mCharacterGlyphs[aIndex].SetIsLowSurrogate();
+    }
+
     bool FilterIfIgnorable(PRUint32 aIndex);
 
     const CompressedGlyph *GetCharacterGlyphs() const {
         return &mCharacterGlyphs[0];
     }
 
     bool HasDetailedGlyphs() const {
         return mDetailedGlyphs != nsnull;
@@ -2118,16 +2166,33 @@ public:
             CompressedGlyph::FLAG_BREAK_TYPE_NORMAL;
     }
     bool CanHyphenateBefore(PRUint32 aPos) {
         NS_ASSERTION(aPos < mCharacterCount, "aPos out of range");
         return mCharacterGlyphs[aPos].CanBreakBefore() ==
             CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
     }
 
+    bool CharIsSpace(PRUint32 aPos) {
+        NS_ASSERTION(0 <= aPos && aPos < mCharacterCount, "aPos out of range");
+        return mCharacterGlyphs[aPos].CharIsSpace();
+    }
+    bool CharIsTab(PRUint32 aPos) {
+        NS_ASSERTION(0 <= aPos && aPos < mCharacterCount, "aPos out of range");
+        return mCharacterGlyphs[aPos].CharIsTab();
+    }
+    bool CharIsNewline(PRUint32 aPos) {
+        NS_ASSERTION(0 <= aPos && aPos < mCharacterCount, "aPos out of range");
+        return mCharacterGlyphs[aPos].CharIsNewline();
+    }
+    bool CharIsLowSurrogate(PRUint32 aPos) {
+        NS_ASSERTION(0 <= aPos && aPos < mCharacterCount, "aPos out of range");
+        return mCharacterGlyphs[aPos].CharIsLowSurrogate();
+    }
+
     PRUint32 GetLength() { return mCharacterCount; }
 
     // All PRUint32 aStart, PRUint32 aLength ranges below are restricted to
     // grapheme cluster boundaries! All offsets are in terms of the string
     // passed into MakeTextRun.
     
     // All coordinates are in layout/app units
 
@@ -2393,29 +2458,17 @@ public:
     void ClearFlagBits(PRUint32 aFlags) {
       NS_ASSERTION(!(aFlags & ~gfxTextRunFactory::SETTABLE_FLAGS),
                    "Only user flags should be mutable");
       mFlags &= ~aFlags;
     }
     const gfxSkipChars& GetSkipChars() const { return mSkipChars; }
     PRUint32 GetAppUnitsPerDevUnit() const { return mAppUnitsPerDevUnit; }
     gfxFontGroup *GetFontGroup() const { return mFontGroup; }
-    const PRUint8 *GetText8Bit() const
-    { return (mFlags & gfxTextRunFactory::TEXT_IS_8BIT) ? mText.mSingle : nsnull; }
-    const PRUnichar *GetTextUnicode() const
-    { return (mFlags & gfxTextRunFactory::TEXT_IS_8BIT) ? nsnull : mText.mDouble; }
-    const void *GetTextAt(PRUint32 aIndex) {
-        return (mFlags & gfxTextRunFactory::TEXT_IS_8BIT)
-            ? static_cast<const void *>(mText.mSingle + aIndex)
-            : static_cast<const void *>(mText.mDouble + aIndex);
-    }
-    const PRUnichar GetChar(PRUint32 i) const
-    { return (mFlags & gfxTextRunFactory::TEXT_IS_8BIT) ? mText.mSingle[i] : mText.mDouble[i]; }
-    PRUint32 GetHashCode() const { return mHashCode; }
-    void SetHashCode(PRUint32 aHash) { mHashCode = aHash; }
+
 
     // Call this, don't call "new gfxTextRun" directly. This does custom
     // allocation and initialization
     static gfxTextRun *Create(const gfxTextRunFactory::Parameters *aParams,
         const void *aText, PRUint32 aLength, gfxFontGroup *aFontGroup, PRUint32 aFlags);
 
     // The text is divided into GlyphRuns as necessary
     struct GlyphRun {
@@ -2501,25 +2554,59 @@ public:
     void SetMissingGlyph(PRUint32 aCharIndex, PRUint32 aUnicodeChar);
     void SetSpaceGlyph(gfxFont *aFont, gfxContext *aContext, PRUint32 aCharIndex);
 
     // Set the glyph data for the given character index to the font's
     // space glyph, IF this can be done as a "simple" glyph record
     // (not requiring a DetailedGlyph entry). This avoids the need to call
     // the font shaper and go through the shaped-word cache for most spaces.
     //
+    // The parameter aSpaceChar is the original character code for which
+    // this space glyph is being used; if this is U+0020, we need to record
+    // that it could be trimmed at a run edge, whereas other kinds of space
+    // (currently just U+00A0) would not be trimmable/breakable.
+    //
     // Returns true if it was able to set simple glyph data for the space;
     // if it returns false, the caller needs to fall back to some other
     // means to create the necessary (detailed) glyph data.
     bool SetSpaceGlyphIfSimple(gfxFont *aFont, gfxContext *aContext,
-                               PRUint32 aCharIndex);
-
-    // If the character at aIndex is default-ignorable, set the glyph
-    // to be invisible-missing and return TRUE, else return FALSE
-    bool FilterIfIgnorable(PRUint32 aIndex);
+                               PRUint32 aCharIndex, PRUnichar aSpaceChar);
+
+    // Record the positions of specific characters that layout may need to
+    // detect in the textrun, even though it doesn't have an explicit copy
+    // of the original text. These are recorded using flag bits in the
+    // CompressedGlyph record; if necessary, we convert "simple" glyph records
+    // to "complex" ones as the Tab and Newline flags are not present in
+    // simple CompressedGlyph records.
+    void SetIsTab(PRUint32 aIndex) {
+        CompressedGlyph *g = &mCharacterGlyphs[aIndex];
+        if (g->IsSimpleGlyph()) {
+            DetailedGlyph *details = AllocateDetailedGlyphs(aIndex, 1);
+            details->mGlyphID = g->GetSimpleGlyph();
+            details->mAdvance = g->GetSimpleAdvance();
+            details->mXOffset = details->mYOffset = 0;
+            SetGlyphs(aIndex, CompressedGlyph().SetComplex(true, true, 1), details);
+        }
+        g->SetIsTab();
+    }
+    void SetIsNewline(PRUint32 aIndex) {
+        CompressedGlyph *g = &mCharacterGlyphs[aIndex];
+        if (g->IsSimpleGlyph()) {
+            DetailedGlyph *details = AllocateDetailedGlyphs(aIndex, 1);
+            details->mGlyphID = g->GetSimpleGlyph();
+            details->mAdvance = g->GetSimpleAdvance();
+            details->mXOffset = details->mYOffset = 0;
+            SetGlyphs(aIndex, CompressedGlyph().SetComplex(true, true, 1), details);
+        }
+        g->SetIsNewline();
+    }
+    void SetIsLowSurrogate(PRUint32 aIndex) {
+        SetGlyphs(aIndex, CompressedGlyph().SetComplex(false, false, 0), nsnull);
+        mCharacterGlyphs[aIndex].SetIsLowSurrogate();
+    }
 
     /**
      * Prefetch all the glyph extents needed to ensure that Measure calls
      * on this textrun not requesting tight boundingBoxes will succeed. Note
      * that some glyph extents might not be fetched due to OOM or other
      * errors.
      */
     void FetchGlyphExtents(gfxContext *aRefContext);
@@ -2570,19 +2657,16 @@ public:
         // when the part is at the start of the ligature, and after-spacing
         // when the part is as the end of the ligature
         gfxFloat mPartWidth;
         
         bool mClipBeforePart;
         bool mClipAfterPart;
     };
     
-    // user font set generation when text run was created
-    PRUint64 GetUserFontSetGeneration() { return mUserFontSetGeneration; }
-
     // return storage used by this run, for memory reporter;
     // nsTransformedTextRun needs to override this as it holds additional data
     virtual NS_MUST_OVERRIDE size_t
         SizeOfExcludingThis(nsMallocSizeOfFun aMallocSizeOf);
     virtual NS_MUST_OVERRIDE size_t
         SizeOfIncludingThis(nsMallocSizeOfFun aMallocSizeOf);
 
     // Get the size, if it hasn't already been gotten, marking as it goes.
@@ -2609,22 +2693,19 @@ protected:
      * been appended to this array, so it must NOT be freed separately.
      */
     gfxTextRun(const gfxTextRunFactory::Parameters *aParams, const void *aText,
                PRUint32 aLength, gfxFontGroup *aFontGroup, PRUint32 aFlags,
                CompressedGlyph *aGlyphStorage);
 
     /**
      * Helper for the Create() factory method to allocate the required
-     * glyph storage, and copy the text (modifying the aText parameter)
-     * if it is not flagged as persistent.
+     * glyph storage.
      */
-    static CompressedGlyph* AllocateStorage(const void*& aText,
-                                            PRUint32 aLength,
-                                            PRUint32 aFlags);
+    static CompressedGlyph* AllocateStorage(PRUint32 aLength);
 
 private:
     // **** general helpers **** 
 
     // Allocate aCount DetailedGlyphs for the given index
     DetailedGlyph *AllocateDetailedGlyphs(PRUint32 aCharIndex, PRUint32 aCount);
 
     // Get the total advance for a range of glyphs.
@@ -2684,34 +2765,24 @@ private:
     // it must be deleted in the destructor.
     CompressedGlyph*                mCharacterGlyphs;
 
     nsAutoPtr<DetailedGlyphStore>   mDetailedGlyphs;
 
     // XXX this should be changed to a GlyphRun plus a maybe-null GlyphRun*,
     // for smaller size especially in the super-common one-glyphrun case
     nsAutoTArray<GlyphRun,1>        mGlyphRuns;
-    // When TEXT_IS_8BIT is set, we use mSingle, otherwise we use mDouble.
-    // When TEXT_IS_PERSISTENT is set, we don't own the text, otherwise we
-    // own the text. When we own the text, it's allocated fused with the
-    // mCharacterGlyphs array, and therefore need not be explicitly deleted.
-    // This text is not null-terminated.
-    union {
-        const PRUint8   *mSingle;
-        const PRUnichar *mDouble;
-    } mText;
+
     void             *mUserData;
     gfxFontGroup     *mFontGroup; // addrefed
     gfxSkipChars      mSkipChars;
     nsExpirationState mExpirationState;
     PRUint32          mAppUnitsPerDevUnit;
     PRUint32          mFlags;
     PRUint32          mCharacterCount;
-    PRUint32          mHashCode;
-    PRUint64          mUserFontSetGeneration; // user font set generation when text run created
 
     bool              mSkipDrawing; // true if the font group we used had a user font
                                     // download that's in progress, so we should hide text
                                     // until the download completes (or timeout fires)
 };
 
 class THEBES_API gfxFontGroup : public gfxTextRunFactory {
 public:
--- a/layout/generic/nsTextFrameThebes.cpp
+++ b/layout/generic/nsTextFrameThebes.cpp
@@ -834,20 +834,27 @@ public:
   }
   void AccumulateRunInfo(nsTextFrame* aFrame);
   /**
    * @return null to indicate either textrun construction failed or
    * we constructed just a partial textrun to set up linebreaker and other
    * state for following textruns.
    */
   gfxTextRun* BuildTextRunForFrames(void* aTextBuffer);
+  bool SetupLineBreakerContext(gfxTextRun *aTextRun);
   void AssignTextRun(gfxTextRun* aTextRun);
   nsTextFrame* GetNextBreakBeforeFrame(PRUint32* aIndex);
-  void SetupBreakSinksForTextRun(gfxTextRun* aTextRun, bool aIsExistingTextRun,
-                                 bool aSuppressSink);
+  enum SetupBreakSinksFlags {
+    SBS_DOUBLE_BYTE =      (1 << 0),
+    SBS_EXISTING_TEXTRUN = (1 << 1),
+    SBS_SUPPRESS_SINK    = (1 << 2)
+  };
+  void SetupBreakSinksForTextRun(gfxTextRun* aTextRun,
+                                 const void* aTextPtr,
+                                 PRUint32    aFlags);
   struct FindBoundaryState {
     nsIFrame*    mStopAtFrame;
     nsTextFrame* mFirstTextFrame;
     nsTextFrame* mLastTextFrame;
     bool mSeenTextRunBoundaryOnLaterLine;
     bool mSeenTextRunBoundaryOnThisLine;
     bool mSeenSpaceForLineBreakingOnThisLine;
   };
@@ -1357,19 +1364,22 @@ void BuildTextRunsScanner::FlushFrames(b
         ((mCurrentFramesAllSameTextRun->GetFlags() & nsTextFrameUtils::TEXT_INCOMING_WHITESPACE) != 0) ==
         ((mCurrentRunContextInfo & nsTextFrameUtils::INCOMING_WHITESPACE) != 0) &&
         ((mCurrentFramesAllSameTextRun->GetFlags() & gfxTextRunFactory::TEXT_INCOMING_ARABICCHAR) != 0) ==
         ((mCurrentRunContextInfo & nsTextFrameUtils::INCOMING_ARABICCHAR) != 0) &&
         IsTextRunValidForMappedFlows(mCurrentFramesAllSameTextRun)) {
       // Optimization: We do not need to (re)build the textrun.
       textRun = mCurrentFramesAllSameTextRun;
 
-      // Feed this run's text into the linebreaker to provide context. This also
-      // updates mNextRunContextInfo appropriately.
-      SetupBreakSinksForTextRun(textRun, true, false);
+      // Feed this run's text into the linebreaker to provide context.
+      if (!SetupLineBreakerContext(textRun)) {
+        return;
+      }
+ 
+      // Update mNextRunContextInfo appropriately
       mNextRunContextInfo = nsTextFrameUtils::INCOMING_NONE;
       if (textRun->GetFlags() & nsTextFrameUtils::TEXT_TRAILING_WHITESPACE) {
         mNextRunContextInfo |= nsTextFrameUtils::INCOMING_WHITESPACE;
       }
       if (textRun->GetFlags() & gfxTextRunFactory::TEXT_TRAILING_ARABICCHAR) {
         mNextRunContextInfo |= nsTextFrameUtils::INCOMING_ARABICCHAR;
       }
     } else {
@@ -1973,17 +1983,24 @@ BuildTextRunsScanner::BuildTextRunForFra
     DestroyUserData(userDataToDestroy);
     return nsnull;
   }
 
   // We have to set these up after we've created the textrun, because
   // the breaks may be stored in the textrun during this very call.
   // This is a bit annoying because it requires another loop over the frames
   // making up the textrun, but I don't see a way to avoid this.
-  SetupBreakSinksForTextRun(textRun, false, mSkipIncompleteTextRuns);
+  PRUint32 flags = 0;
+  if (mDoubleByteText) {
+    flags |= SBS_DOUBLE_BYTE;
+  }
+  if (mSkipIncompleteTextRuns) {
+    flags |= SBS_SUPPRESS_SINK;
+  }
+  SetupBreakSinksForTextRun(textRun, textPtr, flags);
 
   if (mSkipIncompleteTextRuns) {
     mSkipIncompleteTextRuns = !TextContainsLineBreakerWhiteSpace(textPtr,
         transformedLength, mDoubleByteText);
     // Arrange for this textrun to be deleted the next time the linebreaker
     // is flushed out
     mTextRunsToDelete.AppendElement(textRun);
     // Since we're doing to destroy the user data now, avoid a dangling
@@ -1997,16 +2014,140 @@ BuildTextRunsScanner::BuildTextRunForFra
   }
 
   // Actually wipe out the textruns associated with the mapped frames and associate
   // those frames with this text run.
   AssignTextRun(textRun);
   return textRun;
 }
 
+// This is a cut-down version of BuildTextRunForFrames used to set up
+// context for the line-breaker, when the textrun has already been created.
+// So it does the same walk over the mMappedFlows, but doesn't actually
+// build a new textrun.
+bool
+BuildTextRunsScanner::SetupLineBreakerContext(gfxTextRun *aTextRun)
+{
+  AutoFallibleTArray<PRUint8,BIG_TEXT_NODE_SIZE> buffer;
+  PRUint32 bufferSize = mMaxTextLength*(mDoubleByteText ? 2 : 1);
+  if (bufferSize < mMaxTextLength || bufferSize == PR_UINT32_MAX ||
+      !buffer.AppendElements(bufferSize)) {
+    return false;
+  }
+  void *textPtr = buffer.Elements();
+
+  gfxSkipCharsBuilder builder;
+
+  nsAutoTArray<PRInt32,50> textBreakPoints;
+  TextRunUserData dummyData;
+  TextRunMappedFlow dummyMappedFlow;
+
+  TextRunUserData* userData;
+  TextRunUserData* userDataToDestroy;
+  // If the situation is particularly simple (and common) we don't need to
+  // allocate userData.
+  if (mMappedFlows.Length() == 1 && !mMappedFlows[0].mEndFrame &&
+      mMappedFlows[0].mStartFrame->GetContentOffset() == 0) {
+    userData = &dummyData;
+    userDataToDestroy = nsnull;
+    dummyData.mMappedFlows = &dummyMappedFlow;
+  } else {
+    userData = static_cast<TextRunUserData*>
+      (nsMemory::Alloc(sizeof(TextRunUserData) + mMappedFlows.Length()*sizeof(TextRunMappedFlow)));
+    userDataToDestroy = userData;
+    userData->mMappedFlows = reinterpret_cast<TextRunMappedFlow*>(userData + 1);
+  }
+  userData->mMappedFlowCount = mMappedFlows.Length();
+  userData->mLastFlowIndex = 0;
+
+  PRUint32 nextBreakIndex = 0;
+  nsTextFrame* nextBreakBeforeFrame = GetNextBreakBeforeFrame(&nextBreakIndex);
+
+  PRUint32 i;
+  const nsStyleText* textStyle = nsnull;
+  nsStyleContext* lastStyleContext = nsnull;
+  for (i = 0; i < mMappedFlows.Length(); ++i) {
+    MappedFlow* mappedFlow = &mMappedFlows[i];
+    nsTextFrame* f = mappedFlow->mStartFrame;
+
+    lastStyleContext = f->GetStyleContext();
+    textStyle = f->GetStyleText();
+    nsTextFrameUtils::CompressionMode compression =
+      CSSWhitespaceToCompressionMode[textStyle->mWhiteSpace];
+
+    // Figure out what content is included in this flow.
+    nsIContent* content = f->GetContent();
+    const nsTextFragment* frag = content->GetText();
+    PRInt32 contentStart = mappedFlow->mStartFrame->GetContentOffset();
+    PRInt32 contentEnd = mappedFlow->GetContentEnd();
+    PRInt32 contentLength = contentEnd - contentStart;
+
+    TextRunMappedFlow* newFlow = &userData->mMappedFlows[i];
+    newFlow->mStartFrame = mappedFlow->mStartFrame;
+    newFlow->mDOMOffsetToBeforeTransformOffset = builder.GetCharCount() -
+      mappedFlow->mStartFrame->GetContentOffset();
+    newFlow->mContentLength = contentLength;
+
+    while (nextBreakBeforeFrame && nextBreakBeforeFrame->GetContent() == content) {
+      textBreakPoints.AppendElement(
+          nextBreakBeforeFrame->GetContentOffset() + newFlow->mDOMOffsetToBeforeTransformOffset);
+      nextBreakBeforeFrame = GetNextBreakBeforeFrame(&nextBreakIndex);
+    }
+
+    PRUint32 analysisFlags;
+    if (frag->Is2b()) {
+      NS_ASSERTION(mDoubleByteText, "Wrong buffer char size!");
+      PRUnichar* bufStart = static_cast<PRUnichar*>(textPtr);
+      PRUnichar* bufEnd = nsTextFrameUtils::TransformText(
+          frag->Get2b() + contentStart, contentLength, bufStart,
+          compression, &mNextRunContextInfo, &builder, &analysisFlags);
+      textPtr = bufEnd;
+    } else {
+      if (mDoubleByteText) {
+        // Need to expand the text. First transform it into a temporary buffer,
+        // then expand.
+        AutoFallibleTArray<PRUint8,BIG_TEXT_NODE_SIZE> tempBuf;
+        if (!tempBuf.AppendElements(contentLength)) {
+          DestroyUserData(userDataToDestroy);
+          return false;
+        }
+        PRUint8* bufStart = tempBuf.Elements();
+        PRUint8* end = nsTextFrameUtils::TransformText(
+            reinterpret_cast<const PRUint8*>(frag->Get1b()) + contentStart, contentLength,
+            bufStart, compression, &mNextRunContextInfo, &builder, &analysisFlags);
+        textPtr = ExpandBuffer(static_cast<PRUnichar*>(textPtr),
+                               tempBuf.Elements(), end - tempBuf.Elements());
+      } else {
+        PRUint8* bufStart = static_cast<PRUint8*>(textPtr);
+        PRUint8* end = nsTextFrameUtils::TransformText(
+            reinterpret_cast<const PRUint8*>(frag->Get1b()) + contentStart, contentLength,
+            bufStart, compression, &mNextRunContextInfo, &builder, &analysisFlags);
+        textPtr = end;
+      }
+    }
+  }
+
+  // We have to set these up after we've created the textrun, because
+  // the breaks may be stored in the textrun during this very call.
+  // This is a bit annoying because it requires another loop over the frames
+  // making up the textrun, but I don't see a way to avoid this.
+  PRUint32 flags = 0;
+  if (mDoubleByteText) {
+    flags |= SBS_DOUBLE_BYTE;
+  }
+  if (mSkipIncompleteTextRuns) {
+    flags |= SBS_SUPPRESS_SINK;
+  }
+  SetupBreakSinksForTextRun(aTextRun, buffer.Elements(), flags);
+
+  DestroyUserData(userDataToDestroy);
+
+  return true;
+}
+
 static bool
 HasCompressedLeadingWhitespace(nsTextFrame* aFrame, const nsStyleText* aStyleText,
                                PRInt32 aContentEndOffset,
                                const gfxSkipCharsIterator& aIterator)
 {
   if (!aIterator.IsOriginalCharSkipped())
     return false;
 
@@ -2019,18 +2160,18 @@ HasCompressedLeadingWhitespace(nsTextFra
     ++frameContentOffset;
     iter.AdvanceOriginal(1);
   }
   return false;
 }
 
 void
 BuildTextRunsScanner::SetupBreakSinksForTextRun(gfxTextRun* aTextRun,
-                                                bool aIsExistingTextRun,
-                                                bool aSuppressSink)
+                                                const void* aTextPtr,
+                                                PRUint32    aFlags)
 {
   // textruns have uniform language
   nsIAtom* language = mMappedFlows[0].mStartFrame->GetStyleVisibility()->mLanguage;
   // We keep this pointed at the skip-chars data for the current mappedFlow.
   // This lets us cheaply check whether the flow has compressed initial
   // whitespace...
   gfxSkipCharsIterator iter(aTextRun->GetSkipChars());
 
@@ -2038,17 +2179,18 @@ BuildTextRunsScanner::SetupBreakSinksFor
   for (i = 0; i < mMappedFlows.Length(); ++i) {
     MappedFlow* mappedFlow = &mMappedFlows[i];
     PRUint32 offset = iter.GetSkippedOffset();
     gfxSkipCharsIterator iterNext = iter;
     iterNext.AdvanceOriginal(mappedFlow->GetContentEnd() -
             mappedFlow->mStartFrame->GetContentOffset());
 
     nsAutoPtr<BreakSink>* breakSink = mBreakSinks.AppendElement(
-      new BreakSink(aTextRun, mContext, offset, aIsExistingTextRun));
+      new BreakSink(aTextRun, mContext, offset,
+                    (aFlags & SBS_EXISTING_TEXTRUN) != 0));
     if (!breakSink || !*breakSink)
       return;
 
     PRUint32 length = iterNext.GetSkippedOffset() - offset;
     PRUint32 flags = 0;
     nsIFrame* initialBreakController = mappedFlow->mAncestorControllingInitialBreak;
     if (!initialBreakController) {
       initialBreakController = mLineContainer;
@@ -2072,22 +2214,25 @@ BuildTextRunsScanner::SetupBreakSinksFor
     }
 
     if (HasCompressedLeadingWhitespace(startFrame, textStyle,
                                        mappedFlow->GetContentEnd(), iter)) {
       mLineBreaker.AppendInvisibleWhitespace(flags);
     }
 
     if (length > 0) {
-      BreakSink* sink = aSuppressSink ? nsnull : (*breakSink).get();
-      if (aTextRun->GetFlags() & gfxFontGroup::TEXT_IS_8BIT) {
-        mLineBreaker.AppendText(language, aTextRun->GetText8Bit() + offset,
+      BreakSink* sink =
+        (aFlags & SBS_SUPPRESS_SINK) ? nsnull : (*breakSink).get();
+      if (aFlags & SBS_DOUBLE_BYTE) {
+        const PRUnichar* text = reinterpret_cast<const PRUnichar*>(aTextPtr);
+        mLineBreaker.AppendText(language, text + offset,
                                 length, flags, sink);
       } else {
-        mLineBreaker.AppendText(language, aTextRun->GetTextUnicode() + offset,
+        const PRUint8* text = reinterpret_cast<const PRUint8*>(aTextPtr);
+        mLineBreaker.AppendText(language, text + offset,
                                 length, flags, sink);
       }
     }
     
     iter = iterNext;
   }
 }
 
@@ -2788,17 +2933,17 @@ PropertyProvider::CalcTabWidths(PRUint32
     if (!mReflowing) {
       mTabWidths = static_cast<TabWidthStore*>
         (mFrame->Properties().Get(TabWidthProperty()));
 #ifdef DEBUG
       // If we're not reflowing, we should have already computed the
       // tab widths; check that they're available as far as the last
       // tab character present (if any)
       for (PRUint32 i = aStart + aLength; i > aStart; --i) {
-        if (mTextRun->GetChar(i - 1) == '\t') {
+        if (mTextRun->CharIsTab(i - 1)) {
           NS_ASSERTION(mTabWidths && mTabWidths->mLimit >= i,
                        "Precomputed tab widths are missing!");
           break;
         }
       }
 #endif
       return;
     }
@@ -2813,17 +2958,17 @@ PropertyProvider::CalcTabWidths(PRUint32
                  "We need precomputed tab widths, but don't have enough.");
 
     gfxFloat tabWidth = -1;
     for (PRUint32 i = tabsEnd; i < aStart + aLength; ++i) {
       Spacing spacing;
       GetSpacingInternal(i, 1, &spacing, true);
       mOffsetFromBlockOriginForTabs += spacing.mBefore;
 
-      if (mTextRun->GetChar(i) != '\t') {
+      if (!mTextRun->CharIsTab(i)) {
         if (mTextRun->IsClusterStart(i)) {
           PRUint32 clusterEnd = i + 1;
           while (clusterEnd < mTextRun->GetLength() &&
                  !mTextRun->IsClusterStart(clusterEnd)) {
             ++clusterEnd;
           }
           mOffsetFromBlockOriginForTabs +=
             mTextRun->GetAdvanceWidth(i, clusterEnd - i, nsnull);
@@ -6012,22 +6157,19 @@ IsAcceptableCaretPosition(const gfxSkipC
   if (aRespectClusters && !aTextRun->IsClusterStart(index))
     return false;
   if (index > 0) {
     // Check whether the proposed position is in between the two halves of a
     // surrogate pair; if so, this is not a valid character boundary.
     // (In the case where we are respecting clusters, we won't actually get
     // this far because the low surrogate is also marked as non-clusterStart
     // so we'll return FALSE above.)
-    // If the textrun is 8-bit it can't have any surrogates, so we only need
-    // to check the actual characters if GetTextUnicode() returns non-null.
-    const PRUnichar *txt = aTextRun->GetTextUnicode();
-    if (txt && NS_IS_LOW_SURROGATE(txt[index]) &&
-               NS_IS_HIGH_SURROGATE(txt[index-1]))
+    if (aTextRun->CharIsLowSurrogate(index)) {
       return false;
+    }
   }
   return true;
 }
 
 bool
 nsTextFrame::PeekOffsetCharacter(bool aForward, PRInt32* aOffset,
                                  bool aRespectClusters)
 {
@@ -6062,17 +6204,17 @@ nsTextFrame::PeekOffsetCharacter(bool aF
     *aOffset = 0;
   } else {
     // If we're at the end of a line, look at the next continuation
     iter.SetOriginalOffset(startOffset);
     if (startOffset <= trimmed.GetEnd() &&
         !(startOffset < trimmed.GetEnd() &&
           GetStyleText()->NewlineIsSignificant() &&
           iter.GetSkippedOffset() < mTextRun->GetLength() &&
-          mTextRun->GetChar(iter.GetSkippedOffset()) == '\n')) {
+          mTextRun->CharIsNewline(iter.GetSkippedOffset()))) {
       for (PRInt32 i = startOffset + 1; i <= trimmed.GetEnd(); ++i) {
         iter.SetOriginalOffset(i);
         if (i == trimmed.GetEnd() ||
             IsAcceptableCaretPosition(iter, aRespectClusters, mTextRun, this)) {
           *aOffset = i - mContentOffset;
           return true;
         }
       }
@@ -6478,18 +6620,18 @@ nsTextFrame::AddInlineMinWidthForFlow(ns
 
   for (PRUint32 i = start, wordStart = start; i <= flowEndInTextRun; ++i) {
     bool preformattedNewline = false;
     bool preformattedTab = false;
     if (i < flowEndInTextRun) {
       // XXXldb Shouldn't we be including the newline as part of the
       // segment that it ends rather than part of the segment that it
       // starts?
-      preformattedNewline = preformatNewlines && textRun->GetChar(i) == '\n';
-      preformattedTab = preformatTabs && textRun->GetChar(i) == '\t';
+      preformattedNewline = preformatNewlines && textRun->CharIsNewline(i);
+      preformattedTab = preformatTabs && textRun->CharIsTab(i);
       if (!textRun->CanBreakLineBefore(i) &&
           !preformattedNewline &&
           !preformattedTab &&
           (!hyphBreakBefore || !hyphBreakBefore[i - start]))
       {
         // we can't break here (and it's not the end of the flow)
         continue;
       }
@@ -6621,18 +6763,18 @@ nsTextFrame::AddInlinePrefWidthForFlow(n
   for (PRUint32 i = loopStart, lineStart = start; i <= flowEndInTextRun; ++i) {
     bool preformattedNewline = false;
     bool preformattedTab = false;
     if (i < flowEndInTextRun) {
       // XXXldb Shouldn't we be including the newline as part of the
       // segment that it ends rather than part of the segment that it
       // starts?
       NS_ASSERTION(preformatNewlines, "We can't be here unless newlines are hard breaks");
-      preformattedNewline = preformatNewlines && textRun->GetChar(i) == '\n';
-      preformattedTab = preformatTabs && textRun->GetChar(i) == '\t';
+      preformattedNewline = preformatNewlines && textRun->CharIsNewline(i);
+      preformattedTab = preformatTabs && textRun->CharIsTab(i);
       if (!preformattedNewline && !preformattedTab) {
         // we needn't break here (and it's not the end of the flow)
         continue;
       }
     }
 
     if (i > lineStart) {
       nscoord width =
--- a/layout/generic/nsTextRunTransformations.cpp
+++ b/layout/generic/nsTextRunTransformations.cpp
@@ -54,26 +54,23 @@ nsTransformedTextRun::Create(const gfxTe
                              gfxFontGroup* aFontGroup,
                              const PRUnichar* aString, PRUint32 aLength,
                              const PRUint32 aFlags, nsStyleContext** aStyles,
                              bool aOwnsFactory)
 {
   NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT),
                "didn't expect text to be marked as 8-bit here");
 
-  // Note that AllocateStorage MAY modify the textPtr parameter,
-  // if the text is not persistent and therefore a private copy is created
-  const void *textPtr = aString;
-  CompressedGlyph *glyphStorage = AllocateStorage(textPtr, aLength, aFlags);
+  CompressedGlyph *glyphStorage = AllocateStorage(aLength);
   if (!glyphStorage) {
     return nsnull;
   }
 
   return new nsTransformedTextRun(aParams, aFactory, aFontGroup,
-                                  static_cast<const PRUnichar*>(textPtr), aLength,
+                                  aString, aLength,
                                   aFlags, aStyles, aOwnsFactory, glyphStorage);
 }
 
 void
 nsTransformedTextRun::SetCapitalization(PRUint32 aStart, PRUint32 aLength,
                                         bool* aCapitalization,
                                         gfxContext* aRefContext)
 {
@@ -266,17 +263,17 @@ nsFontVariantTextRunFactory::RebuildText
   if (!smallFont)
     return;
 
   PRUint32 flags;
   gfxTextRunFactory::Parameters innerParams =
       GetParametersForInner(aTextRun, &flags, aRefContext);
 
   PRUint32 length = aTextRun->GetLength();
-  const PRUnichar* str = aTextRun->GetTextUnicode();
+  const PRUnichar* str = aTextRun->mString.BeginReading();
   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
   // Create a textrun so we can check cluster-start properties
   nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags));
   if (!inner.get())
     return;
 
   nsCaseTransformTextRunFactory uppercaseFactory(nsnull, true);
 
@@ -348,17 +345,17 @@ nsFontVariantTextRunFactory::RebuildText
   }
 }
 
 void
 nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
     gfxContext* aRefContext)
 {
   PRUint32 length = aTextRun->GetLength();
-  const PRUnichar* str = aTextRun->GetTextUnicode();
+  const PRUnichar* str = aTextRun->mString.BeginReading();
   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
 
   nsAutoString convertedString;
   nsAutoTArray<bool,50> charsToMergeArray;
   nsAutoTArray<nsStyleContext*,50> styleArray;
   nsAutoTArray<PRUint8,50> canBreakBeforeArray;
   PRUint32 extraCharsCount = 0;
 
--- a/layout/generic/nsTextRunTransformations.h
+++ b/layout/generic/nsTextRunTransformations.h
@@ -133,31 +133,33 @@ public:
   }
 
   // override the gfxTextRun impls to account for additional members here
   virtual NS_MUST_OVERRIDE size_t SizeOfExcludingThis(nsMallocSizeOfFun aMallocSizeOf);
   virtual NS_MUST_OVERRIDE size_t SizeOfIncludingThis(nsMallocSizeOfFun aMallocSizeOf);
 
   nsTransformingTextRunFactory       *mFactory;
   nsTArray<nsRefPtr<nsStyleContext> > mStyles;
-  nsTArray<bool>              mCapitalize;
+  nsTArray<bool>                      mCapitalize;
+  nsString                            mString;
   bool                                mOwnsFactory;
   bool                                mNeedsRebuild;
 
 private:
   nsTransformedTextRun(const gfxTextRunFactory::Parameters* aParams,
                        nsTransformingTextRunFactory* aFactory,
                        gfxFontGroup* aFontGroup,
                        const PRUnichar* aString, PRUint32 aLength,
                        const PRUint32 aFlags, nsStyleContext** aStyles,
                        bool aOwnsFactory,
                        CompressedGlyph *aGlyphStorage)
     : gfxTextRun(aParams, aString, aLength, aFontGroup, aFlags, aGlyphStorage),
-      mFactory(aFactory), mOwnsFactory(aOwnsFactory), mNeedsRebuild(true)
+      mFactory(aFactory), mString(aString, aLength),
+      mOwnsFactory(aOwnsFactory), mNeedsRebuild(true)
   {
     PRUint32 i;
     for (i = 0; i < aLength; ++i) {
       mStyles.AppendElement(aStyles[i]);
     }
-  }  
+  }
 };
 
 #endif /*NSTEXTRUNTRANSFORMATIONS_H_*/