Bug 384100. Implement word-based textrun cache. r=vlad
authorroc+@cs.cmu.edu
Tue, 12 Jun 2007 13:56:04 -0700
changeset 2317 3ab5e1ea160a9381419d9609cf3027266182891b
parent 2316 624867d7e54b5cc55795f491a94ed87fc366602a
child 2318 d7d83dd6c903f3974a2ba93167b7cc8c97d8ffd6
push idunknown
push userunknown
push dateunknown
reviewersvlad
bugs384100
milestone1.9a6pre
Bug 384100. Implement word-based textrun cache. r=vlad
gfx/thebes/public/Makefile.in
gfx/thebes/public/gfxFont.h
gfx/thebes/public/gfxTextRunWordCache.h
gfx/thebes/src/Makefile.in
gfx/thebes/src/gfxFont.cpp
gfx/thebes/src/gfxSkipChars.cpp
gfx/thebes/src/gfxTextRunWordCache.cpp
--- a/gfx/thebes/public/Makefile.in
+++ b/gfx/thebes/public/Makefile.in
@@ -20,16 +20,17 @@ EXPORTS		= 	gfxASurface.h \
 			gfxPath.h \
 			gfxPattern.h \
 			gfxPlatform.h \
 			gfxPoint.h \
 			gfxRect.h \
 			gfxSkipChars.h \
 			gfxTypes.h \
 			gfxTextRunCache.h \
+			gfxTextRunWordCache.h \
 			$(NULL)
 
 EXPORTS += gfxFontTest.h
 
 ifdef MOZ_ENABLE_GLITZ
 REQUIRES += glitz
 EXPORTS +=	gfxGlitzSurface.h
 endif
--- a/gfx/thebes/public/gfxFont.h
+++ b/gfx/thebes/public/gfxFont.h
@@ -425,16 +425,20 @@ protected:
 class THEBES_API gfxTextRunFactory {
     THEBES_INLINE_DECL_REFCOUNTING(gfxTextRunFactory)
 
 public:
     // Flags in the mask 0xFFFF0000 are reserved for textrun clients
     // Flags in the mask 0x0000F000 are reserved for per-platform fonts
     // Flags in the mask 0x00000FFF are set by the textrun creator.
     enum {
+        USER_TEXT_FLAGS     = 0xFFFF0000,
+        PLATFORM_TEXT_FLAGS = 0x0000F000,
+        TEXTRUN_TEXT_FLAGS  = 0x00000FFF,
+      
         /**
          * When set, the text string pointer used to create the text run
          * is guaranteed to be available during the lifetime of the text run.
          */
         TEXT_IS_PERSISTENT           = 0x0001,
         /**
          * When set, the text is known to be all-ASCII (< 128).
          */
@@ -784,23 +788,38 @@ public:
 
     // Utility getters
 
     PRBool IsRightToLeft() const { return (mFlags & gfxTextRunFactory::TEXT_IS_RTL) != 0; }
     gfxFloat GetDirection() const { return (mFlags & gfxTextRunFactory::TEXT_IS_RTL) ? -1.0 : 1.0; }
     void *GetUserData() const { return mUserData; }
     void SetUserData(void *aUserData) { mUserData = aUserData; }
     PRUint32 GetFlags() const { return mFlags; }
+    void SetFlagBits(PRUint32 aFlags) {
+      NS_ASSERTION(!(aFlags & ~gfxTextRunFactory::USER_TEXT_FLAGS),
+                   "Only user flags should be mutable");
+      mFlags |= aFlags;
+    }
+    void ClearFlagBits(PRUint32 aFlags) {
+      NS_ASSERTION(!(aFlags & ~gfxTextRunFactory::USER_TEXT_FLAGS),
+                   "Only user flags should be mutable");
+      mFlags &= ~aFlags;
+    }
     const gfxSkipChars& GetSkipChars() const { return mSkipChars; }
     PRUint32 GetAppUnitsPerDevUnit() const { return mAppUnitsPerDevUnit; }
     gfxFontGroup *GetFontGroup() const { return mFontGroup; }
     const PRUint8 *GetText8Bit() const
     { return (mFlags & gfxTextRunFactory::TEXT_IS_8BIT) ? mText.mSingle : nsnull; }
     const PRUnichar *GetTextUnicode() const
     { return (mFlags & gfxTextRunFactory::TEXT_IS_8BIT) ? nsnull : mText.mDouble; }
+    const void *GetTextAt(PRUint32 aIndex) {
+        return (mFlags & gfxTextRunFactory::TEXT_IS_8BIT)
+            ? NS_STATIC_CAST(const void *, mText.mSingle + aIndex)
+            : NS_STATIC_CAST(const void *, mText.mDouble + aIndex);
+    }
     const PRUnichar GetChar(PRUint32 i) const
     { return (mFlags & gfxTextRunFactory::TEXT_IS_8BIT) ? mText.mSingle[i] : mText.mDouble[i]; }
     PRUint32 GetHashCode() const { return mHashCode; }
     void SetHashCode(PRUint32 aHash) { mHashCode = aHash; }
 
     // The caller is responsible for initializing our glyphs after construction.
     // Initially all glyphs are such that GetCharacterGlyphs()[i].IsMissing() is true.
     // If aText is not persistent (aFlags & TEXT_IS_PERSISTENT), the
@@ -1015,41 +1034,49 @@ public:
     }
     /**
      * Set some detailed glyphs for a character. The data is copied from aGlyphs,
      * the caller retains ownership.
      */
     void SetDetailedGlyphs(PRUint32 aCharIndex, const DetailedGlyph *aGlyphs,
                            PRUint32 aNumGlyphs);
     void SetMissingGlyph(PRUint32 aCharIndex, PRUnichar aChar);
+    void SetSpaceGlyph(gfxFont *aFont, gfxContext *aContext, PRUint32 aCharIndex);
 
     // API for access to the raw glyph data, needed by gfxFont::Draw
     // and gfxFont::GetBoundingBox
     const CompressedGlyph *GetCharacterGlyphs() { return mCharacterGlyphs; }
     const DetailedGlyph *GetDetailedGlyphs(PRUint32 aCharIndex) {
         // Although mDetailedGlyphs should be non-NULL when ComplexCluster,
         // Missing glyphs need not have details.
         return mDetailedGlyphs ? mDetailedGlyphs[aCharIndex].get() : nsnull;
     }
     PRUint32 CountMissingGlyphs();
     const GlyphRun *GetGlyphRuns(PRUint32 *aNumGlyphRuns) {
         *aNumGlyphRuns = mGlyphRuns.Length();
         return mGlyphRuns.Elements();
     }
+    // Returns the index of the GlyphRun containing the given offset.
+    // Returns mGlyphRuns.Length() when aOffset is mCharacterCount.
+    PRUint32 FindFirstGlyphRunContaining(PRUint32 aOffset);
+    // Copy glyph data for a range of characters from aSource to this
+    // textrun. If aStealData is true then we actually steal the glyph data,
+    // setting the data in aSource to "missing". aDest should be in the last
+    // glyphrun.
+    virtual void CopyGlyphDataFrom(gfxTextRun *aSource, PRUint32 aStart,
+                                   PRUint32 aLength, PRUint32 aDest,
+                                   PRBool aStealData);
 
     nsExpirationState *GetExpirationState() { return &mExpirationState; }
 
 private:
     // **** general helpers **** 
 
     // Allocate aCount DetailedGlyphs for the given index
     DetailedGlyph *AllocateDetailedGlyphs(PRUint32 aCharIndex, PRUint32 aCount);
-    // Returns the index of the GlyphRun containing the given offset.
-    // Returns mGlyphRuns.Length() when aOffset is mCharacterCount.
-    PRUint32 FindFirstGlyphRunContaining(PRUint32 aOffset);
     // Computes the x-advance for a given cluster starting at aClusterOffset. Does
     // not include any spacing. Result is in appunits.
     PRInt32 ComputeClusterAdvance(PRUint32 aClusterOffset);
 
     //  **** ligature helpers ****
     // (Platforms do the actual ligaturization, but we need to do a bunch of stuff
     // to handle requests that begin or end inside a ligature)
 
@@ -1135,17 +1162,18 @@ public:
             mStyle.Equals(other.mStyle);
     }
 
     const gfxFontStyle *GetStyle() const { return &mStyle; }
 
     virtual gfxFontGroup *Copy(const gfxFontStyle *aStyle) = 0;
 
     /**
-     * Tabs, CRs and LFs should be zero-width and invisible.
+     * Tabs, CRs and LFs should be zero-width and invisible. They should
+     * break up shaping.
      */
     static PRBool IsInvisibleChar(PRUnichar ch) {
         return ch == '\t' || ch == '\r' || ch == '\n';
     }
 
     /**
      * Make a textrun for an empty string. This is fast; if you call it,
      * don't bother caching the result.
new file mode 100644
--- /dev/null
+++ b/gfx/thebes/public/gfxTextRunWordCache.h
@@ -0,0 +1,145 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Foundation code.
+ *
+ * The Initial Developer of the Original Code is Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2006
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Vladimir Vukicevic <vladimir@pobox.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef GFX_TEXT_RUN_WORD_CACHE_H
+#define GFX_TEXT_RUN_WORD_CACHE_H
+
+#include "gfxFont.h"
+
+/**
+ * Cache individual "words" (strings delimited by white-space or white-space-like
+ * characters that don't involve kerning or ligatures) in textruns.
+  */
+class THEBES_API gfxTextRunWordCache {
+public:
+    gfxTextRunWordCache() {
+        mCache.Init(100);
+    }
+    ~gfxTextRunWordCache() {
+        NS_ASSERTION(mCache.Count() == 0, "Textrun cache not empty!");
+    }
+
+    /**
+     * Create a textrun using cached words.
+     * @param aFlags the flags TEXT_IS_ASCII and TEXT_HAS_SURROGATES must be set
+     * by the caller, if applicable
+     * @param aIsInCache if true is returned, then RemoveTextRun must be called
+     * before the textrun changes or dies.
+     */
+    gfxTextRun *MakeTextRun(const PRUnichar *aText, PRUint32 aLength,
+                            gfxFontGroup *aFontGroup,
+                            const gfxFontGroup::Parameters *aParams,
+                            PRUint32 aFlags, PRBool *aIsInCache);
+    /**
+     * Create a textrun using cached words
+     * @param aFlags the flag TEXT_IS_ASCII must be set by the caller,
+     * if applicable
+     * @param aIsInCache if true is returned, then RemoveTextRun must be called
+     * before the textrun changes or dies.
+     */
+    gfxTextRun *MakeTextRun(const PRUint8 *aText, PRUint32 aLength,
+                            gfxFontGroup *aFontGroup,
+                            const gfxFontGroup::Parameters *aParams,
+                            PRUint32 aFlags, PRBool *aIsInCache);
+
+    /**
+     * Remove a textrun from the cache. This must be called before aTextRun
+     * is deleted! The text in the textrun must still be valid.
+     */
+    void RemoveTextRun(gfxTextRun *aTextRun);
+
+protected:
+    struct THEBES_API CacheHashKey {
+        void        *mFontOrGroup;
+        const void  *mString;
+        PRUint32     mLength;
+        PRUint32     mAppUnitsPerDevUnit;
+        PRUint32     mStringHash;
+        PRPackedBool mIsDoubleByteText;
+    };
+
+    class THEBES_API CacheHashEntry : public PLDHashEntryHdr {
+    public:
+        typedef const CacheHashKey &KeyType;
+        typedef const CacheHashKey *KeyTypePointer;
+
+        // When constructing a new entry in the hashtable, the caller of Put()
+        // will fill us in.
+        CacheHashEntry(KeyTypePointer aKey) : mTextRun(nsnull), mWordOffset(0),
+            mHashedByFont(PR_FALSE) { }
+        CacheHashEntry(const CacheHashEntry& toCopy) { NS_ERROR("Should not be called"); }
+        ~CacheHashEntry() { }
+
+        PRBool KeyEquals(const KeyTypePointer aKey) const;
+        static KeyTypePointer KeyToPointer(KeyType aKey) { return &aKey; }
+        static PLDHashNumber HashKey(const KeyTypePointer aKey);
+        enum { ALLOW_MEMMOVE = PR_TRUE };
+
+        gfxTextRun *mTextRun;
+        // The offset of the start of the word in the textrun. The length of
+        // the word is not stored here because we can figure it out by
+        // looking at the textrun's text.
+        PRUint32    mWordOffset:31;
+        // This is set to true when the cache entry was hashed by the first
+        // font in mTextRun's fontgroup; it's false when the cache entry
+        // was hashed by the fontgroup itself.
+        PRUint32    mHashedByFont:1;
+    };
+    
+    // Used to track words that should be copied from one textrun to
+    // another during the textrun construction process
+    struct DeferredWord {
+        gfxTextRun *mSourceTextRun;
+        PRUint32    mSourceOffset;
+        PRUint32    mDestOffset;
+        PRUint32    mLength;
+        PRUint32    mHash;
+    };
+    
+    PRBool LookupWord(gfxTextRun *aTextRun, gfxFont *aFirstFont,
+                      PRUint32 aStart, PRUint32 aEnd, PRUint32 aHash,
+                      nsTArray<DeferredWord>* aDeferredWords);
+    void FinishTextRun(gfxTextRun *aTextRun, gfxTextRun *aNewRun,
+                       const nsTArray<DeferredWord>& aDeferredWords,
+                       PRBool aSuccessful);
+    void RemoveWord(gfxTextRun *aTextRun, PRUint32 aStart,
+                    PRUint32 aEnd, PRUint32 aHash);    
+
+    nsTHashtable<CacheHashEntry> mCache;
+};
+
+#endif /* GFX_TEXT_RUN_WORD_CACHE_H */
--- a/gfx/thebes/src/Makefile.in
+++ b/gfx/thebes/src/Makefile.in
@@ -30,16 +30,17 @@ CPPSRCS	= \
 	gfxFontTest.cpp \
 	gfxMatrix.cpp \
 	gfxPath.cpp \
 	gfxPattern.cpp \
 	gfxPlatform.cpp \
 	gfxRect.cpp \
 	gfxSkipChars.cpp \
 	gfxTextRunCache.cpp \
+	gfxTextRunWordCache.cpp \
 	$(NULL)
 
 ifdef MOZ_TREE_CAIRO
 SHARED_LIBRARY_LIBS += \
 	$(DEPTH)/gfx/cairo/cairo/src/$(LIB_PREFIX)mozcairo.$(LIB_SUFFIX) \
 	$(DEPTH)/gfx/cairo/libpixman/src/$(LIB_PREFIX)mozlibpixman.$(LIB_SUFFIX) \
 	$(NULL)
 else
--- a/gfx/thebes/src/gfxFont.cpp
+++ b/gfx/thebes/src/gfxFont.cpp
@@ -526,34 +526,23 @@ gfxFontGroup::MakeEmptyTextRun(const Par
 }
 
 gfxTextRun *
 gfxFontGroup::MakeSpaceTextRun(const Parameters *aParams, PRUint32 aFlags)
 {
     aFlags |= TEXT_IS_8BIT | TEXT_IS_ASCII | TEXT_IS_PERSISTENT;
     static const PRUint8 space = ' ';
 
-    gfxFont *font = GetFontAt(0);
-    PRUint32 spaceGlyph = font->GetSpaceGlyph();
-    float spaceWidth = font->GetMetrics().spaceWidth;
-    PRUint32 spaceWidthAppUnits = NS_lroundf(spaceWidth*aParams->mAppUnitsPerDevUnit);
-    if (!spaceGlyph ||
-        !gfxTextRun::CompressedGlyph::IsSimpleGlyphID(spaceGlyph) ||
-        !gfxTextRun::CompressedGlyph::IsSimpleAdvance(spaceWidthAppUnits))
-        return MakeTextRun(&space, 1, aParams, aFlags);
-
     nsAutoPtr<gfxTextRun> textRun;
     textRun = new gfxTextRun(aParams, &space, 1, this, aFlags);
-    if (!textRun)
-        return nsnull;
-    if (NS_FAILED(textRun->AddGlyphRun(font, 0)))
+    if (!textRun || !textRun->GetCharacterGlyphs())
         return nsnull;
-    gfxTextRun::CompressedGlyph g;
-    g.SetSimpleGlyph(spaceWidthAppUnits, spaceGlyph);
-    textRun->SetCharacterGlyph(0, g);
+
+    gfxFont *font = GetFontAt(0);
+    textRun->SetSpaceGlyph(font, aParams->mContext, 0);
     return textRun.forget();
 }
 
 gfxFontStyle::gfxFontStyle(PRUint8 aStyle, PRUint16 aWeight, gfxFloat aSize,
                            const nsACString& aLangGroup,
                            float aSizeAdjust, PRPackedBool aSystemFont,
                            PRPackedBool aFamilyNameQuirks) :
     style(aStyle), systemFont(aSystemFont),
@@ -686,45 +675,17 @@ gfxTextRun::Clone(const gfxTextRunFactor
     if (!mCharacterGlyphs)
         return nsnull;
 
     nsAutoPtr<gfxTextRun> textRun;
     textRun = new gfxTextRun(aParams, aText, aLength, aFontGroup, aFlags);
     if (!textRun || !textRun->mCharacterGlyphs)
         return nsnull;
 
-    PRUint32 i;
-    for (i = 0; i < mGlyphRuns.Length(); ++i) {
-        if (NS_FAILED(textRun->AddGlyphRun(mGlyphRuns[i].mFont,
-                                           mGlyphRuns[i].mCharacterOffset)))
-            return nsnull;
-    }
-
-    for (i = 0; i < aLength; ++i) {
-        CompressedGlyph g = mCharacterGlyphs[i];
-        g.SetCanBreakBefore(PR_FALSE);
-        textRun->mCharacterGlyphs[i] = g;
-    }
-
-    if (mDetailedGlyphs) {
-        for (i = 0; i < aLength; ++i) {
-            DetailedGlyph *details = mDetailedGlyphs[i];
-            if (details) {
-                PRUint32 glyphCount = 1;
-                while (!details[glyphCount - 1].mIsLastGlyph) {
-                    ++glyphCount;
-                }
-                DetailedGlyph *dest = textRun->AllocateDetailedGlyphs(i, glyphCount);
-                if (!dest)
-                    return nsnull;
-                memcpy(dest, details, sizeof(DetailedGlyph)*glyphCount);
-            }
-        }
-    }
-
+    textRun->CopyGlyphDataFrom(this, 0, mCharacterCount, 0, PR_FALSE);
     return textRun.forget();
 }
 
 PRBool
 gfxTextRun::SetPotentialLineBreaks(PRUint32 aStart, PRUint32 aLength,
                                    PRPackedBool *aBreakBefore)
 {
     NS_ASSERTION(aStart + aLength <= mCharacterCount, "Overflow");
@@ -1485,18 +1446,34 @@ gfxTextRun::FindFirstGlyphRunContaining(
     NS_ASSERTION(mGlyphRuns[start].mCharacterOffset <= aOffset,
                  "Hmm, something went wrong, aOffset should have been found");
     return start;
 }
 
 nsresult
 gfxTextRun::AddGlyphRun(gfxFont *aFont, PRUint32 aUTF16Offset)
 {
-    NS_ASSERTION(mGlyphRuns.Length() > 0 || aUTF16Offset == 0,
+    PRUint32 numGlyphRuns = mGlyphRuns.Length();
+    if (numGlyphRuns > 0) {
+        GlyphRun *lastGlyphRun = &mGlyphRuns[numGlyphRuns - 1];
+
+        NS_ASSERTION(lastGlyphRun->mCharacterOffset <= aUTF16Offset,
+                     "Glyph runs out of order");
+
+        if (lastGlyphRun->mFont == aFont)
+            return NS_OK;
+        if (lastGlyphRun->mCharacterOffset == aUTF16Offset) {
+            lastGlyphRun->mFont = aFont;
+            return NS_OK;
+        }
+    }
+
+    NS_ASSERTION(numGlyphRuns > 0 || aUTF16Offset == 0,
                  "First run doesn't cover the first character?");
+
     GlyphRun *glyphRun = mGlyphRuns.AppendElement();
     if (!glyphRun)
         return NS_ERROR_OUT_OF_MEMORY;
     glyphRun->mFont = aFont;
     glyphRun->mCharacterOffset = aUTF16Offset;
     return NS_OK;
 }
 
@@ -1579,8 +1556,127 @@ gfxTextRun::RecordSurrogates(const PRUni
     PRUint32 i;
     gfxTextRun::CompressedGlyph g;
     for (i = 0; i < mCharacterCount; ++i) {
         if (NS_IS_LOW_SURROGATE(aString[i])) {
             SetCharacterGlyph(i, g.SetLowSurrogate());
         }
     }
 }
+
+static PRUint32
+CountDetailedGlyphs(gfxTextRun::DetailedGlyph *aGlyphs)
+{
+    PRUint32 i = 0;
+    while (!aGlyphs[i].mIsLastGlyph) {
+        ++i;
+    }
+    return i + 1;
+}
+
+static void
+ClearCharacters(gfxTextRun::CompressedGlyph *aGlyphs, PRUint32 aLength)
+{
+    gfxTextRun::CompressedGlyph g;
+    g.SetMissing();
+    PRUint32 i;
+    for (i = 0; i < aLength; ++i) {
+        aGlyphs[i] = g;
+    }
+}
+
+void
+gfxTextRun::CopyGlyphDataFrom(gfxTextRun *aSource, PRUint32 aStart,
+                              PRUint32 aLength, PRUint32 aDest,
+                              PRBool aStealData)
+{
+    NS_ASSERTION(aStart + aLength <= aSource->GetLength(),
+                 "Source substring out of range");
+    NS_ASSERTION(aDest + aLength <= GetLength(),
+                 "Destination substring out of range");
+
+    PRUint32 i;
+    for (i = 0; i < aLength; ++i) {
+        CompressedGlyph g = aSource->mCharacterGlyphs[i + aStart];
+        g.SetCanBreakBefore(PR_FALSE);
+        mCharacterGlyphs[i + aDest] = g;
+        if (aStealData) {
+            aSource->mCharacterGlyphs[i + aStart].SetMissing();
+        }
+    }
+
+    if (aSource->mDetailedGlyphs) {
+        for (i = 0; i < aLength; ++i) {
+            DetailedGlyph *details = aSource->mDetailedGlyphs[i + aStart];
+            if (details) {
+                if (aStealData) {
+                    if (!mDetailedGlyphs) {
+                        mDetailedGlyphs = new nsAutoArrayPtr<DetailedGlyph>[mCharacterCount];
+                        if (!mDetailedGlyphs) {
+                            ClearCharacters(&mCharacterGlyphs[aDest], aLength);
+                            return;
+                        }
+                    }        
+                    mDetailedGlyphs[i + aDest] = details;
+                    aSource->mDetailedGlyphs[i + aStart].forget();
+                } else {
+                    PRUint32 glyphCount = CountDetailedGlyphs(details);
+                    DetailedGlyph *dest = AllocateDetailedGlyphs(i + aDest, glyphCount);
+                    if (!dest) {
+                        ClearCharacters(&mCharacterGlyphs[aDest], aLength);
+                        return;
+                    }
+                    memcpy(dest, details, sizeof(DetailedGlyph)*glyphCount);
+                }
+            } else if (mDetailedGlyphs) {
+                mDetailedGlyphs[i + aDest] = nsnull;
+            }
+        }
+    } else if (mDetailedGlyphs) {
+        for (i = 0; i < aLength; ++i) {
+            mDetailedGlyphs[i + aDest] = nsnull;
+        }
+    }
+
+    GlyphRunIterator iter(aSource, aStart, aLength);
+    while (iter.NextRun()) {
+        gfxFont *font = iter.GetGlyphRun()->mFont;
+        PRUint32 start = iter.GetStringStart();
+        PRUint32 end = iter.GetStringEnd();
+        NS_ASSERTION(aSource->IsClusterStart(start),
+                     "Started word in the middle of a cluster...");
+        NS_ASSERTION(end == aSource->GetLength() || aSource->IsClusterStart(end),
+                     "Ended word in the middle of a cluster...");
+
+        nsresult rv = AddGlyphRun(font, start - aStart + aDest);
+        if (NS_FAILED(rv))
+            return;
+    }
+}
+
+void
+gfxTextRun::SetSpaceGlyph(gfxFont *aFont, gfxContext *aContext, PRUint32 aCharIndex)
+{
+    PRUint32 spaceGlyph = aFont->GetSpaceGlyph();
+    float spaceWidth = aFont->GetMetrics().spaceWidth;
+    PRUint32 spaceWidthAppUnits = NS_lroundf(spaceWidth*mAppUnitsPerDevUnit);
+    if (!spaceGlyph ||
+        !CompressedGlyph::IsSimpleGlyphID(spaceGlyph) ||
+        !CompressedGlyph::IsSimpleAdvance(spaceWidthAppUnits)) {
+        gfxTextRunFactory::Parameters params = {
+            aContext, nsnull, nsnull, nsnull, 0, mAppUnitsPerDevUnit
+        };
+        static const PRUint8 space = ' ';
+        nsAutoPtr<gfxTextRun> textRun;
+        textRun = mFontGroup->MakeTextRun(&space, 1, &params,
+            gfxTextRunFactory::TEXT_IS_8BIT | gfxTextRunFactory::TEXT_IS_ASCII |
+            gfxTextRunFactory::TEXT_IS_PERSISTENT);
+        if (!textRun || !textRun->mCharacterGlyphs)
+            return;
+        CopyGlyphDataFrom(textRun, 0, 1, aCharIndex, PR_TRUE);
+        return;
+    }
+
+    AddGlyphRun(aFont, aCharIndex);
+    CompressedGlyph g;
+    g.SetSimpleGlyph(spaceWidthAppUnits, spaceGlyph);
+    SetCharacterGlyph(aCharIndex, g);
+}
--- a/gfx/thebes/src/gfxSkipChars.cpp
+++ b/gfx/thebes/src/gfxSkipChars.cpp
@@ -83,23 +83,20 @@ gfxSkipChars::BuildShortcuts()
             skippedCharOffset += len;
         }
     }
 }
 
 void
 gfxSkipCharsIterator::SetOffsets(PRUint32 aOffset, PRBool aInOriginalString)
 {
+    NS_ASSERTION(aOffset <= mSkipChars->mCharCount,
+                 "Invalid offset");
+
     if (mSkipChars->mListLength == 0) {
-        // Special case: all chars kept, original and stripped strings are equal
-        if (aOffset < 0) {
-            aOffset = 0;
-        } else if (aOffset > mSkipChars->mCharCount) {
-            aOffset = mSkipChars->mCharCount;
-        }
         mOriginalStringOffset = mSkippedStringOffset = aOffset;
         return;
     }
   
     if (aOffset == 0) {
         // Start from the beginning of the string.
         mSkippedStringOffset = 0;
         mOriginalStringOffset = 0;
new file mode 100644
--- /dev/null
+++ b/gfx/thebes/src/gfxTextRunWordCache.cpp
@@ -0,0 +1,469 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Foundation code.
+ *
+ * The Initial Developer of the Original Code is Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2006
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Vladimir Vukicevic <vladimir@pobox.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "gfxTextRunWordCache.h"
+
+static inline PRUint32
+HashMix(PRUint32 aHash, PRUnichar aCh)
+{
+    return (aHash >> 28) ^ (aHash << 4) ^ aCh;
+}
+
+// If the substring of the textrun is rendered entirely in the first font
+// of the textrun's fontgroup, then return that font. Otherwise return the
+// fontgroup.
+static void *GetWordFontOrGroup(gfxTextRun *aTextRun, PRUint32 aOffset,
+                                PRUint32 aLength)
+{
+    PRUint32 glyphRunCount;
+    const gfxTextRun::GlyphRun *glyphRuns = aTextRun->GetGlyphRuns(&glyphRunCount);
+    PRUint32 glyphRunIndex = aTextRun->FindFirstGlyphRunContaining(aOffset);
+    gfxFontGroup *fontGroup = aTextRun->GetFontGroup();
+    gfxFont *firstFont = fontGroup->GetFontAt(0);
+    if (glyphRuns[glyphRunIndex].mFont != firstFont)
+        return fontGroup;
+
+    PRUint32 glyphRunEnd = glyphRunIndex == glyphRunCount - 1
+        ? aTextRun->GetLength() : glyphRuns[glyphRunIndex + 1].mCharacterOffset;
+    if (aOffset + aLength <= glyphRunEnd)
+        return firstFont;
+    return fontGroup;
+}
+
+#define UNICODE_NBSP 0x00A0
+
+// XXX should we treat NBSP or SPACE combined with other characters as a word
+// boundary? Currently this does.
+static PRBool
+IsBoundarySpace(PRUnichar aChar)
+{
+    return aChar == ' ' || aChar == UNICODE_NBSP;
+}
+
+static PRBool
+IsWordBoundary(PRUnichar aChar)
+{
+    return IsBoundarySpace(aChar) || gfxFontGroup::IsInvisibleChar(aChar);
+}
+
+/**
+ * Looks up a word in the cache. If the word is found in the cache
+ * (which could be from an existing textrun or an earlier word in the same
+ * textrun), we copy the glyphs from the word into the textrun, unless
+ * aDeferredWords is non-null (meaning that all words from now on must be
+ * copied later instead of now), in which case we add the word to be copied
+ * to the list.
+ * 
+ * If the word is not found in the cache then we add it to the cache with
+ * aFirstFont as the key, on the assumption that the word will be matched
+ * by aFirstFont. If this assumption is false we fix up the cache later in
+ * FinishTextRun. We make this assumption for two reasons:
+ * 1) it's usually true so it saves an extra cache lookup if we had to insert
+ * the entry later
+ * 2) we need to record words that appear in the string in some kind
+ * of hash table so we can detect and use them if they appear later in the
+ * (in general the string might be huge and contain many repeated words).
+ * We might as well use the master hash table for this.
+ * 
+ * @return true if the word was found in the cache, false otherwise.
+ */
+PRBool
+gfxTextRunWordCache::LookupWord(gfxTextRun *aTextRun, gfxFont *aFirstFont,
+                                PRUint32 aStart, PRUint32 aEnd, PRUint32 aHash,
+                                nsTArray<DeferredWord>* aDeferredWords)
+{
+    if (aEnd <= aStart)
+        return PR_TRUE;
+
+    PRUint32 length = aEnd - aStart;
+    CacheHashKey key =
+        { aFirstFont, aTextRun->GetTextAt(aStart),
+          length, aTextRun->GetAppUnitsPerDevUnit(), aHash,
+          (aTextRun->GetFlags() & gfxTextRunFactory::TEXT_IS_8BIT) == 0 };
+    CacheHashEntry *fontEntry = mCache.PutEntry(key);
+    if (!fontEntry)
+        return PR_FALSE;
+    CacheHashEntry *existingEntry = nsnull;
+
+    if (fontEntry->mTextRun) {
+        existingEntry = fontEntry;
+    } else {
+        key.mFontOrGroup = aTextRun->GetFontGroup();
+        CacheHashEntry *groupEntry = mCache.GetEntry(key);
+        if (groupEntry) {
+            existingEntry = groupEntry;
+            mCache.RawRemoveEntry(fontEntry);
+            fontEntry = nsnull;
+        }
+    }
+    // At this point, either existingEntry is non-null and points to (surprise!)
+    // an entry for a word in an existing textrun, or fontEntry points
+    // to a cache entry for this word with aFirstFont, which needs to be
+    // filled in or removed.
+
+    if (existingEntry) {
+        if (aDeferredWords) {
+            DeferredWord word = { existingEntry->mTextRun,
+                  existingEntry->mWordOffset, aStart, aEnd - aStart, aHash };
+            aDeferredWords->AppendElement(word);
+        } else {
+            aTextRun->CopyGlyphDataFrom(existingEntry->mTextRun,
+                existingEntry->mWordOffset, aEnd - aStart, aStart, PR_FALSE);
+        }
+        return PR_TRUE;
+    }
+
+    // Set up the cache entry so that if later in this textrun we hit this
+    // entry, we'll copy within our own textrun
+    fontEntry->mTextRun = aTextRun;
+    fontEntry->mWordOffset = aStart;
+    fontEntry->mHashedByFont = PR_TRUE;
+    return PR_FALSE;
+}
+
+/**
+ * Processes all deferred words. Each word is copied from the source
+ * textrun to the output textrun. (The source may be an earlier word in the
+ * output textrun.) If the word was not matched by the textrun's fontgroup's
+ * first font, then we remove the entry we optimistically added to the cache
+ * with that font in the key, and add a new entry keyed with the fontgroup
+ * instead.
+ * 
+ * @param aSuccessful if false, then we don't do any word copies and we don't
+ * add anything to the cache, but we still remove all the optimistic cache
+ * entries.
+ */
+void
+gfxTextRunWordCache::FinishTextRun(gfxTextRun *aTextRun, gfxTextRun *aNewRun,
+                                   const nsTArray<DeferredWord>& aDeferredWords,
+                                   PRBool aSuccessful)
+{
+    PRUint32 i;
+    gfxFontGroup *fontGroup = aTextRun->GetFontGroup();
+    gfxFont *font = fontGroup->GetFontAt(0);
+    // copy deferred words from various sources into destination textrun
+    for (i = 0; i < aDeferredWords.Length(); ++i) {
+        const DeferredWord *word = &aDeferredWords[i];
+        gfxTextRun *source = word->mSourceTextRun;
+        if (!source) {
+            source = aNewRun;
+            // we created a cache entry for this word based on the assumption
+            // that the word matches GetFontAt(0). If this assumption is false,
+            // we need to remove that cache entry and replace it with an entry
+            // keyed off the fontgroup.
+            if (!aSuccessful ||
+                GetWordFontOrGroup(aNewRun, word->mSourceOffset, word->mLength) != font) {
+                CacheHashKey key =
+                    { font, aTextRun->GetTextAt(word->mDestOffset),
+                      word->mLength, aTextRun->GetAppUnitsPerDevUnit(), word->mHash,
+                      (aTextRun->GetFlags() & gfxTextRunFactory::TEXT_IS_8BIT) == 0 };
+                NS_ASSERTION(mCache.GetEntry(key),
+                             "This entry should have been added previously!");
+                mCache.RemoveEntry(key);
+                
+                if (aSuccessful) {
+                    key.mFontOrGroup = fontGroup;
+                    CacheHashEntry *groupEntry = mCache.PutEntry(key);
+                    if (groupEntry) {
+                        groupEntry->mTextRun = aTextRun;
+                        groupEntry->mWordOffset = word->mDestOffset;
+                        groupEntry->mHashedByFont = PR_FALSE;
+                    }
+                }
+            }
+        }
+        if (aSuccessful) {
+            // Copy the word. If the source is aNewRun, then
+            // allow CopyGlyphDataFrom to steal the internal data of
+            // aNewRun since that's only temporary anyway.
+            aTextRun->CopyGlyphDataFrom(source,
+                word->mSourceOffset, word->mLength, word->mDestOffset,
+                source == aNewRun);
+        }
+    }
+}
+
+gfxTextRun *
+gfxTextRunWordCache::MakeTextRun(const PRUnichar *aText, PRUint32 aLength,
+                                 gfxFontGroup *aFontGroup,
+                                 const gfxFontGroup::Parameters *aParams,
+                                 PRUint32 aFlags, PRBool *aIsInCache)
+{
+    nsAutoPtr<gfxTextRun> textRun;
+    textRun = new gfxTextRun(aParams, aText, aLength, aFontGroup, aFlags);
+    if (!textRun || !textRun->GetCharacterGlyphs())
+        return nsnull;   
+
+    gfxFont *font = aFontGroup->GetFontAt(0);
+    nsresult rv = textRun->AddGlyphRun(font, 0);
+    NS_ENSURE_SUCCESS(rv, nsnull);
+
+    nsAutoTArray<PRUnichar,200> tempString;
+    nsAutoTArray<DeferredWord,50> deferredWords;
+    PRUint32 i;
+    PRUint32 wordStart = 0;
+    PRUint32 hash = 0;
+    for (i = 0; i <= aLength; ++i) {
+        PRUnichar ch = i < aLength ? aText[i] : ' ';
+        if (IsWordBoundary(ch)) {
+            PRBool hit = LookupWord(textRun, font, wordStart, i, hash,
+                                    deferredWords.Length() == 0 ? nsnull : &deferredWords);
+            if (!hit) {
+                if (tempString.Length() > 0) {
+                    tempString.AppendElement(' ');
+                }
+                PRUint32 offset = tempString.Length();
+                PRUint32 length = i - wordStart;
+                PRUnichar *chars = tempString.AppendElements(length);
+                if (!chars) {
+                    FinishTextRun(textRun, nsnull, deferredWords, PR_FALSE);
+                    return nsnull;
+                }
+                memcpy(chars, aText + wordStart, length*sizeof(PRUnichar));
+                DeferredWord word = { nsnull, offset, wordStart, length, hash };
+                deferredWords.AppendElement(word);
+            }
+            
+            if (IsBoundarySpace(ch) && i < aLength) {
+                textRun->SetSpaceGlyph(font, aParams->mContext, i);
+            } // else we should set this character to be invisible missing,
+              // but it already is because the textrun is blank!
+            hash = 0;
+            wordStart = i + 1;
+        } else {
+            hash = HashMix(hash, ch);
+        }
+    }
+
+    if (deferredWords.Length() == 0) {
+        // We got everything from the cache, so we're done. No point in calling
+        // FinishTextRun.
+        // This textrun is not referenced by the cache.
+        *aIsInCache = PR_FALSE;
+        return textRun.forget();
+    }
+    *aIsInCache = PR_TRUE;
+
+    // create textrun for unknown words
+    gfxTextRunFactory::Parameters params =
+        { aParams->mContext, nsnull, nsnull, nsnull, 0, aParams->mAppUnitsPerDevUnit };
+    nsAutoPtr<gfxTextRun> newRun;
+    newRun = aFontGroup->MakeTextRun(tempString.Elements(), tempString.Length(),
+                                     &params, aFlags | gfxTextRunFactory::TEXT_IS_PERSISTENT);
+
+    FinishTextRun(textRun, newRun, deferredWords, newRun != nsnull);
+    return textRun.forget();
+}
+
+gfxTextRun *
+gfxTextRunWordCache::MakeTextRun(const PRUint8 *aText, PRUint32 aLength,
+                                 gfxFontGroup *aFontGroup,
+                                 const gfxFontGroup::Parameters *aParams,
+                                 PRUint32 aFlags, PRBool *aIsInCache)
+{
+    aFlags |= gfxTextRunFactory::TEXT_IS_8BIT;
+    nsAutoPtr<gfxTextRun> textRun;
+    textRun = new gfxTextRun(aParams, aText, aLength, aFontGroup, aFlags);
+    if (!textRun || !textRun->GetCharacterGlyphs())
+        return nsnull;
+
+    gfxFont *font = aFontGroup->GetFontAt(0);
+    nsresult rv = textRun->AddGlyphRun(font, 0);
+    NS_ENSURE_SUCCESS(rv, nsnull);
+
+    nsAutoTArray<PRUint8,200> tempString;
+    nsAutoTArray<DeferredWord,50> deferredWords;
+    PRUint32 i;
+    PRUint32 wordStart = 0;
+    PRUint32 hash = 0;
+    for (i = 0; i <= aLength; ++i) {
+        PRUint8 ch = i < aLength ? aText[i] : ' ';
+        if (IsWordBoundary(ch)) {
+            PRBool hit = LookupWord(textRun, font, wordStart, i, hash,
+                                    deferredWords.Length() == 0 ? nsnull : &deferredWords);
+            if (!hit) {
+                if (tempString.Length() > 0) {
+                    tempString.AppendElement(' ');
+                }
+                PRUint32 offset = tempString.Length();
+                PRUint32 length = i - wordStart;
+                PRUint8 *chars = tempString.AppendElements(length);
+                if (!chars) {
+                    FinishTextRun(textRun, nsnull, deferredWords, PR_FALSE);
+                    return nsnull;
+                }
+                memcpy(chars, aText + wordStart, length*sizeof(PRUint8));
+                DeferredWord word = { nsnull, offset, wordStart, length, hash };
+                deferredWords.AppendElement(word);
+            }
+            
+            if (IsBoundarySpace(ch) && i < aLength) {
+                textRun->SetSpaceGlyph(font, aParams->mContext, i);
+            } // else we should set this character to be invisible missing,
+              // but it already is because the textrun is blank!
+            hash = 0;
+            wordStart = i + 1;
+        } else {
+            hash = HashMix(hash, ch);
+        }
+    }
+
+    if (deferredWords.Length() == 0) {
+        // We got everything from the cache, so we're done. No point in calling
+        // FinishTextRun.
+        // This textrun is not referenced by the cache.
+        *aIsInCache = PR_FALSE;
+        return textRun.forget();
+    }
+    *aIsInCache = PR_TRUE;
+
+    // create textrun for unknown words
+    gfxTextRunFactory::Parameters params =
+        { aParams->mContext, nsnull, nsnull, nsnull, 0, aParams->mAppUnitsPerDevUnit };
+    nsAutoPtr<gfxTextRun> newRun;
+    newRun = aFontGroup->MakeTextRun(tempString.Elements(), tempString.Length(),
+                                     &params, aFlags | gfxTextRunFactory::TEXT_IS_PERSISTENT);
+
+    FinishTextRun(textRun, newRun, deferredWords, newRun != nsnull);
+    return textRun.forget();
+}
+
+void
+gfxTextRunWordCache::RemoveWord(gfxTextRun *aTextRun, PRUint32 aStart,
+                                PRUint32 aEnd, PRUint32 aHash)
+{
+    if (aEnd <= aStart)
+        return;
+
+    PRUint32 length = aEnd - aStart;
+    CacheHashKey key =
+        { GetWordFontOrGroup(aTextRun, aStart, length), aTextRun->GetTextAt(aStart),
+          length, aTextRun->GetAppUnitsPerDevUnit(), aHash,
+          (aTextRun->GetFlags() & gfxTextRunFactory::TEXT_IS_8BIT) == 0 };
+    CacheHashEntry *entry = mCache.GetEntry(key);
+    if (entry && entry->mTextRun == aTextRun) {
+        // XXX would like to use RawRemoveEntry here plus some extra method
+        // that conditionally shrinks the hashtable
+        mCache.RemoveEntry(key);
+    }
+}
+
+// Remove a textrun from the cache by looking up each word and removing it
+void
+gfxTextRunWordCache::RemoveTextRun(gfxTextRun *aTextRun)
+{
+    PRUint32 i;
+    PRUint32 wordStart = 0;
+    PRUint32 hash = 0;
+    for (i = 0; i < aTextRun->GetLength(); ++i) {
+        PRUnichar ch = aTextRun->GetChar(i);
+        if (IsWordBoundary(ch)) {
+            RemoveWord(aTextRun, wordStart, i, hash);
+            hash = 0;
+            wordStart = i + 1;
+        } else {
+            hash = HashMix(hash, ch);
+        }
+    }
+    RemoveWord(aTextRun, wordStart, i, hash);
+}
+
+static PRBool
+CompareDifferentWidthStrings(const PRUint8 *aStr1, const PRUnichar *aStr2,
+                             PRUint32 aLength)
+{
+    PRUint32 i;
+    for (i = 0; i < aLength; ++i) {
+        if (aStr1[i] != aStr2[i])
+            return PR_FALSE;
+    }
+    return PR_TRUE;
+}
+
+static PRBool
+IsWordEnd(gfxTextRun *aTextRun, PRUint32 aOffset)
+{
+    PRUint32 runLength = aTextRun->GetLength();
+    if (aOffset == runLength)
+        return PR_TRUE;
+    if (aOffset > runLength)
+        return PR_FALSE;
+    return IsWordBoundary(aTextRun->GetChar(aOffset));
+}
+
+static void *
+GetFontOrGroup(gfxFontGroup *aFontGroup, PRBool aUseFont)
+{
+    return aUseFont
+        ? NS_STATIC_CAST(void *, aFontGroup->GetFontAt(0))
+        : NS_STATIC_CAST(void *, aFontGroup);
+}
+
+PRBool
+gfxTextRunWordCache::CacheHashEntry::KeyEquals(const KeyTypePointer aKey) const
+{
+    if (!mTextRun)
+        return PR_FALSE;
+
+    PRUint32 length = aKey->mLength;
+    gfxFontGroup *fontGroup = mTextRun->GetFontGroup();
+    if (!IsWordEnd(mTextRun, mWordOffset + length) ||
+        GetFontOrGroup(fontGroup, mHashedByFont) != aKey->mFontOrGroup ||
+        aKey->mAppUnitsPerDevUnit != mTextRun->GetAppUnitsPerDevUnit())
+        return PR_FALSE;
+
+    if (mTextRun->GetFlags() & gfxFontGroup::TEXT_IS_8BIT) {
+        const PRUint8 *text = mTextRun->GetText8Bit() + mWordOffset;
+        if (!aKey->mIsDoubleByteText)
+            return memcmp(text, aKey->mString, length) == 0;
+        return CompareDifferentWidthStrings(text,
+                                            NS_STATIC_CAST(const PRUnichar *, aKey->mString), length);
+    } else {
+        const PRUnichar *text = mTextRun->GetTextUnicode() + mWordOffset;
+        if (aKey->mIsDoubleByteText)
+            return memcmp(text, aKey->mString, length*sizeof(PRUnichar)) == 0;
+        return CompareDifferentWidthStrings(NS_STATIC_CAST(const PRUint8 *, aKey->mString),
+                                            text, length);
+    }
+}
+
+PLDHashNumber
+gfxTextRunWordCache::CacheHashEntry::HashKey(const KeyTypePointer aKey)
+{
+    return aKey->mStringHash + (long)aKey->mFontOrGroup + aKey->mAppUnitsPerDevUnit +
+        aKey->mIsDoubleByteText;
+}