Bug 410728. gfxTextRunWordCache should not cache words that start with a combining mark (words whose first character clusters with a preceding space). r=vlad
authorroc+@cs.cmu.edu
Mon, 28 Jan 2008 09:35:44 -0800
changeset 10831 1037ace6a1887303b50e6fc4e56d03d128ef7ed9
parent 10830 77430b307c07b90a8278f214dcddf3fd060c2fd7
child 10832 aea8adbd8f8bcfedfc81d7131e5b4d5e0a287950
push idunknown
push userunknown
push dateunknown
reviewersvlad
bugs410728
milestone1.9b3pre
Bug 410728. gfxTextRunWordCache should not cache words that start with a combining mark (words whose first character clusters with a preceding space). r=vlad
gfx/thebes/crashtests/crashtests.list
gfx/thebes/src/gfxTextRunWordCache.cpp
--- a/gfx/thebes/crashtests/crashtests.list
+++ b/gfx/thebes/crashtests/crashtests.list
@@ -19,9 +19,9 @@ skip-if(MOZ_WIDGET_TOOLKIT=="gtk2") load
 load 395335-1.xhtml
 load 398042-1.xhtml
 load 398042-2.xhtml
 load 404112-1.html
 load 404112-2.html
 load 405268-1.xhtml
 load 407761-1.html
 load 407842.html
-# load 410728-1.xml
+load 410728-1.xml
--- a/gfx/thebes/src/gfxTextRunWordCache.cpp
+++ b/gfx/thebes/src/gfxTextRunWordCache.cpp
@@ -39,17 +39,32 @@
 
 #ifdef DEBUG
 #include <stdio.h>
 #endif
 
 /**
  * Cache individual "words" (strings delimited by white-space or white-space-like
  * characters that don't involve kerning or ligatures) in textruns.
-  */
+ *  
+ * The characters treated as word boundaries are defined by IsWordBoundary
+ * below. The characters are: space, NBSP, and all the characters
+ * defined by gfxFontGroup::IsInvalidChar. The latter are all converted
+ * to invisible missing glyphs in this code. Thus, this class ensures
+ * that none of those invalid characters are ever passed to platform
+ * textrun implementations.
+ * 
+ * Some platforms support marks combining with spaces to form clusters.
+ * In such cases we treat "before the space" as a word boundary but
+ * "after the space" is not a word boundary; words with a leading space
+ * are kept out of the cache. Also, words at the start of text, which start
+ * with combining marks that would combine with a space if there was one,
+ * are also kept out of the cache.
+ */
+
 class TextRunWordCache {
 public:
     TextRunWordCache() {
         mCache.Init(100);
     }
     ~TextRunWordCache() {
         NS_WARN_IF_FALSE(mCache.Count() == 0, "Textrun cache not empty!");
     }
@@ -154,17 +169,17 @@ protected:
         PRUint32    mLength;
         PRUint32    mHash;
     };
     
     PRBool LookupWord(gfxTextRun *aTextRun, gfxFont *aFirstFont,
                       PRUint32 aStart, PRUint32 aEnd, PRUint32 aHash,
                       nsTArray<DeferredWord>* aDeferredWords);
     void FinishTextRun(gfxTextRun *aTextRun, gfxTextRun *aNewRun,
-                       gfxContext *aContext,
+                       const gfxFontGroup::Parameters *aParams,
                        const nsTArray<DeferredWord>& aDeferredWords,
                        PRBool aSuccessful);
     void RemoveWord(gfxTextRun *aTextRun, PRUint32 aStart,
                     PRUint32 aEnd, PRUint32 aHash);    
 
     nsTHashtable<CacheHashEntry> mCache;
     
 #ifdef DEBUG
@@ -302,77 +317,120 @@ TextRunWordCache::LookupWord(gfxTextRun 
  * instead.
  * 
  * @param aSuccessful if false, then we don't do any word copies and we don't
  * add anything to the cache, but we still remove all the optimistic cache
  * entries.
  */
 void
 TextRunWordCache::FinishTextRun(gfxTextRun *aTextRun, gfxTextRun *aNewRun,
-                                gfxContext *aContext,
+                                const gfxFontGroup::Parameters *aParams,
                                 const nsTArray<DeferredWord>& aDeferredWords,
                                 PRBool aSuccessful)
 {
     aTextRun->SetFlagBits(gfxTextRunWordCache::TEXT_IN_CACHE);
 
     PRUint32 i;
     gfxFontGroup *fontGroup = aTextRun->GetFontGroup();
     gfxFont *font = fontGroup->GetFontAt(0);
     // copy deferred words from various sources into destination textrun
     for (i = 0; i < aDeferredWords.Length(); ++i) {
         const DeferredWord *word = &aDeferredWords[i];
         gfxTextRun *source = word->mSourceTextRun;
         if (!source) {
             source = aNewRun;
-            // we created a cache entry for this word based on the assumption
+        }
+        // If the word starts inside a cluster we don't want this word
+        // in the cache, so we'll remove the associated cache entry
+        PRBool wordStartsInsideCluster =
+            !source->IsClusterStart(word->mSourceOffset);
+        if (source == aNewRun) {
+            // We created a cache entry for this word based on the assumption
             // that the word matches GetFontAt(0). If this assumption is false,
             // we need to remove that cache entry and replace it with an entry
             // keyed off the fontgroup.
-            if (!aSuccessful ||
-                GetWordFontOrGroup(aNewRun, word->mSourceOffset, word->mLength) != font) {
+            PRBool rekeyWithFontGroup =
+                GetWordFontOrGroup(aNewRun, word->mSourceOffset, word->mLength) != font;
+            if (!aSuccessful || wordStartsInsideCluster || rekeyWithFontGroup) {
+                // We need to remove the current placeholder cache entry
                 CacheHashKey key(aTextRun, font, word->mDestOffset, word->mLength,
                                  word->mHash);
                 NS_ASSERTION(mCache.GetEntry(key),
                              "This entry should have been added previously!");
                 mCache.RemoveEntry(key);
+#ifdef DEBUG
+                --aTextRun->mCachedWords;
+#endif
                 PR_LOG(gWordCacheLog, PR_LOG_DEBUG, ("%p(%d-%d,%d): removed using font", aTextRun, word->mDestOffset, word->mLength, word->mHash));
                 
-                if (aSuccessful) {
+                if (aSuccessful && !wordStartsInsideCluster) {
                     key.mFontOrGroup = fontGroup;
                     CacheHashEntry *groupEntry = mCache.PutEntry(key);
                     if (groupEntry) {
+#ifdef DEBUG
+                        ++aTextRun->mCachedWords;
+#endif
                         PR_LOG(gWordCacheLog, PR_LOG_DEBUG, ("%p(%d-%d,%d): added using fontgroup", aTextRun, word->mDestOffset, word->mLength, word->mHash));
                         groupEntry->mTextRun = aTextRun;
                         groupEntry->mWordOffset = word->mDestOffset;
                         groupEntry->mHashedByFont = PR_FALSE;
                         NS_ASSERTION(mCache.GetEntry(key),
                                      "We should find the thing we just added!");
                     }
                 }
             }
         }
         if (aSuccessful) {
             // Copy the word. If the source is aNewRun, then
             // allow CopyGlyphDataFrom to steal the internal data of
             // aNewRun since that's only temporary anyway.
-            aTextRun->CopyGlyphDataFrom(source,
-                word->mSourceOffset, word->mLength, word->mDestOffset,
-                source == aNewRun);
+            PRUint32 sourceOffset = word->mSourceOffset;
+            PRUint32 destOffset = word->mDestOffset;
+            PRUint32 length = word->mLength;
+            nsAutoPtr<gfxTextRun> tmpTextRun;
+            PRBool stealData = source == aNewRun;
+            if (wordStartsInsideCluster) {
+                NS_ASSERTION(sourceOffset > 0, "How can the first character be inside a cluster?");
+                if (destOffset > 0 && IsBoundarySpace(aTextRun->GetChar(destOffset - 1))) {
+                    // We should copy over data for the preceding space
+                    // as well. The glyphs have probably been attached
+                    // to that character.
+                    --sourceOffset;
+                    --destOffset;
+                    ++length;
+                } else {
+                    // URK! This case sucks! We have combining marks
+                    // at the start of the text. We had to prepend a space
+                    // just so we could detect these are combining marks
+                    // (so we can keep this "word" out of the cache).
+                    // But now the data in aNewRun is no use to us. We
+                    // need to find out what the platform would do
+                    // if the marks were at the start of the text.
+                    tmpTextRun = aNewRun->GetFontGroup()->MakeTextRun(
+                        aTextRun->GetTextUnicode(), length, aParams,
+                        aNewRun->GetFlags());
+                    source = tmpTextRun;
+                    sourceOffset = 0;
+                    stealData = PR_TRUE;
+                }
+            }
+            aTextRun->CopyGlyphDataFrom(source, sourceOffset, length,
+                destOffset, stealData);
             // Fill in additional spaces
             PRUint32 endCharIndex;
             if (i + 1 < aDeferredWords.Length()) {
                 endCharIndex = aDeferredWords[i + 1].mDestOffset;
             } else {
                 endCharIndex = aTextRun->GetLength();
             }
             PRUint32 charIndex;
             for (charIndex = word->mDestOffset + word->mLength;
                  charIndex < endCharIndex; ++charIndex) {
                 if (IsBoundarySpace(aTextRun->GetChar(charIndex))) {
-                    aTextRun->SetSpaceGlyph(font, aContext, charIndex);
+                    aTextRun->SetSpaceGlyph(font, aParams->mContext, charIndex);
                 }
             }
         }
     }
 }
 
 static gfxTextRun *
 MakeBlankTextRun(const void* aText, PRUint32 aLength,
@@ -420,19 +478,19 @@ TextRunWordCache::MakeTextRun(const PRUn
     PRUint32 wordStart = 0;
     PRUint32 hash = 0;
     for (i = 0; i <= aLength; ++i) {
         PRUnichar ch = i < aLength ? aText[i] : ' ';
         if (IsWordBoundary(ch)) {
             PRBool hit = LookupWord(textRun, font, wordStart, i, hash,
                                     deferredWords.Length() == 0 ? nsnull : &deferredWords);
             if (!hit) {
-                if (tempString.Length() > 0) {
-                    tempString.AppendElement(' ');
-                }
+                // Always put a space before the word so we can detect
+                // combining characters at the start of a word
+                tempString.AppendElement(' ');
                 PRUint32 offset = tempString.Length();
                 PRUint32 length = i - wordStart;
                 PRUnichar *chars = tempString.AppendElements(length);
                 if (!chars) {
                     FinishTextRun(textRun, nsnull, nsnull, deferredWords, PR_FALSE);
                     return nsnull;
                 }
                 memcpy(chars, aText + wordStart, length*sizeof(PRUnichar));
@@ -462,17 +520,17 @@ TextRunWordCache::MakeTextRun(const PRUn
 
     // create textrun for unknown words
     gfxTextRunFactory::Parameters params =
         { aParams->mContext, nsnull, nsnull, nsnull, 0, aParams->mAppUnitsPerDevUnit };
     nsAutoPtr<gfxTextRun> newRun;
     newRun = aFontGroup->MakeTextRun(tempString.Elements(), tempString.Length(),
                                      &params, aFlags | gfxTextRunFactory::TEXT_IS_PERSISTENT);
 
-    FinishTextRun(textRun, newRun, aParams->mContext, deferredWords, newRun != nsnull);
+    FinishTextRun(textRun, newRun, aParams, deferredWords, newRun != nsnull);
     return textRun.forget();
 }
 
 gfxTextRun *
 TextRunWordCache::MakeTextRun(const PRUint8 *aText, PRUint32 aLength,
                               gfxFontGroup *aFontGroup,
                               const gfxFontGroup::Parameters *aParams,
                               PRUint32 aFlags)
@@ -545,17 +603,17 @@ TextRunWordCache::MakeTextRun(const PRUi
 
     // create textrun for unknown words
     gfxTextRunFactory::Parameters params =
         { aParams->mContext, nsnull, nsnull, nsnull, 0, aParams->mAppUnitsPerDevUnit };
     nsAutoPtr<gfxTextRun> newRun;
     newRun = aFontGroup->MakeTextRun(tempString.Elements(), tempString.Length(),
                                      &params, aFlags | gfxTextRunFactory::TEXT_IS_PERSISTENT);
 
-    FinishTextRun(textRun, newRun, aParams->mContext, deferredWords, newRun != nsnull);
+    FinishTextRun(textRun, newRun, aParams, deferredWords, newRun != nsnull);
     return textRun.forget();
 }
 
 void
 TextRunWordCache::RemoveWord(gfxTextRun *aTextRun, PRUint32 aStart,
                              PRUint32 aEnd, PRUint32 aHash)
 {
     if (aEnd <= aStart)