fixing font selection bug dealing with zero-width glyphs and begin using ScriptGetCMap when possible. bug 376300. r=roc
authorpavlov@pavlov.net
Mon, 02 Apr 2007 23:32:23 -0700
changeset 282 7f2ab8377b5515e82a72331df7ecb407fa1f5947
parent 281 bc03f9d7140cb5287b9573182d30427057dda05b
child 283 e292f6aba8b87ca31e070a1f5bcf002fbc5ca72b
push idunknown
push userunknown
push dateunknown
reviewersroc
bugs376300
milestone1.9a4pre
fixing font selection bug dealing with zero-width glyphs and begin using ScriptGetCMap when possible. bug 376300. r=roc
gfx/thebes/src/gfxTextRunCache.cpp
gfx/thebes/src/gfxWindowsFonts.cpp
--- a/gfx/thebes/src/gfxTextRunCache.cpp
+++ b/gfx/thebes/src/gfxTextRunCache.cpp
@@ -110,17 +110,17 @@ gfxTextRunCache::Shutdown()
 {
     delete mGlobalCache;
     mGlobalCache = nsnull;
 }    
 
 static PRUint32
 ComputeFlags(PRBool aIsRTL, PRBool aEnableSpacing)
 {
-    PRUint32 flags = gfxTextRunFactory::TEXT_HAS_SURROGATES;
+    PRUint32 flags = 0;
     if (aIsRTL) {
         flags |= gfxTextRunFactory::TEXT_IS_RTL;
     }
     if (aEnableSpacing) {
         flags |= gfxTextRunFactory::TEXT_ENABLE_SPACING |
                  gfxTextRunFactory::TEXT_ABSOLUTE_SPACING |
                  gfxTextRunFactory::TEXT_ENABLE_NEGATIVE_SPACING;
     }
@@ -134,21 +134,31 @@ gfxTextRunCache::GetOrMakeTextRun(gfxCon
                                   PRBool aEnableSpacing, PRBool *aCallerOwns)
 {
     gfxSkipChars skipChars;
     // Choose pessimistic flags since we don't want to bother analyzing the string
     gfxTextRunFactory::Parameters params = {
         aContext, nsnull, nsnull, &skipChars, nsnull, 0, aAppUnitsPerDevUnit,
         ComputeFlags(aIsRTL, aEnableSpacing)
     };
+
     if (IsAscii(aString, aLength))
         params.mFlags |= gfxTextRunFactory::TEXT_IS_ASCII;
     //    else if (Is8Bit(aString, aLength))
     //        params.mFlags |= gfxTextRunFactory::TEXT_IS_8BIT;
 
+    if (!(params.mFlags & gfxTextRunFactory::TEXT_IS_ASCII)) {
+        for (PRUint32 i = 0; i < aLength; ++i) {
+            if (NS_IS_HIGH_SURROGATE(aString[i])) {
+                params.mFlags |= gfxTextRunFactory::TEXT_HAS_SURROGATES;
+                break;
+            }
+        }
+    }
+
     gfxTextRun *tr = nsnull;
     // Don't cache textruns that use spacing
     if (!gDisableCache && !aEnableSpacing) {
         // Evict first, to make sure that the textrun we return is live.
         EvictUTF16();
     
         TextRunEntry *entry;
         nsDependentSubstring keyStr(aString, aString + aLength);
--- a/gfx/thebes/src/gfxWindowsFonts.cpp
+++ b/gfx/thebes/src/gfxWindowsFonts.cpp
@@ -971,16 +971,18 @@ public:
         HRESULT rv;
 
         HDC shapeDC = nsnull;
 
         while (PR_TRUE) {
             const PRUnichar *str =
                 mAlternativeString ? mAlternativeString : mString;
             mScriptItem->a.fLogicalOrder = PR_TRUE;
+            mScriptItem->a.s.fDisplayZWG = PR_TRUE;
+
             rv = ScriptShape(shapeDC, mCurrentFont->ScriptCache(),
                              str, mLength,
                              mMaxGlyphs, &mScriptItem->a,
                              mGlyphs, mClusters,
                              mAttr, &mNumGlyphs);
 
             if (rv == E_OUTOFMEMORY) {
                 mMaxGlyphs *= 2;
@@ -1011,65 +1013,54 @@ public:
         mScriptItem->a.eScript = SCRIPT_UNDEFINED;
         // Note: If we disable the shaping by using SCRIPT_UNDEFINED and
         // the string has the surrogate pair, ScriptShape API is
         // *sometimes* crashed. Therefore, we should replace the surrogate
         // pair to U+FFFD. See bug 341500.
         GenerateAlternativeString();
     }
 
-    static PRBool IsZeroWidthUnicodeChar(PRUnichar aChar) {
-        return aChar == 0x200b; // ZWSP
+    /* this should only be called if there are no surrogates
+     * in the string */
+    PRBool IsMissingGlyphsCMap() {
+        HRESULT rv;
+        HDC cmapDC = nsnull;
+
+        while (PR_TRUE) {
+            rv = ScriptGetCMap(cmapDC, mCurrentFont->ScriptCache(),
+                               mString, mLength, 0, mGlyphs);
+
+            if (rv == E_PENDING) {
+                SelectFont();
+                cmapDC = mDC;
+                continue;
+            }
+
+            if (rv == S_OK)
+                return PR_FALSE;
+
+            PR_LOG(gFontLog, PR_LOG_WARNING, ("cmap is missing a glyph"));
+            for (PRUint32 i = 0; i < mLength; i++)
+                PR_LOG(gFontLog, PR_LOG_WARNING, (" - %d", mGlyphs[i]));
+            return PR_TRUE;
+        }
     }
 
-    /**
-     * @param aCharIndex the index in the string of the first character
-     * for the cluster this glyph belongs to
-     */
-    PRBool IsGlyphMissing(SCRIPT_FONTPROPERTIES *aSFP, PRUint32 aGlyphIndex,
-                          PRUint32 aCharIndex) {
-        WORD glyph = mGlyphs[aGlyphIndex];
-        if (glyph == aSFP->wgDefault ||
-            (glyph == aSFP->wgInvalid && glyph != aSFP->wgBlank))
+    PRBool IsGlyphMissing(SCRIPT_FONTPROPERTIES *aSFP, PRUint32 aGlyphIndex) {
+        if (mGlyphs[aGlyphIndex] == aSFP->wgDefault)
             return PR_TRUE;
-        // I'm not sure that claiming glyphs are missing if they're zero width is valid
-        // but we're seeing cases where some fonts return glyphs such as 0x03 and 0x04
-        // which are zero width and non-invalidGlyph.
-        // At any rate, only make this check for non-complex scripts.
-        if (mAttr[aGlyphIndex].fZeroWidth && !ScriptProperties()->fComplex &&
-            !IsZeroWidthUnicodeChar(mString[aCharIndex])) {
-            PR_LOG(gFontLog, PR_LOG_WARNING, ("crappy font? glyph %04x is zero-width", glyph));
-            return PR_TRUE;
-        }
         return PR_FALSE;
     }
 
-    /* 
-     * Fonts are hopelessly inconsistent about how they handle characters that
-     * they don't have glyphs for.
-     * Some will return the glyph which ScriptFontProperties() lists as
-     * wgDefault; some will return the glyph listed as wgInvalid.
-     * Some list the same glyph as both wgInvalid and wgBlank, and use this
-     * glyph for valid whitespace characters.
-     * In some cases wgBlank with zero width represents a missing glyph,
-     * but in other cases, especially in complex scripts, wgBlank with zero
-     * width represents a valid zero width character such a zero width joiner
-     * or non-joiner.
-     */
     PRBool IsMissingGlyphs() {
         SCRIPT_FONTPROPERTIES sfp;
         ScriptFontProperties(&sfp);
         PRUint32 charIndex = 0;
         for (int i = 0; i < mNumGlyphs; ++i) {
-            // advance charIndex to the first character such that glyph i
-            // is in its cluster (i.e. mClusters[charIndex + 1] > i)
-            while (charIndex + 1 < mLength && mClusters[charIndex + 1] <= i) {
-                ++charIndex;
-            }
-            if (IsGlyphMissing(&sfp, i, charIndex))
+            if (IsGlyphMissing(&sfp, i))
                 return PR_TRUE;
 #ifdef DEBUG_pavlov // excess debugging code
             PR_LOG(gFontLog, PR_LOG_DEBUG, ("%04x %04x %04x", sfp.wgBlank, sfp.wgDefault, sfp.wgInvalid));
             PR_LOG(gFontLog, PR_LOG_DEBUG, ("glyph%d - 0x%04x", i, mGlyphs[i]));
             PR_LOG(gFontLog, PR_LOG_DEBUG, ("%04x  --  %04x -- %04x", ScriptProperties()->fInvalidGlyph, mScriptItem->a.fNoGlyphIndex, mAttr[i].fZeroWidth));
 #endif
         }
         return PR_FALSE;
@@ -1168,26 +1159,26 @@ public:
                     // No glyphs for character 'index', it must be a ligature continuation
                     aRun->SetCharacterGlyph(runOffset, g.SetLigatureContinuation());
                 }
             } else {
                 // Count glyphs for this character
                 PRUint32 k = mClusters[offset];
                 PRUint32 glyphCount = mNumGlyphs - k;
                 PRUint32 nextClusterOffset;
-                PRBool missing = IsGlyphMissing(&sfp, k, offset);
+                PRBool missing = IsGlyphMissing(&sfp, k);
                 for (nextClusterOffset = offset + 1; nextClusterOffset < mLength; ++nextClusterOffset) {
                     if (mClusters[nextClusterOffset] > k) {
                         glyphCount = mClusters[nextClusterOffset] - k;
                         break;
                     }
                 }
                 PRUint32 j;
                 for (j = 1; j < glyphCount; ++j) {
-                    if (IsGlyphMissing(&sfp, k + j, offset)) {
+                    if (IsGlyphMissing(&sfp, k + j)) {
                         missing = PR_TRUE;
                     }
                 }
                 PRInt32 advance = mAdvances[k]*appUnitsPerDevUnit;
                 WORD glyph = mGlyphs[k];
                 if (missing) {
                     aRun->SetMissingGlyph(runOffset, mString[offset]);
                 } else if (glyphCount == 1 && advance >= 0 &&
@@ -1461,16 +1452,20 @@ private:
 class Uniscribe
 {
 public:
     Uniscribe(gfxContext *aContext, HDC aDC, const PRUnichar *aString, PRUint32 aLength, PRBool aIsRTL) :
         mContext(aContext), mDC(aDC), mString(aString), mLength(aLength), mIsRTL(aIsRTL),
         mIsComplex(ScriptIsComplex(aString, aLength, SIC_COMPLEX) == S_OK),
         mItems(nsnull) {
     }
+    ~Uniscribe() {
+        if (mItems)
+            free(mItems);
+    }
 
     void Init() {
         memset(&mControl, 0, sizeof(SCRIPT_CONTROL));
         memset(&mState, 0, sizeof(SCRIPT_STATE));
         // Lock the direction. Don't allow the itemizer to change directions
         // based on character type.
         mState.uBidiLevel = mIsRTL;
         mState.fOverrideDirection = PR_TRUE;
@@ -1562,25 +1557,39 @@ gfxWindowsFontGroup::InitTextRunUniscrib
                 font->UpdateCTM(aContext->CurrentMatrix());
 
                 /* set the curret font on the item */
                 item->SetCurrentFont(font);
 
                 if (PR_LOG_TEST(gFontLog, PR_LOG_DEBUG))
                     PR_LOG(gFontLog, PR_LOG_DEBUG, ("trying: %s", NS_LossyConvertUTF16toASCII(font->GetName()).get()));
 
+                PRBool cmapHasGlyphs = PR_FALSE; // false means "maybe" here
+
+                if (!giveUp && !(aRun->GetFlags() & TEXT_HAS_SURROGATES)) {
+                    if (item->IsMissingGlyphsCMap())
+                        continue;
+                    else
+                        cmapHasGlyphs = PR_TRUE;
+                }
+
                 rv = item->Shape();
 
                 if (giveUp) {
                     if (PR_LOG_TEST(gFontLog, PR_LOG_DEBUG))
                         PR_LOG(gFontLog, PR_LOG_DEBUG, ("%s - gave up", NS_LossyConvertUTF16toASCII(font->GetName()).get()));
                     goto SCRIPT_PLACE;
                 }
-                if (FAILED(rv) || item->IsMissingGlyphs())
+
+                if (FAILED(rv))
                     continue;
+
+                if (!cmapHasGlyphs && item->IsMissingGlyphs())
+                    continue;
+
             } else {
 #if 0
                 /* code to try all the fonts again without shaping on.
                    in general, if we couldn't shape we should probably just give up */
                 if (item->ShapingEnabled()) {
                     item->DisableShaping();
                     item->ResetFontIndex();
                     continue;