Bug 516709. Do better validation of cmap tables when loading fonts. r=jkew.
authorJohn Daggett <jdaggett@mozilla.com>
Fri, 18 Sep 2009 09:45:29 +0900
changeset 32826 1ee2863e89e26fd60da6d3a4e2c4b8f78d27cf01
parent 32825 0787e89e3e0d29377633b61fab3dc53d85374925
child 32827 7263e09bf2718276a7953b583bd2ed4672c5792f
push idunknown
push userunknown
push dateunknown
reviewersjkew
bugs516709
milestone1.9.3a1pre
Bug 516709. Do better validation of cmap tables when loading fonts. r=jkew.
gfx/thebes/public/gfxFontUtils.h
gfx/thebes/public/gfxTypes.h
gfx/thebes/public/gfxWindowsFonts.h
gfx/thebes/src/gfxFontUtils.cpp
gfx/thebes/src/gfxMacPlatformFontList.mm
gfx/thebes/src/gfxWindowsFonts.cpp
--- a/gfx/thebes/public/gfxFontUtils.h
+++ b/gfx/thebes/public/gfxFontUtils.h
@@ -57,20 +57,16 @@
 #include "nsIStreamBufferAccess.h"
 
 /* Bug 341128 - w32api defines min/max which causes problems with <bitset> */
 #ifdef __MINGW32__
 #undef min
 #undef max
 #endif
 
-#include <bitset>
-
-// code from gfxWindowsFonts.h
-
 class gfxSparseBitSet {
 private:
     enum { BLOCK_SIZE = 32 };   // ==> 256 codepoints per block
     enum { BLOCK_SIZE_BITS = BLOCK_SIZE * 8 };
     enum { BLOCK_INDEX_SHIFT = 8 };
 
     struct Block {
         Block(const Block& aBlock) { memcpy(mBits, aBlock.mBits, sizeof(mBits)); }
@@ -420,17 +416,20 @@ public:
         LANG_ID_MAC_KOREAN = 23,
         LANG_ID_MAC_POLISH = 25,
         LANG_ID_MAC_FARSI = 31,
         LANG_ID_MAC_SIMP_CHINESE = 33,
         LANG_ID_MAC_ROMANIAN = 37,
         LANG_ID_MAC_CZECH = 38,
         LANG_ID_MAC_SLOVAK = 39,
 
-        LANG_ID_MICROSOFT_EN_US = 0x0409         // with Microsoft platformID, EN US lang code
+        LANG_ID_MICROSOFT_EN_US = 0x0409,        // with Microsoft platformID, EN US lang code
+        
+        CMAP_MAX_CODEPOINT = 0x10ffff     // maximum possible Unicode codepoint 
+                                          // contained in a cmap
     };
 
     // name table has a header, followed by name records, followed by string data
     struct NameHeader {
         mozilla::AutoSwap_PRUint16    format;       // Format selector (=0).
         mozilla::AutoSwap_PRUint16    count;        // Number of name records.
         mozilla::AutoSwap_PRUint16    stringOffset; // Offset to start of string storage
                                                     // (from start of table)
--- a/gfx/thebes/public/gfxTypes.h
+++ b/gfx/thebes/public/gfxTypes.h
@@ -50,16 +50,27 @@ typedef double gfxFloat;
 # define THEBES_API
 #elif defined(IMPL_THEBES)
 # define THEBES_API NS_EXPORT
 #else
 # define THEBES_API NS_IMPORT
 #endif
 
 /**
+ * gfx errors
+ */
+
+/* nsIDeviceContext.h defines a set of printer errors  */
+#define NS_ERROR_GFX_GENERAL_BASE (50) 
+
+/* Font cmap is strangely structured - avoid this font! */
+#define NS_ERROR_GFX_CMAP_MALFORMED          \
+  NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_GFX,NS_ERROR_GFX_GENERAL_BASE+1)
+
+/**
  * Priority of a line break opportunity.
  *
  * eNoBreak       The line has no break opportunities
  * eWordWrapBreak The line has a break opportunity only within a word. With
  *                word-wrap: break-word we will break at this point only if
  *                there are no other break opportunities in the line.
  * eNormalBreak   The line has a break opportunity determined by the standard
  *                line-breaking algorithm.
--- a/gfx/thebes/public/gfxWindowsFonts.h
+++ b/gfx/thebes/public/gfxWindowsFonts.h
@@ -48,16 +48,20 @@
 #include "gfxFontUtils.h"
 #include "gfxUserFontSet.h"
 
 #include "nsDataHashtable.h"
 
 #include <usp10.h>
 #include <cairo-win32.h>
 
+// xxx - used in FontEntry.  should be trimmed, moz code doesn't use
+//       exceptions.  use gfxSparseBitSet instead?
+#include <bitset>
+
 /**
  * List of different types of fonts we support on Windows.
  * These can generally be lumped in to 3 categories where we have to
  * do special things:  Really old fonts bitmap and vector fonts (device
  * and raster), Type 1 fonts, and TrueType/OpenType fonts.
  * 
  * This list is sorted in order from least prefered to most prefered.
  * We prefer Type1 fonts over OpenType fonts to avoid falling back to
--- a/gfx/thebes/src/gfxFontUtils.cpp
+++ b/gfx/thebes/src/gfxFontUtils.cpp
@@ -241,86 +241,109 @@ gfxFontUtils::ReadCMAPTableFormat12(PRUi
         OffsetNumberGroups = 12,
         OffsetGroups = 16,
 
         SizeOfGroup = 12,
 
         GroupOffsetStartCode = 0,
         GroupOffsetEndCode = 4
     };
-    NS_ENSURE_TRUE(aLength >= 16, NS_ERROR_FAILURE);
+    NS_ENSURE_TRUE(aLength >= 16, NS_ERROR_GFX_CMAP_MALFORMED);
 
-    NS_ENSURE_TRUE(ReadShortAt(aBuf, OffsetFormat) == 12, NS_ERROR_FAILURE);
-    NS_ENSURE_TRUE(ReadShortAt(aBuf, OffsetReserved) == 0, NS_ERROR_FAILURE);
+    NS_ENSURE_TRUE(ReadShortAt(aBuf, OffsetFormat) == 12, 
+                   NS_ERROR_GFX_CMAP_MALFORMED);
+    NS_ENSURE_TRUE(ReadShortAt(aBuf, OffsetReserved) == 0, 
+                   NS_ERROR_GFX_CMAP_MALFORMED);
 
     PRUint32 tablelen = ReadLongAt(aBuf, OffsetTableLength);
-    NS_ENSURE_TRUE(tablelen <= aLength, NS_ERROR_FAILURE);
-    NS_ENSURE_TRUE(tablelen >= 16, NS_ERROR_FAILURE);
+    NS_ENSURE_TRUE(tablelen <= aLength, NS_ERROR_GFX_CMAP_MALFORMED);
+    NS_ENSURE_TRUE(tablelen >= 16, NS_ERROR_GFX_CMAP_MALFORMED);
 
-    NS_ENSURE_TRUE(ReadLongAt(aBuf, OffsetLanguage) == 0, NS_ERROR_FAILURE);
+    NS_ENSURE_TRUE(ReadLongAt(aBuf, OffsetLanguage) == 0, 
+                   NS_ERROR_GFX_CMAP_MALFORMED);
 
     const PRUint32 numGroups  = ReadLongAt(aBuf, OffsetNumberGroups);
-    NS_ENSURE_TRUE(tablelen >= 16 + (12 * numGroups), NS_ERROR_FAILURE);
+    NS_ENSURE_TRUE(tablelen >= 16 + (12 * numGroups), 
+                   NS_ERROR_GFX_CMAP_MALFORMED);
 
     const PRUint8 *groups = aBuf + OffsetGroups;
+    PRUint32 prevEndCharCode = 0;
     for (PRUint32 i = 0; i < numGroups; i++, groups += SizeOfGroup) {
         const PRUint32 startCharCode = ReadLongAt(groups, GroupOffsetStartCode);
         const PRUint32 endCharCode = ReadLongAt(groups, GroupOffsetEndCode);
+        NS_ENSURE_TRUE((prevEndCharCode < startCharCode || i == 0) &&
+                       startCharCode <= endCharCode &&
+                       endCharCode <= CMAP_MAX_CODEPOINT, 
+                       NS_ERROR_GFX_CMAP_MALFORMED);
         aCharacterMap.SetRange(startCharCode, endCharCode);
+        prevEndCharCode = endCharCode;
     }
 
     return NS_OK;
 }
 
 nsresult 
 gfxFontUtils::ReadCMAPTableFormat4(PRUint8 *aBuf, PRUint32 aLength, gfxSparseBitSet& aCharacterMap)
 {
     enum {
         OffsetFormat = 0,
         OffsetLength = 2,
         OffsetLanguage = 4,
         OffsetSegCountX2 = 6
     };
 
-    NS_ENSURE_TRUE(ReadShortAt(aBuf, OffsetFormat) == 4, NS_ERROR_FAILURE);
+    NS_ENSURE_TRUE(ReadShortAt(aBuf, OffsetFormat) == 4, 
+                   NS_ERROR_GFX_CMAP_MALFORMED);
     PRUint16 tablelen = ReadShortAt(aBuf, OffsetLength);
-    NS_ENSURE_TRUE(tablelen <= aLength, NS_ERROR_FAILURE);
-    NS_ENSURE_TRUE(tablelen > 16, NS_ERROR_FAILURE);
+    NS_ENSURE_TRUE(tablelen <= aLength, NS_ERROR_GFX_CMAP_MALFORMED);
+    NS_ENSURE_TRUE(tablelen > 16, NS_ERROR_GFX_CMAP_MALFORMED);
     
     // some buggy fonts on Mac OS report lang = English (e.g. Arial Narrow Bold, v. 1.1 (Tiger))
 #if defined(XP_WIN)
-    NS_ENSURE_TRUE(ReadShortAt(aBuf, OffsetLanguage) == 0, NS_ERROR_FAILURE);
+    NS_ENSURE_TRUE(ReadShortAt(aBuf, OffsetLanguage) == 0, 
+                   NS_ERROR_GFX_CMAP_MALFORMED);
 #endif
 
     PRUint16 segCountX2 = ReadShortAt(aBuf, OffsetSegCountX2);
-    NS_ENSURE_TRUE(tablelen >= 16 + (segCountX2 * 4), NS_ERROR_FAILURE);
+    NS_ENSURE_TRUE(tablelen >= 16 + (segCountX2 * 4), 
+                   NS_ERROR_GFX_CMAP_MALFORMED);
 
     const PRUint16 segCount = segCountX2 / 2;
 
     const PRUint16 *endCounts = reinterpret_cast<const PRUint16*>(aBuf + 14);
     const PRUint16 *startCounts = endCounts + 1 /* skip one uint16 for reservedPad */ + segCount;
     const PRUint16 *idDeltas = startCounts + segCount;
     const PRUint16 *idRangeOffsets = idDeltas + segCount;
+    PRUint16 prevEndCount = 0;
     for (PRUint16 i = 0; i < segCount; i++) {
         const PRUint16 endCount = ReadShortAt16(endCounts, i);
         const PRUint16 startCount = ReadShortAt16(startCounts, i);
         const PRUint16 idRangeOffset = ReadShortAt16(idRangeOffsets, i);
+        
+        // sanity-check range
+        NS_ENSURE_TRUE((startCount > prevEndCount || i == 0) && 
+                       startCount <= endCount,
+                       NS_ERROR_GFX_CMAP_MALFORMED);
+        prevEndCount = endCount;
+        
         if (idRangeOffset == 0) {
             aCharacterMap.SetRange(startCount, endCount);
         } else {
             // const PRUint16 idDelta = ReadShortAt16(idDeltas, i); // Unused: self-documenting.
             for (PRUint32 c = startCount; c <= endCount; ++c) {
                 if (c == 0xFFFF)
                     break;
 
                 const PRUint16 *gdata = (idRangeOffset/2 
                                          + (c - startCount)
                                          + &idRangeOffsets[i]);
 
-                NS_ENSURE_TRUE((PRUint8*)gdata > aBuf && (PRUint8*)gdata < aBuf + aLength, NS_ERROR_FAILURE);
+                NS_ENSURE_TRUE((PRUint8*)gdata > aBuf && 
+                               (PRUint8*)gdata < aBuf + aLength, 
+                               NS_ERROR_GFX_CMAP_MALFORMED);
 
                 // make sure we have a glyph
                 if (*gdata != 0) {
                     // The glyph index at this point is:
                     // glyph = (ReadShortAt16(idDeltas, i) + *gdata) % 65536;
 
                     aCharacterMap.set(c);
                 }
@@ -385,17 +408,17 @@ gfxFontUtils::ReadCMAP(PRUint8 *aBuf, PR
         const PRUint16 platformID = ReadShortAt(table, TableOffsetPlatformID);
         if (!acceptablePlatform(platformID))
             continue;
 
         const PRUint16 encodingID = ReadShortAt(table, TableOffsetEncodingID);
         const PRUint32 offset = ReadLongAt(table, TableOffsetOffset);
 
         NS_ASSERTION(offset < aBufLength, "cmap table offset is longer than table size");
-        NS_ENSURE_TRUE(offset < aBufLength, NS_ERROR_FAILURE);
+        NS_ENSURE_TRUE(offset < aBufLength, NS_ERROR_GFX_CMAP_MALFORMED);
 
         const PRUint8 *subtable = aBuf + offset;
         const PRUint16 format = ReadShortAt(subtable, SubtableOffsetFormat);
 
         if (isSymbol(platformID, encodingID)) {
             aUnicodeFont = PR_FALSE;
             aSymbolFont = PR_TRUE;
             keepFormat = format;
--- a/gfx/thebes/src/gfxMacPlatformFontList.mm
+++ b/gfx/thebes/src/gfxMacPlatformFontList.mm
@@ -190,16 +190,21 @@ MacOSFontEntry::ReadCMAP()
     nsAutoTArray<PRUint8,16384> buffer;
     if (GetFontTable(kCMAP, buffer) != NS_OK)
         return NS_ERROR_FAILURE;
     PRUint8 *cmap = buffer.Elements();
 
     PRPackedBool  unicodeFont, symbolFont; // currently ignored
     nsresult rv = gfxFontUtils::ReadCMAP(cmap, buffer.Length(),
                                          mCharacterMap, unicodeFont, symbolFont);
+                                         
+    if (NS_FAILED(rv)) {
+        mCharacterMap.reset();
+        return rv;
+    }
 
     // for complex scripts, check for the presence of mort/morx
     PRBool checkedForMorphTable = PR_FALSE, hasMorphTable = PR_FALSE;
 
     ATSFontRef fontRef = GetFontRef();
     PRUint32 s, numScripts = sizeof(gScriptsThatRequireShaping) / sizeof(ScriptRange);
 
     for (s = 0; s < numScripts; s++) {
@@ -793,18 +798,18 @@ gfxMacPlatformFontList::MakePlatformFont
                                    FONT_STYLE_ITALIC : FONT_STYLE_NORMAL, 
                                userFontData);
 
         if (!newFontEntry) {
             delete userFontData;
             return nsnull;
         }
 
-        // if we succeeded (which should always be the case), return the new font
-        if (newFontEntry->mIsValid)
+        // if succeeded and font cmap is good, return the new font
+        if (newFontEntry->mIsValid && NS_SUCCEEDED(newFontEntry->ReadCMAP()))
             return newFontEntry;
 
         // if something is funky about this font, delete immediately
 #if DEBUG
         char warnBuf[1024];
         const gfxProxyFontEntry *proxyEntry = 
             static_cast<const gfxProxyFontEntry*> (aProxyEntry);
         sprintf(warnBuf, "downloaded font not loaded properly, removed face for (%s)", 
--- a/gfx/thebes/src/gfxWindowsFonts.cpp
+++ b/gfx/thebes/src/gfxWindowsFonts.cpp
@@ -652,46 +652,61 @@ FontEntry::CreateFontEntry(const nsAStri
     LOGFONTW logFont;
     PRBool needRelease = PR_FALSE;
 
     // jtdfix - need to set charset, unicode ranges, pitch/family
 
     FontEntry *fe;
 
     fe = new FontEntry(aName, aFontType, aItalic, aWeight, aUserFontData);
+    if (!fe)
+        return nsnull;
 
     if (!aLogFont) {
         aLogFont = &logFont;
         FontEntry::FillLogFont(aLogFont, aName, aFontType, aItalic, aWeight, 0);
     }
 
     if (!hdc) {
         hdc = GetDC(nsnull);
         SetGraphicsMode(hdc, GM_ADVANCED);
         needRelease = PR_TRUE;
     }
     
     HFONT font = CreateFontIndirectW(aLogFont);
 
     if (font) {
         AutoPushPopFont fontCleanup(hdc, font);
-
-        // ReadCMAP may change the values of mUnicodeFont and mSymbolFont
-        if (NS_FAILED(::ReadCMAP(hdc, fe))) {
-            // Type1 fonts aren't necessarily Unicode but
-            // this is the best guess we can make here
-            if (fe->IsType1())
-                fe->mUnicodeFont = PR_TRUE;
-            else
-                fe->mUnicodeFont = PR_FALSE;
-
-            // For fonts where we failed to read the character map,
-            // we can take a slow path to look up glyphs character by character
-            fe->mUnknownCMAP = PR_TRUE;
-
+        nsresult rv;
+
+        rv = ::ReadCMAP(hdc, fe);
+
+        if (NS_FAILED(rv)) {
+
+            // ReadCMAP can fail but only handle failure cases when the font
+            // did *not* have a cmap that appears to be malformed.  Uniscribe
+            // can crash with corrupt cmaps.
+            if (rv == NS_ERROR_GFX_CMAP_MALFORMED) {
+                delete fe;
+                return nsnull;
+            } else {
+
+                // ReadCMAP may change the values of mUnicodeFont and mSymbolFont
+    
+                // Type1 fonts aren't necessarily Unicode but
+                // this is the best guess we can make here
+                if (fe->IsType1())
+                    fe->mUnicodeFont = PR_TRUE;
+                else
+                    fe->mUnicodeFont = PR_FALSE;
+    
+                // For fonts where we failed to read the character map,
+                // we can take a slow path to look up glyphs character by character
+                fe->mUnknownCMAP = PR_TRUE;
+            }
         } 
     }
 
     if (needRelease)
         ReleaseDC(nsnull, hdc);
 
     return fe;
 }