Bug 992100 - Mask out complex-script codepoints in fonts that lack the necessary layout tables. r=roc, a=sledru
authorJonathan Kew <jkew@mozilla.com>
Mon, 05 May 2014 19:59:55 +0100
changeset 199078 2569162305c8b36ee94c45d5b05c651351217ce4
parent 199077 aff9fd4e047374f68ea6f31a7d5d59af37aeac2b
child 199079 9b62596f615108b0042065d5dfaa9e8bcadec0cb
push id3624
push userasasaki@mozilla.com
push dateMon, 09 Jun 2014 21:49:01 +0000
treeherdermozilla-beta@b1a5da15899a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersroc, sledru
bugs992100
milestone31.0a2
Bug 992100 - Mask out complex-script codepoints in fonts that lack the necessary layout tables. r=roc, a=sledru
gfx/thebes/gfxFT2FontList.cpp
gfx/thebes/gfxFont.cpp
gfx/thebes/gfxFont.h
gfx/thebes/gfxMacPlatformFontList.mm
gfx/thebes/gfxPlatformFontList.cpp
gfx/thebes/gfxPlatformFontList.h
layout/reftests/text/reftest.list
--- a/gfx/thebes/gfxFT2FontList.cpp
+++ b/gfx/thebes/gfxFT2FontList.cpp
@@ -457,16 +457,27 @@ FT2FontEntry::CairoFontFace()
         mFontFace = cairo_ft_font_face_create_for_ft_face(face, flags);
         FTUserFontData *userFontData = new FTUserFontData(face, face.FontData());
         cairo_font_face_set_user_data(mFontFace, &sFTUserFontDataKey,
                                       userFontData, FTFontDestroyFunc);
     }
     return mFontFace;
 }
 
+// Copied/modified from similar code in gfxMacPlatformFontList.mm:
+// Complex scripts will not render correctly unless Graphite or OT
+// layout tables are present.
+// For OpenType, we also check that the GSUB table supports the relevant
+// script tag, to avoid using things like Arial Unicode MS for Lao (it has
+// the characters, but lacks OpenType support).
+
+// TODO: consider whether we should move this to gfxFontEntry and do similar
+// cmap-masking on all platforms to avoid using fonts that won't shape
+// properly.
+
 nsresult
 FT2FontEntry::ReadCMAP(FontInfoData *aFontInfoData)
 {
     if (mCharacterMap) {
         return NS_OK;
     }
 
     nsRefPtr<gfxCharacterMap> charmap = new gfxCharacterMap();
@@ -477,16 +488,38 @@ FT2FontEntry::ReadCMAP(FontInfoData *aFo
     if (NS_SUCCEEDED(rv)) {
         bool unicodeFont;
         bool symbolFont;
         rv = gfxFontUtils::ReadCMAP(buffer.Elements(), buffer.Length(),
                                     *charmap, mUVSOffset,
                                     unicodeFont, symbolFont);
     }
 
+    if (NS_SUCCEEDED(rv) && !HasGraphiteTables()) {
+        // We assume a Graphite font knows what it's doing,
+        // and provides whatever shaping is needed for the
+        // characters it supports, so only check/clear the
+        // complex-script ranges for non-Graphite fonts
+
+        // for layout support, check for the presence of opentype layout tables
+        bool hasGSUB = HasFontTable(TRUETYPE_TAG('G','S','U','B'));
+
+        for (const ScriptRange* sr = gfxPlatformFontList::sComplexScriptRanges;
+             sr->rangeStart; sr++) {
+            // check to see if the cmap includes complex script codepoints
+            if (charmap->TestRange(sr->rangeStart, sr->rangeEnd)) {
+                // We check for GSUB here, as GPOS alone would not be ok.
+                if (hasGSUB && SupportsScriptInGSUB(sr->tags)) {
+                    continue;
+                }
+                charmap->ClearRange(sr->rangeStart, sr->rangeEnd);
+            }
+        }
+    }
+
     mHasCmapTable = NS_SUCCEEDED(rv);
     if (mHasCmapTable) {
         gfxPlatformFontList *pfl = gfxPlatformFontList::PlatformFontList();
         mCharacterMap = pfl->FindCharMap(charmap);
     } else {
         // if error occurred, initialize to null cmap
         mCharacterMap = new gfxCharacterMap();
     }
--- a/gfx/thebes/gfxFont.cpp
+++ b/gfx/thebes/gfxFont.cpp
@@ -239,16 +239,33 @@ uint16_t gfxFontEntry::GetUVSGlyph(uint3
 
     if (mUVSData) {
         return gfxFontUtils::MapUVSToGlyphFormat14(mUVSData, aCh, aVS);
     }
 
     return 0;
 }
 
+bool gfxFontEntry::SupportsScriptInGSUB(const hb_tag_t* aScriptTags)
+{
+    hb_face_t *face = GetHBFace();
+    if (!face) {
+        return false;
+    }
+
+    unsigned int index;
+    hb_tag_t     chosenScript;
+    bool found =
+        hb_ot_layout_table_choose_script(face, TRUETYPE_TAG('G','S','U','B'),
+                                         aScriptTags, &index, &chosenScript);
+    hb_face_destroy(face);
+
+    return found && chosenScript != TRUETYPE_TAG('D','F','L','T');
+}
+
 nsresult gfxFontEntry::ReadCMAP(FontInfoData *aFontInfoData)
 {
     NS_ASSERTION(false, "using default no-op implementation of ReadCMAP");
     mCharacterMap = new gfxCharacterMap();
     return NS_OK;
 }
 
 nsString
--- a/gfx/thebes/gfxFont.h
+++ b/gfx/thebes/gfxFont.h
@@ -491,16 +491,26 @@ public:
     void NotifyFontDestroyed(gfxFont* aFont);
 
     // For memory reporting
     virtual void AddSizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf,
                                         FontListSizes* aSizes) const;
     virtual void AddSizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf,
                                         FontListSizes* aSizes) const;
 
+    // Used when checking for complex script support, to mask off cmap ranges
+    struct ScriptRange {
+        uint32_t         rangeStart;
+        uint32_t         rangeEnd;
+        hb_tag_t         tags[3]; // one or two OpenType script tags to check,
+                                  // plus a NULL terminator
+    };
+
+    bool SupportsScriptInGSUB(const hb_tag_t* aScriptTags);
+
     nsString         mName;
     nsString         mFamilyName;
 
     bool             mItalic      : 1;
     bool             mFixedPitch  : 1;
     bool             mIsProxy     : 1;
     bool             mIsValid     : 1;
     bool             mIsBadUnderlineFont : 1;
--- a/gfx/thebes/gfxMacPlatformFontList.mm
+++ b/gfx/thebes/gfxMacPlatformFontList.mm
@@ -47,17 +47,16 @@
 
 #import <AppKit/AppKit.h>
 
 #include "gfxPlatformMac.h"
 #include "gfxMacPlatformFontList.h"
 #include "gfxMacFont.h"
 #include "gfxUserFontSet.h"
 #include "harfbuzz/hb.h"
-#include "harfbuzz/hb-ot.h"
 
 #include "nsServiceManagerUtils.h"
 #include "nsTArray.h"
 
 #include "nsDirectoryServiceUtils.h"
 #include "nsDirectoryServiceDefs.h"
 #include "nsISimpleEnumerator.h"
 #include "nsCharTraits.h"
@@ -152,81 +151,16 @@ static NSString* GetNSStringForString(co
 // For OpenType, we also check that the GSUB table supports the relevant
 // script tag, to avoid using things like Arial Unicode MS for Lao (it has
 // the characters, but lacks OpenType support).
 
 // TODO: consider whether we should move this to gfxFontEntry and do similar
 // cmap-masking on other platforms to avoid using fonts that won't shape
 // properly.
 
-struct ScriptRange {
-    uint32_t         rangeStart;
-    uint32_t         rangeEnd;
-    hb_tag_t         tags[3]; // one or two OpenType script tags to check,
-                              // plus a NULL terminator
-};
-
-static const ScriptRange sComplexScripts[] = {
-    // Actually, now that harfbuzz supports presentation-forms shaping for
-    // Arabic, we can render it without layout tables. So maybe we don't
-    // want to mask the basic Arabic block here?
-    // This affects the arabic-fallback-*.html reftests, which rely on
-    // loading a font that *doesn't* have any GSUB table.
-    { 0x0600, 0x06FF, { TRUETYPE_TAG('a','r','a','b'), 0, 0 } },
-    { 0x0700, 0x074F, { TRUETYPE_TAG('s','y','r','c'), 0, 0 } },
-    { 0x0750, 0x077F, { TRUETYPE_TAG('a','r','a','b'), 0, 0 } },
-    { 0x08A0, 0x08FF, { TRUETYPE_TAG('a','r','a','b'), 0, 0 } },
-    { 0x0900, 0x097F, { TRUETYPE_TAG('d','e','v','2'),
-                        TRUETYPE_TAG('d','e','v','a'), 0 } },
-    { 0x0980, 0x09FF, { TRUETYPE_TAG('b','n','g','2'),
-                        TRUETYPE_TAG('b','e','n','g'), 0 } },
-    { 0x0A00, 0x0A7F, { TRUETYPE_TAG('g','u','r','2'),
-                        TRUETYPE_TAG('g','u','r','u'), 0 } },
-    { 0x0A80, 0x0AFF, { TRUETYPE_TAG('g','j','r','2'),
-                        TRUETYPE_TAG('g','u','j','r'), 0 } },
-    { 0x0B00, 0x0B7F, { TRUETYPE_TAG('o','r','y','2'),
-                        TRUETYPE_TAG('o','r','y','a'), 0 } },
-    { 0x0B80, 0x0BFF, { TRUETYPE_TAG('t','m','l','2'),
-                        TRUETYPE_TAG('t','a','m','l'), 0 } },
-    { 0x0C00, 0x0C7F, { TRUETYPE_TAG('t','e','l','2'),
-                        TRUETYPE_TAG('t','e','l','u'), 0 } },
-    { 0x0C80, 0x0CFF, { TRUETYPE_TAG('k','n','d','2'),
-                        TRUETYPE_TAG('k','n','d','a'), 0 } },
-    { 0x0D00, 0x0D7F, { TRUETYPE_TAG('m','l','m','2'),
-                        TRUETYPE_TAG('m','l','y','m'), 0 } },
-    { 0x0D80, 0x0DFF, { TRUETYPE_TAG('s','i','n','h'), 0, 0 } },
-    { 0x0E80, 0x0EFF, { TRUETYPE_TAG('l','a','o',' '), 0, 0 } },
-    { 0x0F00, 0x0FFF, { TRUETYPE_TAG('t','i','b','t'), 0, 0 } },
-    { 0x1000, 0x109f, { TRUETYPE_TAG('m','y','m','r'),
-                        TRUETYPE_TAG('m','y','m','2'), 0 } },
-    { 0x1780, 0x17ff, { TRUETYPE_TAG('k','h','m','r'), 0, 0 } },
-    // Khmer Symbols (19e0..19ff) don't seem to need any special shaping
-    { 0xaa60, 0xaa7f, { TRUETYPE_TAG('m','y','m','r'),
-                        TRUETYPE_TAG('m','y','m','2'), 0 } },
-    // Thai seems to be "renderable" without AAT morphing tables
-};
-
-static bool
-SupportsScriptInGSUB(gfxFontEntry* aFontEntry, const hb_tag_t* aScriptTags)
-{
-    hb_face_t *face = aFontEntry->GetHBFace();
-    if (!face) {
-        return false;
-    }
-
-    unsigned int index;
-    hb_tag_t     chosenScript;
-    bool found =
-        hb_ot_layout_table_choose_script(face, TRUETYPE_TAG('G','S','U','B'),
-                                         aScriptTags, &index, &chosenScript);
-    hb_face_destroy(face);
-
-    return found && chosenScript != TRUETYPE_TAG('D','F','L','T');
-}
-
 nsresult
 MacOSFontEntry::ReadCMAP(FontInfoData *aFontInfoData)
 {
     // attempt this once, if errors occur leave a blank cmap
     if (mCharacterMap) {
         return NS_OK;
     }
 
@@ -268,38 +202,36 @@ MacOSFontEntry::ReadCMAP(FontInfoData *a
         bool hasAATLayout = HasFontTable(TRUETYPE_TAG('m','o','r','x')) ||
                             HasFontTable(TRUETYPE_TAG('m','o','r','t'));
         bool hasGSUB = HasFontTable(TRUETYPE_TAG('G','S','U','B'));
         bool hasGPOS = HasFontTable(TRUETYPE_TAG('G','P','O','S'));
         if (hasAATLayout && !(hasGSUB || hasGPOS)) {
             mRequiresAAT = true; // prefer CoreText if font has no OTL tables
         }
 
-        uint32_t numScripts = ArrayLength(sComplexScripts);
-
-        for (uint32_t s = 0; s < numScripts; s++) {
+        for (const ScriptRange* sr = gfxPlatformFontList::sComplexScriptRanges;
+             sr->rangeStart; sr++) {
             // check to see if the cmap includes complex script codepoints
-            const ScriptRange& sr = sComplexScripts[s];
-            if (charmap->TestRange(sr.rangeStart, sr.rangeEnd)) {
+            if (charmap->TestRange(sr->rangeStart, sr->rangeEnd)) {
                 if (hasAATLayout) {
                     // prefer CoreText for Apple's complex-script fonts,
                     // even if they also have some OpenType tables
                     // (e.g. Geeza Pro Bold on 10.6; see bug 614903)
                     mRequiresAAT = true;
                     // and don't mask off complex-script ranges, we assume
                     // the AAT tables will provide the necessary shaping
                     continue;
                 }
 
                 // We check for GSUB here, as GPOS alone would not be ok.
-                if (hasGSUB && SupportsScriptInGSUB(this, sr.tags)) {
+                if (hasGSUB && SupportsScriptInGSUB(sr->tags)) {
                     continue;
                 }
 
-                charmap->ClearRange(sr.rangeStart, sr.rangeEnd);
+                charmap->ClearRange(sr->rangeStart, sr->rangeEnd);
             }
         }
     }
 
     mHasCmapTable = NS_SUCCEEDED(rv);
     if (mHasCmapTable) {
         gfxPlatformFontList *pfl = gfxPlatformFontList::PlatformFontList();
         mCharacterMap = pfl->FindCharMap(charmap);
--- a/gfx/thebes/gfxPlatformFontList.cpp
+++ b/gfx/thebes/gfxPlatformFontList.cpp
@@ -37,16 +37,62 @@ using namespace mozilla;
 #define LOG_FONTINIT_ENABLED() PR_LOG_TEST( \
                                    gfxPlatform::GetLog(eGfxLog_fontinit), \
                                    PR_LOG_DEBUG)
 
 #endif // PR_LOGGING
 
 gfxPlatformFontList *gfxPlatformFontList::sPlatformFontList = nullptr;
 
+// Character ranges that require complex-script shaping support in the font,
+// and so should be masked out by ReadCMAP if the necessary layout tables
+// are not present.
+// Currently used by the Mac and FT2 implementations only, but probably should
+// be supported on Windows as well.
+const gfxFontEntry::ScriptRange gfxPlatformFontList::sComplexScriptRanges[] = {
+    // Actually, now that harfbuzz supports presentation-forms shaping for
+    // Arabic, we can render it without layout tables. So maybe we don't
+    // want to mask the basic Arabic block here?
+    // This affects the arabic-fallback-*.html reftests, which rely on
+    // loading a font that *doesn't* have any GSUB table.
+    { 0x0600, 0x06FF, { TRUETYPE_TAG('a','r','a','b'), 0, 0 } },
+    { 0x0700, 0x074F, { TRUETYPE_TAG('s','y','r','c'), 0, 0 } },
+    { 0x0750, 0x077F, { TRUETYPE_TAG('a','r','a','b'), 0, 0 } },
+    { 0x08A0, 0x08FF, { TRUETYPE_TAG('a','r','a','b'), 0, 0 } },
+    { 0x0900, 0x097F, { TRUETYPE_TAG('d','e','v','2'),
+                        TRUETYPE_TAG('d','e','v','a'), 0 } },
+    { 0x0980, 0x09FF, { TRUETYPE_TAG('b','n','g','2'),
+                        TRUETYPE_TAG('b','e','n','g'), 0 } },
+    { 0x0A00, 0x0A7F, { TRUETYPE_TAG('g','u','r','2'),
+                        TRUETYPE_TAG('g','u','r','u'), 0 } },
+    { 0x0A80, 0x0AFF, { TRUETYPE_TAG('g','j','r','2'),
+                        TRUETYPE_TAG('g','u','j','r'), 0 } },
+    { 0x0B00, 0x0B7F, { TRUETYPE_TAG('o','r','y','2'),
+                        TRUETYPE_TAG('o','r','y','a'), 0 } },
+    { 0x0B80, 0x0BFF, { TRUETYPE_TAG('t','m','l','2'),
+                        TRUETYPE_TAG('t','a','m','l'), 0 } },
+    { 0x0C00, 0x0C7F, { TRUETYPE_TAG('t','e','l','2'),
+                        TRUETYPE_TAG('t','e','l','u'), 0 } },
+    { 0x0C80, 0x0CFF, { TRUETYPE_TAG('k','n','d','2'),
+                        TRUETYPE_TAG('k','n','d','a'), 0 } },
+    { 0x0D00, 0x0D7F, { TRUETYPE_TAG('m','l','m','2'),
+                        TRUETYPE_TAG('m','l','y','m'), 0 } },
+    { 0x0D80, 0x0DFF, { TRUETYPE_TAG('s','i','n','h'), 0, 0 } },
+    { 0x0E80, 0x0EFF, { TRUETYPE_TAG('l','a','o',' '), 0, 0 } },
+    { 0x0F00, 0x0FFF, { TRUETYPE_TAG('t','i','b','t'), 0, 0 } },
+    { 0x1000, 0x109f, { TRUETYPE_TAG('m','y','m','r'),
+                        TRUETYPE_TAG('m','y','m','2'), 0 } },
+    { 0x1780, 0x17ff, { TRUETYPE_TAG('k','h','m','r'), 0, 0 } },
+    // Khmer Symbols (19e0..19ff) don't seem to need any special shaping
+    { 0xaa60, 0xaa7f, { TRUETYPE_TAG('m','y','m','r'),
+                        TRUETYPE_TAG('m','y','m','2'), 0 } },
+    // Thai seems to be "renderable" without AAT morphing tables
+    { 0, 0, { 0, 0, 0 } } // terminator
+};
+
 // prefs for the font info loader
 #define FONT_LOADER_FAMILIES_PER_SLICE_PREF "gfx.font_loader.families_per_slice"
 #define FONT_LOADER_DELAY_PREF              "gfx.font_loader.delay"
 #define FONT_LOADER_INTERVAL_PREF           "gfx.font_loader.interval"
 
 static const char* kObservedPrefs[] = {
     "font.",
     "font.name-list.",
--- a/gfx/thebes/gfxPlatformFontList.h
+++ b/gfx/thebes/gfxPlatformFontList.h
@@ -184,16 +184,18 @@ public:
     void AddUserFontSet(gfxUserFontSet *aUserFontSet) {
         mUserFontSetList.PutEntry(aUserFontSet);
     }
 
     void RemoveUserFontSet(gfxUserFontSet *aUserFontSet) {
         mUserFontSetList.RemoveEntry(aUserFontSet);
     }
 
+    static const gfxFontEntry::ScriptRange sComplexScriptRanges[];
+
 protected:
     class MemoryReporter MOZ_FINAL : public nsIMemoryReporter
     {
     public:
         NS_DECL_ISUPPORTS
         NS_DECL_NSIMEMORYREPORTER
     };
 
--- a/layout/reftests/text/reftest.list
+++ b/layout/reftests/text/reftest.list
@@ -149,22 +149,22 @@ fails-if(!winWidget&&!gtk2Widget) skip-i
 HTTP(..) != kerning-01.html kerning-01-notref.html
 # Test for bug 577380, support for AAT layout (on OS X only)
 random-if(!cocoaWidget) == 577380.html 577380-ref.html
 # Test for OpenType Arabic shaping support
 HTTP(..) == arabic-shaping-1.html arabic-shaping-1-ref.html
 # check ligature in Arial Bold on Windows, for bug 644184; may fail on other platforms depending on fonts
 random-if(!winWidget) == arial-bold-lam-alef-1.html arial-bold-lam-alef-1-ref.html
 # Fallback (presentation-forms) shaping with a font that lacks GSUB/GPOS
-# These tests are not valid on OS X because our masking of complex-script ranges
+# These tests are not valid with Mac or FT2 font backends because our masking of complex-script ranges
 # in the 'cmap' will prevent the test font (without GSUB) being used.
-skip-if(B2G) fails-if(cocoaWidget) HTTP(..) == arabic-fallback-1.html arabic-fallback-1-ref.html
-fails-if(cocoaWidget) HTTP(..) == arabic-fallback-2.html arabic-fallback-2-ref.html
-fails-if(cocoaWidget) HTTP(..) == arabic-fallback-3.html arabic-fallback-3-ref.html
-fails-if(!cocoaWidget) HTTP(..) != arabic-fallback-4.html arabic-fallback-4-notref.html
+skip-if(B2G) fails-if(cocoaWidget||Android) HTTP(..) == arabic-fallback-1.html arabic-fallback-1-ref.html
+fails-if(cocoaWidget||Android||B2G) HTTP(..) == arabic-fallback-2.html arabic-fallback-2-ref.html
+fails-if(cocoaWidget||Android||B2G) HTTP(..) == arabic-fallback-3.html arabic-fallback-3-ref.html
+fails-if(!cocoaWidget&&!Android&&!B2G) HTTP(..) != arabic-fallback-4.html arabic-fallback-4-notref.html
 == arabic-marks-1.html arabic-marks-1-ref.html
 
 == 726392-1.html 726392-1-ref.html
 == 726392-2.html 726392-2-ref.html
 skip-if(B2G) == 726392-3.html 726392-3-ref.html
 == 745555-1.html 745555-1-ref.html
 == 745555-2.html 745555-2-ref.html
 == 820255.html 820255-ref.html