Bug 1228022 - part 2 - Add support for reading Mac OS Roman encoded names from SFNTNameTables; r=jfkthame
authorHaik Aftandilian <haftandilian@mozilla.com>
Fri, 09 Sep 2016 13:55:21 -0700
changeset 355532 82476bb1bf1783df9ac1ce7473d131343ef4dd24
parent 355531 ea7c4ad6897439b2d6fb5d8b6c2052c6b9e96181
child 355533 ee5d5cea6137c8ed6b4e706ac64a7c345ea1ad9b
push id6570
push userraliiev@mozilla.com
push dateMon, 14 Nov 2016 12:26:13 +0000
treeherdermozilla-beta@f455459b2ae5 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjfkthame
bugs1228022
milestone51.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1228022 - part 2 - Add support for reading Mac OS Roman encoded names from SFNTNameTables; r=jfkthame When reading a U16 font name from the SFNTNameTable, a name entry with platformID == 1 (Macintosh) and platformSpecificID (aka encodingID) == 0 (Roman) is read as Mac Roman and converted to U16. This patch refactors the matchers created in CreateCanonicalU16Matchers to return name encoding type instead of a boolean. The encoding type can then be used to call the appropriate decoding function. CreateCanonicalU16Matchers is also changed so that it doesn't enqueue unnecessary matchers on OS X. On OS X, if the nametable record's platformID field is PLATFORM_ID, IsUTF16Encoding() will always return false so matchers requiring both of those conditions will never match. There are several other platformSpecificID's in Mac SFNTameTables such as Japanese, Traditional Chinese, and Korean. Fonts with names in those encodings won't have their names properly encoded, but that should be OK as SFNTData::GetUniqueKey falls back to another scheme for hashing fonts if the GetU16FullName call fails. Tests on El Capitan and Sierra revealed Mac's use Microsoft/Unicode SFNTNameTable names as well as Mac/Roman. MozReview-Commit-ID: F8fyDVDwHs7
gfx/2d/SFNTNameTable.cpp
gfx/2d/SFNTNameTable.h
--- a/gfx/2d/SFNTNameTable.cpp
+++ b/gfx/2d/SFNTNameTable.cpp
@@ -5,16 +5,20 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "SFNTNameTable.h"
 
 #include "BigEndianInts.h"
 #include "Logging.h"
 #include "mozilla/Move.h"
 
+#if defined(XP_MACOSX)
+#include <CoreFoundation/CoreFoundation.h>
+#endif
+
 namespace mozilla {
 namespace gfx {
 
 static const BigEndianUint16 FORMAT_0 = 0;
 
 static const BigEndianUint16 NAME_ID_FAMILY = 1;
 static const BigEndianUint16 NAME_ID_STYLE = 2;
 static const BigEndianUint16 NAME_ID_FULL = 4;
@@ -22,16 +26,18 @@ static const BigEndianUint16 NAME_ID_FUL
 static const BigEndianUint16 PLATFORM_ID_UNICODE = 0;
 static const BigEndianUint16 PLATFORM_ID_MAC = 1;
 static const BigEndianUint16 PLATFORM_ID_MICROSOFT = 3;
 
 static const BigEndianUint16 ENCODING_ID_MICROSOFT_SYMBOL = 0;
 static const BigEndianUint16 ENCODING_ID_MICROSOFT_UNICODEBMP = 1;
 static const BigEndianUint16 ENCODING_ID_MICROSOFT_UNICODEFULL = 10;
 
+static const BigEndianUint16 ENCODING_ID_MAC_ROMAN = 0;
+
 static const BigEndianUint16 LANG_ID_MAC_ENGLISH = 0;
 
 static const BigEndianUint16 LANG_ID_MICROSOFT_EN_US = 0x0409;
 
 #pragma pack(push, 1)
 
 // Name table has a header, followed by name records, followed by string data.
 struct NameHeader
@@ -48,16 +54,25 @@ struct NameRecord
   BigEndianUint16 languageID;
   BigEndianUint16 nameID;
   BigEndianUint16 length;     // String length in bytes.
   BigEndianUint16 offset;     // String offset from start of storage in bytes.
 };
 
 #pragma pack(pop)
 
+enum ENameDecoder : int
+{
+  eNameDecoderUTF16,
+#if defined(XP_MACOSX)
+  eNameDecoderMacRoman,
+#endif
+  eNameDecoderNone
+};
+
 /* static */
 UniquePtr<SFNTNameTable>
 SFNTNameTable::Create(const uint8_t *aNameData, uint32_t aDataLength)
 {
   MOZ_ASSERT(aNameData);
 
   if (aDataLength < sizeof(NameHeader)) {
     gfxWarning() << "Name data too short to contain NameHeader.";
@@ -93,111 +108,137 @@ SFNTNameTable::SFNTNameTable(const NameH
                                                      + sizeof(NameHeader)))
   , mEndOfRecords(mFirstRecord + aNameHeader->count)
   , mStringData(aNameData + aNameHeader->stringOffset)
   , mStringDataLength(aDataLength - aNameHeader->stringOffset)
 {
   MOZ_ASSERT(reinterpret_cast<const uint8_t*>(aNameHeader) == aNameData);
 }
 
-#if defined(XP_MACOSX)
-static const BigEndianUint16 CANONICAL_LANG_ID = LANG_ID_MAC_ENGLISH;
-static const BigEndianUint16 PLATFORM_ID = PLATFORM_ID_MAC;
-#else
-static const BigEndianUint16 CANONICAL_LANG_ID = LANG_ID_MICROSOFT_EN_US;
-static const BigEndianUint16 PLATFORM_ID = PLATFORM_ID_MICROSOFT;
-#endif
-
 static bool
 IsUTF16Encoding(const NameRecord *aNameRecord)
 {
   if (aNameRecord->platformID == PLATFORM_ID_MICROSOFT &&
       (aNameRecord->encodingID == ENCODING_ID_MICROSOFT_UNICODEBMP ||
        aNameRecord->encodingID == ENCODING_ID_MICROSOFT_SYMBOL)) {
     return true;
   }
 
   if (aNameRecord->platformID == PLATFORM_ID_UNICODE) {
     return true;
   }
 
   return false;
 }
 
+#if defined(XP_MACOSX)
+static bool
+IsMacRomanEncoding(const NameRecord *aNameRecord)
+{
+  if (aNameRecord->platformID == PLATFORM_ID_MAC &&
+      aNameRecord->encodingID == ENCODING_ID_MAC_ROMAN) {
+    return true;
+  }
+
+  return false;
+}
+#endif
+
 static NameRecordMatchers*
-CreateCanonicalU16Matchers(const BigEndianUint16& aNameID)
+CreateCanonicalMatchers(const BigEndianUint16& aNameID)
 {
+  // For Windows, we return only Microsoft platform name record
+  // matchers. On Mac, we return matchers for both Microsoft platform
+  // records and Mac platform records.
   NameRecordMatchers *matchers = new NameRecordMatchers();
 
-  // First, look for the English name (this will normally succeed).
+#if defined(XP_MACOSX)
+  // First, look for the English name.
   if (!matchers->append(
     [=](const NameRecord *aNameRecord) {
-        return aNameRecord->nameID == aNameID &&
-               aNameRecord->languageID == CANONICAL_LANG_ID &&
-               aNameRecord->platformID == PLATFORM_ID &&
-               IsUTF16Encoding(aNameRecord);
+        if (aNameRecord->nameID == aNameID &&
+            aNameRecord->languageID == LANG_ID_MAC_ENGLISH &&
+            aNameRecord->platformID == PLATFORM_ID_MAC &&
+            IsMacRomanEncoding(aNameRecord)) {
+          return eNameDecoderMacRoman;
+        } else  {
+          return eNameDecoderNone;
+        }
     })) {
     MOZ_CRASH();
   }
 
   // Second, look for all languages.
   if (!matchers->append(
     [=](const NameRecord *aNameRecord) {
-        return aNameRecord->nameID == aNameID &&
-               aNameRecord->platformID == PLATFORM_ID &&
-               IsUTF16Encoding(aNameRecord);
+        if (aNameRecord->nameID == aNameID &&
+            aNameRecord->platformID == PLATFORM_ID_MAC &&
+            IsMacRomanEncoding(aNameRecord)) {
+          return eNameDecoderMacRoman;
+        } else  {
+          return eNameDecoderNone;
+        }
+    })) {
+    MOZ_CRASH();
+  }
+#endif /* defined(XP_MACOSX) */
+
+  // First, look for the English name (this will normally succeed).
+  if (!matchers->append(
+    [=](const NameRecord *aNameRecord) {
+        if (aNameRecord->nameID == aNameID &&
+            aNameRecord->languageID == LANG_ID_MICROSOFT_EN_US &&
+            aNameRecord->platformID == PLATFORM_ID_MICROSOFT &&
+            IsUTF16Encoding(aNameRecord)) {
+          return eNameDecoderUTF16;
+        } else {
+          return eNameDecoderNone;
+        }
     })) {
     MOZ_CRASH();
   }
 
-#if defined(XP_MACOSX)
-  // On Mac may be dealing with font that only has Microsoft name entries.
+  // Second, look for all languages.
   if (!matchers->append(
     [=](const NameRecord *aNameRecord) {
-        return aNameRecord->nameID == aNameID &&
-               aNameRecord->languageID == LANG_ID_MICROSOFT_EN_US &&
-               aNameRecord->platformID == PLATFORM_ID_MICROSOFT &&
-               IsUTF16Encoding(aNameRecord);
+        if (aNameRecord->nameID == aNameID &&
+            aNameRecord->platformID == PLATFORM_ID_MICROSOFT &&
+            IsUTF16Encoding(aNameRecord)) {
+          return eNameDecoderUTF16;
+        } else {
+          return eNameDecoderNone;
+        }
     })) {
     MOZ_CRASH();
   }
-  if (!matchers->append(
-    [=](const NameRecord *aNameRecord) {
-        return aNameRecord->nameID == aNameID &&
-               aNameRecord->platformID == PLATFORM_ID_MICROSOFT &&
-               IsUTF16Encoding(aNameRecord);
-    })) {
-    MOZ_CRASH();
-  }
-#endif
 
   return matchers;
 }
 
 static const NameRecordMatchers&
 FullNameMatchers()
 {
   static const NameRecordMatchers *sFullNameMatchers =
-    CreateCanonicalU16Matchers(NAME_ID_FULL);
+    CreateCanonicalMatchers(NAME_ID_FULL);
   return *sFullNameMatchers;
 }
 
 static const NameRecordMatchers&
 FamilyMatchers()
 {
   static const NameRecordMatchers *sFamilyMatchers =
-    CreateCanonicalU16Matchers(NAME_ID_FAMILY);
+    CreateCanonicalMatchers(NAME_ID_FAMILY);
   return *sFamilyMatchers;
 }
 
 static const NameRecordMatchers&
 StyleMatchers()
 {
   static const NameRecordMatchers *sStyleMatchers =
-    CreateCanonicalU16Matchers(NAME_ID_STYLE);
+    CreateCanonicalMatchers(NAME_ID_STYLE);
   return *sStyleMatchers;
 }
 
 bool
 SFNTNameTable::GetU16FullName(mozilla::u16string& aU16FullName)
 {
   if (ReadU16Name(FullNameMatchers(), aU16FullName)) {
     return true;
@@ -225,29 +266,39 @@ bool
 SFNTNameTable::ReadU16Name(const NameRecordMatchers& aMatchers,
                            mozilla::u16string& aU16Name)
 {
   MOZ_ASSERT(!aMatchers.empty());
 
   for (size_t i = 0; i < aMatchers.length(); ++i) {
     const NameRecord* record = mFirstRecord;
     while (record != mEndOfRecords) {
-      if (aMatchers[i](record)) {
-        return ReadU16NameFromRecord(record, aU16Name);
+      switch (aMatchers[i](record)) {
+        case eNameDecoderUTF16:
+          return ReadU16NameFromU16Record(record, aU16Name);
+#if defined(XP_MACOSX)
+        case eNameDecoderMacRoman:
+          return ReadU16NameFromMacRomanRecord(record, aU16Name);
+#endif
+        case eNameDecoderNone:
+          break;
+        default:
+          MOZ_CRASH("Invalid matcher encoding type");
+          break;
       }
       ++record;
     }
   }
 
   return false;
 }
 
 bool
-SFNTNameTable::ReadU16NameFromRecord(const NameRecord *aNameRecord,
-                                     mozilla::u16string& aU16Name)
+SFNTNameTable::ReadU16NameFromU16Record(const NameRecord *aNameRecord,
+                                        mozilla::u16string& aU16Name)
 {
   uint32_t offset = aNameRecord->offset;
   uint32_t length = aNameRecord->length;
   if (mStringDataLength < offset + length) {
     gfxWarning() << "Name data too short to contain name string.";
     return false;
   }
 
@@ -256,10 +307,51 @@ SFNTNameTable::ReadU16NameFromRecord(con
   UniquePtr<char16_t[]> nameData(new char16_t[actualLength]);
   NativeEndian::copyAndSwapFromBigEndian(nameData.get(), startOfName,
                                          actualLength);
 
   aU16Name.assign(nameData.get(), actualLength);
   return true;
 }
 
+#if defined(XP_MACOSX)
+bool
+SFNTNameTable::ReadU16NameFromMacRomanRecord(const NameRecord *aNameRecord,
+                                             mozilla::u16string& aU16Name)
+{
+  uint32_t offset = aNameRecord->offset;
+  uint32_t length = aNameRecord->length;
+  if (mStringDataLength < offset + length) {
+    gfxWarning() << "Name data too short to contain name string.";
+    return false;
+  }
+  if (length > INT_MAX) {
+    gfxWarning() << "Name record too long to decode.";
+    return false;
+  }
+
+  // pointer to the Mac Roman encoded string in the name record
+  const uint8_t *encodedStr = mStringData + offset;
+
+  CFStringRef cfString;
+  cfString = CFStringCreateWithBytesNoCopy(kCFAllocatorDefault, encodedStr,
+                                           length, kCFStringEncodingMacRoman,
+                                           false, kCFAllocatorNull);
+
+  // length (in UTF-16 code pairs) of the decoded string
+  CFIndex decodedLength = CFStringGetLength(cfString);
+
+  // temporary buffer
+  UniquePtr<UniChar[]> u16Buffer = MakeUnique<UniChar[]>(decodedLength);
+
+  CFStringGetCharacters(cfString, CFRangeMake(0, decodedLength),
+                        u16Buffer.get());
+
+  CFRelease(cfString);
+
+  aU16Name.assign(reinterpret_cast<char16_t*>(u16Buffer.get()), decodedLength);
+
+  return true;
+}
+#endif
+
 } // gfx
 } // mozilla
--- a/gfx/2d/SFNTNameTable.h
+++ b/gfx/2d/SFNTNameTable.h
@@ -12,18 +12,19 @@
 #include "mozilla/Vector.h"
 #include "u16string.h"
 
 namespace mozilla {
 namespace gfx {
 
 struct NameHeader;
 struct NameRecord;
+enum ENameDecoder : int;
 
-typedef Vector<function<bool(const NameRecord*)>> NameRecordMatchers;
+typedef Vector<function<ENameDecoder(const NameRecord*)>> NameRecordMatchers;
 
 class SFNTNameTable final
 {
 public:
 
   /**
    * Creates a SFNTNameTable if the header data is valid. Note that the data is
    * NOT copied, so must exist for the lifetime of the table.
@@ -33,32 +34,37 @@ public:
    * @return UniquePtr to a SFNTNameTable or nullptr if the header is invalid.
    */
   static UniquePtr<SFNTNameTable> Create(const uint8_t *aNameData,
                                          uint32_t aDataLength);
 
   /**
    * Gets the full name from the name table. If the full name string is not
    * present it will use the family space concatenated with the style.
-   * This will only read names that are already UTF16.
+   * This will only read names that are already UTF16 or Mac OS Roman.
    *
    * @param aU16FullName string to be populated with the full name.
    * @return true if the full name is successfully read.
    */
   bool GetU16FullName(mozilla::u16string& aU16FullName);
 
 private:
 
   SFNTNameTable(const NameHeader *aNameHeader, const uint8_t *aNameData,
                 uint32_t aDataLength);
 
   bool ReadU16Name(const NameRecordMatchers& aMatchers, mozilla::u16string& aU16Name);
 
-  bool ReadU16NameFromRecord(const NameRecord *aNameRecord,
-                             mozilla::u16string& aU16Name);
+  bool ReadU16NameFromU16Record(const NameRecord *aNameRecord,
+                                mozilla::u16string& aU16Name);
+
+#if defined(XP_MACOSX)
+  bool ReadU16NameFromMacRomanRecord(const NameRecord *aNameRecord,
+                                     mozilla::u16string& aU16Name);
+#endif
 
   const NameRecord *mFirstRecord;
   const NameRecord *mEndOfRecords;
   const uint8_t *mStringData;
   const uint32_t mStringDataLength;
 };
 
 } // gfx