bug 1057330 pt 1 - update genUnicodePropertyData tool to add Vertical_Orientation property. r=smontagu
authorJonathan Kew <jkew@mozilla.com>
Thu, 28 Aug 2014 16:00:02 +0100
changeset 223849 0eb32e869ee335e18100ce6c8ff00dc2650feb97
parent 223848 228f0275bfc807d291b01cc9c9f6b420a0ee547a
child 223850 e0933bde618b7674cf245648de41754989fecaf8
push id3979
push userraliiev@mozilla.com
push dateMon, 13 Oct 2014 16:35:44 +0000
treeherdermozilla-beta@30f2cc610691 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmontagu
bugs1057330
milestone34.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 1057330 pt 1 - update genUnicodePropertyData tool to add Vertical_Orientation property. r=smontagu
intl/unicharutil/tools/genUnicodePropertyData.pl
--- a/intl/unicharutil/tools/genUnicodePropertyData.pl
+++ b/intl/unicharutil/tools/genUnicodePropertyData.pl
@@ -21,21 +21,28 @@
 #       - EastAsianWidth.txt
 #       - BidiMirroring.txt
 #       - HangulSyllableType.txt
 #       - ReadMe.txt (to record version/date of the UCD)
 #       - Unihan_Variants.txt (from Unihan.zip)
 #     though this may change if we find a need for additional properties.
 #
 #     The Unicode data files listed above should be together in one directory.
+#
 #     We also require the file 
 #        http://www.unicode.org/Public/security/latest/xidmodifications.txt
 #     This file should be in a sub-directory "security" immediately below the
 #        directory containing the other Unicode data files.
 #
+#     We also require the latest data file for UTR50, currently revision-12:
+#        http://www.unicode.org/Public/vertical/revision-12/VerticalOrientation-12.txt
+#     This file should be in a sub-directory "vertical" immediately below the
+#        directory containing the other Unicode data files.
+#
+#
 # (2) Run this tool using a command line of the form
 #
 #         perl genUnicodePropertyData.pl \
 #                 /path/to/harfbuzz/src  \
 #                 /path/to/UCD-directory
 #
 #     This will generate (or overwrite!) the files
 #
@@ -296,39 +303,48 @@ my %bidicategoryCode = (
   "AL"  => "13", # Right-to-Left Arabic
   "RLE" => "14", # Right-to-Left Embedding
   "RLO" => "15", # Right-to-Left Override
   "PDF" => "16", # Pop Directional Format
   "NSM" => "17", # Non-Spacing Mark
   "BN"  => "18"  # Boundary Neutral
 );
 
+my %verticalOrientationCode = (
+  'U' => 0,  #   U - Upright, the same orientation as in the code charts
+  'R' => 1,  #   R - Rotated 90 degrees clockwise compared to the code charts
+  'Tu' => 2, #   Tu - Transformed typographically, with fallback to Upright
+  'Tr' => 3  #   Tr - Transformed typographically, with fallback to Rotated
+);
+
 # initialize default properties
 my @script;
 my @category;
 my @combining;
 my @eaw;
 my @mirror;
 my @hangul;
 my @casemap;
 my @xidmod;
 my @numericvalue;
 my @hanVariant;
 my @bidicategory;
 my @fullWidth;
+my @verticalOrientation;
 for (my $i = 0; $i < 0x110000; ++$i) {
     $script[$i] = $scriptCode{"UNKNOWN"};
     $category[$i] = $catCode{"UNASSIGNED"};
     $combining[$i] = 0;
     $casemap[$i] = 0;
     $xidmod[$i] = $xidmodCode{"not-chars"};
     $numericvalue[$i] = -1;
     $hanVariant[$i] = 0;
     $bidicategory[$i] = $bidicategoryCode{"L"};
     $fullWidth[$i] = 0;
+    $verticalOrientation[$i] = 1; # default for unlisted codepoints is 'R'
 }
 
 # blocks where the default for bidi category is not L
 for my $i (0x0600..0x07BF, 0x08A0..0x08FF, 0xFB50..0xFDCF, 0xFDF0..0xFDFF, 0xFE70..0xFEFF, 0x1EE00..0x0001EEFF) {
   $bidicategory[$i] = $bidicategoryCode{"AL"};
 }
 for my $i (0x0590..0x05FF, 0x07C0..0x089F, 0xFB1D..0xFB4F, 0x00010800..0x00010FFF, 0x0001E800..0x0001EDFF, 0x0001EF00..0x0001EFFF) {
   $bidicategory[$i] = $bidicategoryCode{"R"};
@@ -623,16 +639,41 @@ while (<FH>) {
     }
     if ($2 eq "Simplified") {
       $hasSC = 1;
     }
   } 
 }
 close FH;
 
+# read VerticalOrientation-12.txt
+open FH, "< $ARGV[1]/vertical/VerticalOrientation-12.txt" or die "can't open UTR50 data file VerticalOrientation-12.txt\n";
+push @versionInfo, "";
+while (<FH>) {
+    chomp;
+    push @versionInfo, $_;
+    last if /Date:/;
+}
+while (<FH>) {
+    chomp;
+    s/#.*//;
+    if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) {
+        my $vo = $3;
+        warn "unknown Vertical_Orientation code $vo"
+            unless exists $verticalOrientationCode{$vo};
+        $vo = $verticalOrientationCode{$vo};
+        my $start = hex "0x$1";
+        my $end = (defined $2) ? hex "0x$2" : $start;
+        for (my $i = $start; $i <= $end; ++$i) {
+            $verticalOrientation[$i] = $vo;
+        }
+    }
+}
+close FH;
+
 my $timestamp = gmtime();
 
 open DATA_TABLES, "> nsUnicodePropertyData.cpp" or die "unable to open nsUnicodePropertyData.cpp for output";
 
 my $licenseBlock = q[
 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -701,28 +742,45 @@ print DATA_TABLES "};\n\n";
 
 print HEADER "#pragma pack(1)\n\n";
 
 sub sprintCharProps1
 {
   my $usv = shift;
   return sprintf("{%d,%d,%d}, ", $mirror[$usv], $hangul[$usv], $combining[$usv]);
 }
-&genTables("CharProp1", "struct nsCharProps1 {\n  unsigned char mMirrorOffsetIndex:5;\n  unsigned char mHangulType:3;\n  unsigned char mCombiningClass:8;\n};",
-           "nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1);
+my $type = q/
+struct nsCharProps1 {
+  unsigned char mMirrorOffsetIndex:5;
+  unsigned char mHangulType:3;
+  unsigned char mCombiningClass:8;
+};
+/;
+&genTables("CharProp1", $type, "nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1);
 
 sub sprintCharProps2
 {
   my $usv = shift;
-  return sprintf("{%d,%d,%d,%d,%d,%d},",
+  return sprintf("{%d,%d,%d,%d,%d,%d,%d},",
                  $script[$usv], $eaw[$usv], $category[$usv],
-                 $bidicategory[$usv], $xidmod[$usv], $numericvalue[$usv]);
+                 $bidicategory[$usv], $xidmod[$usv], $numericvalue[$usv],
+                 $verticalOrientation[$usv]);
 }
-&genTables("CharProp2", "struct nsCharProps2 {\n  unsigned char mScriptCode:8;\n  unsigned char mEAW:3;\n  unsigned char mCategory:5;\n  unsigned char mBidiCategory:5;\n  unsigned char mXidmod:4;\n  signed char mNumericValue:5;\n  unsigned char mHanVariant:2;\n};",
-           "nsCharProps2", 11, 5, \&sprintCharProps2, 16, 4, 1);
+$type = q/
+struct nsCharProps2 {
+  unsigned char mScriptCode:8;
+  unsigned char mEAW:3;
+  unsigned char mCategory:5;
+  unsigned char mBidiCategory:5;
+  unsigned char mXidmod:4;
+  signed char   mNumericValue:5;
+  unsigned char mVertOrient:2;
+};
+/;
+&genTables("CharProp2", $type, "nsCharProps2", 11, 5, \&sprintCharProps2, 16, 4, 1);
 
 print HEADER "#pragma pack()\n\n";
 
 sub sprintHanVariants
 {
   my $baseUsv = shift;
   my $varShift = 0;
   my $val = 0;