Bug 1230490 - Part 1: Remove support for separate Unicode version for case-folding from make_unicode. r=arai
authorAndré Bargull <andre.bargull@gmail.com>
Mon, 07 Nov 2016 14:26:43 -0800
changeset 351984 f05a94693082bc06aadaabdf87940ddad52de605
parent 351983 d6a0b28b04e4b35494d15155f79e43cdc5244835
child 351985 4a1fa3068f56a6ab2a20cefad78ed63e52226eeb
push id6795
push userjlund@mozilla.com
push dateMon, 23 Jan 2017 14:19:46 +0000
treeherdermozilla-esr52@76101b503191 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1230490
milestone52.0a1
Bug 1230490 - Part 1: Remove support for separate Unicode version for case-folding from make_unicode. r=arai
js/src/vm/make_unicode.py
--- a/js/src/vm/make_unicode.py
+++ b/js/src/vm/make_unicode.py
@@ -77,20 +77,16 @@ mpl_license = """\
 warning_message = """\
 /* Generated by make_unicode.py DO NOT MODIFY */
 """
 
 unicode_version_message = """\
 /* Unicode version: {0} */
 """
 
-casefold_version_message = """\
-/* Casefold Unicode version: {0} */
-"""
-
 def read_unicode_data(unicode_data):
     """
         If you want to understand how this wonderful file format works checkout
           Unicode Standard Annex #44 - Unicode Character Database
           http://www.unicode.org/reports/tr44/
     """
 
     reader = csv.reader(unicode_data, delimiter=';')
@@ -387,26 +383,25 @@ def process_case_folding(case_folding):
             folding_table.append(item)
         folding_index[code] = i
     return (
         folding_table, folding_index,
         non_bmp_folding_map, non_bmp_rev_folding_map,
         folding_tests
     )
 
-def make_non_bmp_file(version, casefold_version,
+def make_non_bmp_file(version,
                       non_bmp_lower_map, non_bmp_upper_map,
                       non_bmp_folding_map, non_bmp_rev_folding_map):
     file_name = 'UnicodeNonBMP.h';
     with io.open(file_name, mode='wb') as non_bmp_file:
         non_bmp_file.write(mpl_license)
         non_bmp_file.write('\n')
         non_bmp_file.write(warning_message)
         non_bmp_file.write(unicode_version_message.format(version))
-        non_bmp_file.write(casefold_version_message.format(casefold_version))
         non_bmp_file.write("""
 #ifndef vm_UnicodeNonBMP_h
 #define vm_UnicodeNonBMP_h
 
 """)
 
         make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map)
         non_bmp_file.write('\n')
@@ -511,17 +506,17 @@ function test(code, ...equivs) {
 """)
         for args in folding_tests:
             test_icase.write('test(' + ','.join([hex(c) for c in args]) + ');\n')
         test_icase.write("""
 if (typeof reportCompare === "function")
     reportCompare(true, true);
 """)
 
-def make_unicode_file(version, casefold_version,
+def make_unicode_file(version,
                       table, index,
                       same_upper_table, same_upper_index,
                       folding_table, folding_index):
     index1, index2, shift = splitbins(index)
 
     # Don't forget to update CharInfo in Unicode.h if you need to change this
     assert shift == 5
 
@@ -627,17 +622,16 @@ def make_unicode_file(version, casefold_
 
         file.write('\n'.join(lines))
         file.write('\n};\n')
 
     file_name = 'Unicode.cpp'
     with io.open(file_name, 'wb') as data_file:
         data_file.write(warning_message)
         data_file.write(unicode_version_message.format(version))
-        data_file.write(casefold_version_message.format(casefold_version))
         data_file.write(public_domain)
         data_file.write('#include "vm/Unicode.h"\n\n')
         data_file.write('using namespace js;\n')
         data_file.write('using namespace js::unicode;\n')
         data_file.write(comment)
         data_file.write('const CharacterInfo unicode::js_charinfo[] = {\n')
         for d in table:
             data_file.write('    {')
@@ -744,126 +738,99 @@ def splitbins(t):
     mask = 2**shift - 1
     for i in range(len(t)):
         assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
     return best
 
 def update_unicode(args):
     import urllib2
 
-    def to_download_url(version):
+    version = args.version
+    if version is not None:
         baseurl = 'http://unicode.org/Public'
-        if version is 'UNIDATA':
-            return '%s/%s' % (baseurl, version)
-        return '%s/%s/ucd' % (baseurl, version)
-
-    unicode_info = {
-        'name': 'Unicode',
-        'version': args.version,
-        'url': to_download_url(args.version),
-    }
-    # TODO: Remove this dict and use a single Unicode version when bug 1230490 has relanded.
-    casefold_info = {
-        'name': 'Casefold Unicode',
-        'version': args.casefold_version,
-        'url': to_download_url(args.casefold_version),
-    }
-
-    def print_info(info):
-        if info['version'] is not None:
-            print('\t%s version: %s' % (info['name'], info['version']))
-            print('\t%s download url: %s' % (info['name'], info['url']))
+        if version == 'UNIDATA':
+            url = '%s/%s' % (baseurl, version)
         else:
-            print('\t%s uses local files.' % info['name'])
-            print('\tAlways make sure you have the newest Unicode files!')
+            url = '%s/%s/ucd' % (baseurl, version)
 
     print('Arguments:')
-    print_info(unicode_info)
-    print_info(casefold_info)
+    if version is not None:
+        print('\tVersion: %s' % version)
+        print('\tDownload url: %s' % url)
+    else:
+        print('\tUsing local files.')
+        print('\tAlways make sure you have the newest Unicode files!')
     print('')
 
-    def download_or_open(info, fname):
+    def download_or_open(fname):
         tfile_path = os.path.join(os.getcwd(), fname)
-        if info['version'] is not None:
+        if version is not None:
             print('Downloading %s...' % fname)
-            unicode_data_url = '%s/%s' % (info['url'], fname)
+            unicode_data_url = '%s/%s' % (url, fname)
             with closing(urllib2.urlopen(unicode_data_url)) as reader:
                 data = reader.read()
             tfile = io.open(tfile_path, 'w+b')
             tfile.write(data)
             tfile.flush()
             tfile.seek(0)
         else:
             if not os.path.isfile(tfile_path):
                 raise RuntimeError('File not found: %s' % tfile_path)
             tfile = io.open(tfile_path, 'rb');
         return tfile
 
     def version_from_file(f, fname):
         pat_version = re.compile(r"# %s-(?P<version>\d+\.\d+\.\d+).txt" % fname)
-        (unicode_version) = pat_version.match(f.readline()).group("version")
-        return unicode_version
+        return pat_version.match(f.readline()).group("version")
 
-    with download_or_open(unicode_info, 'UnicodeData.txt') as unicode_data, \
-         download_or_open(casefold_info, 'CaseFolding.txt') as case_folding, \
-         download_or_open(unicode_info, 'DerivedCoreProperties.txt') as derived_core_properties:
-        version = version_from_file(derived_core_properties, 'DerivedCoreProperties')
-        casefold_version = version_from_file(case_folding, 'CaseFolding')
+    with download_or_open('UnicodeData.txt') as unicode_data, \
+         download_or_open('CaseFolding.txt') as case_folding, \
+         download_or_open('DerivedCoreProperties.txt') as derived_core_properties:
+        unicode_version = version_from_file(derived_core_properties, 'DerivedCoreProperties')
 
         print('Processing...')
         (
             table, index,
             same_upper_table, same_upper_index,
             non_bmp_lower_map, non_bmp_upper_map,
             test_table, test_space_table
         ) = process_unicode_data(unicode_data, derived_core_properties)
         (
             folding_table, folding_index,
             non_bmp_folding_map, non_bmp_rev_folding_map,
             folding_tests
         ) = process_case_folding(case_folding)
 
     print('Generating...')
-    make_unicode_file(version, casefold_version,
+    make_unicode_file(unicode_version,
                       table, index,
                       same_upper_table, same_upper_index,
                       folding_table, folding_index)
-    make_non_bmp_file(version, casefold_version,
+    make_non_bmp_file(unicode_version,
                       non_bmp_lower_map, non_bmp_upper_map,
                       non_bmp_folding_map, non_bmp_rev_folding_map)
 
-    make_bmp_mapping_test(version, test_table)
-    make_non_bmp_mapping_test(version, non_bmp_upper_map, non_bmp_lower_map)
-    make_space_test(version, test_space_table)
-    make_icase_test(casefold_version, folding_tests)
+    make_bmp_mapping_test(unicode_version, test_table)
+    make_non_bmp_mapping_test(unicode_version, non_bmp_upper_map, non_bmp_lower_map)
+    make_space_test(unicode_version, test_space_table)
+    make_icase_test(unicode_version, folding_tests)
 
 if __name__ == '__main__':
     import argparse
 
     # This script must be run from js/src/vm to work correctly.
     if '/'.join(os.path.normpath(os.getcwd()).split(os.sep)[-3:]) != 'js/src/vm':
         raise RuntimeError('%s must be run from js/src/vm' % sys.argv[0])
 
-    # !!! IMPORTANT !!!
-    # We currently use two different Unicode versions (6.2 and 8.0) for
-    # separate parts of the engine. This is all just temporary until
-    # bug 1230490 has relanded. As soon as bug 1230490 has relanded, this
-    # script can be simplified by removing all logic to handle different
-    # Unicode versions.
-
     parser = argparse.ArgumentParser(description='Update Unicode data.')
 
     parser.add_argument('--version',
                         help='Optional Unicode version number. If specified, downloads the\
                               selected version from <http://unicode.org/Public>. If not specified\
                               uses the existing local files to generate the Unicode data. The\
                               number must match a published Unicode version, e.g. use\
                               "--version=8.0.0" to download Unicode 8 files. Alternatively use\
                               "--version=UNIDATA" to download the latest published version.')
-    # TODO: Remove this parameter when bug 1230490 has relanded.
-    parser.add_argument('--casefold-version',
-                        help='Unicode version number for case-folding data. Has the same meaning\
-                        as --version, except only used for case-folding data.')
 
     parser.set_defaults(func=update_unicode)
 
     args = parser.parse_args()
     args.func(args)