Bug 1624129 - Make generate_table.py python 3 compatible. r=mkmelin
authorRob Lemley <rob@thunderbird.net>
Sat, 14 Mar 2020 11:54:13 -0400
changeset 38561 2bf83584b702a24c5c9f6dcba8f6675ae1e6337e
parent 38560 3ebe3ea9c1287a6846f09f0389f173f961ec6ab0
child 38562 6638944e18bade042c5abc847a13097c85c55c44
push id400
push userclokep@gmail.com
push dateMon, 04 May 2020 18:56:09 +0000
reviewersmkmelin
bugs1624129
Bug 1624129 - Make generate_table.py python 3 compatible. r=mkmelin This is a direct port of the existing code to Python 3. This was done in multiple steps: - Fix the mixed tabs/spaces. Use 4 spaces for indentation per current standards. - Run through 2to3.py. - Fix mozlint errors. - Verify output of current script matches the Python 3 output. The output does not match the copy of Normalize.c in ../src as that file got caught up in the recent mass clang formatting.
mailnews/extensions/fts3/data/generate_table.py
--- a/mailnews/extensions/fts3/data/generate_table.py
+++ b/mailnews/extensions/fts3/data/generate_table.py
@@ -31,28 +31,32 @@
 # use your version of this file under the terms of the MPL, indicate your
 # decision by deleting the provisions above and replace them with the notice
 # and other provisions required by the GPL or the LGPL. If you do not delete
 # the provisions above, a recipient may use your version of this file under
 # the terms of any one of the MPL, the GPL or the LGPL.
 #
 # ***** END LICENSE BLOCK *****
 
+from __future__ import absolute_import, print_function, unicode_literals
+
 import re
 
-def printTable(f, t):
-	i = f
-	while i <= t:
-		c = array[i]
-		print "0x%04x," % c,
-		i = i + 1
-		if not i % 8:
-			print "\n\t",
 
-print '''/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+def print_table(f, t):
+    i = f
+    while i <= t:
+        c = array[i]
+        print("0x%04x," % c, end=' ')
+        i = i + 1
+        if not i % 8:
+            print("\n\t", end=' ')
+
+
+print('''/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /* ***** BEGIN LICENSE BLOCK *****
  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  *
  * The contents of this file are subject to the Mozilla Public License Version
  * 1.1 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  * http://www.mozilla.org/MPL/
  *
@@ -81,31 +85,30 @@ print '''/* -*- Mode: C++; tab-width: 4;
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 /* THIS FILE IS GENERATED BY generate_table.py.  DON'T EDIT THIS */
-'''
+''')
 
 p = re.compile('([0-9A-F]{4,5})(?:\.\.([0-9A-F]{4,5}))?[=\>]([0-9A-F]{4,5})?')
 G_FROM = 1
 G_TO = 2
 G_FIRSTVAL = 3
 
 # Array whose value at index i is the unicode value unicode character i should
 # map to.
 array = []
 # Contents of gNormalizeTable.  We insert zero entries for sub-pages where we
 # have no mappings.  We insert references to the tables where we do have
 # such tables.
-globalTable = []
-globalTable.append("0")
+globalTable = ["0"]
 # The (exclusive) upper bound of the conversion table, unicode character-wise.
 # This is 0x10000 because our generated table is only 16-bit.  This also limits
 # the values we can map to; we perform an identity mapping for target values
 # that >= maxmapping.
 maxmapping = 0x10000
 sizePerTable = 64
 
 # Map characters that the mapping tells us to obliterate to the NUKE_CHAR
@@ -127,41 +130,41 @@ NUKE_CHAR = 0x20
 # The 2000..200A covers a range of space characters and maps them down to the
 #  'normal' space character.
 
 file = open('nfkc_cf.txt')
 
 m = None
 line = "\n"
 i = 0x0
+low = high = val = 0
 while i < maxmapping and line:
+    if not m:
+        line = file.readline()
+        m = p.match(line)
         if not m:
-                line = file.readline()
-                m = p.match(line)
-                if not m:
-                        continue
-                low = int(m.group(G_FROM), 16)
-                # if G_TO is present, use it, otherwise fallback to low
-                high = m.group(G_TO) and int(m.group(G_TO), 16) or low
-                # if G_FIRSTVAL is present use it, otherwise use NUKE_CHAR
-                val = (m.group(G_FIRSTVAL) and int(m.group(G_FIRSTVAL), 16)
-                                           or NUKE_CHAR)
-		continue
+            continue
+        low = int(m.group(G_FROM), 16)
+        # if G_TO is present, use it, otherwise fallback to low
+        high = m.group(G_TO) and int(m.group(G_TO), 16) or low
+        # if G_FIRSTVAL is present use it, otherwise use NUKE_CHAR
+        val = (m.group(G_FIRSTVAL) and int(m.group(G_FIRSTVAL), 16)
+               or NUKE_CHAR)
+        continue
 
-
-        if i >= low and i <= high:
-		if val >= maxmapping:
-			array.append(i)
-		else:
-			array.append(val)
-                if i == high:
-                        m = None
-	else:
-		array.append(i)
-	i = i + 1
+    if low <= i <= high:
+        if val >= maxmapping:
+            array.append(i)
+        else:
+            array.append(val)
+        if i == high:
+            m = None
+    else:
+        array.append(i)
+    i = i + 1
 file.close()
 
 # --- load normalization / decomposition table
 # It is important that this file gets processed second because the other table
 # will tell us about mappings from uppercase U with diaeresis to lowercase u
 # with diaeresis.  We obviously don't want that clobbering our value.  (Although
 # this would work out if we propagated backwards rather than forwards...)
 #
@@ -183,82 +186,82 @@ file.close()
 #
 # These map marks to their canonical combining class which appears to be a way
 # of specifying the precedence / order in which marks should be combined.  The
 # key thing is we don't care about them.
 file = open('nfkc.txt')
 line = file.readline()
 m = p.match(line)
 while line:
-	if not m:
-		line = file.readline()
-		m = p.match(line)
-		continue
+    if not m:
+        line = file.readline()
+        m = p.match(line)
+        continue
 
-        low = int(m.group(G_FROM), 16)
-        # if G_TO is present, use it, otherwise fallback to low
-        high = m.group(G_TO) and int(m.group(G_TO), 16) or low
-        # if G_FIRSTVAL is present use it, otherwise fall back to NUKE_CHAR
-        val = m.group(G_FIRSTVAL) and int(m.group(G_FIRSTVAL), 16) or NUKE_CHAR
-        for i in range(low, high+1):
-                if i < maxmapping and val < maxmapping:
-                        array[i] = val
-	m = None
+    low = int(m.group(G_FROM), 16)
+    # if G_TO is present, use it, otherwise fallback to low
+    high = m.group(G_TO) and int(m.group(G_TO), 16) or low
+    # if G_FIRSTVAL is present use it, otherwise fall back to NUKE_CHAR
+    val = m.group(G_FIRSTVAL) and int(m.group(G_FIRSTVAL), 16) or NUKE_CHAR
+    for i in range(low, high+1):
+        if i < maxmapping and val < maxmapping:
+            array[i] = val
+    m = None
 file.close()
 
 # --- generate a normalized table to support case and accent folding
 
 i = 0
-needTerm = False;
+needTerm = False
 while i < maxmapping:
-	if not i % sizePerTable:
-		# table is empty?
-		j = i
-		while j < i + sizePerTable:
-			if array[j] != j:
-				break
-			j += 1
+    if not i % sizePerTable:
+        # table is empty?
+        j = i
+        while j < i + sizePerTable:
+            if array[j] != j:
+                break
+            j += 1
 
-		if j == i + sizePerTable:
-			if i:
-				globalTable.append("0")
-			i += sizePerTable
-			continue
+        if j == i + sizePerTable:
+            if i:
+                globalTable.append("0")
+            i += sizePerTable
+            continue
 
-		if needTerm:
-			print "};\n"
-		globalTable.append("gNormalizeTable%04x" % i)
-		print "static const unsigned short gNormalizeTable%04x[] = {\n\t" % i,
-		print "/* U+%04x */\n\t" % i,
-		needTerm = True
-        # Decomposition does not case-fold, so we want to compensate by
-        # performing a lookup here.  Because decomposition chains can be
-        # example: 01d5, a capital U with a diaeresis and a bar. yes, really.
-        # 01d5 -> 00dc -> 0055 (U) -> 0075 (u)
-        c = array[i]
-        while c != array[c]:
-                c = array[c]
-        if c >= 0x41 and c <= 0x5a:
-                raise Exception('got an uppercase character somehow: %x => %x'
-                                % (i, c))
-	print "0x%04x," % c,
-	i = i + 1
-	if not i % 8:
-		print "\n\t",
+        if needTerm:
+            print("};\n")
+        globalTable.append("gNormalizeTable%04x" % i)
+        print("static const unsigned short gNormalizeTable%04x[] = {\n\t" % i, end=' ')
+        print("/* U+%04x */\n\t" % i, end=' ')
+        needTerm = True
+    # Decomposition does not case-fold, so we want to compensate by
+    # performing a lookup here.  Because decomposition chains can be
+    # example: 01d5, a capital U with a diaeresis and a bar. yes, really.
+    # 01d5 -> 00dc -> 0055 (U) -> 0075 (u)
+    c = array[i]
+    while c != array[c]:
+        c = array[c]
+    if 0x41 <= c <= 0x5a:
+        raise Exception('got an uppercase character somehow: %x => %x'
+                        % (i, c))
+    print("0x%04x," % c, end=' ')
+    i = i + 1
+    if not i % 8:
+        print("\n\t", end=' ')
 
-print "};\n\nstatic const unsigned short* gNormalizeTable[] = {",
+print("};\n\nstatic const unsigned short* gNormalizeTable[] = {", end=' ')
 i = 0
 while i < (maxmapping / sizePerTable):
-	if not i % 4:
-		print "\n\t",
-	print globalTable[i] + ",", 
-	i += 1
+    if not i % 4:
+        print("\n\t", end=' ')
+    print(globalTable[i] + ",", end=' ')
+    i += 1
 
-print '''
+print('''
 };
 
 unsigned int normalize_character(const unsigned int c)
 {
   if (c >= ''' + ('0x%x' % (maxmapping,)) + ''' || !gNormalizeTable[c >> 6])
     return c;
   return gNormalizeTable[c >> 6][c & 0x3f];
 }
-'''
+''')