author | Arpit Bharti <arpitbharti73@gmail.com> |
Tue, 02 Jul 2019 12:28:48 +0000 | |
changeset 480918 | 822cb68b6ab75c96d7e36aa1f7fffda122d41f0c |
parent 480917 | 99f94dd8c8f1f0ca13196051cf608b420b5df731 |
child 480919 | dee2008c7a7d05bc882eb368dee6a3cc7d1f90a6 |
push id | 36228 |
push user | aciure@mozilla.com |
push date | Tue, 02 Jul 2019 21:46:04 +0000 |
treeherder | mozilla-central@da33e6261a81 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | leplatrem, erahm |
bugs | 1083971 |
milestone | 69.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
netwerk/dns/prepare_tlds.py | file | annotate | diff | comparison | revisions | |
xpcom/ds/tools/make_dafsa.py | file | annotate | diff | comparison | revisions |
--- a/netwerk/dns/prepare_tlds.py +++ b/netwerk/dns/prepare_tlds.py @@ -3,17 +3,17 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. import codecs import encodings.idna import imp import os import re import sys -from make_dafsa import words_to_cxx +from make_dafsa import words_to_cxx, words_to_bin """ Processes a file containing effective TLD data. See the following URL for a description of effective TLDs and of the file format that this script processes (although for the latter you're better off just reading this file's short source code). http://wiki.mozilla.org/Gecko:Effective_TLD_Service @@ -93,21 +93,22 @@ class EffectiveTLDEntry: "True if this entry represents a class of effective TLDs." return self._wild ################# # DO EVERYTHING # ################# -def main(output, effective_tld_filename): +def main(output, effective_tld_filename, output_format="cxx"): """ effective_tld_filename is the effective TLD file to parse. - A C++ array of a binary representation of a DAFSA representing the - eTLD file is then printed to output. + based on the output format, either a C++ array of a binary representation + of a DAFSA representing the eTLD file is then printed to standard output + or a binary file is written to disk. """ def typeEnum(etld): """ Maps the flags to the DAFSA's enum types. """ if etld.exception(): return 1 @@ -118,12 +119,31 @@ def main(output, effective_tld_filename) def dafsa_words(): """ make_dafsa expects lines of the form "<domain_name><enum_value>" """ for etld in getEffectiveTLDs(effective_tld_filename): yield "%s%d" % (etld.domain(), typeEnum(etld)) - output.write(words_to_cxx(dafsa_words())) + """ words_to_bin() returns a bytes while words_to_cxx() returns string """ + if output_format == "bin": + if sys.version_info[0] >= 3: + output = output.buffer + output.write(words_to_bin(dafsa_words())) + else: + output.write(words_to_cxx(dafsa_words())) + + if __name__ == '__main__': - main(sys.stdout, sys.argv[1]) + """ + This program can output the DAFSA in two formats: + as C++ code that will be included and compiled at build time + or as a binary file that will be published in Remote Settings. + + Flags for format options: + "cxx" -> C++ array [default] + "bin" -> Binary file + """ + + output_format = "bin" if "--bin" in sys.argv else "cxx" + main(sys.stdout, sys.argv[1], output_format=output_format)
--- a/xpcom/ds/tools/make_dafsa.py +++ b/xpcom/ds/tools/make_dafsa.py @@ -188,16 +188,17 @@ The bytes in the generated array has the 7: 0x81 <return_value> 0x81 & 0x0F -> return 1 8: 0x62 <char> label character 0x62 -> match "b" 9: 0x62 <char> label character 0x62 -> match "b" 10: 0x82 <return_value> 0x82 & 0x0F -> return 2 """ import sys +import struct class InputError(Exception): """Exception raised for errors in the input file.""" def to_dafsa(words): """Generates a DAFSA from a word list and returns the source node. @@ -377,17 +378,17 @@ def encode_links(children, offsets, curr buf.reverse() return buf def encode_prefix(label): """Encodes a node label as a list of bytes without a trailing high byte. This method encodes a node if there is exactly one child and the - child follows immidiately after so that no jump is needed. This label + child follows immediately after so that no jump is needed. This label will then be a prefix to the label in the child node. """ assert label return [ord(c) for c in reversed(label)] def encode_label(label): """Encodes a node label as a list of bytes with a trailing high byte >0x80. @@ -411,16 +412,23 @@ def encode(dafsa): output.extend(encode_links(node[1], offsets, len(output))) output.extend(encode_label(node[0])) offsets[id(node)] = len(output) output.extend(encode_links(dafsa, offsets, len(output))) output.reverse() return output +def encode_words(words): + """Generates a dafsa representation of a word list""" + dafsa = to_dafsa(words) + for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels): + dafsa = fun(dafsa) + return dafsa + def to_cxx(data, preamble=None): """Generates C++ code from a list of encoded bytes.""" text = '/* This file is generated. DO NOT EDIT!\n\n' text += 'The byte array encodes a dictionary of strings and values. See ' text += 'make_dafsa.py for documentation.' text += '*/\n\n' @@ -434,22 +442,27 @@ def to_cxx(data, preamble=None): text += ', '.join('0x%02x' % byte for byte in data[i:i + 12]) text += ',\n' text += '};\n' return text def words_to_cxx(words, preamble=None): """Generates C++ code from a word list""" - dafsa = to_dafsa(words) - for fun in (reverse, join_suffixes, reverse, join_suffixes, join_labels): - dafsa = fun(dafsa) + dafsa = encode_words(words) return to_cxx(encode(dafsa), preamble) +def words_to_bin(words): + """Generates bytes from a word list""" + dafsa = encode_words(words) + data = encode(dafsa) + return struct.pack('%dB' % len(data), *data) + + def parse_gperf(infile): """Parses gperf file and extract strings and return code""" lines = [line.strip() for line in infile] # Extract the preamble. first_delimeter = lines.index('%%') preamble = '\n'.join(lines[0:first_delimeter])