author Jeff Muizelaar <>
Tue, 18 Oct 2011 14:19:07 -0400
changeset 79692 344e0e0cabe451fd8d99f0b5729d8cd3b7121fa9
parent 43173 ac1ed3f6b2e71637e562866867c9ac571d2cb283
child 96742 f4157e8c410708d76703f19e4dfb61859bfe32d8
permissions -rw-r--r--
Bug 695324. Fix precedence problem found with clang warning. r=mounir '*' has higher precedence than '?' so bracket the expression so we get what we want.

# Version: MPL 1.1/GPL 2.0/LGPL 2.1
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
# The Original Code is Effective TLD conversion code.
# The Initial Developer of the Original Code is
# Jeff Walden <>.
# Portions created by the Initial Developer are Copyright (C) 2008
# the Initial Developer. All Rights Reserved.
# Contributor(s):
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
# ***** END LICENSE BLOCK *****

import codecs
import encodings.idna
import re
import sys

Processes a file containing effective TLD data.  See the following URL for a
description of effective TLDs and of the file format that this script
processes (although for the latter you're better off just reading this file's
short source code).

def getEffectiveTLDs(path):
  file =, "r", "UTF-8")
  domains = set()
  while True:
    line = file.readline()
    # line always contains a line terminator unless the file is empty
    if len(line) == 0:
      raise StopIteration
    line = line.rstrip()
    # comment, empty, or superfluous line for explicitness purposes
    if line.startswith("//") or "." not in line:
    line = re.split(r"[ \t\n]", line, 1)[0]
    entry = EffectiveTLDEntry(line)
    domain = entry.domain()
    assert domain not in domains, \
           "repeating domain %s makes no sense" % domain
    yield entry

def _normalizeHostname(domain):
  Normalizes the given domain, component by component.  ASCII components are
  lowercased, while non-ASCII components are processed using the ToASCII
  def convertLabel(label):
    if _isASCII(label):
      return label.lower()
    return encodings.idna.ToASCII(label)
  return ".".join(map(convertLabel, domain.split(".")))

def _isASCII(s):
  "True if s consists entirely of ASCII characters, false otherwise."
  for c in s:
    if ord(c) > 127:
      return False
  return True

class EffectiveTLDEntry:
  Stores an entry in an effective-TLD name file.

  _exception = False
  _wild = False

  def __init__(self, line):
    Creates a TLD entry from a line of data, which must have been stripped of
    the line ending.
    if line.startswith("!"):
      self._exception = True
      domain = line[1:]
    elif line.startswith("*."):
      self._wild = True
      domain = line[2:]
      domain = line
    self._domain = _normalizeHostname(domain)

  def domain(self):
    "The domain this represents."
    return self._domain

  def exception(self):
    "True if this entry's domain denotes does not denote an effective TLD."
    return self._exception

  def wild(self):
    "True if this entry represents a class of effective TLDs."
    return self._wild


def main():
  argv[1] is the effective TLD file to parse.
  A C++ array of { domain, exception, wild } entries representing the
  eTLD file is then printed to stdout.

  def boolStr(b):
    if b:
      return "PR_TRUE"
    return "PR_FALSE"

  print "{"
  for etld in getEffectiveTLDs(sys.argv[1]):
    exception = boolStr(etld.exception())
    wild = boolStr(etld.wild())
    print '  { "%s", %s, %s },' % (etld.domain(), exception, wild)
  print "  { nsnull, PR_FALSE, PR_FALSE }"
  print "}"

if __name__ == '__main__':