Backed out changesets d8fd745a0095 and 30b7ebdf5c99 (bug 924480) for robocop-3 failures.
[gecko.git] / netwerk / dns / prepare_tlds.py
blob782b8c80227ea08a88c85c747df61b469f9308f7
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 import codecs
6 import encodings.idna
7 import re
8 import sys
10 """
11 Processes a file containing effective TLD data. See the following URL for a
12 description of effective TLDs and of the file format that this script
13 processes (although for the latter you're better off just reading this file's
14 short source code).
16 http://wiki.mozilla.org/Gecko:Effective_TLD_Service
17 """
19 def getEffectiveTLDs(path):
20 file = codecs.open(path, "r", "UTF-8")
21 domains = set()
22 while True:
23 line = file.readline()
24 # line always contains a line terminator unless the file is empty
25 if len(line) == 0:
26 raise StopIteration
27 line = line.rstrip()
28 # comment, empty, or superfluous line for explicitness purposes
29 if line.startswith("//") or "." not in line:
30 continue
31 line = re.split(r"[ \t\n]", line, 1)[0]
32 entry = EffectiveTLDEntry(line)
33 domain = entry.domain()
34 assert domain not in domains, \
35 "repeating domain %s makes no sense" % domain
36 domains.add(domain)
37 yield entry
39 def _normalizeHostname(domain):
40 """
41 Normalizes the given domain, component by component. ASCII components are
42 lowercased, while non-ASCII components are processed using the ToASCII
43 algorithm.
44 """
45 def convertLabel(label):
46 if _isASCII(label):
47 return label.lower()
48 return encodings.idna.ToASCII(label)
49 return ".".join(map(convertLabel, domain.split(".")))
51 def _isASCII(s):
52 "True if s consists entirely of ASCII characters, false otherwise."
53 for c in s:
54 if ord(c) > 127:
55 return False
56 return True
58 class EffectiveTLDEntry:
59 """
60 Stores an entry in an effective-TLD name file.
61 """
63 _exception = False
64 _wild = False
66 def __init__(self, line):
67 """
68 Creates a TLD entry from a line of data, which must have been stripped of
69 the line ending.
70 """
71 if line.startswith("!"):
72 self._exception = True
73 domain = line[1:]
74 elif line.startswith("*."):
75 self._wild = True
76 domain = line[2:]
77 else:
78 domain = line
79 self._domain = _normalizeHostname(domain)
81 def domain(self):
82 "The domain this represents."
83 return self._domain
85 def exception(self):
86 "True if this entry's domain denotes does not denote an effective TLD."
87 return self._exception
89 def wild(self):
90 "True if this entry represents a class of effective TLDs."
91 return self._wild
94 #################
95 # DO EVERYTHING #
96 #################
98 def main():
99 """
100 argv[1] is the effective TLD file to parse.
101 A C++ array of { domain, exception, wild } entries representing the
102 eTLD file is then printed to stdout.
105 def boolStr(b):
106 if b:
107 return "true"
108 return "false"
110 for etld in getEffectiveTLDs(sys.argv[1]):
111 exception = boolStr(etld.exception())
112 wild = boolStr(etld.wild())
113 print 'ETLD_ENTRY("%s", %s, %s)' % (etld.domain(), exception, wild)
115 if __name__ == '__main__':
116 main()