1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 Processes a file containing effective TLD data. See the following URL for a
12 description of effective TLDs and of the file format that this script
13 processes (although for the latter you're better off just reading this file's
16 http://wiki.mozilla.org/Gecko:Effective_TLD_Service
19 def getEffectiveTLDs(path
):
20 file = codecs
.open(path
, "r", "UTF-8")
23 line
= file.readline()
24 # line always contains a line terminator unless the file is empty
28 # comment, empty, or superfluous line for explicitness purposes
29 if line
.startswith("//") or "." not in line
:
31 line
= re
.split(r
"[ \t\n]", line
, 1)[0]
32 entry
= EffectiveTLDEntry(line
)
33 domain
= entry
.domain()
34 assert domain
not in domains
, \
35 "repeating domain %s makes no sense" % domain
39 def _normalizeHostname(domain
):
41 Normalizes the given domain, component by component. ASCII components are
42 lowercased, while non-ASCII components are processed using the ToASCII
45 def convertLabel(label
):
48 return encodings
.idna
.ToASCII(label
)
49 return ".".join(map(convertLabel
, domain
.split(".")))
52 "True if s consists entirely of ASCII characters, false otherwise."
58 class EffectiveTLDEntry
:
60 Stores an entry in an effective-TLD name file.
66 def __init__(self
, line
):
68 Creates a TLD entry from a line of data, which must have been stripped of
71 if line
.startswith("!"):
72 self
._exception
= True
74 elif line
.startswith("*."):
79 self
._domain
= _normalizeHostname(domain
)
82 "The domain this represents."
86 "True if this entry's domain denotes does not denote an effective TLD."
87 return self
._exception
90 "True if this entry represents a class of effective TLDs."
100 argv[1] is the effective TLD file to parse.
101 A C++ array of { domain, exception, wild } entries representing the
102 eTLD file is then printed to stdout.
110 for etld
in getEffectiveTLDs(sys
.argv
[1]):
111 exception
= boolStr(etld
.exception())
112 wild
= boolStr(etld
.wild())
113 print 'ETLD_ENTRY("%s", %s, %s)' % (etld
.domain(), exception
, wild
)
115 if __name__
== '__main__':