Bug 1861709 replace AudioCallbackDriver::ThreadRunning() assertions that mean to...
[gecko.git] / netwerk / dns / prepare_tlds.py
blobadebcec4870539206b469db0c266fea0ccd1631b
1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 import codecs
6 import encodings.idna
7 import re
8 import sys
10 from make_dafsa import words_to_bin, words_to_cxx
12 """
13 Processes a file containing effective TLD data. See the following URL for a
14 description of effective TLDs and of the file format that this script
15 processes (although for the latter you're better off just reading this file's
16 short source code).
18 http://wiki.mozilla.org/Gecko:Effective_TLD_Service
19 """
22 def getEffectiveTLDs(path):
23 file = codecs.open(path, "r", "UTF-8")
24 domains = set()
25 for line in file:
26 # line always contains a line terminator unless the file is empty
27 if len(line) == 0:
28 raise StopIteration
29 line = line.rstrip()
30 # comment, empty, or superfluous line for explicitness purposes
31 if line.startswith("//") or not line.strip():
32 continue
33 line = re.split(r"[ \t\n]", line, 1)[0]
34 entry = EffectiveTLDEntry(line)
35 domain = entry.domain()
36 assert domain not in domains, "repeating domain %s makes no sense" % domain
37 domains.add(domain)
38 yield entry
41 def _normalizeHostname(domain):
42 """
43 Normalizes the given domain, component by component. ASCII components are
44 lowercased, while non-ASCII components are processed using the ToASCII
45 algorithm.
46 """
48 def convertLabel(label):
49 if _isASCII(label):
50 return label.lower()
51 return encodings.idna.ToASCII(label).decode("utf-8")
53 return ".".join(map(convertLabel, domain.split(".")))
56 def _isASCII(s):
57 "True if s consists entirely of ASCII characters, false otherwise."
58 for c in s:
59 if ord(c) > 127:
60 return False
61 return True
64 class EffectiveTLDEntry:
65 """
66 Stores an entry in an effective-TLD name file.
67 """
69 _exception = False
70 _wild = False
72 def __init__(self, line):
73 """
74 Creates a TLD entry from a line of data, which must have been stripped of
75 the line ending.
76 """
77 if line.startswith("!"):
78 self._exception = True
79 domain = line[1:]
80 elif line.startswith("*."):
81 self._wild = True
82 domain = line[2:]
83 else:
84 domain = line
85 self._domain = _normalizeHostname(domain)
87 def domain(self):
88 "The domain this represents."
89 return self._domain
91 def exception(self):
92 "True if this entry's domain denotes does not denote an effective TLD."
93 return self._exception
95 def wild(self):
96 "True if this entry represents a class of effective TLDs."
97 return self._wild
100 #################
101 # DO EVERYTHING #
102 #################
105 def main(output, effective_tld_filename, output_format="cxx"):
107 effective_tld_filename is the effective TLD file to parse.
108 based on the output format, either a C++ array of a binary representation
109 of a DAFSA representing the eTLD file is then printed to standard output
110 or a binary file is written to disk.
113 def typeEnum(etld):
115 Maps the flags to the DAFSA's enum types.
117 if etld.exception():
118 return 1
119 elif etld.wild():
120 return 2
121 else:
122 return 0
124 def dafsa_words():
126 make_dafsa expects lines of the form "<domain_name><enum_value>"
128 for etld in getEffectiveTLDs(effective_tld_filename):
129 yield "%s%d" % (etld.domain(), typeEnum(etld))
131 """ words_to_bin() returns a bytes while words_to_cxx() returns string """
132 if output_format == "bin":
133 output.write(words_to_bin(dafsa_words()))
134 else:
135 output.write(words_to_cxx(dafsa_words()))
138 if __name__ == "__main__":
140 This program can output the DAFSA in two formats:
141 as C++ code that will be included and compiled at build time
142 or as a binary file that will be published in Remote Settings.
144 Flags for format options:
145 "cxx" -> C++ array [default]
146 "bin" -> Binary file
149 output_format = "bin" if "--bin" in sys.argv else "cxx"
150 main(sys.stdout, sys.argv[1], output_format=output_format)