Bug 601299: Find RegExpStatics in cx->globalObject if necessary. (r=mrbkap)
[mozilla-central.git] / netwerk / dns / prepare_tlds.py
blob8414e070a1a9a0c6f8eff9f11067baf09aa14a6b
1 # ***** BEGIN LICENSE BLOCK *****
2 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 # The contents of this file are subject to the Mozilla Public License Version
5 # 1.1 (the "License"); you may not use this file except in compliance with
6 # the License. You may obtain a copy of the License at
7 # http://www.mozilla.org/MPL/
9 # Software distributed under the License is distributed on an "AS IS" basis,
10 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 # for the specific language governing rights and limitations under the
12 # License.
14 # The Original Code is Effective TLD conversion code.
16 # The Initial Developer of the Original Code is
17 # Jeff Walden <jwalden+code@mit.edu>.
18 # Portions created by the Initial Developer are Copyright (C) 2008
19 # the Initial Developer. All Rights Reserved.
21 # Contributor(s):
23 # Alternatively, the contents of this file may be used under the terms of
24 # either the GNU General Public License Version 2 or later (the "GPL"), or
25 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26 # in which case the provisions of the GPL or the LGPL are applicable instead
27 # of those above. If you wish to allow use of your version of this file only
28 # under the terms of either the GPL or the LGPL, and not to allow others to
29 # use your version of this file under the terms of the MPL, indicate your
30 # decision by deleting the provisions above and replace them with the notice
31 # and other provisions required by the GPL or the LGPL. If you do not delete
32 # the provisions above, a recipient may use your version of this file under
33 # the terms of any one of the MPL, the GPL or the LGPL.
35 # ***** END LICENSE BLOCK *****
37 import codecs
38 import encodings.idna
39 import re
40 import sys
42 """
43 Processes a file containing effective TLD data. See the following URL for a
44 description of effective TLDs and of the file format that this script
45 processes (although for the latter you're better off just reading this file's
46 short source code).
48 http://wiki.mozilla.org/Gecko:Effective_TLD_Service
49 """
51 def getEffectiveTLDs(path):
52 file = codecs.open(path, "r", "UTF-8")
53 domains = set()
54 while True:
55 line = file.readline()
56 # line always contains a line terminator unless the file is empty
57 if len(line) == 0:
58 raise StopIteration
59 line = line.rstrip()
60 # comment, empty, or superfluous line for explicitness purposes
61 if line.startswith("//") or "." not in line:
62 continue
63 line = re.split(r"[ \t\n]", line, 1)[0]
64 entry = EffectiveTLDEntry(line)
65 domain = entry.domain()
66 assert domain not in domains, \
67 "repeating domain %s makes no sense" % domain
68 domains.add(domain)
69 yield entry
71 def _normalizeHostname(domain):
72 """
73 Normalizes the given domain, component by component. ASCII components are
74 lowercased, while non-ASCII components are processed using the ToASCII
75 algorithm.
76 """
77 def convertLabel(label):
78 if _isASCII(label):
79 return label.lower()
80 return encodings.idna.ToASCII(label)
81 return ".".join(map(convertLabel, domain.split(".")))
83 def _isASCII(s):
84 "True if s consists entirely of ASCII characters, false otherwise."
85 for c in s:
86 if ord(c) > 127:
87 return False
88 return True
90 class EffectiveTLDEntry:
91 """
92 Stores an entry in an effective-TLD name file.
93 """
95 _exception = False
96 _wild = False
98 def __init__(self, line):
99 """
100 Creates a TLD entry from a line of data, which must have been stripped of
101 the line ending.
103 if line.startswith("!"):
104 self._exception = True
105 domain = line[1:]
106 elif line.startswith("*."):
107 self._wild = True
108 domain = line[2:]
109 else:
110 domain = line
111 self._domain = _normalizeHostname(domain)
113 def domain(self):
114 "The domain this represents."
115 return self._domain
117 def exception(self):
118 "True if this entry's domain denotes does not denote an effective TLD."
119 return self._exception
121 def wild(self):
122 "True if this entry represents a class of effective TLDs."
123 return self._wild
126 #################
127 # DO EVERYTHING #
128 #################
130 def main():
132 argv[1] is the effective TLD file to parse.
133 A C++ array of { domain, exception, wild } entries representing the
134 eTLD file is then printed to stdout.
137 def boolStr(b):
138 if b:
139 return "PR_TRUE"
140 return "PR_FALSE"
142 print "{"
143 for etld in getEffectiveTLDs(sys.argv[1]):
144 exception = boolStr(etld.exception())
145 wild = boolStr(etld.wild())
146 print ' { "%s", %s, %s },' % (etld.domain(), exception, wild)
147 print " { nsnull, PR_FALSE, PR_FALSE }"
148 print "}"
150 if __name__ == '__main__':
151 main()