3 """world -- Print mappings between country names and DNS country codes.
6 Email: barry@python.org
7 Version: %(__version__)s
9 This script will take a list of Internet addresses and print out where in the
10 world those addresses originate from, based on the top-level domain country
11 code found in the address. Addresses can be in any of the following forms:
13 xx -- just the country code or top-level domain identifier
14 host.domain.xx -- any Internet host or network name
15 somebody@where.xx -- an Internet email address
17 If no match is found, the address is interpreted as a regular expression and a
18 reverse lookup is attempted. This script will search the country names and
19 print a list of matching entries. You can force reverse mappings with the
20 `-r' flag (see below).
25 tz originated from Tanzania, United Republic of
26 us originated from United States
29 united matches 6 countries:
30 ae: United Arab Emirates
31 uk: United Kingdom (common practice)
32 um: United States Minor Outlying Islands
34 tz: Tanzania, United Republic of
37 Country codes are maintained by the RIPE Network Coordination Centre,
38 in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
39 authoritative source of country code mappings is:
41 <url:ftp://ftp.ripe.net/iso3166-countrycodes.txt>
43 The latest known change to this information was:
45 Friday, 5 April 2002, 12.00 CET 2002
47 This script also knows about non-geographic top-level domains, and the
48 additional ccTLDs reserved by IANA.
50 Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
54 Print mapping of all top-level domains.
58 Parse an iso3166-countrycodes file extracting the two letter country
59 code followed by the country name. Note that the three letter country
60 codes and numbers, which are also provided in the standard format
65 When used in conjunction with the `-p' option, output is in the form
66 of a Python dictionary, and country names are normalized
67 w.r.t. capitalization. This makes it appropriate for cutting and
68 pasting back into this file. Output is always to standard out.
72 Force reverse lookup. In this mode the address can be any Python
73 regular expression; this is matched against all country names and a
74 list of matching mappings is printed. In normal mode (e.g. without
75 this flag), reverse lookup is performed on addresses if no matching
76 country code is found.
82 __version__
= '$Revision$'
93 def usage(code
, msg
=''):
94 print __doc__
% globals()
101 def resolve(rawaddr
):
102 parts
= rawaddr
.split('.')
104 # no top level domain found, bounce it to the next step
107 if nameorgs
.has_key(addr
):
108 print rawaddr
, 'is in the', nameorgs
[addr
], 'top level domain'
110 elif countries
.has_key(addr
):
111 print rawaddr
, 'originated from', countries
[addr
]
114 # Not resolved, bounce it to the next step
121 cre
= re
.compile(regexp
, re
.IGNORECASE
)
122 for code
, country
in all
.items():
123 mo
= cre
.search(country
)
128 # not resolved, bounce it to the next step
130 if len(matches
) == 1:
132 print regexp
, "matches code `%s', %s" % (code
, all
[code
])
134 print regexp
, 'matches %d countries:' % len(matches
)
136 print " %s: %s" % (code
, all
[code
])
141 def parse(file, normalize
):
144 except IOError, (err
, msg
):
147 cre
= re
.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
151 print 'countries = {'
166 print 'Could not parse line:', line
168 country
, code
= mo
.group(1, 2)
170 words
= country
.split()
171 for i
in range(len(words
)):
174 if w
in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
176 elif w
== 'THE' and i
<> 1:
178 elif len(w
) > 3 and w
[1] == "'":
179 words
[i
] = w
[0:3].upper() + w
[3:].lower()
180 elif w
in ('(U.S.)', 'U.S.'):
182 elif w
[0] == '(' and w
<> '(local':
183 words
[i
] = '(' + w
[1:].capitalize()
184 elif w
.find('-') <> -1:
186 [s
.capitalize() for s
in w
.split('-')])
188 words
[i
] = w
.capitalize()
190 country
= ' '.join(words
)
191 print ' "%s": "%s",' % (code
, country
)
211 opts
, args
= getopt
.getopt(
214 ['parse=', 'reverse', 'outputdict', 'help', 'dump'])
215 except getopt
.error
, msg
:
218 for opt
, arg
in opts
:
219 if opt
in ('-h', '--help'):
221 elif opt
in ('-d', '--dump'):
223 elif opt
in ('-p', '--parse'):
225 elif opt
in ('-o', '--outputdict'):
227 elif opt
in ('-r', '--reverse'):
234 print 'Non-geographic domains:'
235 codes
= nameorgs
.keys()
238 print ' %4s:' % code
, nameorgs
[code
]
240 print '\nCountry coded domains:'
241 codes
= countries
.keys()
244 print ' %2s:' % code
, countries
[code
]
246 parse(parsefile
, normalize
)
249 args
= filter(None, map(resolve
, args
))
250 args
= filter(None, map(reverse
, args
))
252 print 'Where in the world is %s?' % arg
258 # New top level domains as described by ICANN
259 # http://www.icann.org/tlds/
260 "aero": "air-transport industry",
264 "coop": "cooperatives",
265 "edu": "educational",
267 "info": "unrestricted `info'",
268 "int": "international",
271 "name": "`name' (for registration by individuals)",
273 "org": "non-commercial",
274 "pro": "professionals",
275 # These additional ccTLDs are included here even though they are not part
276 # of ISO 3166. IANA has 5 reserved ccTLDs as described here:
278 # http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html
280 # but I can't find an official list anywhere.
282 # Note that `uk' is the common practice country code for the United
283 # Kingdom. AFAICT, the official `gb' code is routinely ignored!
285 # <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166
286 # was adopted for top-level DNS zone names (although in the reverse order
287 # like uk.ac.qmw) and was carried forward (with the reversal) to avoid a
288 # large-scale renaming process as the UK switched from their old `Coloured
289 # Book' protocols over X.25 to Internet protocols over IP.
291 # See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt>
293 # Also, `su', while obsolete is still in limited use.
294 "ac": "Ascension Island",
298 "uk": "United Kingdom (common practice)",
299 "su": "Soviet Union (still in limited use)",
308 "as": "American Samoa",
313 "ag": "Antigua and Barbuda",
331 "ba": "Bosnia and Herzegowina",
333 "bv": "Bouvet Island",
335 "io": "British Indian Ocean Territory",
336 "bn": "Brunei Darussalam",
338 "bf": "Burkina Faso",
344 "ky": "Cayman Islands",
345 "cf": "Central African Republic",
349 "cx": "Christmas Island",
350 "cc": "Cocos (Keeling) Islands",
354 "cd": "Congo, The Democratic Republic of the",
355 "ck": "Cook Islands",
357 "ci": "Cote D'Ivoire",
361 "cz": "Czech Republic",
365 "do": "Dominican Republic",
370 "gq": "Equatorial Guinea",
374 "fk": "Falkland Islands (Malvinas)",
375 "fo": "Faroe Islands",
379 "gf": "French Guiana",
380 "pf": "French Polynesia",
381 "tf": "French Southern Territories",
395 "gw": "Guinea-Bissau",
398 "hm": "Heard Island and Mcdonald Islands",
399 "va": "Holy See (Vatican City State)",
406 "ir": "Iran, Islamic Republic of",
417 "kp": "Korea, Democratic People's Republic of",
418 "kr": "Korea, Republic of",
421 "la": "Lao People's Democratic Republic",
426 "ly": "Libyan Arab Jamahiriya",
427 "li": "Liechtenstein",
431 "mk": "Macedonia, The Former Yugoslav Republic of",
438 "mh": "Marshall Islands",
444 "fm": "Micronesia, Federated States of",
445 "md": "Moldova, Republic of",
456 "an": "Netherlands Antilles",
457 "nc": "New Caledonia",
463 "nf": "Norfolk Island",
464 "mp": "Northern Mariana Islands",
469 "ps": "Palestinian Territory, Occupied",
471 "pg": "Papua New Guinea",
482 "ru": "Russian Federation",
484 "sh": "Saint Helena",
485 "kn": "Saint Kitts and Nevis",
487 "pm": "Saint Pierre and Miquelon",
488 "vc": "Saint Vincent and the Grenadines",
491 "st": "Sao Tome and Principe",
492 "sa": "Saudi Arabia",
495 "sl": "Sierra Leone",
499 "sb": "Solomon Islands",
501 "za": "South Africa",
502 "gs": "South Georgia and the South Sandwich Islands",
507 "sj": "Svalbard and Jan Mayen",
511 "sy": "Syrian Arab Republic",
512 "tw": "Taiwan, Province of China",
514 "tz": "Tanzania, United Republic of",
519 "tt": "Trinidad and Tobago",
522 "tm": "Turkmenistan",
523 "tc": "Turks and Caicos Islands",
527 "ae": "United Arab Emirates",
528 "gb": "United Kingdom",
529 "us": "United States",
530 "um": "United States Minor Outlying Islands",
536 "vg": "Virgin Islands, British",
537 "vi": "Virgin Islands, U.S.",
538 "wf": "Wallis and Futuna",
539 "eh": "Western Sahara",
546 all
= nameorgs
.copy()
547 all
.update(countries
)
550 if __name__
== '__main__':