fetch, unpack v4 and v6 GeoIP data
[blockfinder.git] / blockfinder
blob849f07816744b650897f5079057d50da65406ce7
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # For the people of Smubworld!
5 import urllib2
6 import os
7 import time
8 import getopt
9 import sys
10 from math import floor, log
11 import sqlite3
12 import hashlib
13 from xml.dom import minidom
14 __program__ = 'blockfinder'
15 __url__ = 'http://github.com/ioerror/blockfinder/'
16 ___author__ = 'Jacob Appelbaum <jacob@appelbaum.net>, dave b. <db@d1b.org>'
17 __copyright__ = 'Copyright (c) 2010'
18 __license__ = 'See LICENSE for licensing information'
19 __version__ = '3.1415'
21 try:
22 import GeoIP
23 import gzip
24 except ImportError:
25 GeoIP = None
27 try:
28 from future import antigravity
29 except ImportError:
30 antigravity = None
32 def update_progress_bar(percent_done, caption=""):
33 """Write a progress bar to the console"""
34 rows, columns = map(int, os.popen('stty size', 'r').read().split())
35 width = columns - 4 - len(caption)
36 sys.stdout.write("[%s>%s] %s\x1b[G" % (
37 "=" * int(percent_done*width),
38 "." * (width - int(percent_done * width)),
39 caption) )
40 sys.stdout.flush()
42 # XXX TODO:allow the use of a proxy
43 # Set up a proper Request object, set the user agent and if desired, a proxy
44 def fetch(url, useragent):
45 """ Fetch (with progress meter) and return the contents of a url. """
46 req = urllib2.Request(url)
47 req.add_header('User-agent', useragent)
48 #req.set_proxy(host, type)
49 fetcher = urllib2.urlopen(req)
50 length_header = fetcher.headers.get("content-length")
51 if length_header == None:
52 raise Exception("Missing content-length header in reply from server.")
53 length = int(length_header)
54 print "Fetching ", str (round(float(length/1024),2)) , " kilobytes"
55 ret = ""
56 t_start = time.time()
57 while True:
58 t_delta = time.time() - t_start
59 update_progress_bar(
60 float(len(ret)) / length,
61 "%.2f K/s" % (len(ret) / 1024 / t_delta) )
62 tmp = fetcher.read(1024)
63 if len(tmp) == 0:
64 if len(ret) != length:
65 raise Exception("Expected %s bytes, only received %s" % (
66 len(ret), length ))
67 print ""
68 return ret
69 ret += tmp
71 def cache_delegation(cache_dir, delegation_url, useragent):
72 """ Attempt to cache the contents of a delegation url in our cache dir. """
73 try:
74 os.stat(cache_dir)
75 except OSError, e:
76 if e.errno == 2:
77 if verbose:
78 print "Initializing the cache directory..."
79 os.mkdir(cache_dir)
80 else:
81 raise e
82 delegation = ""
83 print "Fetching " + delegation_url
84 delegation = fetch(delegation_url,useragent)
85 tmp = delegation_url.split('/')
86 delegation_file = str(cache_dir) + str(tmp[-1])
87 try:
88 f = open(delegation_file, 'w')
89 f.write(delegation)
90 f.close()
91 return True
92 except Exception, e:
93 print repr(e)
94 return False
96 def cache_is_dated(cache_dir, cached_files):
97 """ Returns True if the mtime of any files in cache dir is > 24hrs."""
98 try:
99 os.stat(cache_dir)
100 except OSError, e:
101 print "\nDid you initialize the cache directory?\n"
102 raise e
103 for file in cached_files:
104 fstat = os.stat(cache_dir + file)
105 if (time.time() - fstat.st_mtime) > 86400:
106 return True
107 return False
109 def create_sql_database(cache_dir):
110 """ Creates a new sqlite database.
111 If there is a previous sqlite database it will be deleted. """
112 try:
113 os.remove(cache_dir +"sqlitedb")
114 except:
115 pass
116 conn = sqlite3.connect(cache_dir +"sqlitedb")
117 cursor = conn.cursor()
118 cursor.execute("""create table asn(registry text, cc text, start text, value INTEGER, date text, status text)""")
119 cursor.execute("""create table ipv4(registry text, cc text, start text, value INTEGER, date text, status text)""")
120 cursor.execute("""create table ipv6(registry text, cc text, start text, value INTEGER, date text, status text)""")
121 conn.commit()
122 cursor.close()
124 def insert_into_sql_database(delegations,cache_dir):
125 """ inserts delegation information into the sqlite database"""
126 conn = sqlite3.connect(cache_dir +"sqlitedb")
127 cursor = conn.cursor()
128 table = ""
129 for delegation in delegations:
130 for entry in delegation:
131 registry = str(entry['registry'])
132 if not registry.isdigit() and str (entry['cc']) !="*":
133 if entry['type'] == "ipv6":
134 table = "ipv6"
135 if entry['type'] == "ipv4":
136 table = "ipv4"
137 if entry['type'] == "asn":
138 table = "asn"
139 text = """INSERT INTO """ + table + """ ( registry, cc, start, value, date,status) VALUES (?,?,?,?,?,?)"""
140 data = [entry['registry'], entry['cc'], entry['start'], entry['value'], entry['date'], entry['status'] ]
141 cursor.execute(text, data )
142 conn.commit()
143 cursor.close()
145 def get_total_delegations_from_db(cache_dir):
146 """ Returns the total count of the number of entries in the ipv4, ipv6 and asn table """
147 conn = sqlite3.connect(cache_dir +"sqlitedb")
148 cursor = conn.cursor()
149 count = 0
150 table_names = ["ipv4", "ipv6", "asn"]
151 for table in table_names:
152 cursor.execute("""select count (*) from """ + table)
153 count += int (cursor.fetchone()[0] )
154 cursor.close()
155 return count
157 def get_possible_match_entries(cc,cache_dir):
158 """ Get the count of 'possible' matching delegation entries"""
159 conn = sqlite3.connect(cache_dir +"sqlitedb")
160 cursor = conn.cursor()
161 count = 0
162 table_names =["ipv4", "ipv6", "asn"]
163 for table in table_names:
164 cursor.execute("""select count (*) from """ + table + """ where cc=?""",cc)
165 count += int (cursor.fetchone()[0] )
166 cursor.close()
167 return count
169 def use_sql_database(request, cc, cache_dir):
171 """ Use the sqlite database that is created after fetching delegations
172 to output information for a given request """
173 conn = sqlite3.connect(cache_dir + "sqlitedb")
174 cursor = conn.cursor()
175 if verbose:
176 print "We have %d entries in our delegation cache." %get_total_delegations_from_db(cache_dir)
177 text ="""select start,value from """ + request + """ where cc=?"""
178 cc = (cc,)
179 cursor.execute(text,cc)
180 for row in cursor:
181 if request == "ipv4":
182 print str(row[0]) + "/" + str(calculate_ipv4_subnet(int(row[1])))
183 elif request == "ipv6":
184 print str(row[0]) + "/" + str(int(row[1]))
185 else:
186 print str(int(row[0]))
187 if verbose:
188 print "We found %d possible entries in our delegation cache." % get_possible_match_entries(cc, cache_dir)
189 cursor.execute("""select count(*) from """ + request + """ where cc=?""", cc )
190 print "We found %d matching entries in our delegation cache." % int (cursor.fetchone()[0] )
191 cursor.close()
193 def get_md5_from_delegation_md5_file(cache_dir, delegation_file):
194 """ Returns the md5sum from the delegation md5 file
195 if it doesn't exist it returns an empty string"""
196 checksum = ""
197 try:
198 f = open(cache_dir + delegation_file + ".md5", "r")
199 checksum = f.read()
200 f.close()
201 if delegation_file == "delegated-afrinic-latest":
202 pos = checksum.find(" ")
203 checksum = str (checksum[:pos])
204 else:
205 pos = checksum.find("=") +2
206 checksum = str (checksum[pos:-1])
207 except Exception, e:
208 print repr(e)
209 return checksum
211 def verify_delegation_file(cache_dir, delegation_file):
212 """ compares the delegation file md5sum to that of the provided md5sum
213 returns True if they match otherwise returns False """
214 checksum = ""
215 checksum_of_file = ""
216 try:
217 f = open(cache_dir + delegation_file, "rb")
218 checksum_of_file = str (hashlib.md5(f.read()).hexdigest() )
219 f.close()
220 except Exception, e:
221 print repr(e)
222 checksum = get_md5_from_delegation_md5_file(cache_dir,delegation_file)
223 if checksum != checksum_of_file:
224 return False
225 if checksum == checksum_of_file and checksum != "":
226 return True
227 return False
229 def verify_cache(cache_dir, delegation_files):
230 """ if in verbose mode prints the result of checking the checksum of the
231 delegation files """
232 for file in delegation_files:
233 if verbose:
234 print "verifying " + file
235 if verify_delegation_file(cache_dir,file):
236 if verbose:
237 print "the md5 checksum of " + file + " *matches* the provided checksum"
238 else:
239 if verbose:
240 print "the md5 checksum of " + file + " does *not* match the provided checksum"
242 def update_delegation_cache(cache_dir, delegation_urls, useragent):
243 """ Fetch multiple delegation urls and cache the contents. """
244 print "Updating delegation cache..."
245 for url in delegation_urls.split():
246 cache_delegation(cache_dir, url + ".md5",useragent)
247 if verify_delegation_file(cache_dir, url.rpartition('/')[-1]):
248 pass
249 else:
250 cache_delegation(cache_dir, url,useragent)
251 return True
253 def unpack_geoip_cache(cache_dir, geoip_urls):
254 """ Unpack the fetched GeoIP file into the blockfinder cache. """
255 # This probably should unlink the gzip'ed file if we care about space...
256 for url in geoip_urls.split():
257 gzip_filename = geoip_urls.rpartition('/')[-1]
258 gunziped_filename = gzip_filename.rpartition('.')[0]
259 if verbose:
260 print "Unpacking GeoIP file " + gzip_filename + " into our cache as " + gunziped_filename
261 gzip_file = gzip.open(cache_dir + gzip_filename, 'rb')
262 gunzipped_data = gzip_file.read()
263 gzip_file.close()
264 gunzipped_file = open(cache_dir + gunziped_filename, 'w')
265 gunzipped_file.writelines(gunzipped_data)
266 gunzipped_file.close()
267 return True
269 def update_geoip_cache(cache_dir, geoip_urls, useragent):
270 """ Fetch country level resolution GeoIP files from a given url and cache
271 the contents. Unpack it if it's compressed. """
272 print "Updating GeoIP cache..."
273 for url in geoip_urls.split():
274 cache_delegation(cache_dir, url, useragent)
275 unpack_geoip_cache(cache_dir, geoip_urls)
277 def load_delegation(delegation_file):
278 """ Load, parse and store the delegation file contents as a list. """
279 keys = "registry cc type start value date status"
280 try:
281 f = open(delegation_file, "r")
282 delegations = [ dict((k,v) for k,v in zip(keys.split(), line.split("|")))
283 for line in f.readlines() if not line.startswith("#")]
284 f.close()
285 return delegations
286 except OSError, e:
287 print repr(e)
289 def load_all_delegations(cache_dir, delegation_urls):
290 """ Load all delegations into memory. """
291 delegations = []
292 for url in delegation_urls.split():
293 filename = url.rpartition('/')[-1]
294 if verbose:
295 print "Attempting to load delegation file into memory: " + filename
296 delegations.append(load_delegation(cache_dir + filename))
297 return delegations
299 def calculate_ipv4_subnet(host_count):
300 return 32 - int(floor(log(host_count,2)))
302 def download_country_code_file(cache_dir, useragent):
303 """ Download and save the latest opencountrycode XML file """
304 # Google frontend will not return content-length for some reason...
305 url = "http://opencountrycodes.appspot.com/xml"
306 ul = urllib2.urlopen(url)
307 xml = ul.read()
308 try:
309 f = open(cache_dir + "countrycodes.xml",'w')
310 f.write(xml)
311 f.close()
312 return True
313 except Exception,e:
314 print repr(e)
315 return False
317 def build_country_code_dictionary(cache_dir):
318 """ Return a dictionary mapping country name to the country code"""
319 map_co = {}
320 xml_file = str(cache_dir) + "countrycodes.xml"
321 clist = minidom.parse(xml_file)
322 for country in clist.getElementsByTagName("country"):
323 code = country.attributes["code"]
324 name = country.attributes["name"]
325 map_co[name.value] = code.value
326 return map_co
327 def build_country_code_dictionary_rev(cache_dir):
328 """ Return a dictionary mapping country code to the country name"""
329 map_co = {}
330 xml_file = str(cache_dir) + "countrycodes.xml"
331 clist = minidom.parse(xml_file)
332 for country in clist.getElementsByTagName("country"):
333 code = country.attributes["code"]
334 name = country.attributes["name"]
335 map_co[code.value] = name.value
336 return map_co
338 def get_country_code_from_name(cache_dir, country_name):
339 """ Return the country code for a given country name. """
340 map_co = build_country_code_dictionary(cache_dir)
341 cc_code = [map_co[key] for key in map_co.keys() if key.upper().startswith(country_name.upper())]
342 if len(cc_code) > 0:
343 return cc_code[0]
345 def ip_address_to_dec(ip_addr):
346 ipar = ip_addr.split('.')
347 a = ['','','','']
348 for i in range(4):
349 a[i] = hex(int(ipar[i]))[2:]
350 if(int(ipar[i]) < 15):
351 a[i] = """0""" + a[i]
353 total = '0x'+a[0]+a[1]+a[2]+a[3]
354 decimal = int(total,16)
355 return decimal
357 def geoip_lookup(cache_dir, ip_addr):
358 gi = GeoIP.open(cache_dir + "GeoIP.dat",GeoIP.GEOIP_STANDARD)
359 cc = gi.country_code_by_addr(ip_addr)
360 cc_name = gi.country_name_by_addr(ip_addr)
361 return cc,cc_name
363 def lookup_ip_address(ip_addr,cache_dir):
364 """ Return the country code and name for a given ip address. Attempts to
365 use GeoIP if available."""
366 print "Reverse lookup for: " + ip_addr
367 if GeoIP:
368 geoip_cc, geoip_cc_name = geoip_lookup(cache_dir, ip_addr)
369 print "GeoIP country code: " + str(geoip_cc)
370 print "GeoIP country name: " + str(geoip_cc_name)
371 conn = sqlite3.connect(cache_dir +"sqlitedb")
372 cursor = conn.cursor()
373 ipv4arr = ip_addr.split('.')
374 if len(ipv4arr) < 4:
375 print """doesn't look like an ipv4 address.."""
376 sys.exit(5)
377 cursor.execute('select * from ipv4 WHERE start LIKE ?', (ipv4arr[0]+'%',))
378 cc_map = build_country_code_dictionary_rev(cache_dir)
379 for row in cursor:
380 if(ip_address_to_dec(row[2]) <= ip_address_to_dec(ip_addr) <= (ip_address_to_dec(row[2])+row[3])):
381 rir_cc = row[1]
382 rir_cc_name = cc_map[row[1]]
383 print 'RIR country code: ' + rir_cc
384 print 'RIR country: ' + rir_cc_name
385 break
386 cursor.close()
387 if GeoIP:
388 if geoip_cc != rir_cc:
389 print "It appears that the RIR data conflicts with the GeoIP data"
390 print "The GeoIP data is likely closer to being correct due to " \
391 "sub-delegation issues with LIR databases"
393 def usage():
394 """ Print usage information. """
395 print >> sys.stderr, """
396 blockfinder [-c DIR] -i
397 blockfinder [options] -t COUNTRY
399 The first form initializes the local cache. The second form queries it.
401 Understood options (not all of which are implemented yet):
402 -h, --help Show this help and exit
403 -v Be verbose
404 -c, --cachedir DIR Set the cache directory
405 -u, --useragent
406 -p, --progress
407 -o, --output FILE
408 -4, --ipv4 Search IPv4 allocations
409 -6, --ipv6 Search IPv6 allocation
410 -a, --asn Search ASN allocations
411 -t, --nation-state CC Set the country to search (given as a two-letter code)
412 -n, --country-name "Costa Rica" Set country to search (full name)
413 -x, --hack-the-internet Hack the internet
414 -r, --reverse-lookup Return the county name for the specified IP
416 At least one of -t or -i is required, and when in -t mode, at least one of -4,
417 -6, and -a is required in order to do anything sensible.
420 def main():
421 """ Where the magic starts. """
422 try:
423 opts, args = getopt.getopt(sys.argv[1:],
424 "xvhc:u:pso:46at:n:ir:",
425 ["hack-the-internet", "verbose", "help", "cachedir=", "useragent=", "progress",
426 "silent", "output=", "ipv4", "ipv6", "asn", "nation-state=",
427 "country-name", "initialize-delegation","reverse-lookup"])
428 except getopt.GetoptError, err:
429 print str(err)
430 usage()
431 sys.exit(2)
433 global verbose
434 verbose = False
435 output = None
436 silent = True
437 cache_dir = str(os.path.expanduser('~')) + "/.blockfinder/"
438 update_delegations = False
439 delegation_urls = """
440 ftp://ftp.arin.net/pub/stats/arin/delegated-arin-latest
441 ftp://ftp.ripe.net/ripe/stats/delegated-ripencc-latest
442 ftp://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-latest
443 ftp://ftp.apnic.net/pub/stats/apnic/delegated-apnic-latest
444 ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-latest
446 geoip_country_urls = """http://geolite.maxmind.com/download/geoip/database/GeoLiteCountry/GeoIP.dat.gz
447 http://geolite.maxmind.com/download/geoip/database/GeoIPv6.dat.gz"""
449 delegation_files = []
450 for url in delegation_urls.split():
451 filename = url.rpartition('/')
452 delegation_files.append(filename[-1])
453 update_delegations = False
454 requests = []
455 country = ""
456 useragent = "Mozilla/5.0"
457 ipaddress = ""
459 if not os.path.exists(cache_dir + "countrycodes.xml"):
460 download_country_code_file(cache_dir,useragent)
462 for o, a in opts:
463 if o in ("-x", "--hack-the-internet"):
464 print "all your bases are belong to us!"
465 sys.exit(4)
466 if o == "-v":
467 verbose = True
468 elif o in ("-h", "--help"):
469 usage()
470 sys.exit()
471 elif o in ("-c", "--cachedir"):
472 cache_dir = a
473 elif o in ("-u", "--useragent"):
474 useragent = a
475 elif o in ("-p", "--progress"):
476 progress = True
477 elif o in ("-s", "--silent"):
478 silent = True
479 elif o in ("-o", "--output"):
480 output = a
481 elif o in ("-4", "--ipv4"):
482 requests.append("ipv4")
483 elif o in ("-6", "--ipv6"):
484 requests.append("ipv6")
485 elif o in ("-a", "--asn"):
486 requests.append("asn")
487 # XXX TODO: This should be a positional argument as it's the only manditory one...
488 elif o in ("-r", "--reverse-lookup"):
489 ipaddress = a
490 requests.append("reverse")
491 elif o in ("-t", "--nation-state"):
492 country = a.upper()
493 elif o in ("-n", "--country-name"):
494 country = get_country_code_from_name(cache_dir, a)
495 elif o in ("-i", "--initialize-delegations"):
496 update_delegations = True
497 else:
498 print "Unhandled option; Sorry!"
499 sys.exit(3)
501 # Update and quit.
502 if update_delegations:
503 if GeoIP:
504 update_geoip_cache(cache_dir,geoip_country_urls,useragent)
505 update_delegation_cache(cache_dir,delegation_urls,useragent)
506 if verbose:
507 verify_cache(cache_dir, delegation_files)
508 delegations = load_all_delegations(cache_dir, delegation_urls)
509 create_sql_database(cache_dir)
510 insert_into_sql_database(delegations, cache_dir)
511 sys.exit(0)
513 if not requests:
514 print "Nothing to do. Have you requested anything?"
515 print "Example usage: blockfinder -v --ipv4 -t mm"
516 sys.exit(1)
518 if ipaddress:
519 lookup_ip_address(ipaddress,cache_dir)
520 sys.exit(0)
521 if not country:
522 print "It appears your search did not match a country."
523 sys.exit(1)
524 # Check our cache age and warn if it's aged
525 if cache_is_dated(cache_dir, delegation_files) and verbose:
526 print "Your delegation cache is older than 24 hours; you probably want to update it."
527 if verbose:
528 print "Using country code: %s" % country
530 for request in requests:
531 try:
532 use_sql_database(request, country, cache_dir)
533 except IOError: sys.exit()
535 if __name__ == "__main__":
536 main()