unpack geoip data into cache
[blockfinder.git] / blockfinder
blob5c909dab971967c8945ab00077e03d0d84f9eba0
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # For the people of Smubworld!
5 import urllib2
6 import os
7 import time
8 import getopt
9 import sys
10 from math import floor, log
11 import sqlite3
12 import hashlib
13 from xml.dom import minidom
14 __program__ = 'blockfinder'
15 __url__ = 'http://github.com/ioerror/blockfinder/'
16 ___author__ = 'Jacob Appelbaum <jacob@appelbaum.net>, dave b. <db@d1b.org>'
17 __copyright__ = 'Copyright (c) 2010'
18 __license__ = 'See LICENSE for licensing information'
19 __version__ = '3.1415'
21 try:
22 import GeoIP
23 import gzip
24 except ImportError:
25 GeoIP = None
27 try:
28 from future import antigravity
29 except ImportError:
30 antigravity = None
32 def update_progress_bar(percent_done, caption=""):
33 """Write a progress bar to the console"""
34 rows, columns = map(int, os.popen('stty size', 'r').read().split())
35 width = columns - 4 - len(caption)
36 sys.stdout.write("[%s>%s] %s\x1b[G" % (
37 "=" * int(percent_done*width),
38 "." * (width - int(percent_done * width)),
39 caption) )
40 sys.stdout.flush()
42 # XXX TODO:allow the use of a proxy
43 # Set up a proper Request object, set the user agent and if desired, a proxy
44 def fetch(url, useragent):
45 """ Fetch (with progress meter) and return the contents of a url. """
46 req = urllib2.Request(url)
47 req.add_header('User-agent', useragent)
48 #req.set_proxy(host, type)
49 fetcher = urllib2.urlopen(req)
50 length_header = fetcher.headers.get("content-length")
51 if length_header == None:
52 raise Exception("Missing content-length header in reply from server.")
53 length = int(length_header)
54 print "Fetching ", str (round(float(length/1024),2)) , " kilobytes"
55 ret = ""
56 t_start = time.time()
57 while True:
58 t_delta = time.time() - t_start
59 update_progress_bar(
60 float(len(ret)) / length,
61 "%.2f K/s" % (len(ret) / 1024 / t_delta) )
62 tmp = fetcher.read(1024)
63 if len(tmp) == 0:
64 if len(ret) != length:
65 raise Exception("Expected %s bytes, only received %s" % (
66 len(ret), length ))
67 print ""
68 return ret
69 ret += tmp
71 def cache_delegation(cache_dir, delegation_url, useragent):
72 """ Attempt to cache the contents of a delegation url in our cache dir. """
73 try:
74 os.stat(cache_dir)
75 except OSError, e:
76 if e.errno == 2:
77 if verbose:
78 print "Initializing the cache directory..."
79 os.mkdir(cache_dir)
80 else:
81 raise e
82 delegation = ""
83 print "Fetching " + delegation_url
84 delegation = fetch(delegation_url,useragent)
85 tmp = delegation_url.split('/')
86 delegation_file = str(cache_dir) + str(tmp[-1])
87 try:
88 f = open(delegation_file, 'w')
89 f.write(delegation)
90 f.close()
91 return True
92 except Exception, e:
93 print repr(e)
94 return False
96 def cache_is_dated(cache_dir, cached_files):
97 """ Returns True if the mtime of any files in cache dir is > 24hrs."""
98 try:
99 os.stat(cache_dir)
100 except OSError, e:
101 print "\nDid you initialize the cache directory?\n"
102 raise e
103 for file in cached_files:
104 fstat = os.stat(cache_dir + file)
105 if (time.time() - fstat.st_mtime) > 86400:
106 return True
107 return False
109 def create_sql_database(cache_dir):
110 """ Creates a new sqlite database.
111 If there is a previous sqlite database it will be deleted. """
112 try:
113 os.remove(cache_dir +"sqlitedb")
114 except:
115 pass
116 conn = sqlite3.connect(cache_dir +"sqlitedb")
117 cursor = conn.cursor()
118 cursor.execute("""create table asn(registry text, cc text, start text, value INTEGER, date text, status text)""")
119 cursor.execute("""create table ipv4(registry text, cc text, start text, value INTEGER, date text, status text)""")
120 cursor.execute("""create table ipv6(registry text, cc text, start text, value INTEGER, date text, status text)""")
121 conn.commit()
122 cursor.close()
124 def insert_into_sql_database(delegations,cache_dir):
125 """ inserts delegation information into the sqlite database"""
126 conn = sqlite3.connect(cache_dir +"sqlitedb")
127 cursor = conn.cursor()
128 table = ""
129 for delegation in delegations:
130 for entry in delegation:
131 registry = str(entry['registry'])
132 if not registry.isdigit() and str (entry['cc']) !="*":
133 if entry['type'] == "ipv6":
134 table = "ipv6"
135 if entry['type'] == "ipv4":
136 table = "ipv4"
137 if entry['type'] == "asn":
138 table = "asn"
139 text = """INSERT INTO """ + table + """ ( registry, cc, start, value, date,status) VALUES (?,?,?,?,?,?)"""
140 data = [entry['registry'], entry['cc'], entry['start'], entry['value'], entry['date'], entry['status'] ]
141 cursor.execute(text, data )
142 conn.commit()
143 cursor.close()
145 def get_total_delegations_from_db(cache_dir):
146 """ Returns the total count of the number of entries in the ipv4, ipv6 and asn table """
147 conn = sqlite3.connect(cache_dir +"sqlitedb")
148 cursor = conn.cursor()
149 count = 0
150 table_names = ["ipv4", "ipv6", "asn"]
151 for table in table_names:
152 cursor.execute("""select count (*) from """ + table)
153 count += int (cursor.fetchone()[0] )
154 cursor.close()
155 return count
157 def get_possible_match_entries(cc,cache_dir):
158 """ Get the count of 'possible' matching delegation entries"""
159 conn = sqlite3.connect(cache_dir +"sqlitedb")
160 cursor = conn.cursor()
161 count = 0
162 table_names =["ipv4", "ipv6", "asn"]
163 for table in table_names:
164 cursor.execute("""select count (*) from """ + table + """ where cc=?""",cc)
165 count += int (cursor.fetchone()[0] )
166 cursor.close()
167 return count
169 def use_sql_database(request, cc, cache_dir):
171 """ Use the sqlite database that is created after fetching delegations
172 to output information for a given request """
173 conn = sqlite3.connect(cache_dir + "sqlitedb")
174 cursor = conn.cursor()
175 if verbose:
176 print "We have %d entries in our delegation cache." %get_total_delegations_from_db(cache_dir)
177 text ="""select start,value from """ + request + """ where cc=?"""
178 cc = (cc,)
179 cursor.execute(text,cc)
180 for row in cursor:
181 if request == "ipv4":
182 print str(row[0]) + "/" + str(calculate_ipv4_subnet(int(row[1])))
183 elif request == "ipv6":
184 print str(row[0]) + "/" + str(int(row[1]))
185 else:
186 print str(int(row[0]))
187 if verbose:
188 print "We found %d possible entries in our delegation cache." % get_possible_match_entries(cc, cache_dir)
189 cursor.execute("""select count(*) from """ + request + """ where cc=?""", cc )
190 print "We found %d matching entries in our delegation cache." % int (cursor.fetchone()[0] )
191 cursor.close()
193 def get_md5_from_delegation_md5_file(cache_dir, delegation_file):
194 """ Returns the md5sum from the delegation md5 file
195 if it doesn't exist it returns an empty string"""
196 checksum = ""
197 try:
198 f = open(cache_dir + delegation_file + ".md5", "r")
199 checksum = f.read()
200 f.close()
201 if delegation_file == "delegated-afrinic-latest":
202 pos = checksum.find(" ")
203 checksum = str (checksum[:pos])
204 else:
205 pos = checksum.find("=") +2
206 checksum = str (checksum[pos:-1])
207 except Exception, e:
208 print repr(e)
209 return checksum
211 def verify_delegation_file(cache_dir, delegation_file):
212 """ compares the delegation file md5sum to that of the provided md5sum
213 returns True if they match otherwise returns False """
214 checksum = ""
215 checksum_of_file = ""
216 try:
217 f = open(cache_dir + delegation_file, "rb")
218 checksum_of_file = str (hashlib.md5(f.read()).hexdigest() )
219 f.close()
220 except Exception, e:
221 print repr(e)
222 checksum = get_md5_from_delegation_md5_file(cache_dir,delegation_file)
223 if checksum != checksum_of_file:
224 return False
225 if checksum == checksum_of_file and checksum != "":
226 return True
227 return False
229 def verify_cache(cache_dir, delegation_files):
230 """ if in verbose mode prints the result of checking the checksum of the
231 delegation files """
232 for file in delegation_files:
233 if verbose:
234 print "verifying " + file
235 if verify_delegation_file(cache_dir,file):
236 if verbose:
237 print "the md5 checksum of " + file + " *matches* the provided checksum"
238 else:
239 if verbose:
240 print "the md5 checksum of " + file + " does *not* match the provided checksum"
242 def update_delegation_cache(cache_dir, delegation_urls, useragent):
243 """ Fetch multiple delegation urls and cache the contents. """
244 print "Updating delegation cache..."
245 for url in delegation_urls.split():
246 cache_delegation(cache_dir, url + ".md5",useragent)
247 if verify_delegation_file(cache_dir, url.rpartition('/')[-1]):
248 pass
249 else:
250 cache_delegation(cache_dir, url,useragent)
251 return True
253 def unpack_geoip_cache(cache_dir, geoip_url):
254 """ Unpack the fetched GeoIP file into the blockfinder cache. """
255 # This probably should unlink the gzip'ed file if we care about space...
256 gzip_filename = geoip_url.rpartition('/')[-1]
257 gunziped_filename = gzip_filename.rpartition('.')[0]
258 if verbose:
259 print "Unpacking GeoIP file " + gzip_filename + " into our cache as " + gunziped_filename
260 gzip_file = gzip.open(cache_dir + gzip_filename, 'rb')
261 gunzipped_data = gzip_file.read()
262 gzip_file.close()
263 gunzipped_file = open(cache_dir + gunziped_filename, 'w')
264 gunzipped_file.writelines(gunzipped_data)
265 gunzipped_file.close()
266 return True
268 def update_geoip_cache(cache_dir, geoip_url, useragent):
269 """ Fetch country level resolution GeoIP file from a given url and cache
270 the contents. Unpack it if it's compressed. """
271 print "Updating GeoIP cache..."
272 cache_delegation(cache_dir, geoip_url, useragent)
273 unpack_geoip_cache(cache_dir, geoip_url)
275 def load_delegation(delegation_file):
276 """ Load, parse and store the delegation file contents as a list. """
277 keys = "registry cc type start value date status"
278 try:
279 f = open(delegation_file, "r")
280 delegations = [ dict((k,v) for k,v in zip(keys.split(), line.split("|")))
281 for line in f.readlines() if not line.startswith("#")]
282 f.close()
283 return delegations
284 except OSError, e:
285 print repr(e)
287 def load_all_delegations(cache_dir, delegation_urls):
288 """ Load all delegations into memory. """
289 delegations = []
290 for url in delegation_urls.split():
291 filename = url.rpartition('/')[-1]
292 if verbose:
293 print "Attempting to load delegation file into memory: " + filename
294 delegations.append(load_delegation(cache_dir + filename))
295 return delegations
297 def calculate_ipv4_subnet(host_count):
298 return 32 - int(floor(log(host_count,2)))
300 def download_country_code_file(cache_dir, useragent):
301 """ Download and save the latest opencountrycode XML file """
302 # Google frontend will not return content-length for some reason...
303 url = "http://opencountrycodes.appspot.com/xml"
304 ul = urllib2.urlopen(url)
305 xml = ul.read()
306 try:
307 f = open(cache_dir + "countrycodes.xml",'w')
308 f.write(xml)
309 f.close()
310 return True
311 except Exception,e:
312 print repr(e)
313 return False
315 def build_country_code_dictionary(cache_dir):
316 """ Return a dictionary mapping country name to the country code"""
317 map_co = {}
318 xml_file = str(cache_dir) + "countrycodes.xml"
319 clist = minidom.parse(xml_file)
320 for country in clist.getElementsByTagName("country"):
321 code = country.attributes["code"]
322 name = country.attributes["name"]
323 map_co[name.value] = code.value
324 return map_co
325 def build_country_code_dictionary_rev(cache_dir):
326 """ Return a dictionary mapping country code to the country name"""
327 map_co = {}
328 xml_file = str(cache_dir) + "countrycodes.xml"
329 clist = minidom.parse(xml_file)
330 for country in clist.getElementsByTagName("country"):
331 code = country.attributes["code"]
332 name = country.attributes["name"]
333 map_co[code.value] = name.value
334 return map_co
336 def get_country_code_from_name(cache_dir, country_name):
337 """ Return the country code for a given country name. """
338 map_co = build_country_code_dictionary(cache_dir)
339 cc_code = [map_co[key] for key in map_co.keys() if key.upper().startswith(country_name.upper())]
340 if len(cc_code) > 0:
341 return cc_code[0]
343 def ip_address_to_dec(ip_addr):
344 ipar = ip_addr.split('.')
345 a = ['','','','']
346 for i in range(4):
347 a[i] = hex(int(ipar[i]))[2:]
348 if(int(ipar[i]) < 15):
349 a[i] = """0""" + a[i]
351 total = '0x'+a[0]+a[1]+a[2]+a[3]
352 decimal = int(total,16)
353 return decimal
355 def lookup_ip_address(ip_addr,cache_dir):
356 """ Return the country code and name for a given ip address. """
357 conn = sqlite3.connect(cache_dir +"sqlitedb")
358 cursor = conn.cursor()
359 ipv4arr = ip_addr.split('.')
360 if len(ipv4arr) < 4:
361 print """doesn't look like an ipv4 address.."""
362 sys.exit(5)
363 cursor.execute('select * from ipv4 WHERE start LIKE ?', (ipv4arr[0]+'%',))
364 cc_map = build_country_code_dictionary_rev(cache_dir)
365 for row in cursor:
366 if(ip_address_to_dec(row[2]) <= ip_address_to_dec(ip_addr) <= (ip_address_to_dec(row[2])+row[3])):
367 print 'country code: ' + row[1]
368 print 'country: ' + cc_map[row[1]]
369 break
370 cursor.close()
372 def usage():
373 """ Print usage information. """
374 print >> sys.stderr, """
375 blockfinder [-c DIR] -i
376 blockfinder [options] -t COUNTRY
378 The first form initializes the local cache. The second form queries it.
380 Understood options (not all of which are implemented yet):
381 -h, --help Show this help and exit
382 -v Be verbose
383 -c, --cachedir DIR Set the cache directory
384 -u, --useragent
385 -p, --progress
386 -o, --output FILE
387 -4, --ipv4 Search IPv4 allocations
388 -6, --ipv6 Search IPv6 allocation
389 -a, --asn Search ASN allocations
390 -t, --nation-state CC Set the country to search (given as a two-letter code)
391 -n, --country-name "Costa Rica" Set country to search (full name)
392 -x, --hack-the-internet Hack the internet
393 -r, --reverse-lookup Return the county name for the specified IP
395 At least one of -t or -i is required, and when in -t mode, at least one of -4,
396 -6, and -a is required in order to do anything sensible.
399 def main():
400 """ Where the magic starts. """
401 try:
402 opts, args = getopt.getopt(sys.argv[1:],
403 "xvhc:u:pso:46at:n:ir:",
404 ["hack-the-internet", "verbose", "help", "cachedir=", "useragent=", "progress",
405 "silent", "output=", "ipv4", "ipv6", "asn", "nation-state=",
406 "country-name", "initialize-delegation","reverse-lookup"])
407 except getopt.GetoptError, err:
408 print str(err)
409 usage()
410 sys.exit(2)
412 global verbose
413 verbose = False
414 output = None
415 silent = True
416 cache_dir = str(os.path.expanduser('~')) + "/.blockfinder/"
417 update_delegations = False
418 delegation_urls = """
419 ftp://ftp.arin.net/pub/stats/arin/delegated-arin-latest
420 ftp://ftp.ripe.net/ripe/stats/delegated-ripencc-latest
421 ftp://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-latest
422 ftp://ftp.apnic.net/pub/stats/apnic/delegated-apnic-latest
423 ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-latest
425 geoip_country_url = "http://geolite.maxmind.com/download/geoip/database/GeoLiteCountry/GeoIP.dat.gz"
426 delegation_files = []
427 for url in delegation_urls.split():
428 filename = url.rpartition('/')
429 delegation_files.append(filename[-1])
430 update_delegations = False
431 requests = []
432 country = ""
433 useragent = "Mozilla/5.0"
434 ipaddress = ""
436 if not os.path.exists(cache_dir + "countrycodes.xml"):
437 download_country_code_file(cache_dir,useragent)
439 for o, a in opts:
440 if o in ("-x", "--hack-the-internet"):
441 print "all your bases are belong to us!"
442 sys.exit(4)
443 if o == "-v":
444 verbose = True
445 elif o in ("-h", "--help"):
446 usage()
447 sys.exit()
448 elif o in ("-c", "--cachedir"):
449 cache_dir = a
450 elif o in ("-u", "--useragent"):
451 useragent = a
452 elif o in ("-p", "--progress"):
453 progress = True
454 elif o in ("-s", "--silent"):
455 silent = True
456 elif o in ("-o", "--output"):
457 output = a
458 elif o in ("-4", "--ipv4"):
459 requests.append("ipv4")
460 elif o in ("-6", "--ipv6"):
461 requests.append("ipv6")
462 elif o in ("-a", "--asn"):
463 requests.append("asn")
464 # XXX TODO: This should be a positional argument as it's the only manditory one...
465 elif o in ("-r", "--reverse-lookup"):
466 ipaddress = a
467 requests.append("reverse")
468 elif o in ("-t", "--nation-state"):
469 country = a.upper()
470 elif o in ("-n", "--country-name"):
471 country = get_country_code_from_name(cache_dir, a)
472 elif o in ("-i", "--initialize-delegations"):
473 update_delegations = True
474 else:
475 print "Unhandled option; Sorry!"
476 sys.exit(3)
478 # Update and quit.
479 if update_delegations:
480 if GeoIP:
481 update_geoip_cache(cache_dir,geoip_country_url,useragent)
482 update_delegation_cache(cache_dir,delegation_urls,useragent)
483 if verbose:
484 verify_cache(cache_dir, delegation_files)
485 delegations = load_all_delegations(cache_dir, delegation_urls)
486 create_sql_database(cache_dir)
487 insert_into_sql_database(delegations, cache_dir)
488 sys.exit(0)
490 if not requests:
491 print "Nothing to do. Have you requested anything?"
492 print "Example usage: blockfinder -v --ipv4 -t mm"
493 sys.exit(1)
495 if ipaddress:
496 print "Reverse ip lookup"
497 lookup_ip_address(ipaddress,cache_dir)
498 sys.exit(0)
499 if not country:
500 print "It appears your search did not match a country."
501 sys.exit(1)
502 # Check our cache age and warn if it's aged
503 if cache_is_dated(cache_dir, delegation_files) and verbose:
504 print "Your delegation cache is older than 24 hours; you probably want to update it."
505 if verbose:
506 print "Using country code: %s" % country
508 for request in requests:
509 try:
510 use_sql_database(request, country, cache_dir)
511 except IOError: sys.exit()
513 if __name__ == "__main__":
514 main()