Typo
[blockfinder.git] / blockfinder
blob7ddabe40243657aa71a6249beb6f546cff61299a
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # For the people of Smubworld!
5 import urllib2
6 import os
7 import time
8 import getopt
9 import sys
10 from math import ceil, log
11 import sqlite3
12 import hashlib
13 import gzip
14 from xml.dom import minidom
15 import IPy
16 __program__ = 'blockfinder'
17 __url__ = 'http://github.com/ioerror/blockfinder/'
18 ___author__ = 'Jacob Appelbaum <jacob@appelbaum.net>, dave b. <db@d1b.org>'
19 __copyright__ = 'Copyright (c) 2010'
20 __license__ = 'See LICENSE for licensing information'
21 __version__ = '3.1415'
23 try:
24 import GeoIP
25 except ImportError:
26 GeoIP = None
28 try:
29 from future import antigravity
30 except ImportError:
31 antigravity = None
33 def update_progress_bar(percent_done, caption=""):
34 """Write a progress bar to the console"""
35 rows, columns = map(int, os.popen('stty size', 'r').read().split())
36 width = columns - 4 - len(caption)
37 sys.stdout.write("[%s>%s] %s\x1b[G" % (
38 "=" * int(percent_done*width),
39 "." * (width - int(percent_done * width)),
40 caption) )
41 sys.stdout.flush()
43 # XXX TODO:allow the use of a proxy
44 # Set up a proper Request object, set the user agent and if desired, a proxy
45 def fetch(url, useragent):
46 """ Fetch (with progress meter) and return the contents of a url. """
47 req = urllib2.Request(url)
48 req.add_header('User-agent', useragent)
49 #req.set_proxy(host, type)
50 fetcher = urllib2.urlopen(req)
51 length_header = fetcher.headers.get("content-length")
52 if length_header == None:
53 raise Exception("Missing content-length header in reply from server.")
54 length = int(length_header)
55 print "Fetching ", str (round(float(length/1024),2)) , " kilobytes"
56 ret = ""
57 t_start = time.time()
58 while True:
59 t_delta = time.time() - t_start
60 if t_delta == 0:
61 t_delta = 1
62 update_progress_bar(
63 float(len(ret)) / length,
64 "%.2f K/s" % (len(ret) / 1024 / t_delta) )
65 tmp = fetcher.read(1024)
66 if len(tmp) == 0:
67 if len(ret) != length:
68 raise Exception("Expected %s bytes, only received %s" % (
69 len(ret), length ))
70 print ""
71 return ret
72 ret += tmp
74 def cache_delegation(cache_dir, delegation_url, useragent):
75 """ Attempt to cache the contents of a delegation url in our cache dir. """
76 try:
77 os.stat(cache_dir)
78 except OSError, e:
79 if e.errno == 2:
80 if verbose:
81 print "Initializing the cache directory..."
82 os.mkdir(cache_dir)
83 else:
84 raise e
85 delegation = ""
86 print "Fetching " + delegation_url
87 delegation = fetch(delegation_url,useragent)
88 tmp = delegation_url.split('/')
89 delegation_file = str(cache_dir) + str(tmp[-1])
90 try:
91 f = open(delegation_file, 'w')
92 f.write(delegation)
93 f.close()
94 return True
95 except Exception, e:
96 print repr(e)
97 return False
99 def cache_is_dated(cache_dir, cached_files):
100 """ Returns True if the mtime of any files in cache dir is > 24hrs."""
101 try:
102 os.stat(cache_dir)
103 except OSError, e:
104 print "\nDid you initialize the cache directory?\n"
105 raise e
106 for file in cached_files:
107 fstat = os.stat(cache_dir + file)
108 if (time.time() - fstat.st_mtime) > 86400:
109 return True
110 return False
112 def create_sql_database(cache_dir):
113 """ Creates a new sqlite database.
114 If there is a previous sqlite database it will be deleted. """
115 try:
116 os.remove(cache_dir +"sqlitedb")
117 except:
118 pass
119 conn = sqlite3.connect(cache_dir +"sqlitedb")
120 cursor = conn.cursor()
121 cursor.execute("""create table asn(registry text, cc text, start text, value INTEGER, date text, status text)""")
122 cursor.execute("""create table ipv4(registry text, cc text, start text, value INTEGER, date text, status text)""")
123 cursor.execute("""create table ipv6(registry text, cc text, start text, value INTEGER, date text, status text)""")
124 cursor.execute("""create table lir_record(cc text, start text, value INTEGER, type INTEGER)""")
125 conn.commit()
126 cursor.close()
128 def insert_into_sql_database(delegations,cache_dir):
129 """ inserts delegation information into the sqlite database"""
130 conn = sqlite3.connect(cache_dir +"sqlitedb")
131 cursor = conn.cursor()
132 table = ""
133 for delegation in delegations:
134 for entry in delegation:
135 registry = str(entry['registry'])
136 if not registry.isdigit() and str (entry['cc']) !="*":
137 if entry['type'] == "ipv6":
138 table = "ipv6"
139 if entry['type'] == "ipv4":
140 table = "ipv4"
141 if entry['type'] == "asn":
142 table = "asn"
143 text = """INSERT INTO """ + table + """ ( registry, cc, start, value, date,status) VALUES (?,?,?,?,?,?)"""
144 data = [entry['registry'], entry['cc'], entry['start'], entry['value'], entry['date'], entry['status'] ]
145 cursor.execute(text, data )
146 conn.commit()
147 cursor.close()
149 def get_total_delegations_from_db(cache_dir):
150 """ Returns the total count of the number of entries in the ipv4, ipv6 and asn table """
151 conn = sqlite3.connect(cache_dir +"sqlitedb")
152 cursor = conn.cursor()
153 count = 0
154 table_names = ["ipv4", "ipv6", "asn"]
155 for table in table_names:
156 cursor.execute("""select count (*) from """ + table)
157 count += int (cursor.fetchone()[0] )
158 cursor.close()
159 return count
161 def get_possible_match_entries(cc,cache_dir):
162 """ Get the count of 'possible' matching delegation entries"""
163 conn = sqlite3.connect(cache_dir +"sqlitedb")
164 cursor = conn.cursor()
165 count = 0
166 table_names =["ipv4", "ipv6", "asn"]
167 for table in table_names:
168 cursor.execute("""select count (*) from """ + table + """ where cc=?""",cc)
169 count += int (cursor.fetchone()[0] )
170 cursor.close()
171 return count
173 def use_sql_database(request, cc, cache_dir):
175 """ Use the sqlite database that is created after fetching delegations
176 to output information for a given request """
177 conn = sqlite3.connect(cache_dir + "sqlitedb")
178 cursor = conn.cursor()
179 if verbose:
180 print "We have %d entries in our delegation cache." %get_total_delegations_from_db(cache_dir)
181 text ="""select start,value from """ + request + """ where cc=?"""
182 cc = (cc,)
183 cursor.execute(text,cc)
184 for row in cursor:
185 if request == "ipv4":
186 print str(row[0]) + "/" + str(calculate_ipv4_subnet(int(row[1])))
187 elif request == "ipv6":
188 print str(row[0]) + "/" + str(int(row[1]))
189 else:
190 print str(int(row[0]))
191 if verbose:
192 print "We found %d possible entries in our delegation cache." % get_possible_match_entries(cc, cache_dir)
193 cursor.execute("""select count(*) from """ + request + """ where cc=?""", cc )
194 print "We found %d matching entries in our delegation cache." % int (cursor.fetchone()[0] )
195 cursor.close()
197 def get_md5_from_delegation_md5_file(cache_dir, delegation_file):
198 """ Returns the md5sum from the delegation md5 file
199 if it doesn't exist it returns an empty string"""
200 checksum = ""
201 try:
202 f = open(cache_dir + delegation_file + ".md5", "r")
203 checksum = f.read()
204 f.close()
205 if delegation_file == "delegated-afrinic-latest":
206 pos = checksum.find(" ")
207 checksum = str (checksum[:pos])
208 else:
209 pos = checksum.find("=") +2
210 checksum = str (checksum[pos:-1])
211 except Exception, e:
212 print repr(e)
213 return checksum
215 def verify_delegation_file(cache_dir, delegation_file):
216 """ compares the delegation file md5sum to that of the provided md5sum
217 returns True if they match otherwise returns False """
218 checksum = ""
219 checksum_of_file = ""
220 try:
221 f = open(cache_dir + delegation_file, "rb")
222 checksum_of_file = str (hashlib.md5(f.read()).hexdigest() )
223 f.close()
224 except Exception, e:
225 print repr(e)
226 checksum = get_md5_from_delegation_md5_file(cache_dir,delegation_file)
227 if checksum != checksum_of_file:
228 return False
229 if checksum == checksum_of_file and checksum != "":
230 return True
231 return False
233 def verify_cache(cache_dir, delegation_files):
234 """ if in verbose mode prints the result of checking the checksum of the
235 delegation files """
236 for file in delegation_files:
237 if verbose:
238 print "verifying " + file
239 if verify_delegation_file(cache_dir,file):
240 if verbose:
241 print "the md5 checksum of " + file + " *matches* the provided checksum"
242 else:
243 if verbose:
244 print "the md5 checksum of " + file + " does *not* match the provided checksum"
246 def update_delegation_cache(cache_dir, delegation_urls, useragent):
247 """ Fetch multiple delegation urls and cache the contents. """
248 print "Updating delegation cache..."
249 for url in delegation_urls.split():
250 cache_delegation(cache_dir, url + ".md5",useragent)
251 if verify_delegation_file(cache_dir, url.rpartition('/')[-1]):
252 pass
253 else:
254 cache_delegation(cache_dir, url,useragent)
255 return True
257 def update_lir_delegation_cache(cache_dir, delegation_urls, useragent):
258 """ Fetch multiple LIR delegation urls and cache the contents. """
259 print "Updating LIR delegation cache..."
260 for url in delegation_urls.split():
261 cache_delegation(cache_dir, url,useragent)
262 unpack_lir_delegation_cache(cache_dir, delegation_urls)
264 def unpack_lir_delegation_cache(cache_dir, delegation_urls):
265 """ Unpack the fetched LIR delegation files into the blockfinder cache. """
266 # This probably should unlink the gzip'ed file if we care about space...
267 for url in delegation_urls.split():
268 gzip_filename = url.rpartition('/')[-1]
269 gunziped_filename = gzip_filename.rpartition('.')[0]
270 if verbose:
271 print "Unpacking LIR file " + gzip_filename + " into our cache as " + gunziped_filename
272 gzip_file = gzip.open(cache_dir + gzip_filename, 'rb')
273 gunzipped_data = gzip_file.read()
274 gzip_file.close()
275 gunzipped_file = open(cache_dir + gunziped_filename, 'w')
276 gunzipped_file.writelines(gunzipped_data)
277 gunzipped_file.close()
279 def unpack_geoip_cache(cache_dir, geoip_urls):
280 """ Unpack the fetched GeoIP file into the blockfinder cache. """
281 # This probably should unlink the gzip'ed file if we care about space...
282 for url in geoip_urls.split():
283 gzip_filename = url.rpartition('/')[-1]
284 gunziped_filename = gzip_filename.rpartition('.')[0]
285 if verbose:
286 print "Unpacking GeoIP file " + gzip_filename + " into our cache as " + gunziped_filename
287 gzip_file = gzip.open(cache_dir + gzip_filename, 'rb')
288 gunzipped_data = gzip_file.read()
289 gzip_file.close()
290 gunzipped_file = open(cache_dir + gunziped_filename, 'w')
291 gunzipped_file.writelines(gunzipped_data)
292 gunzipped_file.close()
293 return True
295 def update_geoip_cache(cache_dir, geoip_urls, useragent):
296 """ Fetch country level resolution GeoIP files from a given url and cache
297 the contents. Unpack it if it's compressed. """
298 print "Updating GeoIP cache..."
299 for url in geoip_urls.split():
300 cache_delegation(cache_dir, url, useragent)
301 unpack_geoip_cache(cache_dir, geoip_urls)
303 def load_delegation(delegation_file):
304 """ Load, parse and store the delegation file contents as a list. """
305 keys = "registry cc type start value date status"
306 try:
307 f = open(delegation_file, "r")
308 delegations = [ dict((k,v) for k,v in zip(keys.split(), line.split("|")))
309 for line in f.readlines() if not line.startswith("#")]
310 f.close()
311 return delegations
312 except OSError, e:
313 print repr(e)
315 def load_all_delegations(cache_dir, delegation_urls):
316 """ Load all delegations into memory. """
317 delegations = []
318 for url in delegation_urls.split():
319 filename = url.rpartition('/')[-1]
320 if verbose:
321 print "Attempting to load delegation file into memory: " + filename
322 delegations.append(load_delegation(cache_dir + filename))
323 return delegations
325 def calculate_ipv4_subnet(host_count):
326 return 32 - int(ceil(log(host_count,2)))
328 def download_country_code_file(cache_dir, useragent):
329 """ Download and save the latest opencountrycode XML file """
330 # Google frontend will not return content-length for some reason...
331 url = "http://opencountrycodes.appspot.com/xml"
332 ul = urllib2.urlopen(url)
333 xml = ul.read()
334 try:
335 f = open(cache_dir + "countrycodes.xml",'w')
336 f.write(xml)
337 f.close()
338 return True
339 except Exception,e:
340 print repr(e)
341 return False
343 def build_country_code_dictionary(cache_dir):
344 """ Return a dictionary mapping country name to the country code"""
345 map_co = {}
346 xml_file = str(cache_dir) + "countrycodes.xml"
347 clist = minidom.parse(xml_file)
348 for country in clist.getElementsByTagName("country"):
349 code = country.attributes["code"]
350 name = country.attributes["name"]
351 map_co[name.value] = code.value
352 return map_co
354 def get_name_from_country_code(cache_dir, cc_code):
355 map_co = build_country_code_dictionary(cache_dir)
356 country_name = [(key, value) for (key, value) in map_co.items() if value == cc_code]
357 if len(country_name) > 0:
358 return country_name[0][0]
360 def get_country_code_from_name(cache_dir, country_name):
361 """ Return the country code for a given country name. """
362 map_co = build_country_code_dictionary(cache_dir)
363 cc_code = [map_co[key] for key in map_co.keys() if key.upper().startswith(country_name.upper())]
364 if len(cc_code) > 0:
365 return cc_code[0]
367 def ip_address_to_dec(ip_addr):
368 return IPy.parseAddress(ip_addr)[0]
370 def geoip_lookup(cache_dir, ip_addr):
371 if IPy.IP(ip_addr).version() != 4:
372 # This would work with the CVS version of the GeoIP code
373 # However, MaxMind hasn't done a release in a long time.
374 # http://geoip.cvs.sourceforge.net/viewvc/geoip/python/test_v6.py?revision=1.1&view=markup
375 # gi = GeoIP.open(cache_dir + "GeoIPv6.dat",GeoIP.GEOIP_STANDARD)
376 # cc = gi.country_code_by_addr_v6(ip_addr)
377 # cc_name = gi.country_name_by_addr_v6(ip_addr)
378 cc = "N/A"
379 cc_name = "N/A"
380 print "IPv6 GeoIP is not currently supported by python-geoip"
381 else:
382 gi = GeoIP.open(cache_dir + "GeoIP.dat",GeoIP.GEOIP_STANDARD)
383 cc = gi.country_code_by_addr(ip_addr)
384 cc_name = gi.country_name_by_addr(ip_addr)
385 return cc,cc_name
387 def lookup_ip_address(ip_addr,cache_dir):
388 """ Return the country code and name for a given ip address. Attempts to
389 use GeoIP if available."""
390 print "Reverse lookup for: " + ip_addr
391 if GeoIP:
392 geoip_cc, geoip_cc_name = geoip_lookup(cache_dir, ip_addr)
393 print "GeoIP country code: " + str(geoip_cc)
394 print "GeoIP country name: " + str(geoip_cc_name)
395 conn = sqlite3.connect(cache_dir +"sqlitedb")
396 cursor = conn.cursor()
397 ipv4arr = ip_addr.split('.')
398 if len(ipv4arr) < 4:
399 print """doesn't look like an ipv4 address.."""
400 sys.exit(5)
401 cursor.execute('select * from ipv4 WHERE start LIKE ?', (ipv4arr[0]+'%',))
402 for row in cursor:
403 if(ip_address_to_dec(row[2]) <= ip_address_to_dec(ip_addr) <= (ip_address_to_dec(row[2])+row[3])):
404 rir_cc = row[1]
405 rir_cc_name = get_name_from_country_code(cache_dir, row[1])
406 print 'RIR country code: ' + rir_cc
407 print 'RIR country: ' + rir_cc_name
408 break
409 cursor.close()
410 if GeoIP:
411 if geoip_cc != rir_cc:
412 print "It appears that the RIR data conflicts with the GeoIP data"
413 print "The GeoIP data is likely closer to being correct due to " \
414 "sub-delegation issues with LIR databases"
417 def return_first_ip_and_number_in_inetnum(line):
418 start_ip = line.split("-")[0].strip()
419 end_ip = line.split("-")[1].strip()
420 num_ips = ip_address_to_dec(end_ip) - ip_address_to_dec(start_ip)
421 return start_ip, num_ips
423 def extract_info_from_lir_file_and_insert_into_sqlite(cache_dir, filename):
424 block = []
425 country = ""
426 entry = False
427 conn = sqlite3.connect(cache_dir + "sqlitedb")
428 cursor = conn.cursor()
429 insert_text = """insert into lir_record (cc, start, value, type) VALUES (?,?,?,?)"""
431 for line in open(cache_dir + filename, "r"):
432 line = line.replace("\n", "")
433 if line == "":
434 entry = False
435 country, block = "", []
436 elif not entry and "inetnum:" in line:
437 try:
438 line = line.replace("inetnum:", "").strip()
439 start_ip, num_ips = return_first_ip_and_number_in_inetnum(line)
440 if num_ips == 0:
441 num_ips = 1
442 block = [start_ip, calculate_ipv4_subnet(num_ips)]
443 entry = True
444 except Exception, e:
445 print e
446 elif not entry and "inet6num:" in line:
447 try:
448 block = line.replace("inet6num:", "").strip().split("/")
449 entry = True
450 except Exception, e:
451 print e
452 elif entry and "country:" in line:
453 country = line.replace("country:", "").strip()
454 data = (block[0], block[1], country, IPy.IP(block[0]).version() )
455 cursor.execute(insert_text, data)
456 conn.commit()
457 cursor.close()
460 def usage():
461 """ Print usage information. """
462 print >> sys.stderr, """
463 blockfinder [-c DIR] -i
464 blockfinder [options] -t COUNTRY
466 The first form initializes the local cache. The second form queries it.
468 Understood options (not all of which are implemented yet):
469 -h, --help Show this help and exit
470 -v Be verbose
471 -c, --cachedir DIR Set the cache directory
472 -u, --useragent
473 -p, --progress
474 -o, --output FILE
475 -4, --ipv4 Search IPv4 allocations
476 -6, --ipv6 Search IPv6 allocation
477 -a, --asn Search ASN allocations
478 -t, --nation-state CC Set the country to search (given as a two-letter code)
479 -n, --country-name "Costa Rica" Set country to search (full name)
480 -x, --hack-the-internet Hack the internet
481 -r, --reverse-lookup Return the county name for the specified IP
483 At least one of -t or -i is required, and when in -t mode, at least one of -4,
484 -6, and -a is required in order to do anything sensible.
487 def main():
488 """ Where the magic starts. """
489 try:
490 opts, args = getopt.getopt(sys.argv[1:],
491 "xvhc:u:pso:46at:n:ir:",
492 ["hack-the-internet", "verbose", "help", "cachedir=", "useragent=", "progress",
493 "silent", "output=", "ipv4", "ipv6", "asn", "nation-state=",
494 "country-name", "initialize-delegation","reverse-lookup"])
495 except getopt.GetoptError, err:
496 print str(err)
497 usage()
498 sys.exit(2)
500 global verbose
501 verbose = False
502 output = None
503 silent = True
504 cache_dir = str(os.path.expanduser('~')) + "/.blockfinder/"
505 update_delegations = False
506 delegation_urls = """
507 ftp://ftp.arin.net/pub/stats/arin/delegated-arin-latest
508 ftp://ftp.ripe.net/ripe/stats/delegated-ripencc-latest
509 ftp://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-latest
510 ftp://ftp.apnic.net/pub/stats/apnic/delegated-apnic-latest
511 ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-latest
513 geoip_country_urls = """http://geolite.maxmind.com/download/geoip/database/GeoLiteCountry/GeoIP.dat.gz
514 http://geolite.maxmind.com/download/geoip/database/GeoIPv6.dat.gz"""
516 lir_urls = """ftp://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz
517 ftp://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz"""
518 delegation_files = []
519 for url in delegation_urls.split():
520 filename = url.rpartition('/')
521 delegation_files.append(filename[-1])
522 update_delegations = False
523 requests = []
524 country = ""
525 useragent = "Mozilla/5.0"
526 ipaddress = ""
528 if not os.path.exists(cache_dir + "countrycodes.xml"):
529 download_country_code_file(cache_dir,useragent)
531 for o, a in opts:
532 if o in ("-x", "--hack-the-internet"):
533 print "all your bases are belong to us!"
534 sys.exit(4)
535 if o == "-v":
536 verbose = True
537 elif o in ("-h", "--help"):
538 usage()
539 sys.exit()
540 elif o in ("-c", "--cachedir"):
541 cache_dir = a
542 elif o in ("-u", "--useragent"):
543 useragent = a
544 elif o in ("-p", "--progress"):
545 progress = True
546 elif o in ("-s", "--silent"):
547 silent = True
548 elif o in ("-o", "--output"):
549 output = a
550 elif o in ("-4", "--ipv4"):
551 requests.append("ipv4")
552 elif o in ("-6", "--ipv6"):
553 requests.append("ipv6")
554 elif o in ("-a", "--asn"):
555 requests.append("asn")
556 # XXX TODO: This should be a positional argument as it's the only manditory one...
557 elif o in ("-r", "--reverse-lookup"):
558 ipaddress = a
559 requests.append("reverse")
560 elif o in ("-t", "--nation-state"):
561 country = a.upper()
562 elif o in ("-n", "--country-name"):
563 country = get_country_code_from_name(cache_dir, a)
564 elif o in ("-i", "--initialize-delegations"):
565 update_delegations = True
566 else:
567 print "Unhandled option; Sorry!"
568 sys.exit(3)
570 # Update and quit.
571 if update_delegations:
572 if GeoIP:
573 update_geoip_cache(cache_dir,geoip_country_urls,useragent)
574 update_delegation_cache(cache_dir,delegation_urls,useragent)
575 update_lir_delegation_cache(cache_dir,lir_urls,useragent)
576 if verbose:
577 verify_cache(cache_dir, delegation_files)
578 delegations = load_all_delegations(cache_dir, delegation_urls)
579 create_sql_database(cache_dir)
580 insert_into_sql_database(delegations, cache_dir)
581 sys.exit(0)
583 if not requests:
584 print "Nothing to do. Have you requested anything?"
585 print "Example usage: blockfinder -v --ipv4 -t mm"
586 sys.exit(1)
588 if ipaddress:
589 lookup_ip_address(ipaddress,cache_dir)
590 sys.exit(0)
591 if not country:
592 print "It appears your search did not match a country."
593 sys.exit(1)
594 # Check our cache age and warn if it's aged
595 if cache_is_dated(cache_dir, delegation_files) and verbose:
596 print "Your delegation cache is older than 24 hours; you probably want to update it."
597 if verbose:
598 print "Using country code: %s" % country
600 for request in requests:
601 try:
602 use_sql_database(request, country, cache_dir)
603 except IOError: sys.exit()
605 if __name__ == "__main__":
606 main()