retab; please never use tabs again
[blockfinder.git] / blockfinder
blob7572c49e1f1c1074e6673bdb76f93f0657a18c5e
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # For the people of Smubworld!
5 import urllib2
6 import os
7 import time
8 import getopt
9 import sys
10 from math import floor, log
11 import sqlite3
12 import hashlib
13 from xml.dom import minidom
14 __program__ = 'blockfinder'
15 __url__ = 'http://github.com/ioerror/blockfinder/'
16 ___author__ = 'Jacob Appelbaum <jacob@appelbaum.net>, dave b. <db@d1b.org>'
17 __copyright__ = 'Copyright (c) 2010'
18 __license__ = 'See LICENSE for licensing information'
19 __version__ = '3.1415'
21 try:
22 import GeoIP
23 except ImportError:
24 GeoIP = None
26 try:
27 from future import antigravity
28 except ImportError:
29 antigravity = None
31 def update_progress_bar(percent_done, caption=""):
32 """Write a progress bar to the console"""
33 rows, columns = map(int, os.popen('stty size', 'r').read().split())
34 width = columns - 4 - len(caption)
35 sys.stdout.write("[%s>%s] %s\x1b[G" % (
36 "=" * int(percent_done*width),
37 "." * (width - int(percent_done * width)),
38 caption) )
39 sys.stdout.flush()
41 # XXX TODO:allow the use of a proxy
42 # Set up a proper Request object, set the user agent and if desired, a proxy
43 def fetch(url, useragent):
44 """ Fetch (with progress meter) and return the contents of a url. """
45 req = urllib2.Request(url)
46 req.add_header('User-agent', useragent)
47 #req.set_proxy(host, type)
48 fetcher = urllib2.urlopen(req)
49 length_header = fetcher.headers.get("content-length")
50 if length_header == None:
51 raise Exception("Missing content-length header in reply from server.")
52 length = int(length_header)
53 print "Fetching ", str (round(float(length/1024),2)) , " kilobytes"
54 ret = ""
55 t_start = time.time()
56 while True:
57 t_delta = time.time() - t_start
58 update_progress_bar(
59 float(len(ret)) / length,
60 "%.2f K/s" % (len(ret) / 1024 / t_delta) )
61 tmp = fetcher.read(1024)
62 if len(tmp) == 0:
63 if len(ret) != length:
64 raise Exception("Expected %s bytes, only received %s" % (
65 len(ret), length ))
66 print ""
67 return ret
68 ret += tmp
70 def cache_delegation(cache_dir, delegation_url, useragent):
71 """ Attempt to cache the contents of a delegation url in our cache dir. """
72 try:
73 os.stat(cache_dir)
74 except OSError, e:
75 if e.errno == 2:
76 if verbose:
77 print "Initializing the cache directory..."
78 os.mkdir(cache_dir)
79 else:
80 raise e
81 delegation = ""
82 print "Fetching " + delegation_url
83 delegation = fetch(delegation_url,useragent)
84 tmp = delegation_url.split('/')
85 delegation_file = str(cache_dir) + str(tmp[-1])
86 try:
87 f = open(delegation_file, 'w')
88 f.write(delegation)
89 f.close()
90 return True
91 except Exception, e:
92 print repr(e)
93 return False
95 def cache_is_dated(cache_dir, cached_files):
96 """ Returns True if the mtime of any files in cache dir is > 24hrs."""
97 try:
98 os.stat(cache_dir)
99 except OSError, e:
100 print "\nDid you initialize the cache directory?\n"
101 raise e
102 for file in cached_files:
103 fstat = os.stat(cache_dir + file)
104 if (time.time() - fstat.st_mtime) > 86400:
105 return True
106 return False
108 def create_sql_database(cache_dir):
109 """ Creates a new sqlite database.
110 If there is a previous sqlite database it will be deleted. """
111 try:
112 os.remove(cache_dir +"sqlitedb")
113 except:
114 pass
115 conn = sqlite3.connect(cache_dir +"sqlitedb")
116 cursor = conn.cursor()
117 cursor.execute("""create table asn(registry text, cc text, start text, value INTEGER, date text, status text)""")
118 cursor.execute("""create table ipv4(registry text, cc text, start text, value INTEGER, date text, status text)""")
119 cursor.execute("""create table ipv6(registry text, cc text, start text, value INTEGER, date text, status text)""")
120 conn.commit()
121 cursor.close()
123 def insert_into_sql_database(delegations,cache_dir):
124 """ inserts delegation information into the sqlite database"""
125 conn = sqlite3.connect(cache_dir +"sqlitedb")
126 cursor = conn.cursor()
127 table = ""
128 for delegation in delegations:
129 for entry in delegation:
130 registry = str(entry['registry'])
131 if not registry.isdigit() and str (entry['cc']) !="*":
132 if entry['type'] == "ipv6":
133 table = "ipv6"
134 if entry['type'] == "ipv4":
135 table = "ipv4"
136 if entry['type'] == "asn":
137 table = "asn"
138 text = """INSERT INTO """ + table + """ ( registry, cc, start, value, date,status) VALUES (?,?,?,?,?,?)"""
139 data = [entry['registry'], entry['cc'], entry['start'], entry['value'], entry['date'], entry['status'] ]
140 cursor.execute(text, data )
141 conn.commit()
142 cursor.close()
144 def get_total_delegations_from_db(cache_dir):
145 """ Returns the total count of the number of entries in the ipv4, ipv6 and asn table """
146 conn = sqlite3.connect(cache_dir +"sqlitedb")
147 cursor = conn.cursor()
148 count = 0
149 table_names = ["ipv4", "ipv6", "asn"]
150 for table in table_names:
151 cursor.execute("""select count (*) from """ + table)
152 count += int (cursor.fetchone()[0] )
153 cursor.close()
154 return count
156 def get_possible_match_entries(cc,cache_dir):
157 """ Get the count of 'possible' matching delegation entries"""
158 conn = sqlite3.connect(cache_dir +"sqlitedb")
159 cursor = conn.cursor()
160 count = 0
161 table_names =["ipv4", "ipv6", "asn"]
162 for table in table_names:
163 cursor.execute("""select count (*) from """ + table + """ where cc=?""",cc)
164 count += int (cursor.fetchone()[0] )
165 cursor.close()
166 return count
168 def use_sql_database(request, cc, cache_dir):
170 """ Use the sqlite database that is created after fetching delegations
171 to output information for a given request """
172 conn = sqlite3.connect(cache_dir + "sqlitedb")
173 cursor = conn.cursor()
174 if verbose:
175 print "We have %d entries in our delegation cache." %get_total_delegations_from_db(cache_dir)
176 text ="""select start,value from """ + request + """ where cc=?"""
177 cc = (cc,)
178 cursor.execute(text,cc)
179 for row in cursor:
180 if request == "ipv4":
181 print str(row[0]) + "/" + str(calculate_ipv4_subnet(int(row[1])))
182 elif request == "ipv6":
183 print str(row[0]) + "/" + str(int(row[1]))
184 else:
185 print str(int(row[0]))
186 if verbose:
187 print "We found %d possible entries in our delegation cache." % get_possible_match_entries(cc, cache_dir)
188 cursor.execute("""select count(*) from """ + request + """ where cc=?""", cc )
189 print "We found %d matching entries in our delegation cache." % int (cursor.fetchone()[0] )
190 cursor.close()
192 def get_md5_from_delegation_md5_file(cache_dir, delegation_file):
193 """ Returns the md5sum from the delegation md5 file
194 if it doesn't exist it returns an empty string"""
195 checksum = ""
196 try:
197 f = open(cache_dir + delegation_file + ".md5", "r")
198 checksum = f.read()
199 f.close()
200 if delegation_file == "delegated-afrinic-latest":
201 pos = checksum.find(" ")
202 checksum = str (checksum[:pos])
203 else:
204 pos = checksum.find("=") +2
205 checksum = str (checksum[pos:-1])
206 except Exception, e:
207 print repr(e)
208 return checksum
210 def verify_delegation_file(cache_dir, delegation_file):
211 """ compares the delegation file md5sum to that of the provided md5sum
212 returns True if they match otherwise returns False """
213 checksum = ""
214 checksum_of_file = ""
215 try:
216 f = open(cache_dir + delegation_file, "rb")
217 checksum_of_file = str (hashlib.md5(f.read()).hexdigest() )
218 f.close()
219 except Exception, e:
220 print repr(e)
221 checksum = get_md5_from_delegation_md5_file(cache_dir,delegation_file)
222 if checksum != checksum_of_file:
223 return False
224 if checksum == checksum_of_file and checksum != "":
225 return True
226 return False
228 def verify_cache(cache_dir, delegation_files):
229 """ if in verbose mode prints the result of checking the checksum of the
230 delegation files """
231 for file in delegation_files:
232 if verbose:
233 print "verifying " + file
234 if verify_delegation_file(cache_dir,file):
235 if verbose:
236 print "the md5 checksum of " + file + " *matches* the provided checksum"
237 else:
238 if verbose:
239 print "the md5 checksum of " + file + " does *not* match the provided checksum"
241 def update_delegation_cache(cache_dir, delegation_urls, useragent):
242 """ Fetch multiple delegation urls and cache the contents. """
243 print "Updating delegation cache..."
244 for url in delegation_urls.split():
245 cache_delegation(cache_dir, url + ".md5",useragent)
246 if verify_delegation_file(cache_dir, url.rpartition('/')[-1]):
247 pass
248 else:
249 cache_delegation(cache_dir, url,useragent)
250 return True
252 def update_geoip_cache(cache_dir, geoip_url, useragent):
253 """ Fetch country level resolution GeoIP file from a given url and cache
254 the contents. Unpack it if it's compressed. """
255 print "Updating GeoIP cache..."
256 return cache_delegation(cache_dir, geoip_url, useragent)
258 def load_delegation(delegation_file):
259 """ Load, parse and store the delegation file contents as a list. """
260 keys = "registry cc type start value date status"
261 try:
262 f = open(delegation_file, "r")
263 delegations = [ dict((k,v) for k,v in zip(keys.split(), line.split("|")))
264 for line in f.readlines() if not line.startswith("#")]
265 f.close()
266 return delegations
267 except OSError, e:
268 print repr(e)
270 def load_all_delegations(cache_dir, delegation_urls):
271 """ Load all delegations into memory. """
272 delegations = []
273 for url in delegation_urls.split():
274 filename = url.rpartition('/')[-1]
275 if verbose:
276 print "Attempting to load delegation file into memory: " + filename
277 delegations.append(load_delegation(cache_dir + filename))
278 return delegations
280 def calculate_ipv4_subnet(host_count):
281 return 32 - int(floor(log(host_count,2)))
283 def download_country_code_file(cache_dir, useragent):
284 """ Download and save the latest opencountrycode XML file """
285 # Google frontend will not return content-length for some reason...
286 url = "http://opencountrycodes.appspot.com/xml"
287 ul = urllib2.urlopen(url)
288 xml = ul.read()
289 try:
290 f = open(cache_dir + "countrycodes.xml",'w')
291 f.write(xml)
292 f.close()
293 return True
294 except Exception,e:
295 print repr(e)
296 return False
298 def build_country_code_dictionary(cache_dir):
299 """ Return a dictionary mapping country name to the country code"""
300 map_co = {}
301 xml_file = str(cache_dir) + "countrycodes.xml"
302 clist = minidom.parse(xml_file)
303 for country in clist.getElementsByTagName("country"):
304 code = country.attributes["code"]
305 name = country.attributes["name"]
306 map_co[name.value] = code.value
307 return map_co
308 def build_country_code_dictionary_rev(cache_dir):
309 """ Return a dictionary mapping country code to the country name"""
310 map_co = {}
311 xml_file = str(cache_dir) + "countrycodes.xml"
312 clist = minidom.parse(xml_file)
313 for country in clist.getElementsByTagName("country"):
314 code = country.attributes["code"]
315 name = country.attributes["name"]
316 map_co[code.value] = name.value
317 return map_co
319 def get_country_code_from_name(cache_dir, country_name):
320 """ Return the country code for a given country name. """
321 map_co = build_country_code_dictionary(cache_dir)
322 cc_code = [map_co[key] for key in map_co.keys() if key.upper().startswith(country_name.upper())]
323 if len(cc_code) > 0:
324 return cc_code[0]
326 def ip_address_to_dec(ip_addr):
327 ipar = ip_addr.split('.')
328 a = ['','','','']
329 for i in range(4):
330 a[i] = hex(int(ipar[i]))[2:]
331 if(int(ipar[i]) < 15):
332 a[i] = """0""" + a[i]
334 total = '0x'+a[0]+a[1]+a[2]+a[3]
335 decimal = int(total,16)
336 return decimal
338 def lookup_ip_address(ip_addr,cache_dir):
339 """ Return the country code and name for a given ip address. """
340 conn = sqlite3.connect(cache_dir +"sqlitedb")
341 cursor = conn.cursor()
342 ipv4arr = ip_addr.split('.')
343 if len(ipv4arr) < 4:
344 print """doesn't look like an ipv4 address.."""
345 sys.exit(5)
346 cursor.execute('select * from ipv4 WHERE start LIKE ?', (ipv4arr[0]+'%',))
347 cc_map = build_country_code_dictionary_rev(cache_dir)
348 for row in cursor:
349 if(ip_address_to_dec(row[2]) <= ip_address_to_dec(ip_addr) <= (ip_address_to_dec(row[2])+row[3])):
350 print 'country code: ' + row[1]
351 print 'country: ' + cc_map[row[1]]
352 break
353 cursor.close()
355 def usage():
356 """ Print usage information. """
357 print >> sys.stderr, """
358 blockfinder [-c DIR] -i
359 blockfinder [options] -t COUNTRY
361 The first form initializes the local cache. The second form queries it.
363 Understood options (not all of which are implemented yet):
364 -h, --help Show this help and exit
365 -v Be verbose
366 -c, --cachedir DIR Set the cache directory
367 -u, --useragent
368 -p, --progress
369 -o, --output FILE
370 -4, --ipv4 Search IPv4 allocations
371 -6, --ipv6 Search IPv6 allocation
372 -a, --asn Search ASN allocations
373 -t, --nation-state CC Set the country to search (given as a two-letter code)
374 -n, --country-name "Costa Rica" Set country to search (full name)
375 -x, --hack-the-internet Hack the internet
376 -r, --reverse-lookup Return the county name for the specified IP
378 At least one of -t or -i is required, and when in -t mode, at least one of -4,
379 -6, and -a is required in order to do anything sensible.
382 def main():
383 """ Where the magic starts. """
384 try:
385 opts, args = getopt.getopt(sys.argv[1:],
386 "xvhc:u:pso:46at:n:ir:",
387 ["hack-the-internet", "verbose", "help", "cachedir=", "useragent=", "progress",
388 "silent", "output=", "ipv4", "ipv6", "asn", "nation-state=",
389 "country-name", "initialize-delegation","reverse-lookup"])
390 except getopt.GetoptError, err:
391 print str(err)
392 usage()
393 sys.exit(2)
395 global verbose
396 verbose = False
397 output = None
398 silent = True
399 cache_dir = str(os.path.expanduser('~')) + "/.blockfinder/"
400 update_delegations = False
401 delegation_urls = """
402 ftp://ftp.arin.net/pub/stats/arin/delegated-arin-latest
403 ftp://ftp.ripe.net/ripe/stats/delegated-ripencc-latest
404 ftp://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-latest
405 ftp://ftp.apnic.net/pub/stats/apnic/delegated-apnic-latest
406 ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-latest
408 geoip_country_url = "http://geolite.maxmind.com/download/geoip/database/GeoLiteCountry/GeoIP.dat.gz"
409 delegation_files = []
410 for url in delegation_urls.split():
411 filename = url.rpartition('/')
412 delegation_files.append(filename[-1])
413 update_delegations = False
414 requests = []
415 country = ""
416 useragent = "Mozilla/5.0"
417 ipaddress = ""
419 if not os.path.exists(cache_dir + "countrycodes.xml"):
420 download_country_code_file(cache_dir,useragent)
422 for o, a in opts:
423 if o in ("-x", "--hack-the-internet"):
424 print "all your bases are belong to us!"
425 sys.exit(4)
426 if o == "-v":
427 verbose = True
428 elif o in ("-h", "--help"):
429 usage()
430 sys.exit()
431 elif o in ("-c", "--cachedir"):
432 cache_dir = a
433 elif o in ("-u", "--useragent"):
434 useragent = a
435 elif o in ("-p", "--progress"):
436 progress = True
437 elif o in ("-s", "--silent"):
438 silent = True
439 elif o in ("-o", "--output"):
440 output = a
441 elif o in ("-4", "--ipv4"):
442 requests.append("ipv4")
443 elif o in ("-6", "--ipv6"):
444 requests.append("ipv6")
445 elif o in ("-a", "--asn"):
446 requests.append("asn")
447 # XXX TODO: This should be a positional argument as it's the only manditory one...
448 elif o in ("-r", "--reverse-lookup"):
449 ipaddress = a
450 requests.append("reverse")
451 elif o in ("-t", "--nation-state"):
452 country = a.upper()
453 elif o in ("-n", "--country-name"):
454 country = get_country_code_from_name(cache_dir, a)
455 elif o in ("-i", "--initialize-delegations"):
456 update_delegations = True
457 else:
458 print "Unhandled option; Sorry!"
459 sys.exit(3)
461 # Update and quit.
462 if update_delegations:
463 if GeoIP:
464 update_geoip_cache(cache_dir,geoip_country_url,useragent)
465 update_delegation_cache(cache_dir,delegation_urls,useragent)
466 if verbose:
467 verify_cache(cache_dir, delegation_files)
468 delegations = load_all_delegations(cache_dir, delegation_urls)
469 create_sql_database(cache_dir)
470 insert_into_sql_database(delegations, cache_dir)
471 sys.exit(0)
473 if not requests:
474 print "Nothing to do. Have you requested anything?"
475 print "Example usage: blockfinder -v --ipv4 -t mm"
476 sys.exit(1)
478 if ipaddress:
479 print "Reverse ip lookup"
480 lookup_ip_address(ipaddress,cache_dir)
481 sys.exit(0)
482 if not country:
483 print "It appears your search did not match a country."
484 sys.exit(1)
485 # Check our cache age and warn if it's aged
486 if cache_is_dated(cache_dir, delegation_files) and verbose:
487 print "Your delegation cache is older than 24 hours; you probably want to update it."
488 if verbose:
489 print "Using country code: %s" % country
491 for request in requests:
492 try:
493 use_sql_database(request, country, cache_dir)
494 except IOError: sys.exit()
496 if __name__ == "__main__":
497 main()