Merge remote branch 'symm/master'
[blockfinder.git] / blockfinder
blob612a9ac961b9ea6876f261452bfba9408b5d17c7
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
4 # For the people of Smubworld!
5 import urllib2
6 import os
7 import time
8 import getopt
9 import sys
10 from math import floor, log
11 import sqlite3
12 import hashlib
13 from xml.dom import minidom
14 __program__ = 'blockfinder'
15 __url__ = 'http://github.com/ioerror/blockfinder/'
16 __author__ = 'Jacob Appelbaum <jacob@appelbaum.net>'
17 __copyright__ = 'Copyright (c) 2009'
18 __license__ = 'See LICENSE for licensing information'
19 __version__ = '3.141'
21 try:
22 from future import antigravity
23 except ImportError:
24 antigravity = None
26 def update_progress_bar(percent_done, caption=""):
27 """Write a progress bar to the console"""
28 rows, columns = map(int, os.popen('stty size', 'r').read().split())
29 width = columns - 4 - len(caption)
30 sys.stdout.write("[%s>%s] %s\x1b[G" % (
31 "=" * int(percent_done*width),
32 "." * (width - int(percent_done * width)),
33 caption) )
34 sys.stdout.flush()
36 # XXX TODO:allow the use of a proxy
37 # Set up a proper Request object, set the user agent and if desired, a proxy
38 def fetch(url,useragent):
39 """ Fetch (with progress meter) and return the contents of a url. """
40 req = urllib2.Request(url)
41 req.add_header('User-agent', useragent)
42 #req.set_proxy(host, type)
43 fetcher = urllib2.urlopen(req)
44 length_header = fetcher.headers.get("content-length")
45 if length_header == None:
46 raise Exception("Missing content-length header in reply from server.")
47 length = int(length_header)
48 print "Fetching ", str (round(float(length/1024),2)) , " kilobytes"
49 ret = ""
50 t_start = time.time()
51 while True:
52 t_delta = time.time() - t_start
53 update_progress_bar(
54 float(len(ret)) / length,
55 "%.2f K/s" % (len(ret) / 1024 / t_delta) )
56 tmp = fetcher.read(1024)
57 if len(tmp) == 0:
58 if len(ret) != length:
59 raise Exception("Expected %s bytes, only received %s" % (
60 len(ret), length ))
61 print ""
62 return ret
63 ret += tmp
65 def cache_delegation(cache_dir, delegation_url, useragent):
66 """ Attempt to cache the contents of a delegation url in our cache dir. """
67 try:
68 os.stat(cache_dir)
69 except OSError, e:
70 if e.errno == 2:
71 if verbose:
72 print "Initializing the cache directory..."
73 os.mkdir(cache_dir)
74 else:
75 raise e
76 delegation = ""
77 print "Fetching " + delegation_url
78 delegation = fetch(delegation_url,useragent)
79 tmp = delegation_url.split('/')
80 delegation_file = str(cache_dir) + str(tmp[-1])
81 try:
82 f = open(delegation_file, 'w')
83 f.write(delegation)
84 f.close()
85 return True
86 except Exception, e:
87 print repr(e)
88 return False
90 def cache_is_dated(cache_dir, cached_files):
91 """ Returns True if the mtime of any files in cache dir is > 24hrs."""
92 try:
93 os.stat(cache_dir)
94 except OSError, e:
95 print "\nDid you initialize the cache directory?\n"
96 raise e
97 for file in cached_files:
98 fstat = os.stat(cache_dir + file)
99 if (time.time() - fstat.st_mtime) > 86400:
100 return True
101 return False
103 def create_sql_database(cache_dir):
104 """creates a sqlite database and if there is already an existing one it deletes it.
105 ftp://ftp.arin.net/pub/stats/arin/README"""
106 try:
107 os.remove(cache_dir +"sqlitedb")
108 except:
109 pass
110 conn = sqlite3.connect(cache_dir +"sqlitedb")
111 cursor = conn.cursor()
112 cursor.execute("""create table asn(registry text, cc text, start text, value INTEGER, date text, status text)""")
113 cursor.execute("""create table ipv4(registry text, cc text, start text, value INTEGER, date text, status text)""")
114 cursor.execute("""create table ipv6(registry text, cc text, start text, value INTEGER, date text, status text)""")
115 conn.commit()
116 cursor.close()
118 def insert_into_sql_database(delegations,cache_dir):
119 """ inserts delegations into the sqlite database"""
120 conn = sqlite3.connect(cache_dir +"sqlitedb")
121 cursor = conn.cursor()
122 table = ""
123 for delegation in delegations:
124 for entry in delegation:
125 registry = str(entry['registry'])
126 if not registry.isdigit() and str (entry['cc']) !="*":
127 if entry['type'] == "ipv6":
128 table = "ipv6"
129 if entry['type'] == "ipv4":
130 table = "ipv4"
131 if entry['type'] == "asn":
132 table = "asn"
133 text = """INSERT INTO """ + table + """ ( registry, cc, start, value, date,status) VALUES (?,?,?,?,?,?)"""
134 data = [entry['registry'], entry['cc'], entry['start'], entry['value'], entry['date'], entry['status'] ]
135 cursor.execute(text, data )
136 conn.commit()
137 cursor.close()
139 def get_total_delegations_from_db(cache_dir):
140 """returns count of the n.o. of entries in the ipv4 +ipv6 + asn tables"""
141 conn = sqlite3.connect(cache_dir +"sqlitedb")
142 cursor = conn.cursor()
143 count = 0
144 table_names = ["ipv4", "ipv6", "asn"]
145 for table in table_names:
146 cursor.execute("""select count (*) from """ + table)
147 count += int (cursor.fetchone()[0] )
148 cursor.close()
149 return count
151 def get_possible_match_entries(cc,cache_dir):
152 """ get the count of 'possible' matching delegation entries"""
153 conn = sqlite3.connect(cache_dir +"sqlitedb")
154 cursor = conn.cursor()
155 count = 0
156 table_names =["ipv4", "ipv6", "asn"]
157 for table in table_names:
158 cursor.execute("""select count (*) from """ + table + """ where cc=?""",cc)
159 count += int (cursor.fetchone()[0] )
160 cursor.close()
161 return count
163 def use_sql_database(request, cc, cache_dir):
164 """now with added magic!"""
165 conn = sqlite3.connect(cache_dir +"sqlitedb")
166 cursor = conn.cursor()
167 if verbose:
168 print "We have %d entries in our delegation cache." %get_total_delegations_from_db(cache_dir)
169 text ="""select start,value from """ + request + """ where cc=?"""
170 cc = (cc,)
171 cursor.execute(text,cc)
172 for row in cursor:
173 if request == "ipv4":
174 print str(row[0]) + "/" + str(calculate_ipv4_subnet(int(row[1])))
175 elif request == "ipv6":
176 print str(row[0]) + "/" + str(int(row[1]))
177 else:
178 print str(int(row[0]) )
179 if verbose:
180 print "We found %d possible entries in our delegation cache." % get_possible_match_entries(cc, cache_dir)
181 cursor.execute("""select count(*) from """ + request + """ where cc=?""", cc )
182 print "We found %d matching entries in our delegation cache." % int (cursor.fetchone()[0] )
183 cursor.close()
185 def get_md5_from_delegation_md5_file(cache_dir, delegation_file):
186 """ Returns the md5sum from the delegation md5 file (if it exists)"""
187 checksum = ""
188 try:
189 f = open(cache_dir + delegation_file +".md5", "r")
190 checksum = f.read()
191 f.close()
192 if delegation_file == "delegated-afrinic-latest":
193 pos = checksum.find(" ")
194 assert pos < len(checksum)
195 checksum = str ( checksum[:pos] )
196 else:
197 pos = checksum.find("=") +2
198 assert pos < len(checksum)
199 checksum = str ( checksum[pos:-1] )
200 except Exception, e:
201 print repr(e)
202 return checksum
204 def verify_delegation_file(cache_dir, delegation_file):
205 """compares the delegation file md5sum to that of the provided md5sum
206 returns True if they match otherwise returns False"""
207 checksum = ""
208 checksum_of_file = ""
209 try:
210 f = open(cache_dir + delegation_file, "rb")
211 checksum_of_file = str (hashlib.md5(f.read()).hexdigest() )
212 f.close()
213 except Exception, e:
214 print repr(e)
215 checksum = get_md5_from_delegation_md5_file(cache_dir,delegation_file)
216 if checksum != checksum_of_file:
217 return False
218 if checksum == checksum_of_file and checksum != "":
219 return True
221 def verify_cache(cache_dir, delegation_files):
222 """ if in verbose mode prints the result of checking the checksum of the
223 delegation files """
224 for file in delegation_files:
225 if verbose:
226 print "verifying " + file
227 if verify_delegation_file(cache_dir,file):
228 if verbose:
229 print "the md5 checksum of " + file + " *matches* the provided checksum"
230 else:
231 if verbose:
232 print "the md5 checksum of " + file + " does *not* match the provided checksum"
234 def update_delegation_cache(cache_dir, delegation_urls, useragent):
235 """ Fetch multiple delegation urls and cache the contents. """
236 print "Updating delegation cache..."
237 for url in delegation_urls.split():
238 cache_delegation(cache_dir, url+".md5",useragent)
239 if verify_delegation_file(cache_dir, url.rpartition('/')[-1]):
240 pass
241 else:
242 cache_delegation(cache_dir, url,useragent)
243 return True
245 def load_delegation(delegation_file):
246 """ Load, parse and store the delegation file contents as a list. """
247 keys = "registry cc type start value date status"
248 try:
249 f = open(delegation_file, "r")
250 delegations = [ dict((k,v) for k,v in zip(keys.split(), line.split("|")))
251 for line in f.readlines() if not line.startswith("#")]
252 f.close()
253 return delegations
254 except OSError, e:
255 print repr(e)
257 def load_all_delegations(cache_dir, delegation_urls):
258 """ Load all delegations into memory. """
259 delegations = []
260 for url in delegation_urls.split():
261 filename = url.rpartition('/')[-1]
262 if verbose:
263 print "Attempting to load delegation file into memory: " + filename
264 delegations.append(load_delegation(cache_dir + filename))
265 return delegations
267 def calculate_ipv4_subnet(host_count):
268 return 32 - int(floor(log(host_count,2)))
270 def download_country_code_file(cache_dir, useragent):
271 """ Download and save the latest opencountrycode XML file """
272 # Google frontend will not return content-length for some reason...
273 url = "http://opencountrycodes.appspot.com/xml"
274 ul = urllib2.urlopen(url)
275 xml = ul.read()
276 try:
277 f = open(cache_dir + "countrycodes.xml",'w')
278 f.write(xml)
279 f.close()
280 return True
281 except Exception,e:
282 print repr(e)
283 return False
285 def build_country_code_dictionary(cache_dir):
286 """ Return a dictionary mapping country name to the country code"""
287 map = {}
288 xml_file = str(cache_dir) + "countrycodes.xml"
289 clist = minidom.parse(xml_file)
290 for country in clist.getElementsByTagName("country"):
291 code = country.attributes["code"]
292 name = country.attributes["name"]
293 map[name.value] = code.value
294 return map
296 def get_country_code_from_name(cache_dir,country_name):
297 """ Return the country code for a given country name. """
298 map = build_country_code_dictionary(cache_dir)
299 for name,code in map.iteritems():
300 if name.lower() == country_name.lower():
301 return code
302 return None
305 def usage():
306 """ Print usage information. """
307 print >> sys.stderr, """
308 blockfinder [-c DIR] -i
309 blockfinder [options] -t COUNTRY
311 The first form initializes the local cache. The second form queries it.
313 Understood options (not all of which are implemented yet):
314 -h, --help Show this help and exit
315 -v Be verbose
316 -c, --cachedir DIR Set the cache directory
317 -u, --useragent
318 -p, --progress
319 -o, --output FILE
320 -4, --ipv4 Search IPv4 allocations
321 -6, --ipv6 Search IPv6 allocation
322 -a, --asn Search ASN allocations
323 -t, --nation-state CC Set the country to search (given as a two-letter code)
324 -n, --country-name "Costa Rica" Set country to search (full name)
326 At least one of -t or -i is required, and when in -t mode, at least one of -4,
327 -6, and -a is required in order to do anything sensible.
330 def main():
331 """ Where the magic starts. """
332 try:
333 opts, args = getopt.getopt(sys.argv[1:],
334 "vhc:u:pso:46at:n:i",
335 ["verbose", "help", "cachedir=", "useragent=", "progress",
336 "silent", "output=", "ipv4", "ipv6", "asn", "nation-state=",
337 "country-name", "initialize-delegation"])
338 except getopt.GetoptError, err:
339 print str(err)
340 usage()
341 sys.exit(2)
343 global verbose
344 verbose = False
345 output = None
346 silent = True
347 cache_dir = str(os.path.expanduser('~')) + "/.blockfinder/"
348 update_delegations = False
349 delegation_urls = """
350 ftp://ftp.arin.net/pub/stats/arin/delegated-arin-latest
351 ftp://ftp.ripe.net/ripe/stats/delegated-ripencc-latest
352 ftp://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-latest
353 ftp://ftp.apnic.net/pub/stats/apnic/delegated-apnic-latest
354 ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-latest
356 delegation_files = []
357 for url in delegation_urls.split():
358 filename = url.rpartition('/')
359 delegation_files.append(filename[-1])
360 update_delegations = False
361 requests = []
362 country = ""
363 useragent = "Mozilla/5.0"
365 if not os.path.exists(cache_dir + "countrycodes.xml"):
366 download_country_code_file(cache_dir,useragent)
368 for o, a in opts:
369 if o == "-v":
370 verbose = True
371 elif o in ("-h", "--help"):
372 usage()
373 sys.exit()
374 elif o in ("-c", "--cachedir"):
375 cache_dir = a
376 elif o in ("-u", "--useragent"):
377 useragent = a
378 elif o in ("-p", "--progress"):
379 progress = True
380 elif o in ("-s", "--silent"):
381 silent = True
382 elif o in ("-o", "--output"):
383 output = a
384 elif o in ("-4", "--ipv4"):
385 requests.append("ipv4")
386 elif o in ("-6", "--ipv6"):
387 requests.append("ipv6")
388 elif o in ("-a", "--asn"):
389 requests.append("asn")
390 # XXX TODO: This should be a positional argument as it's the only manditory one...
391 elif o in ("-t", "--nation-state"):
392 country = a.upper()
393 elif o in ("-n", "--country-name"):
394 country = get_country_code_from_name(cache_dir,a)
395 elif o in ("-i", "--initialize-delegations"):
396 update_delegations = True
397 else:
398 assert False, "Unhandled option; Sorry!"
400 # Update and quit.
401 if update_delegations:
402 update_delegation_cache(cache_dir,delegation_urls,useragent)
403 if verbose:
404 verify_cache(cache_dir, delegation_files)
405 delegations = load_all_delegations(cache_dir, delegation_urls)
406 create_sql_database(cache_dir)
407 insert_into_sql_database(delegations, cache_dir)
408 sys.exit(0)
409 if not requests:
410 print "Nothing to do. Have you requested anything?"
411 print "Example usage: blockfinder -v --ipv4 -t mm"
412 sys.exit(1)
413 # Check our cache age and warn if it's aged
414 if cache_is_dated(cache_dir, delegation_files) and verbose:
415 print "Your delegation cache is older than 24 hours; you probably want to update it."
416 if verbose:
417 print "Using country code %s" % country
418 for request in requests:
419 use_sql_database(request, country, cache_dir)
421 if __name__ == "__main__":
422 main()