2 # -*- coding: utf-8 -*-
4 # For the people of Smubworld!
10 from math
import floor
, log
13 from xml
.dom
import minidom
14 __program__
= 'blockfinder'
15 __url__
= 'http://github.com/ioerror/blockfinder/'
16 ___author__
= 'Jacob Appelbaum <jacob@appelbaum.net>, dave b. <db@d1b.org>'
17 __copyright__
= 'Copyright (c) 2010'
18 __license__
= 'See LICENSE for licensing information'
19 __version__
= '3.1415'
28 from future
import antigravity
32 def update_progress_bar(percent_done
, caption
=""):
33 """Write a progress bar to the console"""
34 rows
, columns
= map(int, os
.popen('stty size', 'r').read().split())
35 width
= columns
- 4 - len(caption
)
36 sys
.stdout
.write("[%s>%s] %s\x1b[G" % (
37 "=" * int(percent_done
*width
),
38 "." * (width
- int(percent_done
* width
)),
42 # XXX TODO:allow the use of a proxy
43 # Set up a proper Request object, set the user agent and if desired, a proxy
44 def fetch(url
, useragent
):
45 """ Fetch (with progress meter) and return the contents of a url. """
46 req
= urllib2
.Request(url
)
47 req
.add_header('User-agent', useragent
)
48 #req.set_proxy(host, type)
49 fetcher
= urllib2
.urlopen(req
)
50 length_header
= fetcher
.headers
.get("content-length")
51 if length_header
== None:
52 raise Exception("Missing content-length header in reply from server.")
53 length
= int(length_header
)
54 print "Fetching ", str (round(float(length
/1024),2)) , " kilobytes"
58 t_delta
= time
.time() - t_start
60 float(len(ret
)) / length
,
61 "%.2f K/s" % (len(ret
) / 1024 / t_delta
) )
62 tmp
= fetcher
.read(1024)
64 if len(ret
) != length
:
65 raise Exception("Expected %s bytes, only received %s" % (
71 def cache_delegation(cache_dir
, delegation_url
, useragent
):
72 """ Attempt to cache the contents of a delegation url in our cache dir. """
78 print "Initializing the cache directory..."
83 print "Fetching " + delegation_url
84 delegation
= fetch(delegation_url
,useragent
)
85 tmp
= delegation_url
.split('/')
86 delegation_file
= str(cache_dir
) + str(tmp
[-1])
88 f
= open(delegation_file
, 'w')
96 def cache_is_dated(cache_dir
, cached_files
):
97 """ Returns True if the mtime of any files in cache dir is > 24hrs."""
101 print "\nDid you initialize the cache directory?\n"
103 for file in cached_files
:
104 fstat
= os
.stat(cache_dir
+ file)
105 if (time
.time() - fstat
.st_mtime
) > 86400:
109 def create_sql_database(cache_dir
):
110 """ Creates a new sqlite database.
111 If there is a previous sqlite database it will be deleted. """
113 os
.remove(cache_dir
+"sqlitedb")
116 conn
= sqlite3
.connect(cache_dir
+"sqlitedb")
117 cursor
= conn
.cursor()
118 cursor
.execute("""create table asn(registry text, cc text, start text, value INTEGER, date text, status text)""")
119 cursor
.execute("""create table ipv4(registry text, cc text, start text, value INTEGER, date text, status text)""")
120 cursor
.execute("""create table ipv6(registry text, cc text, start text, value INTEGER, date text, status text)""")
124 def insert_into_sql_database(delegations
,cache_dir
):
125 """ inserts delegation information into the sqlite database"""
126 conn
= sqlite3
.connect(cache_dir
+"sqlitedb")
127 cursor
= conn
.cursor()
129 for delegation
in delegations
:
130 for entry
in delegation
:
131 registry
= str(entry
['registry'])
132 if not registry
.isdigit() and str (entry
['cc']) !="*":
133 if entry
['type'] == "ipv6":
135 if entry
['type'] == "ipv4":
137 if entry
['type'] == "asn":
139 text
= """INSERT INTO """ + table
+ """ ( registry, cc, start, value, date,status) VALUES (?,?,?,?,?,?)"""
140 data
= [entry
['registry'], entry
['cc'], entry
['start'], entry
['value'], entry
['date'], entry
['status'] ]
141 cursor
.execute(text
, data
)
145 def get_total_delegations_from_db(cache_dir
):
146 """ Returns the total count of the number of entries in the ipv4, ipv6 and asn table """
147 conn
= sqlite3
.connect(cache_dir
+"sqlitedb")
148 cursor
= conn
.cursor()
150 table_names
= ["ipv4", "ipv6", "asn"]
151 for table
in table_names
:
152 cursor
.execute("""select count (*) from """ + table
)
153 count
+= int (cursor
.fetchone()[0] )
157 def get_possible_match_entries(cc
,cache_dir
):
158 """ Get the count of 'possible' matching delegation entries"""
159 conn
= sqlite3
.connect(cache_dir
+"sqlitedb")
160 cursor
= conn
.cursor()
162 table_names
=["ipv4", "ipv6", "asn"]
163 for table
in table_names
:
164 cursor
.execute("""select count (*) from """ + table
+ """ where cc=?""",cc
)
165 count
+= int (cursor
.fetchone()[0] )
169 def use_sql_database(request
, cc
, cache_dir
):
171 """ Use the sqlite database that is created after fetching delegations
172 to output information for a given request """
173 conn
= sqlite3
.connect(cache_dir
+ "sqlitedb")
174 cursor
= conn
.cursor()
176 print "We have %d entries in our delegation cache." %get
_total
_delegations
_from
_db
(cache_dir
)
177 text
="""select start,value from """ + request
+ """ where cc=?"""
179 cursor
.execute(text
,cc
)
181 if request
== "ipv4":
182 print str(row
[0]) + "/" + str(calculate_ipv4_subnet(int(row
[1])))
183 elif request
== "ipv6":
184 print str(row
[0]) + "/" + str(int(row
[1]))
186 print str(int(row
[0]))
188 print "We found %d possible entries in our delegation cache." % get_possible_match_entries(cc
, cache_dir
)
189 cursor
.execute("""select count(*) from """ + request
+ """ where cc=?""", cc
)
190 print "We found %d matching entries in our delegation cache." % int (cursor
.fetchone()[0] )
193 def get_md5_from_delegation_md5_file(cache_dir
, delegation_file
):
194 """ Returns the md5sum from the delegation md5 file
195 if it doesn't exist it returns an empty string"""
198 f
= open(cache_dir
+ delegation_file
+ ".md5", "r")
201 if delegation_file
== "delegated-afrinic-latest":
202 pos
= checksum
.find(" ")
203 checksum
= str (checksum
[:pos
])
205 pos
= checksum
.find("=") +2
206 checksum
= str (checksum
[pos
:-1])
211 def verify_delegation_file(cache_dir
, delegation_file
):
212 """ compares the delegation file md5sum to that of the provided md5sum
213 returns True if they match otherwise returns False """
215 checksum_of_file
= ""
217 f
= open(cache_dir
+ delegation_file
, "rb")
218 checksum_of_file
= str (hashlib
.md5(f
.read()).hexdigest() )
222 checksum
= get_md5_from_delegation_md5_file(cache_dir
,delegation_file
)
223 if checksum
!= checksum_of_file
:
225 if checksum
== checksum_of_file
and checksum
!= "":
229 def verify_cache(cache_dir
, delegation_files
):
230 """ if in verbose mode prints the result of checking the checksum of the
232 for file in delegation_files
:
234 print "verifying " + file
235 if verify_delegation_file(cache_dir
,file):
237 print "the md5 checksum of " + file + " *matches* the provided checksum"
240 print "the md5 checksum of " + file + " does *not* match the provided checksum"
242 def update_delegation_cache(cache_dir
, delegation_urls
, useragent
):
243 """ Fetch multiple delegation urls and cache the contents. """
244 print "Updating delegation cache..."
245 for url
in delegation_urls
.split():
246 cache_delegation(cache_dir
, url
+ ".md5",useragent
)
247 if verify_delegation_file(cache_dir
, url
.rpartition('/')[-1]):
250 cache_delegation(cache_dir
, url
,useragent
)
253 def unpack_geoip_cache(cache_dir
, geoip_urls
):
254 """ Unpack the fetched GeoIP file into the blockfinder cache. """
255 # This probably should unlink the gzip'ed file if we care about space...
256 for url
in geoip_urls
.split():
257 gzip_filename
= geoip_urls
.rpartition('/')[-1]
258 gunziped_filename
= gzip_filename
.rpartition('.')[0]
260 print "Unpacking GeoIP file " + gzip_filename
+ " into our cache as " + gunziped_filename
261 gzip_file
= gzip
.open(cache_dir
+ gzip_filename
, 'rb')
262 gunzipped_data
= gzip_file
.read()
264 gunzipped_file
= open(cache_dir
+ gunziped_filename
, 'w')
265 gunzipped_file
.writelines(gunzipped_data
)
266 gunzipped_file
.close()
269 def update_geoip_cache(cache_dir
, geoip_urls
, useragent
):
270 """ Fetch country level resolution GeoIP files from a given url and cache
271 the contents. Unpack it if it's compressed. """
272 print "Updating GeoIP cache..."
273 for url
in geoip_urls
.split():
274 cache_delegation(cache_dir
, url
, useragent
)
275 unpack_geoip_cache(cache_dir
, geoip_urls
)
277 def load_delegation(delegation_file
):
278 """ Load, parse and store the delegation file contents as a list. """
279 keys
= "registry cc type start value date status"
281 f
= open(delegation_file
, "r")
282 delegations
= [ dict((k
,v
) for k
,v
in zip(keys
.split(), line
.split("|")))
283 for line
in f
.readlines() if not line
.startswith("#")]
289 def load_all_delegations(cache_dir
, delegation_urls
):
290 """ Load all delegations into memory. """
292 for url
in delegation_urls
.split():
293 filename
= url
.rpartition('/')[-1]
295 print "Attempting to load delegation file into memory: " + filename
296 delegations
.append(load_delegation(cache_dir
+ filename
))
299 def calculate_ipv4_subnet(host_count
):
300 return 32 - int(floor(log(host_count
,2)))
302 def download_country_code_file(cache_dir
, useragent
):
303 """ Download and save the latest opencountrycode XML file """
304 # Google frontend will not return content-length for some reason...
305 url
= "http://opencountrycodes.appspot.com/xml"
306 ul
= urllib2
.urlopen(url
)
309 f
= open(cache_dir
+ "countrycodes.xml",'w')
317 def build_country_code_dictionary(cache_dir
):
318 """ Return a dictionary mapping country name to the country code"""
320 xml_file
= str(cache_dir
) + "countrycodes.xml"
321 clist
= minidom
.parse(xml_file
)
322 for country
in clist
.getElementsByTagName("country"):
323 code
= country
.attributes
["code"]
324 name
= country
.attributes
["name"]
325 map_co
[name
.value
] = code
.value
327 def build_country_code_dictionary_rev(cache_dir
):
328 """ Return a dictionary mapping country code to the country name"""
330 xml_file
= str(cache_dir
) + "countrycodes.xml"
331 clist
= minidom
.parse(xml_file
)
332 for country
in clist
.getElementsByTagName("country"):
333 code
= country
.attributes
["code"]
334 name
= country
.attributes
["name"]
335 map_co
[code
.value
] = name
.value
338 def get_country_code_from_name(cache_dir
, country_name
):
339 """ Return the country code for a given country name. """
340 map_co
= build_country_code_dictionary(cache_dir
)
341 cc_code
= [map_co
[key
] for key
in map_co
.keys() if key
.upper().startswith(country_name
.upper())]
345 def ip_address_to_dec(ip_addr
):
346 ipar
= ip_addr
.split('.')
349 a
[i
] = hex(int(ipar
[i
]))[2:]
350 if(int(ipar
[i
]) < 15):
351 a
[i
] = """0""" + a
[i
]
353 total
= '0x'+a
[0]+a
[1]+a
[2]+a
[3]
354 decimal
= int(total
,16)
357 def geoip_lookup(cache_dir
, ip_addr
):
358 gi
= GeoIP
.open(cache_dir
+ "GeoIP.dat",GeoIP
.GEOIP_STANDARD
)
359 cc
= gi
.country_code_by_addr(ip_addr
)
360 cc_name
= gi
.country_name_by_addr(ip_addr
)
363 def lookup_ip_address(ip_addr
,cache_dir
):
364 """ Return the country code and name for a given ip address. Attempts to
365 use GeoIP if available."""
366 print "Reverse lookup for: " + ip_addr
368 geoip_cc
, geoip_cc_name
= geoip_lookup(cache_dir
, ip_addr
)
369 print "GeoIP country code: " + str(geoip_cc
)
370 print "GeoIP country name: " + str(geoip_cc_name
)
371 conn
= sqlite3
.connect(cache_dir
+"sqlitedb")
372 cursor
= conn
.cursor()
373 ipv4arr
= ip_addr
.split('.')
375 print """doesn't look like an ipv4 address.."""
377 cursor
.execute('select * from ipv4 WHERE start LIKE ?', (ipv4arr
[0]+'%',))
378 cc_map
= build_country_code_dictionary_rev(cache_dir
)
380 if(ip_address_to_dec(row
[2]) <= ip_address_to_dec(ip_addr
) <= (ip_address_to_dec(row
[2])+row
[3])):
382 rir_cc_name
= cc_map
[row
[1]]
383 print 'RIR country code: ' + rir_cc
384 print 'RIR country: ' + rir_cc_name
388 if geoip_cc
!= rir_cc
:
389 print "It appears that the RIR data conflicts with the GeoIP data"
390 print "The GeoIP data is likely closer to being correct due to " \
391 "sub-delegation issues with LIR databases"
394 """ Print usage information. """
395 print >> sys
.stderr
, """
396 blockfinder [-c DIR] -i
397 blockfinder [options] -t COUNTRY
399 The first form initializes the local cache. The second form queries it.
401 Understood options (not all of which are implemented yet):
402 -h, --help Show this help and exit
404 -c, --cachedir DIR Set the cache directory
408 -4, --ipv4 Search IPv4 allocations
409 -6, --ipv6 Search IPv6 allocation
410 -a, --asn Search ASN allocations
411 -t, --nation-state CC Set the country to search (given as a two-letter code)
412 -n, --country-name "Costa Rica" Set country to search (full name)
413 -x, --hack-the-internet Hack the internet
414 -r, --reverse-lookup Return the county name for the specified IP
416 At least one of -t or -i is required, and when in -t mode, at least one of -4,
417 -6, and -a is required in order to do anything sensible.
421 """ Where the magic starts. """
423 opts
, args
= getopt
.getopt(sys
.argv
[1:],
424 "xvhc:u:pso:46at:n:ir:",
425 ["hack-the-internet", "verbose", "help", "cachedir=", "useragent=", "progress",
426 "silent", "output=", "ipv4", "ipv6", "asn", "nation-state=",
427 "country-name", "initialize-delegation","reverse-lookup"])
428 except getopt
.GetoptError
, err
:
437 cache_dir
= str(os
.path
.expanduser('~')) + "/.blockfinder/"
438 update_delegations
= False
439 delegation_urls
= """
440 ftp://ftp.arin.net/pub/stats/arin/delegated-arin-latest
441 ftp://ftp.ripe.net/ripe/stats/delegated-ripencc-latest
442 ftp://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-latest
443 ftp://ftp.apnic.net/pub/stats/apnic/delegated-apnic-latest
444 ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-latest
446 geoip_country_urls
= """http://geolite.maxmind.com/download/geoip/database/GeoLiteCountry/GeoIP.dat.gz
447 http://geolite.maxmind.com/download/geoip/database/GeoIPv6.dat.gz"""
449 delegation_files
= []
450 for url
in delegation_urls
.split():
451 filename
= url
.rpartition('/')
452 delegation_files
.append(filename
[-1])
453 update_delegations
= False
456 useragent
= "Mozilla/5.0"
459 if not os
.path
.exists(cache_dir
+ "countrycodes.xml"):
460 download_country_code_file(cache_dir
,useragent
)
463 if o
in ("-x", "--hack-the-internet"):
464 print "all your bases are belong to us!"
468 elif o
in ("-h", "--help"):
471 elif o
in ("-c", "--cachedir"):
473 elif o
in ("-u", "--useragent"):
475 elif o
in ("-p", "--progress"):
477 elif o
in ("-s", "--silent"):
479 elif o
in ("-o", "--output"):
481 elif o
in ("-4", "--ipv4"):
482 requests
.append("ipv4")
483 elif o
in ("-6", "--ipv6"):
484 requests
.append("ipv6")
485 elif o
in ("-a", "--asn"):
486 requests
.append("asn")
487 # XXX TODO: This should be a positional argument as it's the only manditory one...
488 elif o
in ("-r", "--reverse-lookup"):
490 requests
.append("reverse")
491 elif o
in ("-t", "--nation-state"):
493 elif o
in ("-n", "--country-name"):
494 country
= get_country_code_from_name(cache_dir
, a
)
495 elif o
in ("-i", "--initialize-delegations"):
496 update_delegations
= True
498 print "Unhandled option; Sorry!"
502 if update_delegations
:
504 update_geoip_cache(cache_dir
,geoip_country_urls
,useragent
)
505 update_delegation_cache(cache_dir
,delegation_urls
,useragent
)
507 verify_cache(cache_dir
, delegation_files
)
508 delegations
= load_all_delegations(cache_dir
, delegation_urls
)
509 create_sql_database(cache_dir
)
510 insert_into_sql_database(delegations
, cache_dir
)
514 print "Nothing to do. Have you requested anything?"
515 print "Example usage: blockfinder -v --ipv4 -t mm"
519 lookup_ip_address(ipaddress
,cache_dir
)
522 print "It appears your search did not match a country."
524 # Check our cache age and warn if it's aged
525 if cache_is_dated(cache_dir
, delegation_files
) and verbose
:
526 print "Your delegation cache is older than 24 hours; you probably want to update it."
528 print "Using country code: %s" % country
530 for request
in requests
:
532 use_sql_database(request
, country
, cache_dir
)
533 except IOError: sys
.exit()
535 if __name__
== "__main__":