Merge pull request #78 from aagbsn/fix/77_str_decode_arguments
[blockfinder.git] / blockfinder
blobe498e8cf9c8f804a1bc8566d37d6627a6602448e
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # For the people of Smubworld!
5 import os
6 import time
7 import optparse
8 import sys
9 import sqlite3
10 import hashlib
11 import gzip
12 import zipfile
13 import re
15 if sys.version_info[0] >= 3:
16 import configparser
17 import ipaddress as ipaddr
18 from urllib.request import (urlopen, Request)
19 from urllib.error import URLError
20 long = int
21 else:
22 import ConfigParser as configparser
23 from urllib2 import (urlopen, Request, URLError)
24 from embedded_ipaddr import ipaddr
25 ipaddr.ip_address = ipaddr.IPAddress
27 is_win32 = (sys.platform == "win32")
29 __program__ = 'blockfinder'
30 __url__ = 'https://github.com/ioerror/blockfinder/'
31 __author__ = 'Jacob Appelbaum <jacob@appelbaum.net>, David <db@d1b.org>'
32 __copyright__ = 'Copyright (c) 2010'
33 __license__ = 'See LICENSE for licensing information'
34 __version__ = '3.1415'
36 try:
37 from future import antigravity
38 except ImportError:
39 antigravity = None
42 class DatabaseCache:
43 def __init__(self, cache_dir, verbose=False):
44 self.cache_dir = cache_dir
45 self.verbose = verbose
46 self.cursor = None
47 self.conn = None
48 self.db_version = "0.0.4"
49 self.db_path = os.path.join(self.cache_dir + "sqlitedb")
51 def erase_database(self):
52 """ Erase the database file. """
53 if os.path.exists(self.db_path):
54 os.remove(self.db_path)
56 def connect_to_database(self):
57 """ Connect to the database cache, possibly after creating it if
58 it doesn't exist yet, or after making sure an existing
59 database cache has the correct version. Return True if a
60 connection could be established, False otherwise. """
61 if not os.path.exists(self.cache_dir):
62 if self.verbose:
63 print("Initializing the cache directory...")
64 os.mkdir(self.cache_dir)
65 if os.path.exists(self.db_path):
66 cache_version = self.get_db_version()
67 if not cache_version:
68 cache_version = "0.0.1"
69 if cache_version != self.db_version:
70 print(("The existing database cache uses version %s, "
71 "not the expected %s." % (cache_version,
72 self.db_version)))
73 return False
74 self.conn = sqlite3.connect(self.db_path)
75 self.cursor = self.conn.cursor()
76 self.create_assignments_table()
77 return True
79 def __get_default_config_file_obj(self):
80 open_flags = 'r+'
81 file_path = os.path.join(self.cache_dir, 'db.cfg')
82 if not os.path.exists(file_path):
83 open_flags = 'w+'
84 return open(file_path, open_flags)
86 def _get_db_config(self, file_obj=None):
87 """ Return the database configuration object from the provided
88 file_obj if provided, otherwise from the default database
89 configuration file. """
90 if file_obj is None:
91 file_obj = self.__get_default_config_file_obj()
92 config = configparser.SafeConfigParser()
93 config.readfp(file_obj)
94 file_obj.close()
95 return config
97 def set_db_version(self, file_obj=None):
98 """ Set the database version string in the config file. """
99 if file_obj is None:
100 file_obj = self.__get_default_config_file_obj()
101 config = self._get_db_config()
102 if not config.has_section('db'):
103 config.add_section('db')
104 config.set('db', 'version', self.db_version)
105 config.write(file_obj)
106 file_obj.close()
108 def get_db_version(self):
109 """ Read and return the database version string from the config
110 file. """
111 config = self._get_db_config()
112 if not config.has_section('db'):
113 return None
114 return config.get('db', 'version')
116 def commit_and_close_database(self):
117 self.conn.commit()
118 self.cursor.close()
120 def create_assignments_table(self):
121 """ Create the assignments table that stores all assignments from
122 IPv4/IPv6/ASN to country code. Blocks are stored as first hex
123 of and first hex after the assignment. Numbers are stored
124 as hex strings, because SQLite's INTEGER type only holds up to
125 63 unsigned bits, which is not enough to store a /64 IPv6
126 block. Hex strings have leading zeros, with IPv6 addresses
127 being 33 hex characters long and IPv4 addresses and ASN being
128 9 hex characters long. The first number after an assignment
129 range is stored instead of the last number in the range to
130 facilitate comparisons with neighboring ranges. """
131 sql = ('CREATE TABLE IF NOT EXISTS assignments(start_hex TEXT, '
132 'next_start_hex TEXT, num_type TEXT, country_code TEXT, '
133 'source_type TEXT, source_name TEXT)')
134 self.cursor.execute(sql)
135 self.conn.commit()
137 def delete_assignments(self, source_type):
138 """ Delete all assignments from the database cache matching a
139 given source type ("rir", "lir", etc.). """
140 sql = 'DELETE FROM assignments WHERE source_type = ?'
141 self.cursor.execute(sql, (source_type, ))
142 self.conn.commit()
144 def insert_assignment(self, start_num, end_num, num_type,
145 country_code, source_type, source_name):
146 """ Insert an assignment into the database cache, without
147 commiting after the insertion. """
148 sql = ('INSERT INTO assignments (start_hex, next_start_hex, '
149 'num_type, country_code, source_type, source_name) '
150 'VALUES (?, ?, ?, ?, ?, ?)')
151 if num_type == 'ipv6':
152 start_hex = '%033x' % start_num
153 next_start_hex = '%033x' % (end_num + 1)
154 else:
155 start_hex = '%09x' % start_num
156 next_start_hex = '%09x' % (end_num + 1)
157 self.cursor.execute(sql, (start_hex, next_start_hex, num_type,
158 country_code, source_type, source_name))
160 def commit_changes(self):
161 """ Commit changes, e.g., after inserting assignments into the
162 database cache. """
163 self.conn.commit()
165 def fetch_assignments(self, num_type, country_code):
166 """ Fetch all assignments from the database cache matching the
167 given number type ("asn", "ipv4", or "ipv6") and country code.
168 The result is a sorted list of tuples containing (start_num,
169 end_num). """
170 sql = ('SELECT start_hex, next_start_hex FROM assignments '
171 'WHERE num_type = ? AND country_code = ? '
172 'ORDER BY start_hex')
173 self.cursor.execute(sql, (num_type, country_code))
174 result = []
175 for row in self.cursor:
176 result.append((long(row[0], 16), long(row[1], 16) - 1))
177 return result
179 def fetch_country_code(self, num_type, source_type, lookup_num):
180 """ Fetch the country code from the database cache that is
181 assigned to the given number (e.g., IPv4 address in decimal
182 notation), number type (e.g., "ipv4"), and source type (e.g.,
183 "rir"). """
184 sql = ('SELECT country_code FROM assignments WHERE num_type = ? '
185 'AND source_type = ? AND start_hex <= ? '
186 'AND next_start_hex > ?')
187 if num_type == 'ipv6':
188 lookup_hex = '%033x' % long(lookup_num)
189 else:
190 lookup_hex = '%09x' % long(lookup_num)
191 self.cursor.execute(sql, (num_type, source_type, lookup_hex,
192 lookup_hex))
193 row = self.cursor.fetchone()
194 if row:
195 return row[0]
197 def fetch_country_blocks_in_other_sources(self, first_country_code):
198 """ Fetch all assignments matching the given country code, then look
199 up to which country code(s) the same number ranges are assigned in
200 other source types. Return 8-tuples containing (1) first source
201 type, (2) first and (3) last number of the assignment in the first
202 source type, (4) second source type, (5) first and (6) last number
203 of the assignment in the second source type, (7) country code in
204 the second source type, and (8) number type. """
205 sql = ('SELECT first.source_type, first.start_hex, '
206 'first.next_start_hex, second.source_type, '
207 'second.start_hex, second.next_start_hex, '
208 'second.country_code, first.num_type '
209 'FROM assignments AS first '
210 'JOIN assignments AS second '
211 'WHERE first.country_code = ? '
212 'AND first.start_hex <= second.next_start_hex '
213 'AND first.next_start_hex >= second.start_hex '
214 'AND first.num_type = second.num_type '
215 'ORDER BY first.source_type, first.start_hex, '
216 'second.source_type, second.start_hex')
217 self.cursor.execute(sql, (first_country_code, ))
218 result = []
219 for row in self.cursor:
220 result.append((str(row[0]), long(row[1], 16),
221 long(row[2], 16) - 1, str(row[3]), long(row[4], 16),
222 long(row[5], 16) - 1, str(row[6]), str(row[7])))
223 return result
226 class DownloaderParser:
227 def __init__(self, cache_dir, database_cache, user_agent,
228 verbose=False):
229 self.cache_dir = cache_dir
230 self.database_cache = database_cache
231 self.user_agent = user_agent
232 self.verbose = verbose
234 MAXMIND_URLS = """
235 http://geolite.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip
236 http://geolite.maxmind.com/download/geoip/database/GeoIPv6.csv.gz
239 RIR_URLS = """
240 ftp://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest
241 ftp://ftp.ripe.net/ripe/stats/delegated-ripencc-latest
242 ftp://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-latest
243 ftp://ftp.apnic.net/pub/stats/apnic/delegated-apnic-latest
244 ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-latest
247 LIR_URLS = """
248 ftp://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz
249 ftp://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz
252 COUNTRY_CODE_URL = "http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt-temp.htm"
254 def download_maxmind_files(self):
255 """ Download all LIR delegation urls. """
256 for maxmind_url in self.MAXMIND_URLS.split():
257 self._download_to_cache_dir(maxmind_url)
259 def download_rir_files(self):
260 """ Download all RIR delegation files including md5 checksum. """
261 for rir_url in self.RIR_URLS.split():
262 rir_md5_url = rir_url + '.md5'
263 self._download_to_cache_dir(rir_url)
264 self._download_to_cache_dir(rir_md5_url)
266 def download_lir_files(self):
267 """ Download all LIR delegation urls. """
268 for lir_url in self.LIR_URLS.split():
269 self._download_to_cache_dir(lir_url)
271 def download_country_code_file(self):
272 """ Download and save the latest semicolon-separated open country
273 codes file. """
274 self._download_to_cache_dir(self.COUNTRY_CODE_URL)
276 def _download_to_cache_dir(self, url):
277 """ Fetch a resource (with progress bar) and store contents to the
278 local cache directory under the file name given in the URL. """
279 if not os.path.exists(self.cache_dir):
280 if self.verbose:
281 print("Initializing the cache directory...")
282 os.mkdir(self.cache_dir)
283 filename = url.split('/')[-1]
284 if self.verbose:
285 print(url)
286 req = Request(url)
287 if self.user_agent:
288 req.add_header('User-Agent', self.user_agent)
289 # TODO Allow use of a proxy.
290 #req.set_proxy(host, type)
291 try:
292 fetcher = urlopen(req)
293 except URLError as err:
294 msg = "An error occurred while attempting to cache file from:"
295 print(("%s\n\t%s\n\t%s" % (msg, url, str(err))))
296 return
297 length_header = fetcher.headers.get("Content-Length")
298 expected_bytes = -1
299 if length_header:
300 expected_bytes = int(length_header)
301 print(("Fetching %d kilobytes" %
302 round(float(expected_bytes / 1024), 2)))
303 download_started = time.time()
304 output_file = open(os.path.join(self.cache_dir, filename), "wb")
305 received_bytes, seconds_elapsed = 0, 0
306 while True:
307 seconds_elapsed = time.time() - download_started
308 if expected_bytes >= 0:
309 self._update_progress_bar(received_bytes, expected_bytes,
310 seconds_elapsed)
311 chunk = fetcher.read(1024)
312 if len(chunk) == 0:
313 if expected_bytes >= 0 and received_bytes != expected_bytes:
314 print(("Expected %s bytes, only received %s" %
315 (expected_bytes, received_bytes)))
316 print("")
317 break
318 received_bytes += len(chunk)
319 output_file.write(chunk)
320 output_file.close()
322 def _update_progress_bar(self, received_bytes, expected_bytes,
323 seconds_elapsed):
324 """ Write a progress bar to the console. """
325 if is_win32:
326 rows = 100 # use some WinCon function for these?
327 columns = 80 # but not really important.
328 EOL = "\r"
329 else:
330 rows, columns = list(map(int, os.popen('stty size', 'r'
331 ).read().split()))
332 EOL = "\x1b[G"
333 if seconds_elapsed == 0:
334 seconds_elapsed = 1
335 percent_done = float(received_bytes) / float(expected_bytes)
336 caption = "%.2f K/s" % (received_bytes / 1024 / seconds_elapsed)
337 width = columns - 4 - len(caption)
338 sys.stdout.write("[%s>%s] %s%s" % (
339 "=" * int(percent_done * width),
340 "." * (width - int(percent_done * width)), caption, EOL))
341 sys.stdout.flush()
343 def check_rir_file_mtimes(self):
344 """ Return True if the mtime of any RIR file in our cache directory
345 is > 24 hours, False otherwise. """
346 if not os.path.exists(self.cache_dir):
347 return False
348 for rir_url in self.RIR_URLS.split():
349 rir_path = os.path.join(self.cache_dir,
350 rir_url.split('/')[-1])
351 if os.path.exists(rir_path):
352 rir_stat = os.stat(rir_path)
353 if (time.time() - rir_stat.st_mtime) > 86400:
354 return True
355 return False
357 def verify_rir_files(self):
358 """ Compute md5 checksums of all RIR files, compare them to the
359 provided .md5 files, and return True if the two checksums match,
360 or False otherwise. """
361 for rir_url in self.RIR_URLS.split():
362 rir_path = os.path.join(self.cache_dir,
363 rir_url.split('/')[-1])
364 rir_md5_path = os.path.join(self.cache_dir,
365 rir_url.split('/')[-1] + '.md5')
366 if not os.path.exists(rir_md5_path) or \
367 not os.path.exists(rir_path):
368 continue
369 rir_md5_file = open(rir_md5_path, 'r')
370 expected_checksum = rir_md5_file.read()
371 rir_md5_file.close()
372 if "=" in expected_checksum:
373 expected_checksum = expected_checksum.split("=")[-1].strip()
374 elif expected_checksum == "":
375 if self.verbose:
376 print("No checksum... skipping verification...")
377 continue
378 else:
379 regex = re.compile("[a-f0-9]{32}")
380 regres = regex.findall(expected_checksum)
381 if len(regres) > 1:
382 print("Error: mutiple checksum found")
383 elif len(regres) < 1:
384 print("Error: no checksum found")
385 else:
386 expected_checksum = regres[0]
387 computed_checksum = ""
388 rir_file = open(rir_path, 'rb')
389 rir_data = rir_file.read()
390 rir_file.close()
391 computed_checksum = str(hashlib.md5(rir_data).hexdigest())
392 if expected_checksum != computed_checksum:
393 print(("The computed md5 checksum of %s, %s, does *not* "
394 "match the provided checksum %s!" %
395 (rir_path, computed_checksum, expected_checksum)))
397 def parse_maxmind_files(self, maxmind_urls=None):
398 """ Parse locally cached MaxMind files and insert assignments to the
399 local database cache, overwriting any existing MaxMind
400 assignments. """
401 if not maxmind_urls:
402 maxmind_urls = self.MAXMIND_URLS.split()
403 self.database_cache.delete_assignments('maxmind')
404 for maxmind_url in maxmind_urls:
405 maxmind_path = os.path.join(self.cache_dir,
406 maxmind_url.split('/')[-1])
407 if not os.path.exists(maxmind_path):
408 print("Unable to find %s." % maxmind_path)
409 continue
410 if maxmind_path.endswith('.zip'):
411 maxmind_zip_path = zipfile.ZipFile(maxmind_path)
412 for contained_filename in maxmind_zip_path.namelist():
413 content = maxmind_zip_path.read(contained_filename)
414 self._parse_maxmind_content(content, 'maxmind',
415 'maxmind')
416 elif maxmind_path.endswith('.gz'):
417 content = gzip.open(maxmind_path).read()
418 self._parse_maxmind_content(content, 'maxmind', 'maxmind')
419 self.database_cache.commit_changes()
421 def import_maxmind_file(self, maxmind_path):
422 self.database_cache.delete_assignments(maxmind_path)
423 if not os.path.exists(maxmind_path):
424 print("Unable to find %s." % maxmind_path)
425 return
426 content = open(maxmind_path).read()
427 self._parse_maxmind_content(content, maxmind_path, maxmind_path)
428 self.database_cache.commit_changes()
430 def _parse_maxmind_content(self, content, source_type, source_name):
431 keys = ['start_str', 'end_str', 'start_num', 'end_num',
432 'country_code', 'country_name']
433 for line in content.decode('utf-8').split('\n'):
434 if len(line.strip()) == 0 or line.startswith("#"):
435 continue
436 line = line.replace('"', '').replace(' ', '').strip()
437 parts = line.split(',')
438 entry = dict((k, v) for k, v in zip(keys, parts))
439 start_num = int(entry['start_num'])
440 end_num = int(entry['end_num'])
441 country_code = str(entry['country_code'])
442 start_ipaddr = ipaddr.ip_address(entry['start_str'])
443 if isinstance(start_ipaddr, ipaddr.IPv4Address):
444 num_type = 'ipv4'
445 else:
446 num_type = 'ipv6'
447 self.database_cache.insert_assignment(start_num, end_num,
448 num_type, country_code, source_type, source_name)
450 def parse_rir_files(self, rir_urls=None):
451 """ Parse locally cached RIR files and insert assignments to the local
452 database cache, overwriting any existing RIR assignments. """
453 if not rir_urls:
454 rir_urls = self.RIR_URLS.split()
455 self.database_cache.delete_assignments('rir')
456 keys = "registry country_code type start value date status"
457 for rir_url in rir_urls:
458 rir_path = os.path.join(self.cache_dir,
459 rir_url.split('/')[-1])
460 if not os.path.exists(rir_path):
461 print("Unable to find %s." % rir_path)
462 continue
463 for line in open(rir_path, 'r'):
464 if line.startswith("#"):
465 continue
466 entry = dict((k, v) for k, v in
467 zip(keys.split(), line.strip().split("|")))
468 source_name = str(entry['registry'])
469 country_code = str(entry['country_code'])
470 if source_name.replace(".", "", 1).isdigit() or country_code == "*":
471 continue
472 num_type = entry['type']
473 if num_type == 'asn':
474 start_num = end_num = int(entry['start'])
475 elif num_type == 'ipv4':
476 start_num = int(ipaddr.IPv4Address(entry['start']))
477 end_num = start_num + int(entry['value']) - 1
478 elif num_type == 'ipv6':
479 network_str = entry['start'] + '/' + entry['value']
480 network_ipaddr = ipaddr.IPv6Network(network_str)
481 start_num = int(network_ipaddr.network_address)
482 end_num = int(network_ipaddr.broadcast_address)
483 self.database_cache.insert_assignment(start_num,
484 end_num, num_type, country_code, 'rir',
485 source_name)
486 self.database_cache.commit_changes()
488 def parse_lir_files(self, lir_urls=None):
489 """ Parse locally cached LIR files and insert assignments to the local
490 database cache, overwriting any existing LIR assignments. """
491 if not lir_urls:
492 lir_urls = self.LIR_URLS.split()
493 self.database_cache.delete_assignments('lir')
494 for lir_url in lir_urls:
495 lir_path = os.path.join(self.cache_dir,
496 lir_url.split('/')[-1])
497 if not os.path.exists(lir_path):
498 print("Unable to find %s." % lir_path)
499 continue
500 if lir_path.endswith('.gz'):
501 lir_file = gzip.open(lir_path)
502 else:
503 lir_file = open(lir_path)
504 start_num = 0
505 end_num = 0
506 country_code = ""
507 entry = False
508 num_type = ""
509 for line in lir_file:
510 line = line.decode('utf-8', 'ignore').replace("\n", "")
511 if line == "":
512 entry = False
513 start_num, end_num, country_code, num_type = 0, 0, "", ""
514 elif not entry and "inetnum:" in line:
515 try:
516 line = line.replace("inetnum:", "").strip()
517 start_str = line.split("-")[0].strip()
518 end_str = line.split("-")[1].strip()
519 start_num = int(ipaddr.IPv4Address(start_str))
520 end_num = int(ipaddr.IPv4Address(end_str))
521 entry = True
522 num_type = 'ipv4'
523 except Exception as e:
524 if self.verbose:
525 print(repr(e), line)
526 elif not entry and "inet6num:" in line:
527 try:
528 network_str = line.replace("inet6num:", "").strip()
529 network_ipaddr = ipaddr.IPv6Network(network_str)
530 start_num = int(network_ipaddr.network_address)
531 end_num = int(network_ipaddr.broadcast_address)
532 entry = True
533 num_type = 'ipv6'
534 except Exception as e:
535 if self.verbose:
536 print(repr(e), line)
537 elif entry and "country:" in line:
538 country_code = line.replace("country:", "").strip()
539 self.database_cache.insert_assignment(start_num,
540 end_num, num_type, country_code, 'lir', 'ripencc')
541 self.database_cache.commit_changes()
544 class Lookup:
545 def __init__(self, cache_dir, database_cache, verbose=False):
546 self.cache_dir = cache_dir
547 self.database_cache = database_cache
548 self.verbose = verbose
549 self.map_co = None
550 self.build_country_code_dictionary()
552 def build_country_code_dictionary(self):
553 """ Return a dictionary mapping country name to the country
554 code. """
555 country_code_path = os.path.join(self.cache_dir,
556 'country_names_and_code_elements_txt-temp.htm')
557 if not os.path.exists(country_code_path):
558 return
559 self.map_co = {}
560 for line in open(country_code_path):
561 if line == "" or line.startswith("Country ") or ";" not in line:
562 continue
563 country_name, country_code = line.strip().split(";")
564 country_name = ' '.join([part.capitalize() for part in \
565 country_name.split(" ")])
566 self.map_co[country_name] = country_code
568 def knows_country_names(self):
569 return self.map_co is not None
571 def get_name_from_country_code(self, cc_code):
572 if not self.knows_country_names():
573 return
574 country_name = [(key, value) for (key, value) in \
575 list(self.map_co.items()) if value == cc_code]
576 if len(country_name) > 0:
577 return country_name[0][0]
579 def get_country_code_from_name(self, country_name):
580 """ Return the country code for a given country name. """
581 if not self.knows_country_names():
582 return
583 cc_code = [self.map_co[key] for key in list(self.map_co.keys()) if \
584 key.upper().startswith(country_name.upper())]
585 if len(cc_code) > 0:
586 return cc_code[0]
588 def lookup_ipv6_address(self, lookup_ipaddr):
589 print("Reverse lookup for: " + str(lookup_ipaddr))
590 for source_type in ['maxmind', 'rir', 'lir']:
591 cc = self.database_cache.fetch_country_code('ipv6',
592 source_type, int(lookup_ipaddr))
593 if cc:
594 print(source_type.upper(), "country code:", cc)
595 cn = self.get_name_from_country_code(cc)
596 if cn:
597 print(source_type.upper(), "country name:", cn)
599 def lookup_ipv4_address(self, lookup_ipaddr):
600 print("Reverse lookup for: " + str(lookup_ipaddr))
601 maxmind_cc = self.database_cache.fetch_country_code('ipv4', 'maxmind',
602 int(lookup_ipaddr))
603 if maxmind_cc:
604 print('MaxMind country code:', maxmind_cc)
605 maxmind_cn = self.get_name_from_country_code(maxmind_cc)
606 if maxmind_cn:
607 print('MaxMind country name:', maxmind_cn)
608 rir_cc = self.database_cache.fetch_country_code('ipv4', 'rir',
609 int(lookup_ipaddr))
610 if rir_cc:
611 print('RIR country code:', rir_cc)
612 rir_cn = self.get_name_from_country_code(rir_cc)
613 if rir_cn:
614 print('RIR country name:', rir_cn)
615 else:
616 print('Not found in RIR db')
617 lir_cc = self.database_cache.fetch_country_code('ipv4', 'lir',
618 int(lookup_ipaddr))
619 if lir_cc:
620 print('LIR country code:', lir_cc)
621 lir_cn = self.get_name_from_country_code(lir_cc)
622 if lir_cn:
623 print('LIR country name:', lir_cn)
624 if maxmind_cc and maxmind_cc != rir_cc:
625 print("It appears that the RIR data conflicts with MaxMind's "
626 "data. MaxMind's data is likely closer to being "
627 "correct due to sub-delegation issues with LIR databases.")
629 def lookup_ip_address(self, lookup_str):
630 """ Return the country code and name for a given ip address. """
631 try:
632 lookup_ipaddr = ipaddr.ip_address(lookup_str)
633 if isinstance(lookup_ipaddr, ipaddr.IPv4Address):
634 self.lookup_ipv4_address(lookup_ipaddr)
635 elif isinstance(lookup_ipaddr, ipaddr.IPv6Address):
636 self.lookup_ipv6_address(lookup_ipaddr)
637 else:
638 print(("Did not recognize '%s' as either IPv4 or IPv6 "
639 "address." % lookup_str))
640 except ValueError as e:
641 print("'%s' is not a valid IP address." % lookup_str)
643 def asn_lookup(self, asn):
644 asn_cc = self.database_cache.fetch_country_code('asn', 'rir', asn)
645 if asn_cc:
646 print("AS country code: %s" % asn_cc)
647 asn_cn = self.get_name_from_country_code(asn_cc)
648 if asn_cn:
649 print("AS country name: %s" % asn_cn)
650 else:
651 print("AS%s not found!" % asn)
653 def fetch_rir_blocks_by_country(self, request, country):
654 result = []
655 for (start_num, end_num) in \
656 self.database_cache.fetch_assignments(request, country):
657 if request == "ipv4" or request == "ipv6":
658 start_ipaddr = ipaddr.ip_address(start_num)
659 end_ipaddr = ipaddr.ip_address(end_num)
660 result += [str(x) for x in
661 ipaddr.summarize_address_range(
662 start_ipaddr, end_ipaddr)]
663 else:
664 result.append(str(start_num))
665 return result
667 def lookup_countries_in_different_source(self, first_country_code):
668 """ Look up all assignments matching the given country code, then
669 look up to which country code(s) the same number ranges are
670 assigned in other source types. Print out the result showing
671 similarities and differences. """
672 print(("\nLegend:\n"
673 " '<' = found assignment range with country code '%s'\n"
674 " '>' = overlapping assignment range with same country code\n"
675 " '*' = overlapping assignment range, first conflict\n"
676 " '#' = overlapping assignment range, second conflict and "
677 "beyond\n ' ' = neighboring assignment range") % (
678 first_country_code, ))
679 results = self.database_cache.fetch_country_blocks_in_other_sources(
680 first_country_code)
681 prev_first_source_type = ''
682 prev_first_start_num = -1
683 cur_second_country_codes = []
684 for (first_source_type, first_start_num, first_end_num,
685 second_source_type, second_start_num, second_end_num,
686 second_country_code, num_type) in results:
687 if first_source_type != prev_first_source_type:
688 print("\nAssignments in '%s':" % (first_source_type, ))
689 prev_first_source_type = first_source_type
690 if first_start_num != prev_first_start_num:
691 cur_second_country_codes = []
692 print("")
693 prev_first_start_num = first_start_num
694 marker = ''
695 if second_end_num >= first_start_num and \
696 second_start_num <= first_end_num:
697 if first_country_code != second_country_code and \
698 second_country_code not in cur_second_country_codes:
699 cur_second_country_codes.append(second_country_code)
700 if first_source_type == second_source_type:
701 marker = '<'
702 elif len(cur_second_country_codes) == 0:
703 marker = '>'
704 elif len(cur_second_country_codes) == 1:
705 marker = '*'
706 else:
707 marker = '#'
708 if num_type.startswith("ip") and \
709 second_start_num == second_end_num:
710 second_range = "%s" % (ipaddr.ip_address(second_start_num), )
711 elif num_type.startswith("ip") and \
712 second_start_num < second_end_num:
713 second_range = "%s-%s" % (ipaddr.ip_address(second_start_num),
714 ipaddr.ip_address(second_end_num))
715 elif second_start_num < second_end_num:
716 second_range = "AS%d-%d" % (second_start_num, second_end_num)
717 else:
718 second_range = "AS%d" % (second_start_num, )
719 print("%1s %s %s %s" % (marker, second_country_code, second_range,
720 second_source_type, ))
722 def split_callback(option, opt, value, parser):
723 split_value = value.split(':')
724 setattr(parser.values, option.dest, split_value[0])
725 if len(split_value) > 1 and split_value[1] != '':
726 setattr(parser.values, 'type_filter', split_value[1])
728 def main():
729 """ Where the magic starts. """
730 usage = ("Usage: %prog [options]\n\n"
731 "Example: %prog -v -t mm")
732 parser = optparse.OptionParser(usage)
733 parser.add_option("-v", "--verbose", action="store_true",
734 dest="verbose", help = "be verbose", default=False)
735 parser.add_option("-c", "--cache-dir", action="store", dest="dir",
736 help="set cache directory [default: %default]",
737 default=str(os.path.expanduser('~')) + "/.blockfinder/")
738 parser.add_option("--user-agent", action="store", dest="ua",
739 help=('provide a User-Agent which will be used when '
740 'fetching delegation files [default: "%default"]'),
741 default="Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0")
742 parser.add_option("-x", "--hack-the-internet", action="store_true",
743 dest="hack_the_internet", help=optparse.SUPPRESS_HELP)
744 group = optparse.OptionGroup(parser, "Cache modes",
745 "Pick at most one of these modes to initialize or update "
746 "the local cache. May not be combined with lookup modes.")
747 group.add_option("-m", "--init-maxmind", action="store_true",
748 dest="init_maxmind",
749 help="initialize or update MaxMind GeoIP database")
750 group.add_option("-g", "--reload-maxmind", action="store_true",
751 dest="reload_maxmind",
752 help=("update cache from existing MaxMind GeoIP database"))
753 group.add_option("-r", "--import-maxmind", action="store",
754 dest="import_maxmind", metavar="FILE",
755 help=("import the specified MaxMind GeoIP database file into "
756 "the database cache using its file name as source "
757 "name"))
758 group.add_option("-i", "--init-rir",
759 action="store_true", dest="init_del",
760 help="initialize or update delegation information")
761 group.add_option("-d", "--reload-rir", action="store_true",
762 dest="reload_del",
763 help="use existing delegation files to update the database")
764 group.add_option("-l", "--init-lir", action="store_true",
765 dest="init_lir",
766 help=("initialize or update lir information; can take up to "
767 "5 minutes"))
768 group.add_option("-z", "--reload-lir", action="store_true",
769 dest="reload_lir",
770 help=("use existing lir files to update the database; can "
771 "take up to 5 minutes"))
772 group.add_option("-o", "--download-cc", action="store_true",
773 dest="download_cc", help="download country codes file")
774 group.add_option("-e", "--erase-cache", action="store_true",
775 dest="erase_cache", help="erase the local database cache")
776 parser.add_option_group(group)
777 group = optparse.OptionGroup(parser, "Lookup modes",
778 "Pick at most one of these modes to look up data in the "
779 "local cache. May not be combined with cache modes.")
780 group.add_option("-4", "--ipv4", action="store", dest="ipv4",
781 help=("look up country code and name for the specified IPv4 "
782 "address"))
783 group.add_option("-6", "--ipv6", action="store", dest="ipv6",
784 help=("look up country code and name for the specified IPv6 "
785 "address"))
786 group.add_option("-a", "--asn", action="store", dest="asn",
787 help="look up country code and name for the specified ASN")
788 group.add_option("-t", "--code", action="callback", dest="cc",
789 callback=split_callback, metavar="CC[:type]", type="str",
790 help=("look up all allocations (or only those for number "
791 "type 'ipv4', 'ipv6', or 'asn' if provided) in the "
792 "delegation cache for the specified two-letter country "
793 "code"))
794 group.add_option("-n", "--name", action="callback", dest="cn",
795 callback=split_callback, metavar="CN[:type]", type="str",
796 help=("look up all allocations (or only those for number "
797 "type 'ipv4', 'ipv6', or 'asn' if provided) in the "
798 "delegation cache for the specified full country name"))
799 group.add_option("-p", "--compare", action="store", dest="compare",
800 metavar="CC",
801 help=("compare assignments to the specified country code "
802 "with overlapping assignments in other data sources; "
803 "can take some time and produce some long output"))
804 group.add_option("-w", "--what-country", action="store", dest="what_cc",
805 help=("look up country name for specified country code"))
806 parser.add_option_group(group)
807 group = optparse.OptionGroup(parser, "Network modes")
808 (options, args) = parser.parse_args()
809 if options.hack_the_internet:
810 print("all your bases are belong to us!")
811 sys.exit(0)
812 options_dict = vars(options)
813 modes = 0
814 for mode in ["init_maxmind", "reload_maxmind", "import_maxmind",
815 "init_del", "init_lir", "reload_del", "reload_lir",
816 "download_cc", "erase_cache", "ipv4", "ipv6", "asn",
817 "cc", "cn", "compare", "what_cc"]:
818 if mode in options_dict and options_dict.get(mode):
819 modes += 1
820 if modes > 1:
821 parser.error("only 1 cache or lookup mode allowed")
822 elif modes == 0:
823 parser.error("must provide 1 cache or lookup mode")
824 database_cache = DatabaseCache(options.dir, options.verbose)
825 if options.erase_cache:
826 database_cache.erase_database()
827 sys.exit(0)
828 if not database_cache.connect_to_database():
829 print("Could not connect to database.")
830 print("You may need to erase it using -e and then reload it "
831 "using -d/-z. Exiting.")
832 sys.exit(1)
833 database_cache.set_db_version()
834 downloader_parser = DownloaderParser(options.dir, database_cache,
835 options.ua)
836 lookup = Lookup(options.dir, database_cache)
837 if options.ipv4 or options.ipv6 or options.asn or options.cc \
838 or options.cn or options.compare:
839 if downloader_parser.check_rir_file_mtimes():
840 print("Your cached RIR files are older than 24 hours; you "
841 "probably want to update them.")
842 if options.asn:
843 lookup.asn_lookup(options.asn)
844 elif options.ipv4:
845 lookup.lookup_ip_address(options.ipv4)
846 elif options.ipv6:
847 lookup.lookup_ip_address(options.ipv6)
848 elif options.cc or options.cn or options.what_cc:
849 country = None
850 if options.cc:
851 country = options.cc.upper()
852 elif not lookup.knows_country_names():
853 print("Need to download country codes first before looking "
854 "up countries by name.")
855 elif options.what_cc:
856 country = options.what_cc.upper()
857 country_name = lookup.get_name_from_country_code(country)
858 if country_name:
859 print(("Hmm...%s? That would be %s."
860 % (options.what_cc, country_name)))
861 sys.exit(0)
862 else:
863 print(("Hmm, %s? We're not sure either. Are you sure that's "
864 "a country code?" % options.what_cc))
865 sys.exit(1)
866 else:
867 country = lookup.get_country_code_from_name(options.cn)
868 if not country:
869 print("It appears your search did not match a country.")
870 if country:
871 types = ["ipv4", "ipv6", "asn"]
872 if hasattr(options, 'type_filter') and options.type_filter.lower() in types:
873 types = [options.type_filter.lower()]
874 for request in types:
875 print("\n".join(lookup.fetch_rir_blocks_by_country(\
876 request, country)))
877 elif options.compare:
878 print("Comparing assignments with overlapping assignments in other "
879 "data sources...")
880 lookup.lookup_countries_in_different_source(options.compare)
881 elif options.init_maxmind or options.reload_maxmind:
882 if options.init_maxmind:
883 print("Downloading Maxmind GeoIP files...")
884 downloader_parser.download_maxmind_files()
885 print("Importing Maxmind GeoIP files...")
886 downloader_parser.parse_maxmind_files()
887 elif options.import_maxmind:
888 print("Importing Maxmind GeoIP files...")
889 downloader_parser.import_maxmind_file(options.import_maxmind)
890 elif options.init_del or options.reload_del:
891 if options.init_del:
892 print("Downloading RIR files...")
893 downloader_parser.download_rir_files()
894 print("Verifying RIR files...")
895 downloader_parser.verify_rir_files()
896 print("Importing RIR files...")
897 downloader_parser.parse_rir_files()
898 elif options.init_lir or options.reload_lir:
899 if options.init_lir:
900 print("Downloading LIR delegation files...")
901 downloader_parser.download_lir_files()
902 print("Importing LIR files...")
903 downloader_parser.parse_lir_files()
904 elif options.download_cc:
905 print("Downloading country code file...")
906 downloader_parser.download_country_code_file()
907 database_cache.commit_and_close_database()
909 if __name__ == "__main__":
910 main()