Redirect all queries to Relay Search
[compass.git] / compass.py
blobc22416a8247a6161790827ef64c1969f18119157
1 #!/usr/bin/env python
3 # This program is free software. It comes without any warranty, to
4 # the extent permitted by applicable law. You can redistribute it
5 # and/or modify it under the terms of the Do What The Fuck You Want
6 # To Public License, Version 2, as published by Sam Hocevar. See
7 # http://sam.zoy.org/wtfpl/COPYING for more details.
9 FAST_EXIT_BANDWIDTH_RATE = 95 * 125 * 1024 # 95 Mbit/s
10 FAST_EXIT_ADVERTISED_BANDWIDTH = 5000 * 1024 # 5000 kB/s
11 FAST_EXIT_PORTS = [80, 443, 554, 1755]
12 FAST_EXIT_MAX_PER_NETWORK = 2
14 ALMOST_FAST_EXIT_BANDWIDTH_RATE = 80 * 125 * 1024 # 80 Mbit/s
15 ALMOST_FAST_EXIT_ADVERTISED_BANDWIDTH = 2000 * 1024 # 2000 kB/s
16 ALMOST_FAST_EXIT_PORTS = [80, 443]
18 import json
19 import operator
20 import sys
21 import util
22 import os
23 from optparse import OptionParser, OptionGroup
24 import urllib
25 import re
26 import itertools
28 class BaseFilter(object):
29 def accept(self, relay):
30 raise NotImplementedError("This isn't implemented by the subclass")
32 def load(self, relays):
33 return filter(self.accept, relays)
35 class RunningFilter(BaseFilter):
36 def accept(self, relay):
37 return relay['running']
39 class FamilyFilter(BaseFilter):
40 def __init__(self, family, all_relays):
41 self._family_fingerprint = None
42 self._family_nickname = None
43 self._family_relays = []
44 found_relay = None
45 for relay in all_relays:
46 if len(family) == 40 and relay['fingerprint'] == family:
47 found_relay = relay
48 break
49 if len(family) < 20 and 'Named' in relay['flags'] and relay['nickname'] == family:
50 found_relay = relay
51 break
52 if found_relay:
53 self._family_fingerprint = '$%s' % found_relay['fingerprint']
54 if 'Named' in found_relay['flags']:
55 self._family_nickname = found_relay['nickname']
56 self._family_relays = [self._family_fingerprint] + found_relay.get('effective_family', [])
58 def accept(self, relay):
59 fingerprint = '$%s' % relay['fingerprint']
60 mentions = [fingerprint] + relay.get('effective_family', [])
61 # Only show families as accepted by consensus (mutually listed relays)
62 listed = fingerprint in self._family_relays
63 listed = listed or 'Named' in relay['flags'] and relay['nickname'] in self._family_relays
64 mentioned = self._family_fingerprint in mentions
65 mentioned = mentioned or self._family_nickname in mentions
66 if listed and mentioned:
67 return True
68 return False
70 class CountryFilter(BaseFilter):
71 def __init__(self, countries=[]):
72 self._countries = [x.lower() for x in countries]
74 def accept(self, relay):
75 return relay.get('country', None) in self._countries
77 class ASFilter(BaseFilter):
78 def __init__(self, as_sets=[]):
79 self._as_sets = [x if not x.isdigit() else "AS" + x for x in as_sets]
81 def accept(self, relay):
82 return relay.get('as_number', None) in self._as_sets
84 class ExitFilter(BaseFilter):
85 def accept(self, relay):
86 return relay.get('exit_probability', -1) > 0.0
88 class GuardFilter(BaseFilter):
89 def accept(self, relay):
90 return relay.get('guard_probability', -1) > 0.0
92 class FastExitFilter(BaseFilter):
93 class Relay(object):
94 def __init__(self, relay):
95 self.exit = relay.get('exit_probability')
96 self.fp = relay.get('fingerprint')
97 self.relay = relay
99 def __init__(self, bandwidth_rate=FAST_EXIT_BANDWIDTH_RATE,
100 advertised_bandwidth=FAST_EXIT_ADVERTISED_BANDWIDTH,
101 ports=FAST_EXIT_PORTS):
102 self.bandwidth_rate = bandwidth_rate
103 self.advertised_bandwidth = advertised_bandwidth
104 self.ports = ports
106 def load(self, all_relays):
107 # First, filter relays based on bandwidth and port requirements.
108 matching_relays = []
109 for relay in all_relays:
110 if relay.get('bandwidth_rate', -1) < self.bandwidth_rate:
111 continue
112 if relay.get('advertised_bandwidth', -1) < self.advertised_bandwidth:
113 continue
114 relevant_ports = set(self.ports)
115 summary = relay.get('exit_policy_summary', {})
116 if 'accept' in summary:
117 portlist = summary['accept']
118 elif 'reject' in summary:
119 portlist = summary['reject']
120 else:
121 continue
122 ports = []
123 for p in portlist:
124 if '-' in p:
125 ports.extend(range(int(p.split('-')[0]),
126 int(p.split('-')[1]) + 1))
127 else:
128 ports.append(int(p))
129 policy_ports = set(ports)
130 if 'accept' in summary and not relevant_ports.issubset(policy_ports):
131 continue
132 if 'reject' in summary and not relevant_ports.isdisjoint(policy_ports):
133 continue
134 matching_relays.append(relay)
135 return matching_relays
137 class SameNetworkFilter(BaseFilter):
138 def __init__(self, orig_filter, max_per_network=FAST_EXIT_MAX_PER_NETWORK):
139 self.orig_filter = orig_filter
140 self.max_per_network = max_per_network
142 def load(self, all_relays):
143 network_data = {}
144 for relay in self.orig_filter.load(all_relays):
145 or_addresses = relay.get("or_addresses")
146 no_of_addresses = 0
147 for ip in or_addresses:
148 ip, port = ip.rsplit(':', 1)
149 # skip if ipv6
150 if ':' in ip:
151 continue
152 no_of_addresses += 1
153 if no_of_addresses > 1:
154 print "[WARNING] - %s has more than one IPv4 OR address - %s" % relay.get("fingerprint"), or_addresses
155 network = ip.rsplit('.', 1)[0]
156 if network_data.has_key(network):
157 if len(network_data[network]) >= FAST_EXIT_MAX_PER_NETWORK:
158 # assume current relay to have smallest exit_probability
159 min_exit = relay.get('exit_probability')
160 min_id = -1
161 for id, value in enumerate(network_data[network]):
162 if value.get('exit_probability') < min_exit:
163 min_exit = value.get('exit_probability')
164 min_id = id
165 if min_id != -1:
166 del network_data[network][min_id]
167 network_data[network].append(relay)
168 else:
169 network_data[network].append(relay)
170 else:
171 network_data[network] = [relay]
172 return list(itertools.chain.from_iterable(network_data.values()))
174 class InverseFilter(BaseFilter):
175 def __init__(self, orig_filter):
176 self.orig_filter = orig_filter
178 def load(self, all_relays):
179 matching_relays = self.orig_filter.load(all_relays)
180 inverse_relays = []
181 for relay in all_relays:
182 if relay not in matching_relays:
183 inverse_relays.append(relay)
184 return inverse_relays
186 def get_network_family(relay):
187 addresses = relay.get('or_addresses', [])
188 if len(addresses) == 0:
189 return None
190 # Guaranteed by Onionoo. Currently restricted to IPv4 by the network design.
191 primary_ip, _ = addresses[0].split(':')
192 # Network family is /16, so let's take the first two bytes by regex
193 return "%s.0.0/16" % re.match(r'^([0-9]+\.[0-9]+)\.', primary_ip).group(1)
195 class RelayStats(object):
196 def __init__(self, options, custom_datafile="details.json"):
197 self._data = None
198 self._datafile_name = custom_datafile
199 self._filters = self._create_filters(options)
200 self._get_group = self._get_group_function(options)
201 self._relays = None
203 @property
204 def data(self):
205 if not self._data:
206 self._data = json.load(file(os.path.join(os.path.dirname(os.path.abspath(__file__)), self._datafile_name)))
207 return self._data
209 @property
210 def relays(self):
211 if self._relays:
212 return self._relays
213 self._relays = {}
214 relays = self.data['relays']
215 for f in self._filters:
216 relays = f.load(relays)
217 for relay in relays:
218 self.add_relay(relay)
219 return self._relays
221 def _create_filters(self, options):
222 filters = []
223 if not options.inactive:
224 filters.append(RunningFilter())
225 if options.family:
226 filters.append(FamilyFilter(options.family, self.data['relays']))
227 if options.country:
228 filters.append(CountryFilter(options.country))
229 if options.ases:
230 filters.append(ASFilter(options.ases))
231 if options.exits_only:
232 filters.append(ExitFilter())
233 if options.guards_only:
234 filters.append(GuardFilter())
235 if options.exit_filter == 'all_relays':
236 pass
237 elif options.exit_filter == 'fast_exits_only':
238 filters.append(SameNetworkFilter(FastExitFilter()))
239 elif options.exit_filter == 'almost_fast_exits_only':
240 filters.append(FastExitFilter(ALMOST_FAST_EXIT_BANDWIDTH_RATE,
241 ALMOST_FAST_EXIT_ADVERTISED_BANDWIDTH,
242 ALMOST_FAST_EXIT_PORTS))
243 filters.append(InverseFilter(SameNetworkFilter(FastExitFilter())))
244 elif options.exit_filter == 'fast_exits_only_any_network':
245 filters.append(FastExitFilter())
246 return filters
248 def _get_group_function(self, options):
249 funcs = []
250 if options.by_country:
251 funcs.append(lambda relay: relay.get('country', None))
252 if options.by_as:
253 funcs.append(lambda relay: relay.get('as_number', None))
254 if options.by_network_family:
255 funcs.append(get_network_family)
256 # Default on grouping by fingerprint
257 if len(funcs) == 0:
258 funcs.append(lambda relay: relay.get('fingerprint'))
259 return lambda relay: tuple([func(relay) for func in funcs])
261 def add_relay(self, relay):
262 key = self._get_group(relay)
263 if key not in self._relays:
264 self._relays[key] = []
265 self._relays[key].append(relay)
267 WEIGHTS = ['consensus_weight_fraction', 'advertised_bandwidth_fraction', 'guard_probability', 'middle_probability', 'exit_probability']
269 def print_selection(self,selection,options):
271 Print the selection returned by sort_and_reduce relays into a
272 string for the command line version.
274 column_widths = [9,10,10,10,10,21,80 if options.links else 42,7,7,4,16,11]
275 headings = ["CW","adv_bw","P_guard","P_middle", "P_exit", "Nickname",
276 "Link" if options.links else "Fingerprint",
277 "Exit","Guard","CC", "IPv4", "Autonomous System"]
279 #Print the header
280 header = "".join(word.ljust(column_widths[i]) for i,word in enumerate(headings))
281 print(header[:options.short])
283 for relay in selection['results']:
284 line = "".join(field.ljust(column_widths[i])
285 for i,field in
286 enumerate(relay.printable_fields(options.links)))
287 print(line[:options.short])
289 #Print the 'excluded' set if we have it
290 if selection['excluded']:
291 line = "".join(field.ljust(column_widths[i])
292 for i,field in
293 enumerate(selection['excluded'].printable_fields()))
294 print(line[:options.short])
296 #Print the 'total' set if we have it
297 if selection['total']:
298 line = "".join(field.ljust(column_widths[i])
299 for i,field in
300 enumerate(selection['total'].printable_fields()))
301 print(line[:options.short])
303 def sort_and_reduce(self, relay_set, options):
305 Take a set of relays (has already been grouped and
306 filtered), sort it and return the ones requested
307 in the 'top' option. Add index numbers to them as well.
309 Returns a hash with three values:
310 *results*: A list of Result objects representing the selected
311 relays
312 *excluded*: A Result object representing the stats for the
313 filtered out relays. May be None
314 *total*: A Result object representing the stats for all of the
315 relays in this filterset.
317 output_relays = list()
318 excluded_relays = None
319 total_relays = None
321 # We need a simple sorting key function
322 def sort_fn(r):
323 return getattr(r,options.sort)
325 relay_set.sort(key=sort_fn,reverse=options.sort_reverse)
327 if options.top < 0:
328 options.top = len(relay_set)
330 # Set up to handle the special lines at the bottom
331 excluded_relays = util.Result(zero_probs=True)
332 total_relays = util.Result(zero_probs=True)
333 if options.by_country or options.by_as or options.by_network_family:
334 filtered = "relay groups"
335 else:
336 filtered = "relays"
338 # Add selected relays to the result set
339 for i,relay in enumerate(relay_set):
340 # We have no links if we're grouping
341 if options.by_country or options.by_as or options.by_network_family:
342 relay.link = False
344 if i < options.top:
345 relay.index = i + 1
346 output_relays.append(relay)
348 if i >= options.top:
349 excluded_relays.p_guard += relay.p_guard
350 excluded_relays.p_exit += relay.p_exit
351 excluded_relays.p_middle += relay.p_middle
352 excluded_relays.adv_bw += relay.adv_bw
353 excluded_relays.cw += relay.cw
355 total_relays.p_guard += relay.p_guard
356 total_relays.p_exit += relay.p_exit
357 total_relays.p_middle += relay.p_middle
358 total_relays.adv_bw += relay.adv_bw
359 total_relays.cw += relay.cw
361 excluded_relays.nick = "(%d other %s)" % (
362 len(relay_set) - options.top,
363 filtered)
364 total_relays.nick = "(total in selection)"
366 # Only include the excluded line if
367 if len(relay_set) <= options.top:
368 excluded_relays = None
370 # Only include the last line if
371 if total_relays.cw > 99.9:
372 total_relays = None
374 return {
375 'results': output_relays,
376 'excluded': excluded_relays,
377 'total': total_relays
381 def select_relays(self, grouped_relays, options):
383 Return a Pythonic representation of the relays result set. Return it as a set of Result objects.
385 results = []
386 for group in grouped_relays.itervalues():
387 #Initialize some stuff
388 group_weights = dict.fromkeys(RelayStats.WEIGHTS, 0)
389 relays_in_group, exits_in_group, guards_in_group = 0, 0, 0
390 ases_in_group = set()
391 countries_in_group = set()
392 network_families_in_group = set()
393 result = util.Result()
394 for relay in group:
395 for weight in RelayStats.WEIGHTS:
396 group_weights[weight] += relay.get(weight, 0)
398 result.nick = relay['nickname']
399 result.fp = relay['fingerprint']
400 result.link = options.links
402 if 'Exit' in set(relay['flags']) and not 'BadExit' in set(relay['flags']):
403 result.exit = 'Exit'
404 exits_in_group += 1
405 else:
406 result.exit = '-'
407 if 'Guard' in set(relay['flags']):
408 result.guard = 'Guard'
409 guards_in_group += 1
410 else:
411 result.guard = '-'
412 result.cc = relay.get('country', '??').upper()
413 countries_in_group.add(result.cc)
414 result.primary_ip = relay.get('or_addresses', ['??:0'])[0].split(':')[0]
415 network_families_in_group.add(get_network_family(relay))
416 result.as_no = relay.get('as_number', '??')
417 result.as_name = relay.get('as_name', '??')
418 result.as_info = "%s %s" %(result.as_no, result.as_name)
419 ases_in_group.add(result.as_info)
420 relays_in_group += 1
422 # If we want to group by things, we need to handle some fields
423 # specially
424 if options.by_country or options.by_as or options.by_network_family:
425 result.nick = "*"
426 result.fp = "(%d relays)" % relays_in_group
427 result.exit = "(%d)" % exits_in_group
428 result.guard = "(%d)" % guards_in_group
429 if not options.by_as and not options.ases:
430 result.as_info = "(%d)" % len(ases_in_group)
431 if not options.by_country and not options.country:
432 result.cc = "(%d)" % len(countries_in_group)
433 if not options.by_network_family:
434 result.primary_ip = "(%d diff. /16)" % len(network_families_in_group)
435 else:
436 result.primary_ip = network_families_in_group.pop()
438 #Include our weight values
439 for weight in group_weights.iterkeys():
440 result['cw'] = group_weights['consensus_weight_fraction'] * 100.0
441 result['adv_bw'] = group_weights['advertised_bandwidth_fraction'] * 100.0
442 result['p_guard'] = group_weights['guard_probability'] * 100.0
443 result['p_middle'] = group_weights['middle_probability'] * 100.0
444 result['p_exit'] = group_weights['exit_probability'] * 100.0
446 results.append(result)
448 return results
450 def create_option_parser():
451 parser = OptionParser()
452 parser.add_option("-d", "--download", action="store_true",
453 help="download details.json from Onionoo service")
454 group = OptionGroup(parser, "Filtering options")
455 group.add_option("-i", "--inactive", action="store_true", default=False,
456 help="include relays in selection that aren't currently running")
457 group.add_option("-a", "--as", dest="ases", action="append",
458 help="select only relays from autonomous system number AS",
459 metavar="AS")
460 group.add_option("-c", "--country", action="append",
461 help="select only relays from country with code CC", metavar="CC")
462 group.add_option("-e", "--exits-only", action="store_true",
463 help="select only relays suitable for exit position")
464 group.add_option("-f", "--family", action="store", type="string", metavar="RELAY",
465 help="select family by fingerprint or nickname (for named relays)")
466 group.add_option("-g", "--guards-only", action="store_true",
467 help="select only relays suitable for guard position")
468 group.add_option("--exit-filter",type="choice", dest="exit_filter",
469 choices=["fast_exits_only","almost_fast_exits_only",
470 "all_relays","fast_exits_only_any_network"],
471 metavar="{fast_exits_only|almost_fast_exits_only|all_relays|fast_exits_only_any_network}",
472 default='all_relays')
473 group.add_option("--fast-exits-only", action="store_true",
474 help="select only fast exits (%d+ Mbit/s, %d+ KB/s, %s, %d- per /24)" %
475 (FAST_EXIT_BANDWIDTH_RATE / (125 * 1024),
476 FAST_EXIT_ADVERTISED_BANDWIDTH / 1024,
477 '/'.join(map(str, FAST_EXIT_PORTS)),
478 FAST_EXIT_MAX_PER_NETWORK))
479 group.add_option("--almost-fast-exits-only", action="store_true",
480 help="select only almost fast exits (%d+ Mbit/s, %d+ KB/s, %s, not in set of fast exits)" %
481 (ALMOST_FAST_EXIT_BANDWIDTH_RATE / (125 * 1024),
482 ALMOST_FAST_EXIT_ADVERTISED_BANDWIDTH / 1024,
483 '/'.join(map(str, ALMOST_FAST_EXIT_PORTS))))
484 group.add_option("--fast-exits-only-any-network", action="store_true",
485 help="select only fast exits without network restriction (%d+ Mbit/s, %d+ KB/s, %s)" %
486 (FAST_EXIT_BANDWIDTH_RATE / (125 * 1024),
487 FAST_EXIT_ADVERTISED_BANDWIDTH / 1024,
488 '/'.join(map(str, FAST_EXIT_PORTS))))
489 parser.add_option_group(group)
490 group = OptionGroup(parser, "Grouping options")
491 group.add_option("-A", "--by-as", action="store_true", default=False,
492 help="group relays by AS")
493 group.add_option("-C", "--by-country", action="store_true", default=False,
494 help="group relays by country")
495 group.add_option("-N", "--by-network-family", action="store_true", default=False,
496 help="group relays by network family (/16 IPv4)")
497 parser.add_option_group(group)
498 group = OptionGroup(parser, "Sorting options")
499 group.add_option("--sort", type="choice",
500 choices=["cw","adv_bw","p_guard","p_exit","p_middle",
501 "nick","fp"],
502 metavar="{cw|adv_bw|p_guard|p_exit|p_middle|nick|fp}",
503 default="cw",
504 help="sort by this field")
505 group.add_option("--sort_reverse", action="store_true", default=True,
506 help="invert the sorting order")
507 parser.add_option_group(group)
508 group = OptionGroup(parser, "Display options")
509 group.add_option("-l", "--links", action="store_true",
510 help="display links to the Atlas service instead of fingerprints")
511 group.add_option("-t", "--top", type="int", default=10, metavar="NUM",
512 help="display only the top results (default: %default; -1 for all)")
514 group.add_option("-s", "--short", action="store_const",dest='short',const=70,
515 help="cut the length of the line output at 70 chars")
516 group.add_option("-j", "--json", action="store_true",
517 help="output in JSON rather than human-readable format")
518 group.add_option("--datafile", default="details.json",
519 help="use a custom datafile (Default: 'details.json')")
520 parser.add_option_group(group)
521 return parser
523 def download_details_file():
524 url = urllib.urlopen('https://onionoo.torproject.org/details?type=relay')
525 details_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'details.json'), 'w')
526 details_file.write(url.read())
527 url.close()
528 details_file.close()
530 def fix_exit_filter_options(options):
532 Translate the old-style exit filter options into
533 the new format (as received on the front end).
535 if options.exit_filter != "all_relays":
536 # We just accept this option's value
537 return options
539 fast_exit_options = 0
540 if options.fast_exits_only:
541 options.exit_filter = "fast_exits_only"
542 fast_exit_options += 1
543 if options.almost_fast_exits_only:
544 options.exit_filter = "almost_fast_exits_only"
545 fast_exit_options += 1
546 if options.fast_exits_only_any_network:
547 options.exit_filter = "fast_exits_only_any_network"
548 fast_exit_options += 1
550 if fast_exit_options > 1:
551 raise Exception
553 return options
556 if '__main__' == __name__:
557 parser = create_option_parser()
558 (options, args) = parser.parse_args()
559 if len(args) > 0:
560 parser.error("Did not understand positional argument(s), use options instead.")
561 if options.family and not re.match(r'^[A-F0-9]{40}$', options.family) and not re.match(r'^[A-Za-z0-9]{1,19}$', options.family):
562 parser.error("Not a valid fingerprint or nickname: %s" % options.family)
564 try:
565 options = fix_exit_filter_options(options)
566 except:
567 parser.error("Can only filter by one fast-exit option.")
569 if options.download:
570 download_details_file()
571 print "Downloaded details.json. Re-run without --download option."
572 exit()
573 if not os.path.exists(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'details.json')):
574 parser.error("Did not find details.json. Re-run with --download.")
576 stats = RelayStats(options,options.datafile)
577 results = stats.select_relays(stats.relays,options)
579 sorted_results = stats.sort_and_reduce(results,options)
581 if options.json:
582 print(json.dumps(sorted_results,cls=util.ResultEncoder))
583 else:
584 stats.print_selection(sorted_results,options)