Improve fallback selection and output
[tor.git] / scripts / maint / updateFallbackDirs.py
blob486070007442a24af54fa9ffeb89dfbd39f36f93
1 #!/usr/bin/python
3 # Usage: scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc
4 # Needs stem available in your PYTHONPATH, or just ln -s ../stem/stem .
6 # Then read the generated list to ensure no-one slipped anything funny into
7 # their name or contactinfo
9 # Script by weasel, April 2015
10 # Portions by gsathya & karsten, 2013
11 # https://trac.torproject.org/projects/tor/attachment/ticket/8374/dir_list.2.py
12 # Modifications by teor, 2015
14 import StringIO
15 import string
16 import re
17 import datetime
18 import gzip
19 import os.path
20 import json
21 import math
22 import sys
23 import urllib
24 import urllib2
25 import hashlib
26 import dateutil.parser
27 # bson_lazy provides bson
28 #from bson import json_util
30 from stem.descriptor.remote import DescriptorDownloader
32 import logging
33 # INFO tells you why each relay was included or excluded
34 # WARN tells you about potential misconfigurations
35 logging.basicConfig(level=logging.WARNING)
37 ## Top-Level Configuration
39 # Output all candidate fallbacks, or only output selected fallbacks?
40 OUTPUT_CANDIDATES = False
42 # Perform DirPort checks over IPv4?
43 # Change this to False if IPv4 doesn't work for you, or if you don't want to
44 # download a consensus for each fallback
45 # Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
46 PERFORM_IPV4_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else True
48 # Perform DirPort checks over IPv6?
49 # If you know IPv6 works for you, set this to True
50 # This will exclude IPv6 relays without an IPv6 DirPort configured
51 # So it's best left at False until #18394 is implemented
52 # Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
53 PERFORM_IPV6_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else False
55 ## OnionOO Settings
57 ONIONOO = 'https://onionoo.torproject.org/'
58 #ONIONOO = 'https://onionoo.thecthulhu.com/'
60 # Don't bother going out to the Internet, just use the files available locally,
61 # even if they're very old
62 LOCAL_FILES_ONLY = False
64 ## Whitelist / Blacklist Filter Settings
66 # The whitelist contains entries that are included if all attributes match
67 # (IPv4, dirport, orport, id, and optionally IPv6 and IPv6 orport)
68 # The blacklist contains (partial) entries that are excluded if any
69 # sufficiently specific group of attributes matches:
70 # IPv4 & DirPort
71 # IPv4 & ORPort
72 # ID
73 # IPv6 & DirPort
74 # IPv6 & IPv6 ORPort
75 # If neither port is included in the blacklist, the entire IP address is
76 # blacklisted.
78 # What happens to entries in neither list?
79 # When True, they are included, when False, they are excluded
80 INCLUDE_UNLISTED_ENTRIES = True if OUTPUT_CANDIDATES else False
82 # If an entry is in both lists, what happens?
83 # When True, it is excluded, when False, it is included
84 BLACKLIST_EXCLUDES_WHITELIST_ENTRIES = True
86 WHITELIST_FILE_NAME = 'scripts/maint/fallback.whitelist'
87 BLACKLIST_FILE_NAME = 'scripts/maint/fallback.blacklist'
89 # The number of bytes we'll read from a filter file before giving up
90 MAX_LIST_FILE_SIZE = 1024 * 1024
92 ## Eligibility Settings
94 # Reduced due to a bug in tor where a relay submits a 0 DirPort when restarted
95 # This causes OnionOO to (correctly) reset its stability timer
96 # This issue will be fixed in 0.2.7.7 and 0.2.8.2
97 # Until then, the CUTOFFs below ensure a decent level of stability.
98 ADDRESS_AND_PORT_STABLE_DAYS = 7
99 # What time-weighted-fraction of these flags must FallbackDirs
100 # Equal or Exceed?
101 CUTOFF_RUNNING = .95
102 CUTOFF_V2DIR = .95
103 CUTOFF_GUARD = .95
104 # What time-weighted-fraction of these flags must FallbackDirs
105 # Equal or Fall Under?
106 # .00 means no bad exits
107 PERMITTED_BADEXIT = .00
109 # Clients will time out after 30 seconds trying to download a consensus
110 # So allow fallback directories half that to deliver a consensus
111 # The exact download times might change based on the network connection
112 # running this script, but only by a few seconds
113 # There is also about a second of python overhead
114 CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
115 # If the relay fails a consensus check, retry the download
116 # This avoids delisting a relay due to transient network conditions
117 CONSENSUS_DOWNLOAD_RETRY = True
119 ## List Length Limits
121 # The target for these parameters is 20% of the guards in the network
122 # This is around 200 as of October 2015
123 FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else 0.2
125 # Limit the number of fallbacks (eliminating lowest by weight)
126 MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 500
127 # Emit a C #error if the number of fallbacks is below
128 MIN_FALLBACK_COUNT = 50
130 ## Fallback Weight Settings
132 # Any fallback with the Exit flag has its weight multipled by this fraction
133 EXIT_WEIGHT_FRACTION = 1.0
135 # If True, emit a C #error if we can't satisfy various constraints
136 # If False, emit a C comment instead
137 STRICT_FALLBACK_WEIGHTS = False
139 # Limit the proportional weight
140 # If a single fallback's weight is too high, it will see too many clients
141 # We reweight using a lower threshold to provide some leeway for:
142 # * elimination of low weight relays
143 # * consensus weight changes
144 # * fallback directory losses over time
145 # A relay weighted at 1 in 10 fallbacks will see about 10% of clients that
146 # use the fallback directories. (The 9 directory authorities see a similar
147 # proportion of clients.)
148 TARGET_MAX_WEIGHT_FRACTION = 1/10.0
149 REWEIGHTING_FUDGE_FACTOR = 0.8
150 MAX_WEIGHT_FRACTION = TARGET_MAX_WEIGHT_FRACTION * REWEIGHTING_FUDGE_FACTOR
151 # If a single fallback's weight is too low, it's pointless adding it.
152 # (Final weights may be slightly higher than this, due to low weight relays
153 # being excluded.)
154 # A relay weighted at 1 in 1000 fallbacks will see about 0.1% of clients.
155 MIN_WEIGHT_FRACTION = 0.0 if OUTPUT_CANDIDATES else 1/1000.0
157 ## Other Configuration Parameters
159 # older entries' weights are adjusted with ALPHA^(age in days)
160 AGE_ALPHA = 0.99
162 # this factor is used to scale OnionOO entries to [0,1]
163 ONIONOO_SCALE_ONE = 999.
165 ## Parsing Functions
167 def parse_ts(t):
168 return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M:%S")
170 def remove_bad_chars(raw_string, bad_char_list):
171 # Remove each character in the bad_char_list
172 cleansed_string = raw_string
173 for c in bad_char_list:
174 cleansed_string = cleansed_string.replace(c, '')
175 return cleansed_string
177 def cleanse_unprintable(raw_string):
178 # Remove all unprintable characters
179 cleansed_string = ''
180 for c in raw_string:
181 if (c in string.ascii_letters or c in string.digits
182 or c in string.punctuation or c in string.whitespace):
183 cleansed_string += c
184 return cleansed_string
186 def cleanse_whitespace(raw_string):
187 # Replace all whitespace characters with a space
188 cleansed_string = raw_string
189 for c in string.whitespace:
190 cleansed_string = cleansed_string.replace(c, ' ')
191 return cleansed_string
193 def cleanse_c_multiline_comment(raw_string):
194 cleansed_string = raw_string
195 # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
196 cleansed_string = cleanse_whitespace(cleansed_string)
197 # ContactInfo and Version can be arbitrary binary data
198 cleansed_string = cleanse_unprintable(cleansed_string)
199 # Prevent a malicious / unanticipated string from breaking out
200 # of a C-style multiline comment
201 # This removes '/*' and '*/' and '//'
202 bad_char_list = '*/'
203 # Prevent a malicious string from using C nulls
204 bad_char_list += '\0'
205 # Be safer by removing bad characters entirely
206 cleansed_string = remove_bad_chars(cleansed_string, bad_char_list)
207 # Some compilers may further process the content of comments
208 # There isn't much we can do to cover every possible case
209 # But comment-based directives are typically only advisory
210 return cleansed_string
212 def cleanse_c_string(raw_string):
213 cleansed_string = raw_string
214 # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
215 cleansed_string = cleanse_whitespace(cleansed_string)
216 # ContactInfo and Version can be arbitrary binary data
217 cleansed_string = cleanse_unprintable(cleansed_string)
218 # Prevent a malicious address/fingerprint string from breaking out
219 # of a C-style string
220 bad_char_list = '"'
221 # Prevent a malicious string from using escapes
222 bad_char_list += '\\'
223 # Prevent a malicious string from using C nulls
224 bad_char_list += '\0'
225 # Be safer by removing bad characters entirely
226 cleansed_string = remove_bad_chars(cleansed_string, bad_char_list)
227 # Some compilers may further process the content of strings
228 # There isn't much we can do to cover every possible case
229 # But this typically only results in changes to the string data
230 return cleansed_string
232 ## OnionOO Source Functions
234 # a dictionary of source metadata for each onionoo query we've made
235 fetch_source = {}
237 # register source metadata for 'what'
238 # assumes we only retrieve one document for each 'what'
239 def register_fetch_source(what, url, relays_published, version):
240 fetch_source[what] = {}
241 fetch_source[what]['url'] = url
242 fetch_source[what]['relays_published'] = relays_published
243 fetch_source[what]['version'] = version
245 # list each registered source's 'what'
246 def fetch_source_list():
247 return sorted(fetch_source.keys())
249 # given 'what', provide a multiline C comment describing the source
250 def describe_fetch_source(what):
251 desc = '/*'
252 desc += '\n'
253 desc += 'Onionoo Source: '
254 desc += cleanse_c_multiline_comment(what)
255 desc += ' Date: '
256 desc += cleanse_c_multiline_comment(fetch_source[what]['relays_published'])
257 desc += ' Version: '
258 desc += cleanse_c_multiline_comment(fetch_source[what]['version'])
259 desc += '\n'
260 desc += 'URL: '
261 desc += cleanse_c_multiline_comment(fetch_source[what]['url'])
262 desc += '\n'
263 desc += '*/'
264 return desc
266 ## File Processing Functions
268 def write_to_file(str, file_name, max_len):
269 try:
270 with open(file_name, 'w') as f:
271 f.write(str[0:max_len])
272 except EnvironmentError, error:
273 logging.warning('Writing file %s failed: %d: %s'%
274 (file_name,
275 error.errno,
276 error.strerror)
279 def read_from_file(file_name, max_len):
280 try:
281 if os.path.isfile(file_name):
282 with open(file_name, 'r') as f:
283 return f.read(max_len)
284 except EnvironmentError, error:
285 logging.info('Loading file %s failed: %d: %s'%
286 (file_name,
287 error.errno,
288 error.strerror)
290 return None
292 def load_possibly_compressed_response_json(response):
293 if response.info().get('Content-Encoding') == 'gzip':
294 buf = StringIO.StringIO( response.read() )
295 f = gzip.GzipFile(fileobj=buf)
296 return json.load(f)
297 else:
298 return json.load(response)
300 def load_json_from_file(json_file_name):
301 # An exception here may be resolved by deleting the .last_modified
302 # and .json files, and re-running the script
303 try:
304 with open(json_file_name, 'r') as f:
305 return json.load(f)
306 except EnvironmentError, error:
307 raise Exception('Reading not-modified json file %s failed: %d: %s'%
308 (json_file_name,
309 error.errno,
310 error.strerror)
313 ## OnionOO Functions
315 def datestr_to_datetime(datestr):
316 # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
317 if datestr is not None:
318 dt = dateutil.parser.parse(datestr)
319 else:
320 # Never modified - use start of epoch
321 dt = datetime.datetime.utcfromtimestamp(0)
322 # strip any timezone out (in case they're supported in future)
323 dt = dt.replace(tzinfo=None)
324 return dt
326 def onionoo_fetch(what, **kwargs):
327 params = kwargs
328 params['type'] = 'relay'
329 #params['limit'] = 10
330 params['first_seen_days'] = '%d-'%(ADDRESS_AND_PORT_STABLE_DAYS,)
331 params['last_seen_days'] = '-7'
332 params['flag'] = 'V2Dir'
333 url = ONIONOO + what + '?' + urllib.urlencode(params)
335 # Unfortunately, the URL is too long for some OS filenames,
336 # but we still don't want to get files from different URLs mixed up
337 base_file_name = what + '-' + hashlib.sha1(url).hexdigest()
339 full_url_file_name = base_file_name + '.full_url'
340 MAX_FULL_URL_LENGTH = 1024
342 last_modified_file_name = base_file_name + '.last_modified'
343 MAX_LAST_MODIFIED_LENGTH = 64
345 json_file_name = base_file_name + '.json'
347 if LOCAL_FILES_ONLY:
348 # Read from the local file, don't write to anything
349 response_json = load_json_from_file(json_file_name)
350 else:
351 # store the full URL to a file for debugging
352 # no need to compare as long as you trust SHA-1
353 write_to_file(url, full_url_file_name, MAX_FULL_URL_LENGTH)
355 request = urllib2.Request(url)
356 request.add_header('Accept-encoding', 'gzip')
358 # load the last modified date from the file, if it exists
359 last_mod_date = read_from_file(last_modified_file_name,
360 MAX_LAST_MODIFIED_LENGTH)
361 if last_mod_date is not None:
362 request.add_header('If-modified-since', last_mod_date)
364 # Parse last modified date
365 last_mod = datestr_to_datetime(last_mod_date)
367 # Not Modified and still recent enough to be useful
368 # Onionoo / Globe used to use 6 hours, but we can afford a day
369 required_freshness = datetime.datetime.utcnow()
370 # strip any timezone out (to match dateutil.parser)
371 required_freshness = required_freshness.replace(tzinfo=None)
372 required_freshness -= datetime.timedelta(hours=24)
374 # Make the OnionOO request
375 response_code = 0
376 try:
377 response = urllib2.urlopen(request)
378 response_code = response.getcode()
379 except urllib2.HTTPError, error:
380 response_code = error.code
381 if response_code == 304: # not modified
382 pass
383 else:
384 raise Exception("Could not get " + url + ": "
385 + str(error.code) + ": " + error.reason)
387 if response_code == 200: # OK
388 last_mod = datestr_to_datetime(response.info().get('Last-Modified'))
390 # Check for freshness
391 if last_mod < required_freshness:
392 if last_mod_date is not None:
393 # This check sometimes fails transiently, retry the script if it does
394 date_message = "Outdated data: last updated " + last_mod_date
395 else:
396 date_message = "No data: never downloaded "
397 raise Exception(date_message + " from " + url)
399 # Process the data
400 if response_code == 200: # OK
402 response_json = load_possibly_compressed_response_json(response)
404 with open(json_file_name, 'w') as f:
405 # use the most compact json representation to save space
406 json.dump(response_json, f, separators=(',',':'))
408 # store the last modified date in its own file
409 if response.info().get('Last-modified') is not None:
410 write_to_file(response.info().get('Last-Modified'),
411 last_modified_file_name,
412 MAX_LAST_MODIFIED_LENGTH)
414 elif response_code == 304: # Not Modified
416 response_json = load_json_from_file(json_file_name)
418 else: # Unexpected HTTP response code not covered in the HTTPError above
419 raise Exception("Unexpected HTTP response code to " + url + ": "
420 + str(response_code))
422 register_fetch_source(what,
423 url,
424 response_json['relays_published'],
425 response_json['version'])
427 return response_json
429 def fetch(what, **kwargs):
430 #x = onionoo_fetch(what, **kwargs)
431 # don't use sort_keys, as the order of or_addresses is significant
432 #print json.dumps(x, indent=4, separators=(',', ': '))
433 #sys.exit(0)
435 return onionoo_fetch(what, **kwargs)
437 ## Fallback Candidate Class
439 class Candidate(object):
440 CUTOFF_ADDRESS_AND_PORT_STABLE = (datetime.datetime.utcnow()
441 - datetime.timedelta(ADDRESS_AND_PORT_STABLE_DAYS))
443 def __init__(self, details):
444 for f in ['fingerprint', 'nickname', 'last_changed_address_or_port',
445 'consensus_weight', 'or_addresses', 'dir_address']:
446 if not f in details: raise Exception("Document has no %s field."%(f,))
448 if not 'contact' in details:
449 details['contact'] = None
450 if not 'flags' in details or details['flags'] is None:
451 details['flags'] = []
452 details['last_changed_address_or_port'] = parse_ts(
453 details['last_changed_address_or_port'])
454 self._data = details
455 self._stable_sort_or_addresses()
457 self._fpr = self._data['fingerprint']
458 self._running = self._guard = self._v2dir = 0.
459 self._split_dirport()
460 self._compute_orport()
461 if self.orport is None:
462 raise Exception("Failed to get an orport for %s."%(self._fpr,))
463 self._compute_ipv6addr()
464 if self.ipv6addr is None:
465 logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
466 # Reduce the weight of exits to EXIT_WEIGHT_FRACTION * consensus_weight
467 if self.is_exit():
468 current_weight = self._data['consensus_weight']
469 exit_weight = current_weight * EXIT_WEIGHT_FRACTION
470 self._data['original_consensus_weight'] = current_weight
471 self._data['consensus_weight'] = exit_weight
473 def _stable_sort_or_addresses(self):
474 # replace self._data['or_addresses'] with a stable ordering,
475 # sorting the secondary addresses in string order
476 # leave the received order in self._data['or_addresses_raw']
477 self._data['or_addresses_raw'] = self._data['or_addresses']
478 or_address_primary = self._data['or_addresses'][:1]
479 # subsequent entries in the or_addresses array are in an arbitrary order
480 # so we stabilise the addresses by sorting them in string order
481 or_addresses_secondaries_stable = sorted(self._data['or_addresses'][1:])
482 or_addresses_stable = or_address_primary + or_addresses_secondaries_stable
483 self._data['or_addresses'] = or_addresses_stable
485 def get_fingerprint(self):
486 return self._fpr
488 # is_valid_ipv[46]_address by gsathya, karsten, 2013
489 @staticmethod
490 def is_valid_ipv4_address(address):
491 if not isinstance(address, (str, unicode)):
492 return False
494 # check if there are four period separated values
495 if address.count(".") != 3:
496 return False
498 # checks that each value in the octet are decimal values between 0-255
499 for entry in address.split("."):
500 if not entry.isdigit() or int(entry) < 0 or int(entry) > 255:
501 return False
502 elif entry[0] == "0" and len(entry) > 1:
503 return False # leading zeros, for instance in "1.2.3.001"
505 return True
507 @staticmethod
508 def is_valid_ipv6_address(address):
509 if not isinstance(address, (str, unicode)):
510 return False
512 # remove brackets
513 address = address[1:-1]
515 # addresses are made up of eight colon separated groups of four hex digits
516 # with leading zeros being optional
517 # https://en.wikipedia.org/wiki/IPv6#Address_format
519 colon_count = address.count(":")
521 if colon_count > 7:
522 return False # too many groups
523 elif colon_count != 7 and not "::" in address:
524 return False # not enough groups and none are collapsed
525 elif address.count("::") > 1 or ":::" in address:
526 return False # multiple groupings of zeros can't be collapsed
528 found_ipv4_on_previous_entry = False
529 for entry in address.split(":"):
530 # If an IPv6 address has an embedded IPv4 address,
531 # it must be the last entry
532 if found_ipv4_on_previous_entry:
533 return False
534 if not re.match("^[0-9a-fA-f]{0,4}$", entry):
535 if not Candidate.is_valid_ipv4_address(entry):
536 return False
537 else:
538 found_ipv4_on_previous_entry = True
540 return True
542 def _split_dirport(self):
543 # Split the dir_address into dirip and dirport
544 (self.dirip, _dirport) = self._data['dir_address'].split(':', 2)
545 self.dirport = int(_dirport)
547 def _compute_orport(self):
548 # Choose the first ORPort that's on the same IPv4 address as the DirPort.
549 # In rare circumstances, this might not be the primary ORPort address.
550 # However, _stable_sort_or_addresses() ensures we choose the same one
551 # every time, even if onionoo changes the order of the secondaries.
552 self._split_dirport()
553 self.orport = None
554 for i in self._data['or_addresses']:
555 if i != self._data['or_addresses'][0]:
556 logging.debug('Secondary IPv4 Address Used for %s: %s'%(self._fpr, i))
557 (ipaddr, port) = i.rsplit(':', 1)
558 if (ipaddr == self.dirip) and Candidate.is_valid_ipv4_address(ipaddr):
559 self.orport = int(port)
560 return
562 def _compute_ipv6addr(self):
563 # Choose the first IPv6 address that uses the same port as the ORPort
564 # Or, choose the first IPv6 address in the list
565 # _stable_sort_or_addresses() ensures we choose the same IPv6 address
566 # every time, even if onionoo changes the order of the secondaries.
567 self.ipv6addr = None
568 self.ipv6orport = None
569 # Choose the first IPv6 address that uses the same port as the ORPort
570 for i in self._data['or_addresses']:
571 (ipaddr, port) = i.rsplit(':', 1)
572 if (port == self.orport) and Candidate.is_valid_ipv6_address(ipaddr):
573 self.ipv6addr = ipaddr
574 self.ipv6orport = port
575 return
576 # Choose the first IPv6 address in the list
577 for i in self._data['or_addresses']:
578 (ipaddr, port) = i.rsplit(':', 1)
579 if Candidate.is_valid_ipv6_address(ipaddr):
580 self.ipv6addr = ipaddr
581 self.ipv6orport = port
582 return
584 @staticmethod
585 def _extract_generic_history(history, which='unknown'):
586 # given a tree like this:
588 # "1_month": {
589 # "count": 187,
590 # "factor": 0.001001001001001001,
591 # "first": "2015-02-27 06:00:00",
592 # "interval": 14400,
593 # "last": "2015-03-30 06:00:00",
594 # "values": [
595 # 999,
596 # 999
598 # },
599 # "1_week": {
600 # "count": 169,
601 # "factor": 0.001001001001001001,
602 # "first": "2015-03-23 07:30:00",
603 # "interval": 3600,
604 # "last": "2015-03-30 07:30:00",
605 # "values": [ ...]
606 # },
607 # "1_year": {
608 # "count": 177,
609 # "factor": 0.001001001001001001,
610 # "first": "2014-04-11 00:00:00",
611 # "interval": 172800,
612 # "last": "2015-03-29 00:00:00",
613 # "values": [ ...]
614 # },
615 # "3_months": {
616 # "count": 185,
617 # "factor": 0.001001001001001001,
618 # "first": "2014-12-28 06:00:00",
619 # "interval": 43200,
620 # "last": "2015-03-30 06:00:00",
621 # "values": [ ...]
623 # },
624 # extract exactly one piece of data per time interval,
625 # using smaller intervals where available.
627 # returns list of (age, length, value) dictionaries.
629 generic_history = []
631 periods = history.keys()
632 periods.sort(key = lambda x: history[x]['interval'])
633 now = datetime.datetime.utcnow()
634 newest = now
635 for p in periods:
636 h = history[p]
637 interval = datetime.timedelta(seconds = h['interval'])
638 this_ts = parse_ts(h['last'])
640 if (len(h['values']) != h['count']):
641 logging.warn('Inconsistent value count in %s document for %s'
642 %(p, which))
643 for v in reversed(h['values']):
644 if (this_ts <= newest):
645 agt1 = now - this_ts
646 agt2 = interval
647 agetmp1 = (agt1.microseconds + (agt1.seconds + agt1.days * 24 * 3600)
648 * 10**6) / 10**6
649 agetmp2 = (agt2.microseconds + (agt2.seconds + agt2.days * 24 * 3600)
650 * 10**6) / 10**6
651 generic_history.append(
652 { 'age': agetmp1,
653 'length': agetmp2,
654 'value': v
656 newest = this_ts
657 this_ts -= interval
659 if (this_ts + interval != parse_ts(h['first'])):
660 logging.warn('Inconsistent time information in %s document for %s'
661 %(p, which))
663 #print json.dumps(generic_history, sort_keys=True,
664 # indent=4, separators=(',', ': '))
665 return generic_history
667 @staticmethod
668 def _avg_generic_history(generic_history):
669 a = []
670 for i in generic_history:
671 if i['age'] > (ADDRESS_AND_PORT_STABLE_DAYS * 24 * 3600):
672 continue
673 if (i['length'] is not None
674 and i['age'] is not None
675 and i['value'] is not None):
676 w = i['length'] * math.pow(AGE_ALPHA, i['age']/(3600*24))
677 a.append( (i['value'] * w, w) )
679 sv = math.fsum(map(lambda x: x[0], a))
680 sw = math.fsum(map(lambda x: x[1], a))
682 if sw == 0.0:
683 svw = 0.0
684 else:
685 svw = sv/sw
686 return svw
688 def _add_generic_history(self, history):
689 periods = r['read_history'].keys()
690 periods.sort(key = lambda x: r['read_history'][x]['interval'] )
692 print periods
694 def add_running_history(self, history):
695 pass
697 def add_uptime(self, uptime):
698 logging.debug('Adding uptime %s.'%(self._fpr,))
700 # flags we care about: Running, V2Dir, Guard
701 if not 'flags' in uptime:
702 logging.debug('No flags in document for %s.'%(self._fpr,))
703 return
705 for f in ['Running', 'Guard', 'V2Dir']:
706 if not f in uptime['flags']:
707 logging.debug('No %s in flags for %s.'%(f, self._fpr,))
708 return
710 running = self._extract_generic_history(uptime['flags']['Running'],
711 '%s-Running'%(self._fpr))
712 guard = self._extract_generic_history(uptime['flags']['Guard'],
713 '%s-Guard'%(self._fpr))
714 v2dir = self._extract_generic_history(uptime['flags']['V2Dir'],
715 '%s-V2Dir'%(self._fpr))
716 if 'BadExit' in uptime['flags']:
717 badexit = self._extract_generic_history(uptime['flags']['BadExit'],
718 '%s-BadExit'%(self._fpr))
720 self._running = self._avg_generic_history(running) / ONIONOO_SCALE_ONE
721 self._guard = self._avg_generic_history(guard) / ONIONOO_SCALE_ONE
722 self._v2dir = self._avg_generic_history(v2dir) / ONIONOO_SCALE_ONE
723 self._badexit = None
724 if 'BadExit' in uptime['flags']:
725 self._badexit = self._avg_generic_history(badexit) / ONIONOO_SCALE_ONE
727 def is_candidate(self):
728 must_be_running_now = (PERFORM_IPV4_DIRPORT_CHECKS
729 or PERFORM_IPV6_DIRPORT_CHECKS)
730 if (must_be_running_now and not self.is_running()):
731 logging.info('%s not a candidate: not running now, unable to check ' +
732 'DirPort consensus download', self._fpr)
733 return False
734 if (self._data['last_changed_address_or_port'] >
735 self.CUTOFF_ADDRESS_AND_PORT_STABLE):
736 logging.info('%s not a candidate: changed address/port recently (%s)',
737 self._fpr, self._data['last_changed_address_or_port'])
738 return False
739 if self._running < CUTOFF_RUNNING:
740 logging.info('%s not a candidate: running avg too low (%lf)',
741 self._fpr, self._running)
742 return False
743 if self._v2dir < CUTOFF_V2DIR:
744 logging.info('%s not a candidate: v2dir avg too low (%lf)',
745 self._fpr, self._v2dir)
746 return False
747 if self._badexit is not None and self._badexit > PERMITTED_BADEXIT:
748 logging.info('%s not a candidate: badexit avg too high (%lf)',
749 self._fpr, self._badexit)
750 return False
751 # if the relay doesn't report a version, also exclude the relay
752 if (not self._data.has_key('recommended_version')
753 or not self._data['recommended_version']):
754 logging.info('%s not a candidate: version not recommended', self._fpr)
755 return False
756 if self._guard < CUTOFF_GUARD:
757 logging.info('%s not a candidate: guard avg too low (%lf)',
758 self._fpr, self._guard)
759 return False
760 return True
762 def is_in_whitelist(self, relaylist):
763 """ A fallback matches if each key in the whitelist line matches:
764 ipv4
765 dirport
766 orport
768 ipv6 address and port (if present)
769 If the fallback has an ipv6 key, the whitelist line must also have
770 it, and vice versa, otherwise they don't match. """
771 for entry in relaylist:
772 if entry['id'] != self._fpr:
773 # can't log here, every relay's fingerprint is compared to the entry
774 continue
775 if entry['ipv4'] != self.dirip:
776 logging.info('%s is not in the whitelist: fingerprint matches, but ' +
777 'IPv4 (%s) does not match entry IPv4 (%s)',
778 self._fpr, self.dirip, entry['ipv4'])
779 continue
780 if int(entry['dirport']) != self.dirport:
781 logging.info('%s is not in the whitelist: fingerprint matches, but ' +
782 'DirPort (%d) does not match entry DirPort (%d)',
783 self._fpr, self.dirport, int(entry['dirport']))
784 continue
785 if int(entry['orport']) != self.orport:
786 logging.info('%s is not in the whitelist: fingerprint matches, but ' +
787 'ORPort (%d) does not match entry ORPort (%d)',
788 self._fpr, self.orport, int(entry['orport']))
789 continue
790 has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None
791 if (entry.has_key('ipv6') and has_ipv6):
792 ipv6 = self.ipv6addr + ':' + self.ipv6orport
793 # if both entry and fallback have an ipv6 address, compare them
794 if entry['ipv6'] != ipv6:
795 logging.info('%s is not in the whitelist: fingerprint matches, ' +
796 'but IPv6 (%s) does not match entry IPv6 (%s)',
797 self._fpr, ipv6, entry['ipv6'])
798 continue
799 # if the fallback has an IPv6 address but the whitelist entry
800 # doesn't, or vice versa, the whitelist entry doesn't match
801 elif entry.has_key('ipv6') and not has_ipv6:
802 logging.info('%s is not in the whitelist: fingerprint matches, but ' +
803 'it has no IPv6, and entry has IPv6 (%s)', self._fpr,
804 entry['ipv6'])
805 logging.warning('%s excluded: has it lost its former IPv6 address %s?',
806 self._fpr, entry['ipv6'])
807 continue
808 elif not entry.has_key('ipv6') and has_ipv6:
809 logging.info('%s is not in the whitelist: fingerprint matches, but ' +
810 'it has IPv6 (%s), and entry has no IPv6', self._fpr,
811 ipv6)
812 logging.warning('%s excluded: has it gained an IPv6 address %s?',
813 self._fpr, ipv6)
814 continue
815 return True
816 return False
818 def is_in_blacklist(self, relaylist):
819 """ A fallback matches a blacklist line if a sufficiently specific group
820 of attributes matches:
821 ipv4 & dirport
822 ipv4 & orport
824 ipv6 & dirport
825 ipv6 & ipv6 orport
826 If the fallback and the blacklist line both have an ipv6 key,
827 their values will be compared, otherwise, they will be ignored.
828 If there is no dirport and no orport, the entry matches all relays on
829 that ip. """
830 for entry in relaylist:
831 for key in entry:
832 value = entry[key]
833 if key == 'id' and value == self._fpr:
834 logging.info('%s is in the blacklist: fingerprint matches',
835 self._fpr)
836 return True
837 if key == 'ipv4' and value == self.dirip:
838 # if the dirport is present, check it too
839 if entry.has_key('dirport'):
840 if int(entry['dirport']) == self.dirport:
841 logging.info('%s is in the blacklist: IPv4 (%s) and ' +
842 'DirPort (%d) match', self._fpr, self.dirip,
843 self.dirport)
844 return True
845 # if the orport is present, check it too
846 elif entry.has_key('orport'):
847 if int(entry['orport']) == self.orport:
848 logging.info('%s is in the blacklist: IPv4 (%s) and ' +
849 'ORPort (%d) match', self._fpr, self.dirip,
850 self.orport)
851 return True
852 else:
853 logging.info('%s is in the blacklist: IPv4 (%s) matches, and ' +
854 'entry has no DirPort or ORPort', self._fpr,
855 self.dirip)
856 return True
857 has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None
858 ipv6 = (self.ipv6addr + ':' + self.ipv6orport) if has_ipv6 else None
859 if (key == 'ipv6' and has_ipv6):
860 # if both entry and fallback have an ipv6 address, compare them,
861 # otherwise, disregard ipv6 addresses
862 if value == ipv6:
863 # if the dirport is present, check it too
864 if entry.has_key('dirport'):
865 if int(entry['dirport']) == self.dirport:
866 logging.info('%s is in the blacklist: IPv6 (%s) and ' +
867 'DirPort (%d) match', self._fpr, ipv6,
868 self.dirport)
869 return True
870 # we've already checked the ORPort, it's part of entry['ipv6']
871 else:
872 logging.info('%s is in the blacklist: IPv6 (%s) matches, and' +
873 'entry has no DirPort', self._fpr, ipv6)
874 return True
875 elif (key == 'ipv6' or has_ipv6):
876 # only log if the fingerprint matches but the IPv6 doesn't
877 if entry.has_key('id') and entry['id'] == self._fpr:
878 logging.info('%s skipping IPv6 blacklist comparison: relay ' +
879 'has%s IPv6%s, but entry has%s IPv6%s', self._fpr,
880 '' if has_ipv6 else ' no',
881 (' (' + ipv6 + ')') if has_ipv6 else '',
882 '' if key == 'ipv6' else ' no',
883 (' (' + value + ')') if key == 'ipv6' else '')
884 logging.warning('Has %s %s IPv6 address %s?', self._fpr,
885 'gained an' if has_ipv6 else 'lost its former',
886 ipv6 if has_ipv6 else value)
887 return False
889 def is_exit(self):
890 return 'Exit' in self._data['flags']
892 def is_guard(self):
893 return 'Guard' in self._data['flags']
895 def is_running(self):
896 return 'Running' in self._data['flags']
898 def fallback_weight_fraction(self, total_weight):
899 return float(self._data['consensus_weight']) / total_weight
901 # return the original consensus weight, if it exists,
902 # or, if not, return the consensus weight
903 def original_consensus_weight(self):
904 if self._data.has_key('original_consensus_weight'):
905 return self._data['original_consensus_weight']
906 else:
907 return self._data['consensus_weight']
909 def original_fallback_weight_fraction(self, total_weight):
910 return float(self.original_consensus_weight()) / total_weight
912 @staticmethod
913 def fallback_consensus_dl_speed(dirip, dirport, nickname, max_time):
914 download_failed = False
915 downloader = DescriptorDownloader()
916 start = datetime.datetime.utcnow()
917 # some directory mirrors respond to requests in ways that hang python
918 # sockets, which is why we long this line here
919 logging.info('Initiating consensus download from %s (%s:%d).', nickname,
920 dirip, dirport)
921 # there appears to be about 1 second of overhead when comparing stem's
922 # internal trace time and the elapsed time calculated here
923 TIMEOUT_SLOP = 1.0
924 try:
925 downloader.get_consensus(endpoints = [(dirip, dirport)],
926 timeout = (max_time + TIMEOUT_SLOP),
927 validate = True,
928 retries = 0,
929 fall_back_to_authority = False).run()
930 except Exception, stem_error:
931 logging.debug('Unable to retrieve a consensus from %s: %s', nickname,
932 stem_error)
933 status = 'error: "%s"' % (stem_error)
934 level = logging.WARNING
935 download_failed = True
936 elapsed = (datetime.datetime.utcnow() - start).total_seconds()
937 if elapsed > max_time:
938 status = 'too slow'
939 level = logging.WARNING
940 download_failed = True
941 else:
942 status = 'ok'
943 level = logging.DEBUG
944 logging.log(level, 'Consensus download: %0.1fs %s from %s (%s:%d), ' +
945 'max download time %0.1fs.', elapsed, status, nickname,
946 dirip, dirport, max_time)
947 return download_failed
949 def fallback_consensus_dl_check(self):
950 # include the relay if we're not doing a check, or we can't check (IPv6)
951 ipv4_failed = False
952 ipv6_failed = False
953 if PERFORM_IPV4_DIRPORT_CHECKS:
954 ipv4_failed = Candidate.fallback_consensus_dl_speed(self.dirip,
955 self.dirport,
956 self._data['nickname'],
957 CONSENSUS_DOWNLOAD_SPEED_MAX)
958 if self.ipv6addr is not None and PERFORM_IPV6_DIRPORT_CHECKS:
959 # Clients assume the IPv6 DirPort is the same as the IPv4 DirPort
960 ipv6_failed = Candidate.fallback_consensus_dl_speed(self.ipv6addr,
961 self.dirport,
962 self._data['nickname'],
963 CONSENSUS_DOWNLOAD_SPEED_MAX)
964 # Now retry the relay if it took too long the first time
965 if (PERFORM_IPV4_DIRPORT_CHECKS and ipv4_failed
966 and CONSENSUS_DOWNLOAD_RETRY):
967 ipv4_failed = Candidate.fallback_consensus_dl_speed(self.dirip,
968 self.dirport,
969 self._data['nickname'],
970 CONSENSUS_DOWNLOAD_SPEED_MAX)
971 if (self.ipv6addr is not None and PERFORM_IPV6_DIRPORT_CHECKS
972 and ipv6_failed and CONSENSUS_DOWNLOAD_RETRY):
973 ipv6_failed = Candidate.fallback_consensus_dl_speed(self.ipv6addr,
974 self.dirport,
975 self._data['nickname'],
976 CONSENSUS_DOWNLOAD_SPEED_MAX)
977 return ((not ipv4_failed) and (not ipv6_failed))
979 def fallbackdir_line(self, total_weight, original_total_weight, dl_speed_ok):
980 # /*
981 # nickname
982 # flags
983 # weight / total (percentage)
984 # [original weight / original total (original percentage)]
985 # [contact]
986 # */
987 # "address:dirport orport=port id=fingerprint"
988 # "[ipv6=addr:orport]"
989 # "weight=num",
991 # Multiline C comment
992 s = '/*'
993 s += '\n'
994 s += cleanse_c_multiline_comment(self._data['nickname'])
995 s += '\n'
996 s += 'Flags: '
997 s += cleanse_c_multiline_comment(' '.join(sorted(self._data['flags'])))
998 s += '\n'
999 weight = self._data['consensus_weight']
1000 percent_weight = self.fallback_weight_fraction(total_weight)*100
1001 s += 'Fallback Weight: %d / %d (%.3f%%)'%(weight, total_weight,
1002 percent_weight)
1003 s += '\n'
1004 o_weight = self.original_consensus_weight()
1005 if o_weight != weight:
1006 o_percent_weight = self.original_fallback_weight_fraction(
1007 original_total_weight)*100
1008 s += 'Consensus Weight: %d / %d (%.3f%%)'%(o_weight,
1009 original_total_weight,
1010 o_percent_weight)
1011 s += '\n'
1012 if self._data['contact'] is not None:
1013 s += cleanse_c_multiline_comment(self._data['contact'])
1014 s += '\n'
1015 s += '*/'
1016 s += '\n'
1017 # Comment out the fallback directory entry if it's too slow
1018 # See the debug output for which address and port is failing
1019 if not dl_speed_ok:
1020 s += '/* Consensus download failed or was too slow:\n'
1021 # Multi-Line C string with trailing comma (part of a string list)
1022 # This makes it easier to diff the file, and remove IPv6 lines using grep
1023 # Integers don't need escaping
1024 s += '"%s orport=%d id=%s"'%(
1025 cleanse_c_string(self._data['dir_address']),
1026 self.orport,
1027 cleanse_c_string(self._fpr))
1028 s += '\n'
1029 if self.ipv6addr is not None:
1030 s += '" ipv6=%s:%s"'%(
1031 cleanse_c_string(self.ipv6addr), cleanse_c_string(self.ipv6orport))
1032 s += '\n'
1033 s += '" weight=%d",'%(weight)
1034 if not dl_speed_ok:
1035 s += '\n'
1036 s += '*/'
1037 return s
1039 ## Fallback Candidate List Class
1041 class CandidateList(dict):
1042 def __init__(self):
1043 pass
1045 def _add_relay(self, details):
1046 if not 'dir_address' in details: return
1047 c = Candidate(details)
1048 self[ c.get_fingerprint() ] = c
1050 def _add_uptime(self, uptime):
1051 try:
1052 fpr = uptime['fingerprint']
1053 except KeyError:
1054 raise Exception("Document has no fingerprint field.")
1056 try:
1057 c = self[fpr]
1058 except KeyError:
1059 logging.debug('Got unknown relay %s in uptime document.'%(fpr,))
1060 return
1062 c.add_uptime(uptime)
1064 def _add_details(self):
1065 logging.debug('Loading details document.')
1066 d = fetch('details',
1067 fields=('fingerprint,nickname,contact,last_changed_address_or_port,' +
1068 'consensus_weight,or_addresses,dir_address,' +
1069 'recommended_version,flags'))
1070 logging.debug('Loading details document done.')
1072 if not 'relays' in d: raise Exception("No relays found in document.")
1074 for r in d['relays']: self._add_relay(r)
1076 def _add_uptimes(self):
1077 logging.debug('Loading uptime document.')
1078 d = fetch('uptime')
1079 logging.debug('Loading uptime document done.')
1081 if not 'relays' in d: raise Exception("No relays found in document.")
1082 for r in d['relays']: self._add_uptime(r)
1084 def add_relays(self):
1085 self._add_details()
1086 self._add_uptimes()
1088 def count_guards(self):
1089 guard_count = 0
1090 for fpr in self.keys():
1091 if self[fpr].is_guard():
1092 guard_count += 1
1093 return guard_count
1095 # Find fallbacks that fit the uptime, stability, and flags criteria
1096 def compute_fallbacks(self):
1097 self.fallbacks = map(lambda x: self[x],
1098 sorted(
1099 filter(lambda x: self[x].is_candidate(),
1100 self.keys()),
1101 key=lambda x: self[x]._data['consensus_weight'],
1102 reverse=True)
1105 @staticmethod
1106 def load_relaylist(file_name):
1107 """ Read each line in the file, and parse it like a FallbackDir line:
1108 an IPv4 address and optional port:
1109 <IPv4 address>:<port>
1110 which are parsed into dictionary entries:
1111 ipv4=<IPv4 address>
1112 dirport=<port>
1113 followed by a series of key=value entries:
1114 orport=<port>
1115 id=<fingerprint>
1116 ipv6=<IPv6 address>:<IPv6 orport>
1117 each line's key/value pairs are placed in a dictonary,
1118 (of string -> string key/value pairs),
1119 and these dictionaries are placed in an array.
1120 comments start with # and are ignored """
1121 relaylist = []
1122 file_data = read_from_file(file_name, MAX_LIST_FILE_SIZE)
1123 if file_data is None:
1124 return relaylist
1125 for line in file_data.split('\n'):
1126 relay_entry = {}
1127 # ignore comments
1128 line_comment_split = line.split('#')
1129 line = line_comment_split[0]
1130 # cleanup whitespace
1131 line = cleanse_whitespace(line)
1132 line = line.strip()
1133 if len(line) == 0:
1134 continue
1135 for item in line.split(' '):
1136 item = item.strip()
1137 if len(item) == 0:
1138 continue
1139 key_value_split = item.split('=')
1140 kvl = len(key_value_split)
1141 if kvl < 1 or kvl > 2:
1142 print '#error Bad %s item: %s, format is key=value.'%(
1143 file_name, item)
1144 if kvl == 1:
1145 # assume that entries without a key are the ipv4 address,
1146 # perhaps with a dirport
1147 ipv4_maybe_dirport = key_value_split[0]
1148 ipv4_maybe_dirport_split = ipv4_maybe_dirport.split(':')
1149 dirl = len(ipv4_maybe_dirport_split)
1150 if dirl < 1 or dirl > 2:
1151 print '#error Bad %s IPv4 item: %s, format is ipv4:port.'%(
1152 file_name, item)
1153 if dirl >= 1:
1154 relay_entry['ipv4'] = ipv4_maybe_dirport_split[0]
1155 if dirl == 2:
1156 relay_entry['dirport'] = ipv4_maybe_dirport_split[1]
1157 elif kvl == 2:
1158 relay_entry[key_value_split[0]] = key_value_split[1]
1159 relaylist.append(relay_entry)
1160 return relaylist
1162 # apply the fallback whitelist and blacklist
1163 def apply_filter_lists(self):
1164 excluded_count = 0
1165 logging.debug('Applying whitelist and blacklist.')
1166 # parse the whitelist and blacklist
1167 whitelist = self.load_relaylist(WHITELIST_FILE_NAME)
1168 blacklist = self.load_relaylist(BLACKLIST_FILE_NAME)
1169 filtered_fallbacks = []
1170 for f in self.fallbacks:
1171 in_whitelist = f.is_in_whitelist(whitelist)
1172 in_blacklist = f.is_in_blacklist(blacklist)
1173 if in_whitelist and in_blacklist:
1174 if BLACKLIST_EXCLUDES_WHITELIST_ENTRIES:
1175 # exclude
1176 excluded_count += 1
1177 logging.warning('Excluding %s: in both blacklist and whitelist.',
1178 f._fpr)
1179 else:
1180 # include
1181 filtered_fallbacks.append(f)
1182 elif in_whitelist:
1183 # include
1184 filtered_fallbacks.append(f)
1185 elif in_blacklist:
1186 # exclude
1187 excluded_count += 1
1188 logging.debug('Excluding %s: in blacklist.', f._fpr)
1189 else:
1190 if INCLUDE_UNLISTED_ENTRIES:
1191 # include
1192 filtered_fallbacks.append(f)
1193 else:
1194 # exclude
1195 excluded_count += 1
1196 logging.info('Excluding %s: in neither blacklist nor whitelist.',
1197 f._fpr)
1198 self.fallbacks = filtered_fallbacks
1199 return excluded_count
1201 @staticmethod
1202 def summarise_filters(initial_count, excluded_count):
1203 return '/* Whitelist & blacklist excluded %d of %d candidates. */'%(
1204 excluded_count, initial_count)
1206 # Remove any fallbacks in excess of MAX_FALLBACK_COUNT,
1207 # starting with the lowest-weighted fallbacks
1208 # total_weight should be recalculated after calling this
1209 def exclude_excess_fallbacks(self):
1210 if MAX_FALLBACK_COUNT is not None:
1211 self.fallbacks = self.fallbacks[:MAX_FALLBACK_COUNT]
1213 # Clamp the weight of all fallbacks to MAX_WEIGHT_FRACTION * total_weight
1214 # fallbacks are kept sorted, but since excessive weights are reduced to
1215 # the maximum acceptable weight, these relays end up with equal weights
1216 def clamp_high_weight_fallbacks(self, total_weight):
1217 if MAX_WEIGHT_FRACTION * len(self.fallbacks) < 1.0:
1218 error_str = 'Max Fallback Weight %.3f%% is unachievable'%(
1219 MAX_WEIGHT_FRACTION)
1220 error_str += ' with Current Fallback Count %d.'%(len(self.fallbacks))
1221 if STRICT_FALLBACK_WEIGHTS:
1222 print '#error ' + error_str
1223 else:
1224 print '/* ' + error_str + ' */'
1225 relays_clamped = 0
1226 max_acceptable_weight = total_weight * MAX_WEIGHT_FRACTION
1227 for f in self.fallbacks:
1228 frac_weight = f.fallback_weight_fraction(total_weight)
1229 if frac_weight > MAX_WEIGHT_FRACTION:
1230 relays_clamped += 1
1231 current_weight = f._data['consensus_weight']
1232 # if we already have an original weight, keep it
1233 if (not f._data.has_key('original_consensus_weight')
1234 or f._data['original_consensus_weight'] == current_weight):
1235 f._data['original_consensus_weight'] = current_weight
1236 f._data['consensus_weight'] = max_acceptable_weight
1237 return relays_clamped
1239 # Remove any fallbacks with weights lower than MIN_WEIGHT_FRACTION
1240 # total_weight should be recalculated after calling this
1241 def exclude_low_weight_fallbacks(self, total_weight):
1242 self.fallbacks = filter(
1243 lambda x:
1244 x.fallback_weight_fraction(total_weight) >= MIN_WEIGHT_FRACTION,
1245 self.fallbacks)
1247 def fallback_weight_total(self):
1248 return sum(f._data['consensus_weight'] for f in self.fallbacks)
1250 def fallback_min_weight(self):
1251 if len(self.fallbacks) > 0:
1252 return self.fallbacks[-1]
1253 else:
1254 return None
1256 def fallback_max_weight(self):
1257 if len(self.fallbacks) > 0:
1258 return self.fallbacks[0]
1259 else:
1260 return None
1262 def summarise_fallbacks(self, eligible_count, eligible_weight,
1263 relays_clamped, clamped_weight,
1264 guard_count, target_count, max_count):
1265 # Report:
1266 # the number of fallback directories (with min & max limits);
1267 # #error if below minimum count
1268 # the total weight, min & max fallback proportions
1269 # #error if outside max weight proportion
1270 # Multiline C comment with #error if things go bad
1271 s = '/*'
1272 s += '\n'
1273 s += 'Fallback Directory Summary'
1274 s += '\n'
1275 # Integers don't need escaping in C comments
1276 fallback_count = len(self.fallbacks)
1277 if FALLBACK_PROPORTION_OF_GUARDS is None:
1278 fallback_proportion = ' (none)'
1279 else:
1280 fallback_proportion = '%d (%d * %f)'%(target_count, guard_count,
1281 FALLBACK_PROPORTION_OF_GUARDS)
1282 s += 'Final Count: %d (Eligible %d, Usable %d, Target %d%s'%(
1283 min(max_count, fallback_count),
1284 eligible_count,
1285 fallback_count,
1286 fallback_proportion)
1287 if MAX_FALLBACK_COUNT is not None:
1288 s += ', Clamped to %d'%(MAX_FALLBACK_COUNT)
1289 s += ')\n'
1290 if fallback_count < MIN_FALLBACK_COUNT:
1291 s += '*/'
1292 s += '\n'
1293 # We must have a minimum number of fallbacks so they are always
1294 # reachable, and are in diverse locations
1295 s += '#error Fallback Count %d is too low. '%(fallback_count)
1296 s += 'Must be at least %d for diversity. '%(MIN_FALLBACK_COUNT)
1297 s += 'Try adding entries to the whitelist, '
1298 s += 'or setting INCLUDE_UNLISTED_ENTRIES = True.'
1299 s += '\n'
1300 s += '/*'
1301 s += '\n'
1302 total_weight = self.fallback_weight_total()
1303 min_fb = self.fallback_min_weight()
1304 min_weight = min_fb._data['consensus_weight']
1305 min_percent = min_fb.fallback_weight_fraction(total_weight)*100.0
1306 max_fb = self.fallback_max_weight()
1307 max_weight = max_fb._data['consensus_weight']
1308 max_frac = max_fb.fallback_weight_fraction(total_weight)
1309 max_percent = max_frac*100.0
1310 s += 'Final Weight: %d (Eligible %d)'%(total_weight, eligible_weight)
1311 s += '\n'
1312 s += 'Max Weight: %d (%.3f%%) (Clamped to %.3f%%)'%(
1313 max_weight,
1314 max_percent,
1315 TARGET_MAX_WEIGHT_FRACTION*100)
1316 s += '\n'
1317 s += 'Min Weight: %d (%.3f%%) (Clamped to %.3f%%)'%(
1318 min_weight,
1319 min_percent,
1320 MIN_WEIGHT_FRACTION*100)
1321 s += '\n'
1322 if eligible_count != fallback_count:
1323 s += 'Excluded: %d (Clamped, Below Target, or Low Weight)'%(
1324 eligible_count - fallback_count)
1325 s += '\n'
1326 if relays_clamped > 0:
1327 s += 'Clamped: %d (%.3f%%) Excess Weight, '%(
1328 clamped_weight,
1329 (100.0 * clamped_weight) / total_weight)
1330 s += '%d High Weight Fallbacks (%.1f%%)'%(
1331 relays_clamped,
1332 (100.0 * relays_clamped) / fallback_count)
1333 s += '\n'
1334 s += '*/'
1335 if max_frac > TARGET_MAX_WEIGHT_FRACTION:
1336 s += '\n'
1337 # We must restrict the maximum fallback weight, so an adversary
1338 # at or near the fallback doesn't see too many clients
1339 error_str = 'Max Fallback Weight %.3f%% is too high. '%(max_frac*100)
1340 error_str += 'Must be at most %.3f%% for client anonymity.'%(
1341 TARGET_MAX_WEIGHT_FRACTION*100)
1342 if STRICT_FALLBACK_WEIGHTS:
1343 s += '#error ' + error_str
1344 else:
1345 s += '/* ' + error_str + ' */'
1346 s += '\n'
1347 if PERFORM_IPV4_DIRPORT_CHECKS or PERFORM_IPV6_DIRPORT_CHECKS:
1348 s += '/* Checked %s%s%s DirPorts served a consensus within %.1fs. */'%(
1349 'IPv4' if PERFORM_IPV4_DIRPORT_CHECKS else '',
1350 ' and ' if (PERFORM_IPV4_DIRPORT_CHECKS
1351 and PERFORM_IPV6_DIRPORT_CHECKS) else '',
1352 'IPv6' if PERFORM_IPV6_DIRPORT_CHECKS else '',
1353 CONSENSUS_DOWNLOAD_SPEED_MAX)
1354 else:
1355 s += '/* Did not check IPv4 or IPv6 DirPort consensus downloads. */'
1356 return s
1358 ## Main Function
1360 def list_fallbacks():
1361 """ Fetches required onionoo documents and evaluates the
1362 fallback directory criteria for each of the relays """
1364 # find relays that could be fallbacks
1365 candidates = CandidateList()
1366 candidates.add_relays()
1368 # work out how many fallbacks we want
1369 guard_count = candidates.count_guards()
1370 if FALLBACK_PROPORTION_OF_GUARDS is None:
1371 target_count = guard_count
1372 else:
1373 target_count = int(guard_count * FALLBACK_PROPORTION_OF_GUARDS)
1374 # the maximum number of fallbacks is the least of:
1375 # - the target fallback count (FALLBACK_PROPORTION_OF_GUARDS * guard count)
1376 # - the maximum fallback count (MAX_FALLBACK_COUNT)
1377 if MAX_FALLBACK_COUNT is None:
1378 max_count = guard_count
1379 else:
1380 max_count = min(target_count, MAX_FALLBACK_COUNT)
1382 candidates.compute_fallbacks()
1384 # filter with the whitelist and blacklist
1385 initial_count = len(candidates.fallbacks)
1386 excluded_count = candidates.apply_filter_lists()
1387 print candidates.summarise_filters(initial_count, excluded_count)
1388 eligible_count = len(candidates.fallbacks)
1389 eligible_weight = candidates.fallback_weight_total()
1391 # print the raw fallback list
1392 #total_weight = candidates.fallback_weight_total()
1393 #for x in candidates.fallbacks:
1394 # print x.fallbackdir_line(total_weight, total_weight)
1396 # When candidates are excluded, total_weight decreases, and
1397 # the proportional weight of other candidates increases.
1398 candidates.exclude_excess_fallbacks()
1399 total_weight = candidates.fallback_weight_total()
1401 # When candidates are reweighted, total_weight decreases, and
1402 # the proportional weight of other candidates increases.
1403 # Previously low-weight candidates might obtain sufficient proportional
1404 # weights to be included.
1405 # Save the weight at which we reweighted fallbacks for the summary.
1406 pre_clamp_total_weight = total_weight
1407 relays_clamped = candidates.clamp_high_weight_fallbacks(total_weight)
1409 # When candidates are excluded, total_weight decreases, and
1410 # the proportional weight of other candidates increases.
1411 # No new low weight candidates will be created during exclusions.
1412 # However, high weight candidates may increase over the maximum proportion.
1413 # This should not be an issue, except in pathological cases.
1414 candidates.exclude_low_weight_fallbacks(total_weight)
1415 total_weight = candidates.fallback_weight_total()
1417 # check we haven't exceeded TARGET_MAX_WEIGHT_FRACTION
1418 # since reweighting preserves the orginal sort order,
1419 # the maximum weights will be at the head of the list
1420 if len(candidates.fallbacks) > 0:
1421 max_weight_fb = candidates.fallback_max_weight()
1422 max_weight = max_weight_fb.fallback_weight_fraction(total_weight)
1423 if max_weight > TARGET_MAX_WEIGHT_FRACTION:
1424 error_str = 'Maximum fallback weight: %.3f%% exceeds target %.3f%%. '%(
1425 max_weight*100.0,
1426 TARGET_MAX_WEIGHT_FRACTION*100.0)
1427 error_str += 'Try decreasing REWEIGHTING_FUDGE_FACTOR.'
1428 if STRICT_FALLBACK_WEIGHTS:
1429 print '#error ' + error_str
1430 else:
1431 print '/* ' + error_str + ' */'
1433 print candidates.summarise_fallbacks(eligible_count, eligible_weight,
1434 relays_clamped,
1435 pre_clamp_total_weight - total_weight,
1436 guard_count, target_count, max_count)
1437 else:
1438 print '/* No Fallbacks met criteria */'
1440 for s in fetch_source_list():
1441 print describe_fetch_source(s)
1443 for x in candidates.fallbacks[:max_count]:
1444 dl_speed_ok = x.fallback_consensus_dl_check()
1445 print x.fallbackdir_line(total_weight, pre_clamp_total_weight, dl_speed_ok)
1446 #print json.dumps(candidates[x]._data, sort_keys=True, indent=4,
1447 # separators=(',', ': '), default=json_util.default)
1449 if __name__ == "__main__":
1450 list_fallbacks()