conn: Stop writing when our write bandwidth limist is exhausted
[tor.git] / scripts / maint / updateFallbackDirs.py
blobb093463e08c33d350a7535837c55b3888999d529
1 #!/usr/bin/env python
3 # Usage:
5 # Regenerate the list:
6 # scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc 2> fallback_dirs.log
8 # Check the existing list:
9 # scripts/maint/updateFallbackDirs.py check_existing > fallback_dirs.inc.ok 2> fallback_dirs.log
10 # mv fallback_dirs.inc.ok src/or/fallback_dirs.inc
12 # This script should be run from a stable, reliable network connection,
13 # with no other network activity (and not over tor).
14 # If this is not possible, please disable:
15 # PERFORM_IPV4_DIRPORT_CHECKS and PERFORM_IPV6_DIRPORT_CHECKS
17 # Needs dateutil, stem, and potentially other python packages.
18 # Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package)
19 # for netblock analysis.
21 # Then read the logs to make sure the fallbacks aren't dominated by a single
22 # netblock or port.
24 # Script by weasel, April 2015
25 # Portions by gsathya & karsten, 2013
26 # https://trac.torproject.org/projects/tor/attachment/ticket/8374/dir_list.2.py
27 # Modifications by teor, 2015
29 import StringIO
30 import string
31 import re
32 import datetime
33 import gzip
34 import os.path
35 import json
36 import math
37 import sys
38 import urllib
39 import urllib2
40 import hashlib
41 import dateutil.parser
42 # bson_lazy provides bson
43 #from bson import json_util
44 import copy
45 import re
47 from stem.descriptor import DocumentHandler
48 from stem.descriptor.remote import get_consensus, get_server_descriptors, MAX_FINGERPRINTS
50 import logging
51 logging.root.name = ''
53 HAVE_IPADDRESS = False
54 try:
55 # python 3 builtin, or install package py2-ipaddress
56 # there are several ipaddress implementations for python 2
57 # with slightly different semantics with str typed text
58 # fortunately, all our IP addresses are in unicode
59 import ipaddress
60 HAVE_IPADDRESS = True
61 except ImportError:
62 # if this happens, we avoid doing netblock analysis
63 logging.warning('Unable to import ipaddress, please install py2-ipaddress.' +
64 ' A fallback list will be created, but optional netblock' +
65 ' analysis will not be performed.')
67 ## Top-Level Configuration
69 # We use semantic versioning: https://semver.org
70 # In particular:
71 # * major changes include removing a mandatory field, or anything else that
72 # would break an appropriately tolerant parser,
73 # * minor changes include adding a field,
74 # * patch changes include changing header comments or other unstructured
75 # content
76 FALLBACK_FORMAT_VERSION = '2.0.0'
77 SECTION_SEPARATOR_BASE = '====='
78 SECTION_SEPARATOR_COMMENT = '/* ' + SECTION_SEPARATOR_BASE + ' */'
80 # Output all candidate fallbacks, or only output selected fallbacks?
81 OUTPUT_CANDIDATES = False
83 # Perform DirPort checks over IPv4?
84 # Change this to False if IPv4 doesn't work for you, or if you don't want to
85 # download a consensus for each fallback
86 # Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
87 PERFORM_IPV4_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else True
89 # Perform DirPort checks over IPv6?
90 # If you know IPv6 works for you, set this to True
91 # This will exclude IPv6 relays without an IPv6 DirPort configured
92 # So it's best left at False until #18394 is implemented
93 # Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
94 PERFORM_IPV6_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else False
96 # Must relays be running now?
97 MUST_BE_RUNNING_NOW = (PERFORM_IPV4_DIRPORT_CHECKS
98 or PERFORM_IPV6_DIRPORT_CHECKS)
100 # Clients have been using microdesc consensuses by default for a while now
101 DOWNLOAD_MICRODESC_CONSENSUS = True
103 # If a relay delivers an expired consensus, if it expired less than this many
104 # seconds ago, we still allow the relay. This should never be less than -90,
105 # as all directory mirrors should have downloaded a consensus 90 minutes
106 # before it expires. It should never be more than 24 hours, because clients
107 # reject consensuses that are older than REASONABLY_LIVE_TIME.
108 # For the consensus expiry check to be accurate, the machine running this
109 # script needs an accurate clock.
111 # Relays on 0.3.0 and later return a 404 when they are about to serve an
112 # expired consensus. This makes them fail the download check.
113 # We use a tolerance of 0, so that 0.2.x series relays also fail the download
114 # check if they serve an expired consensus.
115 CONSENSUS_EXPIRY_TOLERANCE = 0
117 # Output fallback name, flags, bandwidth, and ContactInfo in a C comment?
118 OUTPUT_COMMENTS = True if OUTPUT_CANDIDATES else False
120 # Output matching ContactInfo in fallbacks list or the blacklist?
121 # Useful if you're trying to contact operators
122 CONTACT_COUNT = True if OUTPUT_CANDIDATES else False
123 CONTACT_BLACKLIST_COUNT = True if OUTPUT_CANDIDATES else False
125 # How the list should be sorted:
126 # fingerprint: is useful for stable diffs of fallback lists
127 # measured_bandwidth: is useful when pruning the list based on bandwidth
128 # contact: is useful for contacting operators once the list has been pruned
129 OUTPUT_SORT_FIELD = 'contact' if OUTPUT_CANDIDATES else 'fingerprint'
131 ## OnionOO Settings
133 ONIONOO = 'https://onionoo.torproject.org/'
134 #ONIONOO = 'https://onionoo.thecthulhu.com/'
136 # Don't bother going out to the Internet, just use the files available locally,
137 # even if they're very old
138 LOCAL_FILES_ONLY = False
140 ## Whitelist / Blacklist Filter Settings
142 # The whitelist contains entries that are included if all attributes match
143 # (IPv4, dirport, orport, id, and optionally IPv6 and IPv6 orport)
144 # The blacklist contains (partial) entries that are excluded if any
145 # sufficiently specific group of attributes matches:
146 # IPv4 & DirPort
147 # IPv4 & ORPort
148 # ID
149 # IPv6 & DirPort
150 # IPv6 & IPv6 ORPort
151 # If neither port is included in the blacklist, the entire IP address is
152 # blacklisted.
154 # What happens to entries in neither list?
155 # When True, they are included, when False, they are excluded
156 INCLUDE_UNLISTED_ENTRIES = True if OUTPUT_CANDIDATES else False
158 # If an entry is in both lists, what happens?
159 # When True, it is excluded, when False, it is included
160 BLACKLIST_EXCLUDES_WHITELIST_ENTRIES = True
162 WHITELIST_FILE_NAME = 'scripts/maint/fallback.whitelist'
163 BLACKLIST_FILE_NAME = 'scripts/maint/fallback.blacklist'
164 FALLBACK_FILE_NAME = 'src/or/fallback_dirs.inc'
166 # The number of bytes we'll read from a filter file before giving up
167 MAX_LIST_FILE_SIZE = 1024 * 1024
169 ## Eligibility Settings
171 # Require fallbacks to have the same address and port for a set amount of time
172 # We used to have this at 1 week, but that caused many fallback failures, which
173 # meant that we had to rebuild the list more often. We want fallbacks to be
174 # stable for 2 years, so we set it to a few months.
176 # If a relay changes address or port, that's it, it's not useful any more,
177 # because clients can't find it
178 ADDRESS_AND_PORT_STABLE_DAYS = 90
179 # We ignore relays that have been down for more than this period
180 MAX_DOWNTIME_DAYS = 0 if MUST_BE_RUNNING_NOW else 7
181 # FallbackDirs must have a time-weighted-fraction that is greater than or
182 # equal to:
183 # Mirrors that are down half the time are still useful half the time
184 CUTOFF_RUNNING = .50
185 CUTOFF_V2DIR = .50
186 # Guard flags are removed for some time after a relay restarts, so we ignore
187 # the guard flag.
188 CUTOFF_GUARD = .00
189 # FallbackDirs must have a time-weighted-fraction that is less than or equal
190 # to:
191 # .00 means no bad exits
192 PERMITTED_BADEXIT = .00
194 # older entries' weights are adjusted with ALPHA^(age in days)
195 AGE_ALPHA = 0.99
197 # this factor is used to scale OnionOO entries to [0,1]
198 ONIONOO_SCALE_ONE = 999.
200 ## Fallback Count Limits
202 # The target for these parameters is 20% of the guards in the network
203 # This is around 200 as of October 2015
204 _FB_POG = 0.2
205 FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else _FB_POG
207 # Limit the number of fallbacks (eliminating lowest by advertised bandwidth)
208 MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 200
209 # Emit a C #error if the number of fallbacks is less than expected
210 MIN_FALLBACK_COUNT = 0 if OUTPUT_CANDIDATES else MAX_FALLBACK_COUNT*0.5
212 # The maximum number of fallbacks on the same address, contact, or family
214 # With 150 fallbacks, this means each operator sees 5% of client bootstraps.
215 # For comparison:
216 # - We try to limit guard and exit operators to 5% of the network
217 # - The directory authorities used to see 11% of client bootstraps each
219 # We also don't want too much of the list to go down if a single operator
220 # has to move all their relays.
221 MAX_FALLBACKS_PER_IP = 1
222 MAX_FALLBACKS_PER_IPV4 = MAX_FALLBACKS_PER_IP
223 MAX_FALLBACKS_PER_IPV6 = MAX_FALLBACKS_PER_IP
224 MAX_FALLBACKS_PER_CONTACT = 7
225 MAX_FALLBACKS_PER_FAMILY = 7
227 ## Fallback Bandwidth Requirements
229 # Any fallback with the Exit flag has its bandwidth multiplied by this fraction
230 # to make sure we aren't further overloading exits
231 # (Set to 1.0, because we asked that only lightly loaded exits opt-in,
232 # and the extra load really isn't that much for large relays.)
233 EXIT_BANDWIDTH_FRACTION = 1.0
235 # If a single fallback's bandwidth is too low, it's pointless adding it
236 # We expect fallbacks to handle an extra 10 kilobytes per second of traffic
237 # Make sure they can support fifty times the expected extra load
239 # We convert this to a consensus weight before applying the filter,
240 # because all the bandwidth amounts are specified by the relay
241 MIN_BANDWIDTH = 50.0 * 10.0 * 1024.0
243 # Clients will time out after 30 seconds trying to download a consensus
244 # So allow fallback directories half that to deliver a consensus
245 # The exact download times might change based on the network connection
246 # running this script, but only by a few seconds
247 # There is also about a second of python overhead
248 CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
249 # If the relay fails a consensus check, retry the download
250 # This avoids delisting a relay due to transient network conditions
251 CONSENSUS_DOWNLOAD_RETRY = True
253 ## Parsing Functions
255 def parse_ts(t):
256 return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M:%S")
258 def remove_bad_chars(raw_string, bad_char_list):
259 # Remove each character in the bad_char_list
260 cleansed_string = raw_string
261 for c in bad_char_list:
262 cleansed_string = cleansed_string.replace(c, '')
263 return cleansed_string
265 def cleanse_unprintable(raw_string):
266 # Remove all unprintable characters
267 cleansed_string = ''
268 for c in raw_string:
269 if c in string.printable:
270 cleansed_string += c
271 return cleansed_string
273 def cleanse_whitespace(raw_string):
274 # Replace all whitespace characters with a space
275 cleansed_string = raw_string
276 for c in string.whitespace:
277 cleansed_string = cleansed_string.replace(c, ' ')
278 return cleansed_string
280 def cleanse_c_multiline_comment(raw_string):
281 cleansed_string = raw_string
282 # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
283 cleansed_string = cleanse_whitespace(cleansed_string)
284 # ContactInfo and Version can be arbitrary binary data
285 cleansed_string = cleanse_unprintable(cleansed_string)
286 # Prevent a malicious / unanticipated string from breaking out
287 # of a C-style multiline comment
288 # This removes '/*' and '*/' and '//'
289 bad_char_list = '*/'
290 # Prevent a malicious string from using C nulls
291 bad_char_list += '\0'
292 # Avoid confusing parsers by making sure there is only one comma per fallback
293 bad_char_list += ','
294 # Avoid confusing parsers by making sure there is only one equals per field
295 bad_char_list += '='
296 # Be safer by removing bad characters entirely
297 cleansed_string = remove_bad_chars(cleansed_string, bad_char_list)
298 # Some compilers may further process the content of comments
299 # There isn't much we can do to cover every possible case
300 # But comment-based directives are typically only advisory
301 return cleansed_string
303 def cleanse_c_string(raw_string):
304 cleansed_string = raw_string
305 # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
306 cleansed_string = cleanse_whitespace(cleansed_string)
307 # ContactInfo and Version can be arbitrary binary data
308 cleansed_string = cleanse_unprintable(cleansed_string)
309 # Prevent a malicious address/fingerprint string from breaking out
310 # of a C-style string
311 bad_char_list = '"'
312 # Prevent a malicious string from using escapes
313 bad_char_list += '\\'
314 # Prevent a malicious string from using C nulls
315 bad_char_list += '\0'
316 # Avoid confusing parsers by making sure there is only one comma per fallback
317 bad_char_list += ','
318 # Avoid confusing parsers by making sure there is only one equals per field
319 bad_char_list += '='
320 # Be safer by removing bad characters entirely
321 cleansed_string = remove_bad_chars(cleansed_string, bad_char_list)
322 # Some compilers may further process the content of strings
323 # There isn't much we can do to cover every possible case
324 # But this typically only results in changes to the string data
325 return cleansed_string
327 ## OnionOO Source Functions
329 # a dictionary of source metadata for each onionoo query we've made
330 fetch_source = {}
332 # register source metadata for 'what'
333 # assumes we only retrieve one document for each 'what'
334 def register_fetch_source(what, url, relays_published, version):
335 fetch_source[what] = {}
336 fetch_source[what]['url'] = url
337 fetch_source[what]['relays_published'] = relays_published
338 fetch_source[what]['version'] = version
340 # list each registered source's 'what'
341 def fetch_source_list():
342 return sorted(fetch_source.keys())
344 # given 'what', provide a multiline C comment describing the source
345 def describe_fetch_source(what):
346 desc = '/*'
347 desc += '\n'
348 desc += 'Onionoo Source: '
349 desc += cleanse_c_multiline_comment(what)
350 desc += ' Date: '
351 desc += cleanse_c_multiline_comment(fetch_source[what]['relays_published'])
352 desc += ' Version: '
353 desc += cleanse_c_multiline_comment(fetch_source[what]['version'])
354 desc += '\n'
355 desc += 'URL: '
356 desc += cleanse_c_multiline_comment(fetch_source[what]['url'])
357 desc += '\n'
358 desc += '*/'
359 return desc
361 ## File Processing Functions
363 def write_to_file(str, file_name, max_len):
364 try:
365 with open(file_name, 'w') as f:
366 f.write(str[0:max_len])
367 except EnvironmentError, error:
368 logging.error('Writing file %s failed: %d: %s'%
369 (file_name,
370 error.errno,
371 error.strerror)
374 def read_from_file(file_name, max_len):
375 try:
376 if os.path.isfile(file_name):
377 with open(file_name, 'r') as f:
378 return f.read(max_len)
379 except EnvironmentError, error:
380 logging.info('Loading file %s failed: %d: %s'%
381 (file_name,
382 error.errno,
383 error.strerror)
385 return None
387 def parse_fallback_file(file_name):
388 file_data = read_from_file(file_name, MAX_LIST_FILE_SIZE)
389 file_data = cleanse_unprintable(file_data)
390 file_data = remove_bad_chars(file_data, '\n"\0')
391 file_data = re.sub('/\*.*?\*/', '', file_data)
392 file_data = file_data.replace(',', '\n')
393 file_data = file_data.replace(' weight=10', '')
394 return file_data
396 def load_possibly_compressed_response_json(response):
397 if response.info().get('Content-Encoding') == 'gzip':
398 buf = StringIO.StringIO( response.read() )
399 f = gzip.GzipFile(fileobj=buf)
400 return json.load(f)
401 else:
402 return json.load(response)
404 def load_json_from_file(json_file_name):
405 # An exception here may be resolved by deleting the .last_modified
406 # and .json files, and re-running the script
407 try:
408 with open(json_file_name, 'r') as f:
409 return json.load(f)
410 except EnvironmentError, error:
411 raise Exception('Reading not-modified json file %s failed: %d: %s'%
412 (json_file_name,
413 error.errno,
414 error.strerror)
417 ## OnionOO Functions
419 def datestr_to_datetime(datestr):
420 # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
421 if datestr is not None:
422 dt = dateutil.parser.parse(datestr)
423 else:
424 # Never modified - use start of epoch
425 dt = datetime.datetime.utcfromtimestamp(0)
426 # strip any timezone out (in case they're supported in future)
427 dt = dt.replace(tzinfo=None)
428 return dt
430 def onionoo_fetch(what, **kwargs):
431 params = kwargs
432 params['type'] = 'relay'
433 #params['limit'] = 10
434 params['first_seen_days'] = '%d-'%(ADDRESS_AND_PORT_STABLE_DAYS)
435 params['last_seen_days'] = '-%d'%(MAX_DOWNTIME_DAYS)
436 params['flag'] = 'V2Dir'
437 url = ONIONOO + what + '?' + urllib.urlencode(params)
439 # Unfortunately, the URL is too long for some OS filenames,
440 # but we still don't want to get files from different URLs mixed up
441 base_file_name = what + '-' + hashlib.sha1(url).hexdigest()
443 full_url_file_name = base_file_name + '.full_url'
444 MAX_FULL_URL_LENGTH = 1024
446 last_modified_file_name = base_file_name + '.last_modified'
447 MAX_LAST_MODIFIED_LENGTH = 64
449 json_file_name = base_file_name + '.json'
451 if LOCAL_FILES_ONLY:
452 # Read from the local file, don't write to anything
453 response_json = load_json_from_file(json_file_name)
454 else:
455 # store the full URL to a file for debugging
456 # no need to compare as long as you trust SHA-1
457 write_to_file(url, full_url_file_name, MAX_FULL_URL_LENGTH)
459 request = urllib2.Request(url)
460 request.add_header('Accept-encoding', 'gzip')
462 # load the last modified date from the file, if it exists
463 last_mod_date = read_from_file(last_modified_file_name,
464 MAX_LAST_MODIFIED_LENGTH)
465 if last_mod_date is not None:
466 request.add_header('If-modified-since', last_mod_date)
468 # Parse last modified date
469 last_mod = datestr_to_datetime(last_mod_date)
471 # Not Modified and still recent enough to be useful
472 # Onionoo / Globe used to use 6 hours, but we can afford a day
473 required_freshness = datetime.datetime.utcnow()
474 # strip any timezone out (to match dateutil.parser)
475 required_freshness = required_freshness.replace(tzinfo=None)
476 required_freshness -= datetime.timedelta(hours=24)
478 # Make the OnionOO request
479 response_code = 0
480 try:
481 response = urllib2.urlopen(request)
482 response_code = response.getcode()
483 except urllib2.HTTPError, error:
484 response_code = error.code
485 if response_code == 304: # not modified
486 pass
487 else:
488 raise Exception("Could not get " + url + ": "
489 + str(error.code) + ": " + error.reason)
491 if response_code == 200: # OK
492 last_mod = datestr_to_datetime(response.info().get('Last-Modified'))
494 # Check for freshness
495 if last_mod < required_freshness:
496 if last_mod_date is not None:
497 # This check sometimes fails transiently, retry the script if it does
498 date_message = "Outdated data: last updated " + last_mod_date
499 else:
500 date_message = "No data: never downloaded "
501 raise Exception(date_message + " from " + url)
503 # Process the data
504 if response_code == 200: # OK
506 response_json = load_possibly_compressed_response_json(response)
508 with open(json_file_name, 'w') as f:
509 # use the most compact json representation to save space
510 json.dump(response_json, f, separators=(',',':'))
512 # store the last modified date in its own file
513 if response.info().get('Last-modified') is not None:
514 write_to_file(response.info().get('Last-Modified'),
515 last_modified_file_name,
516 MAX_LAST_MODIFIED_LENGTH)
518 elif response_code == 304: # Not Modified
520 response_json = load_json_from_file(json_file_name)
522 else: # Unexpected HTTP response code not covered in the HTTPError above
523 raise Exception("Unexpected HTTP response code to " + url + ": "
524 + str(response_code))
526 register_fetch_source(what,
527 url,
528 response_json['relays_published'],
529 response_json['version'])
531 return response_json
533 def fetch(what, **kwargs):
534 #x = onionoo_fetch(what, **kwargs)
535 # don't use sort_keys, as the order of or_addresses is significant
536 #print json.dumps(x, indent=4, separators=(',', ': '))
537 #sys.exit(0)
539 return onionoo_fetch(what, **kwargs)
541 ## Fallback Candidate Class
543 class Candidate(object):
544 CUTOFF_ADDRESS_AND_PORT_STABLE = (datetime.datetime.utcnow()
545 - datetime.timedelta(ADDRESS_AND_PORT_STABLE_DAYS))
547 def __init__(self, details):
548 for f in ['fingerprint', 'nickname', 'last_changed_address_or_port',
549 'consensus_weight', 'or_addresses', 'dir_address']:
550 if not f in details: raise Exception("Document has no %s field."%(f,))
552 if not 'contact' in details:
553 details['contact'] = None
554 if not 'flags' in details or details['flags'] is None:
555 details['flags'] = []
556 if (not 'advertised_bandwidth' in details
557 or details['advertised_bandwidth'] is None):
558 # relays without advertised bandwidth have it calculated from their
559 # consensus weight
560 details['advertised_bandwidth'] = 0
561 if (not 'effective_family' in details
562 or details['effective_family'] is None):
563 details['effective_family'] = []
564 if not 'platform' in details:
565 details['platform'] = None
566 details['last_changed_address_or_port'] = parse_ts(
567 details['last_changed_address_or_port'])
568 self._data = details
569 self._stable_sort_or_addresses()
571 self._fpr = self._data['fingerprint']
572 self._running = self._guard = self._v2dir = 0.
573 self._split_dirport()
574 self._compute_orport()
575 if self.orport is None:
576 raise Exception("Failed to get an orport for %s."%(self._fpr,))
577 self._compute_ipv6addr()
578 if not self.has_ipv6():
579 logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
580 self._compute_version()
581 self._extra_info_cache = None
583 def _stable_sort_or_addresses(self):
584 # replace self._data['or_addresses'] with a stable ordering,
585 # sorting the secondary addresses in string order
586 # leave the received order in self._data['or_addresses_raw']
587 self._data['or_addresses_raw'] = self._data['or_addresses']
588 or_address_primary = self._data['or_addresses'][:1]
589 # subsequent entries in the or_addresses array are in an arbitrary order
590 # so we stabilise the addresses by sorting them in string order
591 or_addresses_secondaries_stable = sorted(self._data['or_addresses'][1:])
592 or_addresses_stable = or_address_primary + or_addresses_secondaries_stable
593 self._data['or_addresses'] = or_addresses_stable
595 def get_fingerprint(self):
596 return self._fpr
598 # is_valid_ipv[46]_address by gsathya, karsten, 2013
599 @staticmethod
600 def is_valid_ipv4_address(address):
601 if not isinstance(address, (str, unicode)):
602 return False
604 # check if there are four period separated values
605 if address.count(".") != 3:
606 return False
608 # checks that each value in the octet are decimal values between 0-255
609 for entry in address.split("."):
610 if not entry.isdigit() or int(entry) < 0 or int(entry) > 255:
611 return False
612 elif entry[0] == "0" and len(entry) > 1:
613 return False # leading zeros, for instance in "1.2.3.001"
615 return True
617 @staticmethod
618 def is_valid_ipv6_address(address):
619 if not isinstance(address, (str, unicode)):
620 return False
622 # remove brackets
623 address = address[1:-1]
625 # addresses are made up of eight colon separated groups of four hex digits
626 # with leading zeros being optional
627 # https://en.wikipedia.org/wiki/IPv6#Address_format
629 colon_count = address.count(":")
631 if colon_count > 7:
632 return False # too many groups
633 elif colon_count != 7 and not "::" in address:
634 return False # not enough groups and none are collapsed
635 elif address.count("::") > 1 or ":::" in address:
636 return False # multiple groupings of zeros can't be collapsed
638 found_ipv4_on_previous_entry = False
639 for entry in address.split(":"):
640 # If an IPv6 address has an embedded IPv4 address,
641 # it must be the last entry
642 if found_ipv4_on_previous_entry:
643 return False
644 if not re.match("^[0-9a-fA-f]{0,4}$", entry):
645 if not Candidate.is_valid_ipv4_address(entry):
646 return False
647 else:
648 found_ipv4_on_previous_entry = True
650 return True
652 def _split_dirport(self):
653 # Split the dir_address into dirip and dirport
654 (self.dirip, _dirport) = self._data['dir_address'].split(':', 2)
655 self.dirport = int(_dirport)
657 def _compute_orport(self):
658 # Choose the first ORPort that's on the same IPv4 address as the DirPort.
659 # In rare circumstances, this might not be the primary ORPort address.
660 # However, _stable_sort_or_addresses() ensures we choose the same one
661 # every time, even if onionoo changes the order of the secondaries.
662 self._split_dirport()
663 self.orport = None
664 for i in self._data['or_addresses']:
665 if i != self._data['or_addresses'][0]:
666 logging.debug('Secondary IPv4 Address Used for %s: %s'%(self._fpr, i))
667 (ipaddr, port) = i.rsplit(':', 1)
668 if (ipaddr == self.dirip) and Candidate.is_valid_ipv4_address(ipaddr):
669 self.orport = int(port)
670 return
672 def _compute_ipv6addr(self):
673 # Choose the first IPv6 address that uses the same port as the ORPort
674 # Or, choose the first IPv6 address in the list
675 # _stable_sort_or_addresses() ensures we choose the same IPv6 address
676 # every time, even if onionoo changes the order of the secondaries.
677 self.ipv6addr = None
678 self.ipv6orport = None
679 # Choose the first IPv6 address that uses the same port as the ORPort
680 for i in self._data['or_addresses']:
681 (ipaddr, port) = i.rsplit(':', 1)
682 if (port == self.orport) and Candidate.is_valid_ipv6_address(ipaddr):
683 self.ipv6addr = ipaddr
684 self.ipv6orport = int(port)
685 return
686 # Choose the first IPv6 address in the list
687 for i in self._data['or_addresses']:
688 (ipaddr, port) = i.rsplit(':', 1)
689 if Candidate.is_valid_ipv6_address(ipaddr):
690 self.ipv6addr = ipaddr
691 self.ipv6orport = int(port)
692 return
694 def _compute_version(self):
695 # parse the version out of the platform string
696 # The platform looks like: "Tor 0.2.7.6 on Linux"
697 self._data['version'] = None
698 if self._data['platform'] is None:
699 return
700 # be tolerant of weird whitespacing, use a whitespace split
701 tokens = self._data['platform'].split()
702 for token in tokens:
703 vnums = token.split('.')
704 # if it's at least a.b.c.d, with potentially an -alpha-dev, -alpha, -rc
705 if (len(vnums) >= 4 and vnums[0].isdigit() and vnums[1].isdigit() and
706 vnums[2].isdigit()):
707 self._data['version'] = token
708 return
710 # From #20509
711 # bug #20499 affects versions from 0.2.9.1-alpha-dev to 0.2.9.4-alpha-dev
712 # and version 0.3.0.0-alpha-dev
713 # Exhaustive lists are hard to get wrong
714 STALE_CONSENSUS_VERSIONS = ['0.2.9.1-alpha-dev',
715 '0.2.9.2-alpha',
716 '0.2.9.2-alpha-dev',
717 '0.2.9.3-alpha',
718 '0.2.9.3-alpha-dev',
719 '0.2.9.4-alpha',
720 '0.2.9.4-alpha-dev',
721 '0.3.0.0-alpha-dev'
724 def is_valid_version(self):
725 # call _compute_version before calling this
726 # is the version of the relay a version we want as a fallback?
727 # checks both recommended versions and bug #20499 / #20509
729 # if the relay doesn't have a recommended version field, exclude the relay
730 if not self._data.has_key('recommended_version'):
731 log_excluded('%s not a candidate: no recommended_version field',
732 self._fpr)
733 return False
734 if not self._data['recommended_version']:
735 log_excluded('%s not a candidate: version not recommended', self._fpr)
736 return False
737 # if the relay doesn't have version field, exclude the relay
738 if not self._data.has_key('version'):
739 log_excluded('%s not a candidate: no version field', self._fpr)
740 return False
741 if self._data['version'] in Candidate.STALE_CONSENSUS_VERSIONS:
742 logging.warning('%s not a candidate: version delivers stale consensuses',
743 self._fpr)
744 return False
745 return True
747 @staticmethod
748 def _extract_generic_history(history, which='unknown'):
749 # given a tree like this:
751 # "1_month": {
752 # "count": 187,
753 # "factor": 0.001001001001001001,
754 # "first": "2015-02-27 06:00:00",
755 # "interval": 14400,
756 # "last": "2015-03-30 06:00:00",
757 # "values": [
758 # 999,
759 # 999
761 # },
762 # "1_week": {
763 # "count": 169,
764 # "factor": 0.001001001001001001,
765 # "first": "2015-03-23 07:30:00",
766 # "interval": 3600,
767 # "last": "2015-03-30 07:30:00",
768 # "values": [ ...]
769 # },
770 # "1_year": {
771 # "count": 177,
772 # "factor": 0.001001001001001001,
773 # "first": "2014-04-11 00:00:00",
774 # "interval": 172800,
775 # "last": "2015-03-29 00:00:00",
776 # "values": [ ...]
777 # },
778 # "3_months": {
779 # "count": 185,
780 # "factor": 0.001001001001001001,
781 # "first": "2014-12-28 06:00:00",
782 # "interval": 43200,
783 # "last": "2015-03-30 06:00:00",
784 # "values": [ ...]
786 # },
787 # extract exactly one piece of data per time interval,
788 # using smaller intervals where available.
790 # returns list of (age, length, value) dictionaries.
792 generic_history = []
794 periods = history.keys()
795 periods.sort(key = lambda x: history[x]['interval'])
796 now = datetime.datetime.utcnow()
797 newest = now
798 for p in periods:
799 h = history[p]
800 interval = datetime.timedelta(seconds = h['interval'])
801 this_ts = parse_ts(h['last'])
803 if (len(h['values']) != h['count']):
804 logging.warning('Inconsistent value count in %s document for %s'
805 %(p, which))
806 for v in reversed(h['values']):
807 if (this_ts <= newest):
808 agt1 = now - this_ts
809 agt2 = interval
810 agetmp1 = (agt1.microseconds + (agt1.seconds + agt1.days * 24 * 3600)
811 * 10**6) / 10**6
812 agetmp2 = (agt2.microseconds + (agt2.seconds + agt2.days * 24 * 3600)
813 * 10**6) / 10**6
814 generic_history.append(
815 { 'age': agetmp1,
816 'length': agetmp2,
817 'value': v
819 newest = this_ts
820 this_ts -= interval
822 if (this_ts + interval != parse_ts(h['first'])):
823 logging.warning('Inconsistent time information in %s document for %s'
824 %(p, which))
826 #print json.dumps(generic_history, sort_keys=True,
827 # indent=4, separators=(',', ': '))
828 return generic_history
830 @staticmethod
831 def _avg_generic_history(generic_history):
832 a = []
833 for i in generic_history:
834 if i['age'] > (ADDRESS_AND_PORT_STABLE_DAYS * 24 * 3600):
835 continue
836 if (i['length'] is not None
837 and i['age'] is not None
838 and i['value'] is not None):
839 w = i['length'] * math.pow(AGE_ALPHA, i['age']/(3600*24))
840 a.append( (i['value'] * w, w) )
842 sv = math.fsum(map(lambda x: x[0], a))
843 sw = math.fsum(map(lambda x: x[1], a))
845 if sw == 0.0:
846 svw = 0.0
847 else:
848 svw = sv/sw
849 return svw
851 def _add_generic_history(self, history):
852 periods = r['read_history'].keys()
853 periods.sort(key = lambda x: r['read_history'][x]['interval'] )
855 print periods
857 def add_running_history(self, history):
858 pass
860 def add_uptime(self, uptime):
861 logging.debug('Adding uptime %s.'%(self._fpr,))
863 # flags we care about: Running, V2Dir, Guard
864 if not 'flags' in uptime:
865 logging.debug('No flags in document for %s.'%(self._fpr,))
866 return
868 for f in ['Running', 'Guard', 'V2Dir']:
869 if not f in uptime['flags']:
870 logging.debug('No %s in flags for %s.'%(f, self._fpr,))
871 return
873 running = self._extract_generic_history(uptime['flags']['Running'],
874 '%s-Running'%(self._fpr))
875 guard = self._extract_generic_history(uptime['flags']['Guard'],
876 '%s-Guard'%(self._fpr))
877 v2dir = self._extract_generic_history(uptime['flags']['V2Dir'],
878 '%s-V2Dir'%(self._fpr))
879 if 'BadExit' in uptime['flags']:
880 badexit = self._extract_generic_history(uptime['flags']['BadExit'],
881 '%s-BadExit'%(self._fpr))
883 self._running = self._avg_generic_history(running) / ONIONOO_SCALE_ONE
884 self._guard = self._avg_generic_history(guard) / ONIONOO_SCALE_ONE
885 self._v2dir = self._avg_generic_history(v2dir) / ONIONOO_SCALE_ONE
886 self._badexit = None
887 if 'BadExit' in uptime['flags']:
888 self._badexit = self._avg_generic_history(badexit) / ONIONOO_SCALE_ONE
890 def is_candidate(self):
891 try:
892 if (MUST_BE_RUNNING_NOW and not self.is_running()):
893 log_excluded('%s not a candidate: not running now, unable to check ' +
894 'DirPort consensus download', self._fpr)
895 return False
896 if (self._data['last_changed_address_or_port'] >
897 self.CUTOFF_ADDRESS_AND_PORT_STABLE):
898 log_excluded('%s not a candidate: changed address/port recently (%s)',
899 self._fpr, self._data['last_changed_address_or_port'])
900 return False
901 if self._running < CUTOFF_RUNNING:
902 log_excluded('%s not a candidate: running avg too low (%lf)',
903 self._fpr, self._running)
904 return False
905 if self._v2dir < CUTOFF_V2DIR:
906 log_excluded('%s not a candidate: v2dir avg too low (%lf)',
907 self._fpr, self._v2dir)
908 return False
909 if self._badexit is not None and self._badexit > PERMITTED_BADEXIT:
910 log_excluded('%s not a candidate: badexit avg too high (%lf)',
911 self._fpr, self._badexit)
912 return False
913 # this function logs a message depending on which check fails
914 if not self.is_valid_version():
915 return False
916 if self._guard < CUTOFF_GUARD:
917 log_excluded('%s not a candidate: guard avg too low (%lf)',
918 self._fpr, self._guard)
919 return False
920 if (not self._data.has_key('consensus_weight')
921 or self._data['consensus_weight'] < 1):
922 log_excluded('%s not a candidate: consensus weight invalid', self._fpr)
923 return False
924 except BaseException as e:
925 logging.warning("Exception %s when checking if fallback is a candidate",
926 str(e))
927 return False
928 return True
930 def is_in_whitelist(self, relaylist):
931 """ A fallback matches if each key in the whitelist line matches:
932 ipv4
933 dirport
934 orport
936 ipv6 address and port (if present)
937 If the fallback has an ipv6 key, the whitelist line must also have
938 it, and vice versa, otherwise they don't match. """
939 ipv6 = None
940 if self.has_ipv6():
941 ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
942 for entry in relaylist:
943 if entry['id'] != self._fpr:
944 # can't log here unless we match an IP and port, because every relay's
945 # fingerprint is compared to every entry's fingerprint
946 if entry['ipv4'] == self.dirip and int(entry['orport']) == self.orport:
947 logging.warning('%s excluded: has OR %s:%d changed fingerprint to ' +
948 '%s?', entry['id'], self.dirip, self.orport,
949 self._fpr)
950 if self.has_ipv6() and entry.has_key('ipv6') and entry['ipv6'] == ipv6:
951 logging.warning('%s excluded: has OR %s changed fingerprint to ' +
952 '%s?', entry['id'], ipv6, self._fpr)
953 continue
954 if entry['ipv4'] != self.dirip:
955 logging.warning('%s excluded: has it changed IPv4 from %s to %s?',
956 self._fpr, entry['ipv4'], self.dirip)
957 continue
958 if int(entry['dirport']) != self.dirport:
959 logging.warning('%s excluded: has it changed DirPort from %s:%d to ' +
960 '%s:%d?', self._fpr, self.dirip, int(entry['dirport']),
961 self.dirip, self.dirport)
962 continue
963 if int(entry['orport']) != self.orport:
964 logging.warning('%s excluded: has it changed ORPort from %s:%d to ' +
965 '%s:%d?', self._fpr, self.dirip, int(entry['orport']),
966 self.dirip, self.orport)
967 continue
968 if entry.has_key('ipv6') and self.has_ipv6():
969 # if both entry and fallback have an ipv6 address, compare them
970 if entry['ipv6'] != ipv6:
971 logging.warning('%s excluded: has it changed IPv6 ORPort from %s ' +
972 'to %s?', self._fpr, entry['ipv6'], ipv6)
973 continue
974 # if the fallback has an IPv6 address but the whitelist entry
975 # doesn't, or vice versa, the whitelist entry doesn't match
976 elif entry.has_key('ipv6') and not self.has_ipv6():
977 logging.warning('%s excluded: has it lost its former IPv6 address %s?',
978 self._fpr, entry['ipv6'])
979 continue
980 elif not entry.has_key('ipv6') and self.has_ipv6():
981 logging.warning('%s excluded: has it gained an IPv6 address %s?',
982 self._fpr, ipv6)
983 continue
984 return True
985 return False
987 def is_in_blacklist(self, relaylist):
988 """ A fallback matches a blacklist line if a sufficiently specific group
989 of attributes matches:
990 ipv4 & dirport
991 ipv4 & orport
993 ipv6 & dirport
994 ipv6 & ipv6 orport
995 If the fallback and the blacklist line both have an ipv6 key,
996 their values will be compared, otherwise, they will be ignored.
997 If there is no dirport and no orport, the entry matches all relays on
998 that ip. """
999 for entry in relaylist:
1000 for key in entry:
1001 value = entry[key]
1002 if key == 'id' and value == self._fpr:
1003 log_excluded('%s is in the blacklist: fingerprint matches',
1004 self._fpr)
1005 return True
1006 if key == 'ipv4' and value == self.dirip:
1007 # if the dirport is present, check it too
1008 if entry.has_key('dirport'):
1009 if int(entry['dirport']) == self.dirport:
1010 log_excluded('%s is in the blacklist: IPv4 (%s) and ' +
1011 'DirPort (%d) match', self._fpr, self.dirip,
1012 self.dirport)
1013 return True
1014 # if the orport is present, check it too
1015 elif entry.has_key('orport'):
1016 if int(entry['orport']) == self.orport:
1017 log_excluded('%s is in the blacklist: IPv4 (%s) and ' +
1018 'ORPort (%d) match', self._fpr, self.dirip,
1019 self.orport)
1020 return True
1021 else:
1022 log_excluded('%s is in the blacklist: IPv4 (%s) matches, and ' +
1023 'entry has no DirPort or ORPort', self._fpr,
1024 self.dirip)
1025 return True
1026 ipv6 = None
1027 if self.has_ipv6():
1028 ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
1029 if (key == 'ipv6' and self.has_ipv6()):
1030 # if both entry and fallback have an ipv6 address, compare them,
1031 # otherwise, disregard ipv6 addresses
1032 if value == ipv6:
1033 # if the dirport is present, check it too
1034 if entry.has_key('dirport'):
1035 if int(entry['dirport']) == self.dirport:
1036 log_excluded('%s is in the blacklist: IPv6 (%s) and ' +
1037 'DirPort (%d) match', self._fpr, ipv6,
1038 self.dirport)
1039 return True
1040 # we've already checked the ORPort, it's part of entry['ipv6']
1041 else:
1042 log_excluded('%s is in the blacklist: IPv6 (%s) matches, and' +
1043 'entry has no DirPort', self._fpr, ipv6)
1044 return True
1045 elif (key == 'ipv6' or self.has_ipv6()):
1046 # only log if the fingerprint matches but the IPv6 doesn't
1047 if entry.has_key('id') and entry['id'] == self._fpr:
1048 log_excluded('%s skipping IPv6 blacklist comparison: relay ' +
1049 'has%s IPv6%s, but entry has%s IPv6%s', self._fpr,
1050 '' if self.has_ipv6() else ' no',
1051 (' (' + ipv6 + ')') if self.has_ipv6() else '',
1052 '' if key == 'ipv6' else ' no',
1053 (' (' + value + ')') if key == 'ipv6' else '')
1054 logging.warning('Has %s %s IPv6 address %s?', self._fpr,
1055 'gained an' if self.has_ipv6() else 'lost its former',
1056 ipv6 if self.has_ipv6() else value)
1057 return False
1059 def cw_to_bw_factor(self):
1060 # any relays with a missing or zero consensus weight are not candidates
1061 # any relays with a missing advertised bandwidth have it set to zero
1062 return self._data['advertised_bandwidth'] / self._data['consensus_weight']
1064 # since advertised_bandwidth is reported by the relay, it can be gamed
1065 # to avoid this, use the median consensus weight to bandwidth factor to
1066 # estimate this relay's measured bandwidth, and make that the upper limit
1067 def measured_bandwidth(self, median_cw_to_bw_factor):
1068 cw_to_bw= median_cw_to_bw_factor
1069 # Reduce exit bandwidth to make sure we're not overloading them
1070 if self.is_exit():
1071 cw_to_bw *= EXIT_BANDWIDTH_FRACTION
1072 measured_bandwidth = self._data['consensus_weight'] * cw_to_bw
1073 if self._data['advertised_bandwidth'] != 0:
1074 # limit advertised bandwidth (if available) to measured bandwidth
1075 return min(measured_bandwidth, self._data['advertised_bandwidth'])
1076 else:
1077 return measured_bandwidth
1079 def set_measured_bandwidth(self, median_cw_to_bw_factor):
1080 self._data['measured_bandwidth'] = self.measured_bandwidth(
1081 median_cw_to_bw_factor)
1083 def is_exit(self):
1084 return 'Exit' in self._data['flags']
1086 def is_guard(self):
1087 return 'Guard' in self._data['flags']
1089 def is_running(self):
1090 return 'Running' in self._data['flags']
1092 # does this fallback have an IPv6 address and orport?
1093 def has_ipv6(self):
1094 return self.ipv6addr is not None and self.ipv6orport is not None
1096 # strip leading and trailing brackets from an IPv6 address
1097 # safe to use on non-bracketed IPv6 and on IPv4 addresses
1098 # also convert to unicode, and make None appear as ''
1099 @staticmethod
1100 def strip_ipv6_brackets(ip):
1101 if ip is None:
1102 return unicode('')
1103 if len(ip) < 2:
1104 return unicode(ip)
1105 if ip[0] == '[' and ip[-1] == ']':
1106 return unicode(ip[1:-1])
1107 return unicode(ip)
1109 # are ip_a and ip_b in the same netblock?
1110 # mask_bits is the size of the netblock
1111 # takes both IPv4 and IPv6 addresses
1112 # the versions of ip_a and ip_b must be the same
1113 # the mask must be valid for the IP version
1114 @staticmethod
1115 def netblocks_equal(ip_a, ip_b, mask_bits):
1116 if ip_a is None or ip_b is None:
1117 return False
1118 ip_a = Candidate.strip_ipv6_brackets(ip_a)
1119 ip_b = Candidate.strip_ipv6_brackets(ip_b)
1120 a = ipaddress.ip_address(ip_a)
1121 b = ipaddress.ip_address(ip_b)
1122 if a.version != b.version:
1123 raise Exception('Mismatching IP versions in %s and %s'%(ip_a, ip_b))
1124 if mask_bits > a.max_prefixlen:
1125 logging.error('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
1126 mask_bits = a.max_prefixlen
1127 if mask_bits < 0:
1128 logging.error('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
1129 mask_bits = 0
1130 a_net = ipaddress.ip_network('%s/%d'%(ip_a, mask_bits), strict=False)
1131 return b in a_net
1133 # is this fallback's IPv4 address (dirip) in the same netblock as other's
1134 # IPv4 address?
1135 # mask_bits is the size of the netblock
1136 def ipv4_netblocks_equal(self, other, mask_bits):
1137 return Candidate.netblocks_equal(self.dirip, other.dirip, mask_bits)
1139 # is this fallback's IPv6 address (ipv6addr) in the same netblock as
1140 # other's IPv6 address?
1141 # Returns False if either fallback has no IPv6 address
1142 # mask_bits is the size of the netblock
1143 def ipv6_netblocks_equal(self, other, mask_bits):
1144 if not self.has_ipv6() or not other.has_ipv6():
1145 return False
1146 return Candidate.netblocks_equal(self.ipv6addr, other.ipv6addr, mask_bits)
1148 # is this fallback's IPv4 DirPort the same as other's IPv4 DirPort?
1149 def dirport_equal(self, other):
1150 return self.dirport == other.dirport
1152 # is this fallback's IPv4 ORPort the same as other's IPv4 ORPort?
1153 def ipv4_orport_equal(self, other):
1154 return self.orport == other.orport
1156 # is this fallback's IPv6 ORPort the same as other's IPv6 ORPort?
1157 # Returns False if either fallback has no IPv6 address
1158 def ipv6_orport_equal(self, other):
1159 if not self.has_ipv6() or not other.has_ipv6():
1160 return False
1161 return self.ipv6orport == other.ipv6orport
1163 # does this fallback have the same DirPort, IPv4 ORPort, or
1164 # IPv6 ORPort as other?
1165 # Ignores IPv6 ORPort if either fallback has no IPv6 address
1166 def port_equal(self, other):
1167 return (self.dirport_equal(other) or self.ipv4_orport_equal(other)
1168 or self.ipv6_orport_equal(other))
1170 # return a list containing IPv4 ORPort, DirPort, and IPv6 ORPort (if present)
1171 def port_list(self):
1172 ports = [self.dirport, self.orport]
1173 if self.has_ipv6() and not self.ipv6orport in ports:
1174 ports.append(self.ipv6orport)
1175 return ports
1177 # does this fallback share a port with other, regardless of whether the
1178 # port types match?
1179 # For example, if self's IPv4 ORPort is 80 and other's DirPort is 80,
1180 # return True
1181 def port_shared(self, other):
1182 for p in self.port_list():
1183 if p in other.port_list():
1184 return True
1185 return False
1187 # log how long it takes to download a consensus from dirip:dirport
1188 # returns True if the download failed, False if it succeeded within max_time
1189 @staticmethod
1190 def fallback_consensus_download_speed(dirip, dirport, nickname, fingerprint,
1191 max_time):
1192 download_failed = False
1193 # some directory mirrors respond to requests in ways that hang python
1194 # sockets, which is why we log this line here
1195 logging.info('Initiating %sconsensus download from %s (%s:%d) %s.',
1196 'microdesc ' if DOWNLOAD_MICRODESC_CONSENSUS else '',
1197 nickname, dirip, dirport, fingerprint)
1198 # there appears to be about 1 second of overhead when comparing stem's
1199 # internal trace time and the elapsed time calculated here
1200 TIMEOUT_SLOP = 1.0
1201 start = datetime.datetime.utcnow()
1202 try:
1203 consensus = get_consensus(
1204 endpoints = [(dirip, dirport)],
1205 timeout = (max_time + TIMEOUT_SLOP),
1206 validate = True,
1207 retries = 0,
1208 fall_back_to_authority = False,
1209 document_handler = DocumentHandler.BARE_DOCUMENT,
1210 microdescriptor = DOWNLOAD_MICRODESC_CONSENSUS
1211 ).run()[0]
1212 end = datetime.datetime.utcnow()
1213 time_since_expiry = (end - consensus.valid_until).total_seconds()
1214 except Exception, stem_error:
1215 end = datetime.datetime.utcnow()
1216 log_excluded('Unable to retrieve a consensus from %s: %s', nickname,
1217 stem_error)
1218 status = 'error: "%s"' % (stem_error)
1219 level = logging.WARNING
1220 download_failed = True
1221 elapsed = (end - start).total_seconds()
1222 if download_failed:
1223 # keep the error failure status, and avoid using the variables
1224 pass
1225 elif elapsed > max_time:
1226 status = 'too slow'
1227 level = logging.WARNING
1228 download_failed = True
1229 elif (time_since_expiry > 0):
1230 status = 'outdated consensus, expired %ds ago'%(int(time_since_expiry))
1231 if time_since_expiry <= CONSENSUS_EXPIRY_TOLERANCE:
1232 status += ', tolerating up to %ds'%(CONSENSUS_EXPIRY_TOLERANCE)
1233 level = logging.INFO
1234 else:
1235 status += ', invalid'
1236 level = logging.WARNING
1237 download_failed = True
1238 else:
1239 status = 'ok'
1240 level = logging.DEBUG
1241 logging.log(level, 'Consensus download: %0.1fs %s from %s (%s:%d) %s, ' +
1242 'max download time %0.1fs.', elapsed, status, nickname,
1243 dirip, dirport, fingerprint, max_time)
1244 return download_failed
1246 # does this fallback download the consensus fast enough?
1247 def check_fallback_download_consensus(self):
1248 # include the relay if we're not doing a check, or we can't check (IPv6)
1249 ipv4_failed = False
1250 ipv6_failed = False
1251 if PERFORM_IPV4_DIRPORT_CHECKS:
1252 ipv4_failed = Candidate.fallback_consensus_download_speed(self.dirip,
1253 self.dirport,
1254 self._data['nickname'],
1255 self._fpr,
1256 CONSENSUS_DOWNLOAD_SPEED_MAX)
1257 if self.has_ipv6() and PERFORM_IPV6_DIRPORT_CHECKS:
1258 # Clients assume the IPv6 DirPort is the same as the IPv4 DirPort
1259 ipv6_failed = Candidate.fallback_consensus_download_speed(self.ipv6addr,
1260 self.dirport,
1261 self._data['nickname'],
1262 self._fpr,
1263 CONSENSUS_DOWNLOAD_SPEED_MAX)
1264 return ((not ipv4_failed) and (not ipv6_failed))
1266 # if this fallback has not passed a download check, try it again,
1267 # and record the result, available in get_fallback_download_consensus
1268 def try_fallback_download_consensus(self):
1269 if not self.get_fallback_download_consensus():
1270 self._data['download_check'] = self.check_fallback_download_consensus()
1272 # did this fallback pass the download check?
1273 def get_fallback_download_consensus(self):
1274 # if we're not performing checks, return True
1275 if not PERFORM_IPV4_DIRPORT_CHECKS and not PERFORM_IPV6_DIRPORT_CHECKS:
1276 return True
1277 # if we are performing checks, but haven't done one, return False
1278 if not self._data.has_key('download_check'):
1279 return False
1280 return self._data['download_check']
1282 # output an optional header comment and info for this fallback
1283 # try_fallback_download_consensus before calling this
1284 def fallbackdir_line(self, fallbacks, prefilter_fallbacks):
1285 s = ''
1286 if OUTPUT_COMMENTS:
1287 s += self.fallbackdir_comment(fallbacks, prefilter_fallbacks)
1288 # if the download speed is ok, output a C string
1289 # if it's not, but we OUTPUT_COMMENTS, output a commented-out C string
1290 if self.get_fallback_download_consensus() or OUTPUT_COMMENTS:
1291 s += self.fallbackdir_info(self.get_fallback_download_consensus())
1292 return s
1294 # output a header comment for this fallback
1295 def fallbackdir_comment(self, fallbacks, prefilter_fallbacks):
1296 # /*
1297 # nickname
1298 # flags
1299 # adjusted bandwidth, consensus weight
1300 # [contact]
1301 # [identical contact counts]
1302 # */
1303 # Multiline C comment
1304 s = '/*'
1305 s += '\n'
1306 s += cleanse_c_multiline_comment(self._data['nickname'])
1307 s += '\n'
1308 s += 'Flags: '
1309 s += cleanse_c_multiline_comment(' '.join(sorted(self._data['flags'])))
1310 s += '\n'
1311 # this is an adjusted bandwidth, see calculate_measured_bandwidth()
1312 bandwidth = self._data['measured_bandwidth']
1313 weight = self._data['consensus_weight']
1314 s += 'Bandwidth: %.1f MByte/s, Consensus Weight: %d'%(
1315 bandwidth/(1024.0*1024.0),
1316 weight)
1317 s += '\n'
1318 if self._data['contact'] is not None:
1319 s += cleanse_c_multiline_comment(self._data['contact'])
1320 if CONTACT_COUNT or CONTACT_BLACKLIST_COUNT:
1321 fallback_count = len([f for f in fallbacks
1322 if f._data['contact'] == self._data['contact']])
1323 if fallback_count > 1:
1324 s += '\n'
1325 s += '%d identical contacts listed' % (fallback_count)
1326 if CONTACT_BLACKLIST_COUNT:
1327 prefilter_count = len([f for f in prefilter_fallbacks
1328 if f._data['contact'] == self._data['contact']])
1329 filter_count = prefilter_count - fallback_count
1330 if filter_count > 0:
1331 if fallback_count > 1:
1332 s += ' '
1333 else:
1334 s += '\n'
1335 s += '%d blacklisted' % (filter_count)
1336 s += '\n'
1337 s += '*/'
1338 s += '\n'
1339 return s
1341 # output the fallback info C string for this fallback
1342 # this is the text that would go after FallbackDir in a torrc
1343 # if this relay failed the download test and we OUTPUT_COMMENTS,
1344 # comment-out the returned string
1345 def fallbackdir_info(self, dl_speed_ok):
1346 # "address:dirport orport=port id=fingerprint"
1347 # (insert additional madatory fields here)
1348 # "[ipv6=addr:orport]"
1349 # (insert additional optional fields here)
1350 # /* nickname=name */
1351 # /* extrainfo={0,1} */
1352 # (insert additional comment fields here)
1353 # /* ===== */
1356 # Do we want a C string, or a commented-out string?
1357 c_string = dl_speed_ok
1358 comment_string = not dl_speed_ok and OUTPUT_COMMENTS
1359 # If we don't want either kind of string, bail
1360 if not c_string and not comment_string:
1361 return ''
1362 s = ''
1363 # Comment out the fallback directory entry if it's too slow
1364 # See the debug output for which address and port is failing
1365 if comment_string:
1366 s += '/* Consensus download failed or was too slow:\n'
1367 # Multi-Line C string with trailing comma (part of a string list)
1368 # This makes it easier to diff the file, and remove IPv6 lines using grep
1369 # Integers don't need escaping
1370 s += '"%s orport=%d id=%s"'%(
1371 cleanse_c_string(self._data['dir_address']),
1372 self.orport,
1373 cleanse_c_string(self._fpr))
1374 s += '\n'
1375 # (insert additional madatory fields here)
1376 if self.has_ipv6():
1377 s += '" ipv6=%s:%d"'%(cleanse_c_string(self.ipv6addr), self.ipv6orport)
1378 s += '\n'
1379 # (insert additional optional fields here)
1380 if not comment_string:
1381 s += '/* '
1382 s += 'nickname=%s'%(cleanse_c_string(self._data['nickname']))
1383 if not comment_string:
1384 s += ' */'
1385 s += '\n'
1386 # if we know that the fallback is an extrainfo cache, flag it
1387 # and if we don't know, assume it is not
1388 if not comment_string:
1389 s += '/* '
1390 s += 'extrainfo=%d'%(1 if self._extra_info_cache else 0)
1391 if not comment_string:
1392 s += ' */'
1393 s += '\n'
1394 # (insert additional comment fields here)
1395 # The terminator and comma must be the last line in each fallback entry
1396 if not comment_string:
1397 s += '/* '
1398 s += SECTION_SEPARATOR_BASE
1399 if not comment_string:
1400 s += ' */'
1401 s += '\n'
1402 s += ','
1403 if comment_string:
1404 s += '\n'
1405 s += '*/'
1406 return s
1408 ## Fallback Candidate List Class
1410 class CandidateList(dict):
1411 def __init__(self):
1412 pass
1414 def _add_relay(self, details):
1415 if not 'dir_address' in details: return
1416 c = Candidate(details)
1417 self[ c.get_fingerprint() ] = c
1419 def _add_uptime(self, uptime):
1420 try:
1421 fpr = uptime['fingerprint']
1422 except KeyError:
1423 raise Exception("Document has no fingerprint field.")
1425 try:
1426 c = self[fpr]
1427 except KeyError:
1428 logging.debug('Got unknown relay %s in uptime document.'%(fpr,))
1429 return
1431 c.add_uptime(uptime)
1433 def _add_details(self):
1434 logging.debug('Loading details document.')
1435 d = fetch('details',
1436 fields=('fingerprint,nickname,contact,last_changed_address_or_port,' +
1437 'consensus_weight,advertised_bandwidth,or_addresses,' +
1438 'dir_address,recommended_version,flags,effective_family,' +
1439 'platform'))
1440 logging.debug('Loading details document done.')
1442 if not 'relays' in d: raise Exception("No relays found in document.")
1444 for r in d['relays']: self._add_relay(r)
1446 def _add_uptimes(self):
1447 logging.debug('Loading uptime document.')
1448 d = fetch('uptime')
1449 logging.debug('Loading uptime document done.')
1451 if not 'relays' in d: raise Exception("No relays found in document.")
1452 for r in d['relays']: self._add_uptime(r)
1454 def add_relays(self):
1455 self._add_details()
1456 self._add_uptimes()
1458 def count_guards(self):
1459 guard_count = 0
1460 for fpr in self.keys():
1461 if self[fpr].is_guard():
1462 guard_count += 1
1463 return guard_count
1465 # Find fallbacks that fit the uptime, stability, and flags criteria,
1466 # and make an array of them in self.fallbacks
1467 def compute_fallbacks(self):
1468 self.fallbacks = map(lambda x: self[x],
1469 filter(lambda x: self[x].is_candidate(),
1470 self.keys()))
1472 # sort fallbacks by their consensus weight to advertised bandwidth factor,
1473 # lowest to highest
1474 # used to find the median cw_to_bw_factor()
1475 def sort_fallbacks_by_cw_to_bw_factor(self):
1476 self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor())
1478 # sort fallbacks by their measured bandwidth, highest to lowest
1479 # calculate_measured_bandwidth before calling this
1480 # this is useful for reviewing candidates in priority order
1481 def sort_fallbacks_by_measured_bandwidth(self):
1482 self.fallbacks.sort(key=lambda f: f._data['measured_bandwidth'],
1483 reverse=True)
1485 # sort fallbacks by the data field data_field, lowest to highest
1486 def sort_fallbacks_by(self, data_field):
1487 self.fallbacks.sort(key=lambda f: f._data[data_field])
1489 @staticmethod
1490 def load_relaylist(file_obj):
1491 """ Read each line in the file, and parse it like a FallbackDir line:
1492 an IPv4 address and optional port:
1493 <IPv4 address>:<port>
1494 which are parsed into dictionary entries:
1495 ipv4=<IPv4 address>
1496 dirport=<port>
1497 followed by a series of key=value entries:
1498 orport=<port>
1499 id=<fingerprint>
1500 ipv6=<IPv6 address>:<IPv6 orport>
1501 each line's key/value pairs are placed in a dictonary,
1502 (of string -> string key/value pairs),
1503 and these dictionaries are placed in an array.
1504 comments start with # and are ignored """
1505 file_data = file_obj['data']
1506 file_name = file_obj['name']
1507 relaylist = []
1508 if file_data is None:
1509 return relaylist
1510 for line in file_data.split('\n'):
1511 relay_entry = {}
1512 # ignore comments
1513 line_comment_split = line.split('#')
1514 line = line_comment_split[0]
1515 # cleanup whitespace
1516 line = cleanse_whitespace(line)
1517 line = line.strip()
1518 if len(line) == 0:
1519 continue
1520 for item in line.split(' '):
1521 item = item.strip()
1522 if len(item) == 0:
1523 continue
1524 key_value_split = item.split('=')
1525 kvl = len(key_value_split)
1526 if kvl < 1 or kvl > 2:
1527 print '#error Bad %s item: %s, format is key=value.'%(
1528 file_name, item)
1529 if kvl == 1:
1530 # assume that entries without a key are the ipv4 address,
1531 # perhaps with a dirport
1532 ipv4_maybe_dirport = key_value_split[0]
1533 ipv4_maybe_dirport_split = ipv4_maybe_dirport.split(':')
1534 dirl = len(ipv4_maybe_dirport_split)
1535 if dirl < 1 or dirl > 2:
1536 print '#error Bad %s IPv4 item: %s, format is ipv4:port.'%(
1537 file_name, item)
1538 if dirl >= 1:
1539 relay_entry['ipv4'] = ipv4_maybe_dirport_split[0]
1540 if dirl == 2:
1541 relay_entry['dirport'] = ipv4_maybe_dirport_split[1]
1542 elif kvl == 2:
1543 relay_entry[key_value_split[0]] = key_value_split[1]
1544 relaylist.append(relay_entry)
1545 return relaylist
1547 # apply the fallback whitelist and blacklist
1548 def apply_filter_lists(self, whitelist_obj, blacklist_obj):
1549 excluded_count = 0
1550 logging.debug('Applying whitelist and blacklist.')
1551 # parse the whitelist and blacklist
1552 whitelist = self.load_relaylist(whitelist_obj)
1553 blacklist = self.load_relaylist(blacklist_obj)
1554 filtered_fallbacks = []
1555 for f in self.fallbacks:
1556 in_whitelist = f.is_in_whitelist(whitelist)
1557 in_blacklist = f.is_in_blacklist(blacklist)
1558 if in_whitelist and in_blacklist:
1559 if BLACKLIST_EXCLUDES_WHITELIST_ENTRIES:
1560 # exclude
1561 excluded_count += 1
1562 logging.warning('Excluding %s: in both blacklist and whitelist.',
1563 f._fpr)
1564 else:
1565 # include
1566 filtered_fallbacks.append(f)
1567 elif in_whitelist:
1568 # include
1569 filtered_fallbacks.append(f)
1570 elif in_blacklist:
1571 # exclude
1572 excluded_count += 1
1573 log_excluded('Excluding %s: in blacklist.', f._fpr)
1574 else:
1575 if INCLUDE_UNLISTED_ENTRIES:
1576 # include
1577 filtered_fallbacks.append(f)
1578 else:
1579 # exclude
1580 excluded_count += 1
1581 log_excluded('Excluding %s: in neither blacklist nor whitelist.',
1582 f._fpr)
1583 self.fallbacks = filtered_fallbacks
1584 return excluded_count
1586 @staticmethod
1587 def summarise_filters(initial_count, excluded_count):
1588 return '/* Whitelist & blacklist excluded %d of %d candidates. */'%(
1589 excluded_count, initial_count)
1591 # calculate each fallback's measured bandwidth based on the median
1592 # consensus weight to advertised bandwidth ratio
1593 def calculate_measured_bandwidth(self):
1594 self.sort_fallbacks_by_cw_to_bw_factor()
1595 median_fallback = self.fallback_median(True)
1596 if median_fallback is not None:
1597 median_cw_to_bw_factor = median_fallback.cw_to_bw_factor()
1598 else:
1599 # this will never be used, because there are no fallbacks
1600 median_cw_to_bw_factor = None
1601 for f in self.fallbacks:
1602 f.set_measured_bandwidth(median_cw_to_bw_factor)
1604 # remove relays with low measured bandwidth from the fallback list
1605 # calculate_measured_bandwidth for each relay before calling this
1606 def remove_low_bandwidth_relays(self):
1607 if MIN_BANDWIDTH is None:
1608 return
1609 above_min_bw_fallbacks = []
1610 for f in self.fallbacks:
1611 if f._data['measured_bandwidth'] >= MIN_BANDWIDTH:
1612 above_min_bw_fallbacks.append(f)
1613 else:
1614 # the bandwidth we log here is limited by the relay's consensus weight
1615 # as well as its adverttised bandwidth. See set_measured_bandwidth
1616 # for details
1617 log_excluded('%s not a candidate: bandwidth %.1fMByte/s too low, ' +
1618 'must be at least %.1fMByte/s', f._fpr,
1619 f._data['measured_bandwidth']/(1024.0*1024.0),
1620 MIN_BANDWIDTH/(1024.0*1024.0))
1621 self.fallbacks = above_min_bw_fallbacks
1623 # the minimum fallback in the list
1624 # call one of the sort_fallbacks_* functions before calling this
1625 def fallback_min(self):
1626 if len(self.fallbacks) > 0:
1627 return self.fallbacks[-1]
1628 else:
1629 return None
1631 # the median fallback in the list
1632 # call one of the sort_fallbacks_* functions before calling this
1633 def fallback_median(self, require_advertised_bandwidth):
1634 # use the low-median when there are an evan number of fallbacks,
1635 # for consistency with the bandwidth authorities
1636 if len(self.fallbacks) > 0:
1637 median_position = (len(self.fallbacks) - 1) / 2
1638 if not require_advertised_bandwidth:
1639 return self.fallbacks[median_position]
1640 # if we need advertised_bandwidth but this relay doesn't have it,
1641 # move to a fallback with greater consensus weight until we find one
1642 while not self.fallbacks[median_position]._data['advertised_bandwidth']:
1643 median_position += 1
1644 if median_position >= len(self.fallbacks):
1645 return None
1646 return self.fallbacks[median_position]
1647 else:
1648 return None
1650 # the maximum fallback in the list
1651 # call one of the sort_fallbacks_* functions before calling this
1652 def fallback_max(self):
1653 if len(self.fallbacks) > 0:
1654 return self.fallbacks[0]
1655 else:
1656 return None
1658 # return a new bag suitable for storing attributes
1659 @staticmethod
1660 def attribute_new():
1661 return dict()
1663 # get the count of attribute in attribute_bag
1664 # if attribute is None or the empty string, return 0
1665 @staticmethod
1666 def attribute_count(attribute, attribute_bag):
1667 if attribute is None or attribute == '':
1668 return 0
1669 if attribute not in attribute_bag:
1670 return 0
1671 return attribute_bag[attribute]
1673 # does attribute_bag contain more than max_count instances of attribute?
1674 # if so, return False
1675 # if not, return True
1676 # if attribute is None or the empty string, or max_count is invalid,
1677 # always return True
1678 @staticmethod
1679 def attribute_allow(attribute, attribute_bag, max_count=1):
1680 if attribute is None or attribute == '' or max_count <= 0:
1681 return True
1682 elif CandidateList.attribute_count(attribute, attribute_bag) >= max_count:
1683 return False
1684 else:
1685 return True
1687 # add attribute to attribute_bag, incrementing the count if it is already
1688 # present
1689 # if attribute is None or the empty string, or count is invalid,
1690 # do nothing
1691 @staticmethod
1692 def attribute_add(attribute, attribute_bag, count=1):
1693 if attribute is None or attribute == '' or count <= 0:
1694 pass
1695 attribute_bag.setdefault(attribute, 0)
1696 attribute_bag[attribute] += count
1698 # make sure there are only MAX_FALLBACKS_PER_IP fallbacks per IPv4 address,
1699 # and per IPv6 address
1700 # there is only one IPv4 address on each fallback: the IPv4 DirPort address
1701 # (we choose the IPv4 ORPort which is on the same IPv4 as the DirPort)
1702 # there is at most one IPv6 address on each fallback: the IPv6 ORPort address
1703 # we try to match the IPv4 ORPort, but will use any IPv6 address if needed
1704 # (clients only use the IPv6 ORPort)
1705 # if there is no IPv6 address, only the IPv4 address is checked
1706 # return the number of candidates we excluded
1707 def limit_fallbacks_same_ip(self):
1708 ip_limit_fallbacks = []
1709 ip_list = CandidateList.attribute_new()
1710 for f in self.fallbacks:
1711 if (CandidateList.attribute_allow(f.dirip, ip_list,
1712 MAX_FALLBACKS_PER_IPV4)
1713 and CandidateList.attribute_allow(f.ipv6addr, ip_list,
1714 MAX_FALLBACKS_PER_IPV6)):
1715 ip_limit_fallbacks.append(f)
1716 CandidateList.attribute_add(f.dirip, ip_list)
1717 if f.has_ipv6():
1718 CandidateList.attribute_add(f.ipv6addr, ip_list)
1719 elif not CandidateList.attribute_allow(f.dirip, ip_list,
1720 MAX_FALLBACKS_PER_IPV4):
1721 log_excluded('Eliminated %s: already have %d fallback(s) on IPv4 %s'
1722 %(f._fpr, CandidateList.attribute_count(f.dirip, ip_list),
1723 f.dirip))
1724 elif (f.has_ipv6() and
1725 not CandidateList.attribute_allow(f.ipv6addr, ip_list,
1726 MAX_FALLBACKS_PER_IPV6)):
1727 log_excluded('Eliminated %s: already have %d fallback(s) on IPv6 %s'
1728 %(f._fpr, CandidateList.attribute_count(f.ipv6addr,
1729 ip_list),
1730 f.ipv6addr))
1731 original_count = len(self.fallbacks)
1732 self.fallbacks = ip_limit_fallbacks
1733 return original_count - len(self.fallbacks)
1735 # make sure there are only MAX_FALLBACKS_PER_CONTACT fallbacks for each
1736 # ContactInfo
1737 # if there is no ContactInfo, allow the fallback
1738 # this check can be gamed by providing no ContactInfo, or by setting the
1739 # ContactInfo to match another fallback
1740 # However, given the likelihood that relays with the same ContactInfo will
1741 # go down at similar times, its usefulness outweighs the risk
1742 def limit_fallbacks_same_contact(self):
1743 contact_limit_fallbacks = []
1744 contact_list = CandidateList.attribute_new()
1745 for f in self.fallbacks:
1746 if CandidateList.attribute_allow(f._data['contact'], contact_list,
1747 MAX_FALLBACKS_PER_CONTACT):
1748 contact_limit_fallbacks.append(f)
1749 CandidateList.attribute_add(f._data['contact'], contact_list)
1750 else:
1751 log_excluded(
1752 'Eliminated %s: already have %d fallback(s) on ContactInfo %s'
1753 %(f._fpr, CandidateList.attribute_count(f._data['contact'],
1754 contact_list),
1755 f._data['contact']))
1756 original_count = len(self.fallbacks)
1757 self.fallbacks = contact_limit_fallbacks
1758 return original_count - len(self.fallbacks)
1760 # make sure there are only MAX_FALLBACKS_PER_FAMILY fallbacks per effective
1761 # family
1762 # if there is no family, allow the fallback
1763 # we use effective family, which ensures mutual family declarations
1764 # but the check can be gamed by not declaring a family at all
1765 # if any indirect families exist, the result depends on the order in which
1766 # fallbacks are sorted in the list
1767 def limit_fallbacks_same_family(self):
1768 family_limit_fallbacks = []
1769 fingerprint_list = CandidateList.attribute_new()
1770 for f in self.fallbacks:
1771 if CandidateList.attribute_allow(f._fpr, fingerprint_list,
1772 MAX_FALLBACKS_PER_FAMILY):
1773 family_limit_fallbacks.append(f)
1774 CandidateList.attribute_add(f._fpr, fingerprint_list)
1775 for family_fingerprint in f._data['effective_family']:
1776 CandidateList.attribute_add(family_fingerprint, fingerprint_list)
1777 else:
1778 # we already have a fallback with this fallback in its effective
1779 # family
1780 log_excluded(
1781 'Eliminated %s: already have %d fallback(s) in effective family'
1782 %(f._fpr, CandidateList.attribute_count(f._fpr, fingerprint_list)))
1783 original_count = len(self.fallbacks)
1784 self.fallbacks = family_limit_fallbacks
1785 return original_count - len(self.fallbacks)
1787 # try once to get the descriptors for fingerprint_list using stem
1788 # returns an empty list on exception
1789 @staticmethod
1790 def get_fallback_descriptors_once(fingerprint_list):
1791 desc_list = get_server_descriptors(fingerprints=fingerprint_list).run(suppress=True)
1792 return desc_list
1794 # try up to max_retries times to get the descriptors for fingerprint_list
1795 # using stem. Stops retrying when all descriptors have been retrieved.
1796 # returns a list containing the descriptors that were retrieved
1797 @staticmethod
1798 def get_fallback_descriptors(fingerprint_list, max_retries=5):
1799 # we can't use stem's retries=, because we want to support more than 96
1800 # descriptors
1802 # add an attempt for every MAX_FINGERPRINTS (or part thereof) in the list
1803 max_retries += (len(fingerprint_list) + MAX_FINGERPRINTS - 1) / MAX_FINGERPRINTS
1804 remaining_list = fingerprint_list
1805 desc_list = []
1806 for _ in xrange(max_retries):
1807 if len(remaining_list) == 0:
1808 break
1809 new_desc_list = CandidateList.get_fallback_descriptors_once(remaining_list[0:MAX_FINGERPRINTS])
1810 for d in new_desc_list:
1811 try:
1812 remaining_list.remove(d.fingerprint)
1813 except ValueError:
1814 # warn and ignore if a directory mirror returned a bad descriptor
1815 logging.warning("Directory mirror returned unwanted descriptor %s, ignoring",
1816 d.fingerprint)
1817 continue
1818 desc_list.append(d)
1819 return desc_list
1821 # find the fallbacks that cache extra-info documents
1822 # Onionoo doesn't know this, so we have to use stem
1823 def mark_extra_info_caches(self):
1824 fingerprint_list = [ f._fpr for f in self.fallbacks ]
1825 logging.info("Downloading fallback descriptors to find extra-info caches")
1826 desc_list = CandidateList.get_fallback_descriptors(fingerprint_list)
1827 for d in desc_list:
1828 self[d.fingerprint]._extra_info_cache = d.extra_info_cache
1829 missing_descriptor_list = [ f._fpr for f in self.fallbacks
1830 if f._extra_info_cache is None ]
1831 for f in missing_descriptor_list:
1832 logging.warning("No descriptor for {}. Assuming extrainfo=0.".format(f))
1834 # try a download check on each fallback candidate in order
1835 # stop after max_count successful downloads
1836 # but don't remove any candidates from the array
1837 def try_download_consensus_checks(self, max_count):
1838 dl_ok_count = 0
1839 for f in self.fallbacks:
1840 f.try_fallback_download_consensus()
1841 if f.get_fallback_download_consensus():
1842 # this fallback downloaded a consensus ok
1843 dl_ok_count += 1
1844 if dl_ok_count >= max_count:
1845 # we have enough fallbacks
1846 return
1848 # put max_count successful candidates in the fallbacks array:
1849 # - perform download checks on each fallback candidate
1850 # - retry failed candidates if CONSENSUS_DOWNLOAD_RETRY is set
1851 # - eliminate failed candidates
1852 # - if there are more than max_count candidates, eliminate lowest bandwidth
1853 # - if there are fewer than max_count candidates, leave only successful
1854 # Return the number of fallbacks that failed the consensus check
1855 def perform_download_consensus_checks(self, max_count):
1856 self.sort_fallbacks_by_measured_bandwidth()
1857 self.try_download_consensus_checks(max_count)
1858 if CONSENSUS_DOWNLOAD_RETRY:
1859 # try unsuccessful candidates again
1860 # we could end up with more than max_count successful candidates here
1861 self.try_download_consensus_checks(max_count)
1862 # now we have at least max_count successful candidates,
1863 # or we've tried them all
1864 original_count = len(self.fallbacks)
1865 self.fallbacks = filter(lambda x: x.get_fallback_download_consensus(),
1866 self.fallbacks)
1867 # some of these failed the check, others skipped the check,
1868 # if we already had enough successful downloads
1869 failed_count = original_count - len(self.fallbacks)
1870 self.fallbacks = self.fallbacks[:max_count]
1871 return failed_count
1873 # return a string that describes a/b as a percentage
1874 @staticmethod
1875 def describe_percentage(a, b):
1876 if b != 0:
1877 return '%d/%d = %.0f%%'%(a, b, (a*100.0)/b)
1878 else:
1879 # technically, 0/0 is undefined, but 0.0% is a sensible result
1880 return '%d/%d = %.0f%%'%(a, b, 0.0)
1882 # return a dictionary of lists of fallbacks by IPv4 netblock
1883 # the dictionary is keyed by the fingerprint of an arbitrary fallback
1884 # in each netblock
1885 # mask_bits is the size of the netblock
1886 def fallbacks_by_ipv4_netblock(self, mask_bits):
1887 netblocks = {}
1888 for f in self.fallbacks:
1889 found_netblock = False
1890 for b in netblocks.keys():
1891 # we found an existing netblock containing this fallback
1892 if f.ipv4_netblocks_equal(self[b], mask_bits):
1893 # add it to the list
1894 netblocks[b].append(f)
1895 found_netblock = True
1896 break
1897 # make a new netblock based on this fallback's fingerprint
1898 if not found_netblock:
1899 netblocks[f._fpr] = [f]
1900 return netblocks
1902 # return a dictionary of lists of fallbacks by IPv6 netblock
1903 # where mask_bits is the size of the netblock
1904 def fallbacks_by_ipv6_netblock(self, mask_bits):
1905 netblocks = {}
1906 for f in self.fallbacks:
1907 # skip fallbacks without IPv6 addresses
1908 if not f.has_ipv6():
1909 continue
1910 found_netblock = False
1911 for b in netblocks.keys():
1912 # we found an existing netblock containing this fallback
1913 if f.ipv6_netblocks_equal(self[b], mask_bits):
1914 # add it to the list
1915 netblocks[b].append(f)
1916 found_netblock = True
1917 break
1918 # make a new netblock based on this fallback's fingerprint
1919 if not found_netblock:
1920 netblocks[f._fpr] = [f]
1921 return netblocks
1923 # log a message about the proportion of fallbacks in each IPv4 netblock,
1924 # where mask_bits is the size of the netblock
1925 def describe_fallback_ipv4_netblock_mask(self, mask_bits):
1926 fallback_count = len(self.fallbacks)
1927 shared_netblock_fallback_count = 0
1928 most_frequent_netblock = None
1929 netblocks = self.fallbacks_by_ipv4_netblock(mask_bits)
1930 for b in netblocks.keys():
1931 if len(netblocks[b]) > 1:
1932 # how many fallbacks are in a netblock with other fallbacks?
1933 shared_netblock_fallback_count += len(netblocks[b])
1934 # what's the netblock with the most fallbacks?
1935 if (most_frequent_netblock is None
1936 or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
1937 most_frequent_netblock = b
1938 logging.debug('Fallback IPv4 addresses in the same /%d:'%(mask_bits))
1939 for f in netblocks[b]:
1940 logging.debug('%s - %s', f.dirip, f._fpr)
1941 if most_frequent_netblock is not None:
1942 logging.warning('There are %s fallbacks in the IPv4 /%d containing %s'%(
1943 CandidateList.describe_percentage(
1944 len(netblocks[most_frequent_netblock]),
1945 fallback_count),
1946 mask_bits,
1947 self[most_frequent_netblock].dirip))
1948 if shared_netblock_fallback_count > 0:
1949 logging.warning(('%s of fallbacks are in an IPv4 /%d with other ' +
1950 'fallbacks')%(CandidateList.describe_percentage(
1951 shared_netblock_fallback_count,
1952 fallback_count),
1953 mask_bits))
1955 # log a message about the proportion of fallbacks in each IPv6 netblock,
1956 # where mask_bits is the size of the netblock
1957 def describe_fallback_ipv6_netblock_mask(self, mask_bits):
1958 fallback_count = len(self.fallbacks_with_ipv6())
1959 shared_netblock_fallback_count = 0
1960 most_frequent_netblock = None
1961 netblocks = self.fallbacks_by_ipv6_netblock(mask_bits)
1962 for b in netblocks.keys():
1963 if len(netblocks[b]) > 1:
1964 # how many fallbacks are in a netblock with other fallbacks?
1965 shared_netblock_fallback_count += len(netblocks[b])
1966 # what's the netblock with the most fallbacks?
1967 if (most_frequent_netblock is None
1968 or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
1969 most_frequent_netblock = b
1970 logging.debug('Fallback IPv6 addresses in the same /%d:'%(mask_bits))
1971 for f in netblocks[b]:
1972 logging.debug('%s - %s', f.ipv6addr, f._fpr)
1973 if most_frequent_netblock is not None:
1974 logging.warning('There are %s fallbacks in the IPv6 /%d containing %s'%(
1975 CandidateList.describe_percentage(
1976 len(netblocks[most_frequent_netblock]),
1977 fallback_count),
1978 mask_bits,
1979 self[most_frequent_netblock].ipv6addr))
1980 if shared_netblock_fallback_count > 0:
1981 logging.warning(('%s of fallbacks are in an IPv6 /%d with other ' +
1982 'fallbacks')%(CandidateList.describe_percentage(
1983 shared_netblock_fallback_count,
1984 fallback_count),
1985 mask_bits))
1987 # log a message about the proportion of fallbacks in each IPv4 /8, /16,
1988 # and /24
1989 def describe_fallback_ipv4_netblocks(self):
1990 # this doesn't actually tell us anything useful
1991 #self.describe_fallback_ipv4_netblock_mask(8)
1992 self.describe_fallback_ipv4_netblock_mask(16)
1993 #self.describe_fallback_ipv4_netblock_mask(24)
1995 # log a message about the proportion of fallbacks in each IPv6 /12 (RIR),
1996 # /23 (smaller RIR blocks), /32 (LIR), /48 (Customer), and /64 (Host)
1997 # https://www.iana.org/assignments/ipv6-unicast-address-assignments/
1998 def describe_fallback_ipv6_netblocks(self):
1999 # these don't actually tell us anything useful
2000 #self.describe_fallback_ipv6_netblock_mask(12)
2001 #self.describe_fallback_ipv6_netblock_mask(23)
2002 self.describe_fallback_ipv6_netblock_mask(32)
2003 #self.describe_fallback_ipv6_netblock_mask(48)
2004 self.describe_fallback_ipv6_netblock_mask(64)
2006 # log a message about the proportion of fallbacks in each IPv4 and IPv6
2007 # netblock
2008 def describe_fallback_netblocks(self):
2009 self.describe_fallback_ipv4_netblocks()
2010 self.describe_fallback_ipv6_netblocks()
2012 # return a list of fallbacks which are on the IPv4 ORPort port
2013 def fallbacks_on_ipv4_orport(self, port):
2014 return filter(lambda x: x.orport == port, self.fallbacks)
2016 # return a list of fallbacks which are on the IPv6 ORPort port
2017 def fallbacks_on_ipv6_orport(self, port):
2018 return filter(lambda x: x.ipv6orport == port, self.fallbacks_with_ipv6())
2020 # return a list of fallbacks which are on the DirPort port
2021 def fallbacks_on_dirport(self, port):
2022 return filter(lambda x: x.dirport == port, self.fallbacks)
2024 # log a message about the proportion of fallbacks on IPv4 ORPort port
2025 # and return that count
2026 def describe_fallback_ipv4_orport(self, port):
2027 port_count = len(self.fallbacks_on_ipv4_orport(port))
2028 fallback_count = len(self.fallbacks)
2029 logging.warning('%s of fallbacks are on IPv4 ORPort %d'%(
2030 CandidateList.describe_percentage(port_count,
2031 fallback_count),
2032 port))
2033 return port_count
2035 # log a message about the proportion of IPv6 fallbacks on IPv6 ORPort port
2036 # and return that count
2037 def describe_fallback_ipv6_orport(self, port):
2038 port_count = len(self.fallbacks_on_ipv6_orport(port))
2039 fallback_count = len(self.fallbacks_with_ipv6())
2040 logging.warning('%s of IPv6 fallbacks are on IPv6 ORPort %d'%(
2041 CandidateList.describe_percentage(port_count,
2042 fallback_count),
2043 port))
2044 return port_count
2046 # log a message about the proportion of fallbacks on DirPort port
2047 # and return that count
2048 def describe_fallback_dirport(self, port):
2049 port_count = len(self.fallbacks_on_dirport(port))
2050 fallback_count = len(self.fallbacks)
2051 logging.warning('%s of fallbacks are on DirPort %d'%(
2052 CandidateList.describe_percentage(port_count,
2053 fallback_count),
2054 port))
2055 return port_count
2057 # log a message about the proportion of fallbacks on each dirport,
2058 # each IPv4 orport, and each IPv6 orport
2059 def describe_fallback_ports(self):
2060 fallback_count = len(self.fallbacks)
2061 ipv4_or_count = fallback_count
2062 ipv4_or_count -= self.describe_fallback_ipv4_orport(443)
2063 ipv4_or_count -= self.describe_fallback_ipv4_orport(9001)
2064 logging.warning('%s of fallbacks are on other IPv4 ORPorts'%(
2065 CandidateList.describe_percentage(ipv4_or_count,
2066 fallback_count)))
2067 ipv6_fallback_count = len(self.fallbacks_with_ipv6())
2068 ipv6_or_count = ipv6_fallback_count
2069 ipv6_or_count -= self.describe_fallback_ipv6_orport(443)
2070 ipv6_or_count -= self.describe_fallback_ipv6_orport(9001)
2071 logging.warning('%s of IPv6 fallbacks are on other IPv6 ORPorts'%(
2072 CandidateList.describe_percentage(ipv6_or_count,
2073 ipv6_fallback_count)))
2074 dir_count = fallback_count
2075 dir_count -= self.describe_fallback_dirport(80)
2076 dir_count -= self.describe_fallback_dirport(9030)
2077 logging.warning('%s of fallbacks are on other DirPorts'%(
2078 CandidateList.describe_percentage(dir_count,
2079 fallback_count)))
2081 # return a list of fallbacks which cache extra-info documents
2082 def fallbacks_with_extra_info_cache(self):
2083 return filter(lambda x: x._extra_info_cache, self.fallbacks)
2085 # log a message about the proportion of fallbacks that cache extra-info docs
2086 def describe_fallback_extra_info_caches(self):
2087 extra_info_falback_count = len(self.fallbacks_with_extra_info_cache())
2088 fallback_count = len(self.fallbacks)
2089 logging.warning('%s of fallbacks cache extra-info documents'%(
2090 CandidateList.describe_percentage(extra_info_falback_count,
2091 fallback_count)))
2093 # return a list of fallbacks which have the Exit flag
2094 def fallbacks_with_exit(self):
2095 return filter(lambda x: x.is_exit(), self.fallbacks)
2097 # log a message about the proportion of fallbacks with an Exit flag
2098 def describe_fallback_exit_flag(self):
2099 exit_falback_count = len(self.fallbacks_with_exit())
2100 fallback_count = len(self.fallbacks)
2101 logging.warning('%s of fallbacks have the Exit flag'%(
2102 CandidateList.describe_percentage(exit_falback_count,
2103 fallback_count)))
2105 # return a list of fallbacks which have an IPv6 address
2106 def fallbacks_with_ipv6(self):
2107 return filter(lambda x: x.has_ipv6(), self.fallbacks)
2109 # log a message about the proportion of fallbacks on IPv6
2110 def describe_fallback_ip_family(self):
2111 ipv6_falback_count = len(self.fallbacks_with_ipv6())
2112 fallback_count = len(self.fallbacks)
2113 logging.warning('%s of fallbacks are on IPv6'%(
2114 CandidateList.describe_percentage(ipv6_falback_count,
2115 fallback_count)))
2117 def summarise_fallbacks(self, eligible_count, operator_count, failed_count,
2118 guard_count, target_count):
2119 s = ''
2120 # Report:
2121 # whether we checked consensus download times
2122 # the number of fallback directories (and limits/exclusions, if relevant)
2123 # min & max fallback bandwidths
2124 # #error if below minimum count
2125 if PERFORM_IPV4_DIRPORT_CHECKS or PERFORM_IPV6_DIRPORT_CHECKS:
2126 s += '/* Checked %s%s%s DirPorts served a consensus within %.1fs. */'%(
2127 'IPv4' if PERFORM_IPV4_DIRPORT_CHECKS else '',
2128 ' and ' if (PERFORM_IPV4_DIRPORT_CHECKS
2129 and PERFORM_IPV6_DIRPORT_CHECKS) else '',
2130 'IPv6' if PERFORM_IPV6_DIRPORT_CHECKS else '',
2131 CONSENSUS_DOWNLOAD_SPEED_MAX)
2132 else:
2133 s += '/* Did not check IPv4 or IPv6 DirPort consensus downloads. */'
2134 s += '\n'
2135 # Multiline C comment with #error if things go bad
2136 s += '/*'
2137 s += '\n'
2138 # Integers don't need escaping in C comments
2139 fallback_count = len(self.fallbacks)
2140 if FALLBACK_PROPORTION_OF_GUARDS is None:
2141 fallback_proportion = ''
2142 else:
2143 fallback_proportion = ', Target %d (%d * %.2f)'%(target_count,
2144 guard_count,
2145 FALLBACK_PROPORTION_OF_GUARDS)
2146 s += 'Final Count: %d (Eligible %d%s'%(fallback_count, eligible_count,
2147 fallback_proportion)
2148 if MAX_FALLBACK_COUNT is not None:
2149 s += ', Max %d'%(MAX_FALLBACK_COUNT)
2150 s += ')\n'
2151 if eligible_count != fallback_count:
2152 removed_count = eligible_count - fallback_count
2153 excess_to_target_or_max = (eligible_count - operator_count - failed_count
2154 - fallback_count)
2155 # some 'Failed' failed the check, others 'Skipped' the check,
2156 # if we already had enough successful downloads
2157 s += ('Excluded: %d (Same Operator %d, Failed/Skipped Download %d, ' +
2158 'Excess %d)')%(removed_count, operator_count, failed_count,
2159 excess_to_target_or_max)
2160 s += '\n'
2161 min_fb = self.fallback_min()
2162 min_bw = min_fb._data['measured_bandwidth']
2163 max_fb = self.fallback_max()
2164 max_bw = max_fb._data['measured_bandwidth']
2165 s += 'Bandwidth Range: %.1f - %.1f MByte/s'%(min_bw/(1024.0*1024.0),
2166 max_bw/(1024.0*1024.0))
2167 s += '\n'
2168 s += '*/'
2169 if fallback_count < MIN_FALLBACK_COUNT:
2170 # We must have a minimum number of fallbacks so they are always
2171 # reachable, and are in diverse locations
2172 s += '\n'
2173 s += '#error Fallback Count %d is too low. '%(fallback_count)
2174 s += 'Must be at least %d for diversity. '%(MIN_FALLBACK_COUNT)
2175 s += 'Try adding entries to the whitelist, '
2176 s += 'or setting INCLUDE_UNLISTED_ENTRIES = True.'
2177 return s
2179 def process_existing():
2180 logging.basicConfig(level=logging.INFO)
2181 logging.getLogger('stem').setLevel(logging.INFO)
2182 whitelist = {'data': parse_fallback_file(FALLBACK_FILE_NAME),
2183 'name': FALLBACK_FILE_NAME}
2184 blacklist = {'data': read_from_file(BLACKLIST_FILE_NAME, MAX_LIST_FILE_SIZE),
2185 'name': BLACKLIST_FILE_NAME}
2186 list_fallbacks(whitelist, blacklist)
2188 def process_default():
2189 logging.basicConfig(level=logging.WARNING)
2190 logging.getLogger('stem').setLevel(logging.WARNING)
2191 whitelist = {'data': read_from_file(WHITELIST_FILE_NAME, MAX_LIST_FILE_SIZE),
2192 'name': WHITELIST_FILE_NAME}
2193 blacklist = {'data': read_from_file(BLACKLIST_FILE_NAME, MAX_LIST_FILE_SIZE),
2194 'name': BLACKLIST_FILE_NAME}
2195 list_fallbacks(whitelist, blacklist)
2197 ## Main Function
2198 def main():
2199 if get_command() == 'check_existing':
2200 process_existing()
2201 else:
2202 process_default()
2204 def get_command():
2205 if len(sys.argv) == 2:
2206 return sys.argv[1]
2207 else:
2208 return None
2210 def log_excluded(msg, *args):
2211 if get_command() == 'check_existing':
2212 logging.warning(msg, *args)
2213 else:
2214 logging.info(msg, *args)
2216 def list_fallbacks(whitelist, blacklist):
2217 """ Fetches required onionoo documents and evaluates the
2218 fallback directory criteria for each of the relays """
2220 print "/* type=fallback */"
2221 print ("/* version={} */"
2222 .format(cleanse_c_multiline_comment(FALLBACK_FORMAT_VERSION)))
2223 now = datetime.datetime.utcnow()
2224 timestamp = now.strftime('%Y%m%d%H%M%S')
2225 print ("/* timestamp={} */"
2226 .format(cleanse_c_multiline_comment(timestamp)))
2227 # end the header with a separator, to make it easier for parsers
2228 print SECTION_SEPARATOR_COMMENT
2230 logging.warning('Downloading and parsing Onionoo data. ' +
2231 'This may take some time.')
2232 # find relays that could be fallbacks
2233 candidates = CandidateList()
2234 candidates.add_relays()
2236 # work out how many fallbacks we want
2237 guard_count = candidates.count_guards()
2238 if FALLBACK_PROPORTION_OF_GUARDS is None:
2239 target_count = guard_count
2240 else:
2241 target_count = int(guard_count * FALLBACK_PROPORTION_OF_GUARDS)
2242 # the maximum number of fallbacks is the least of:
2243 # - the target fallback count (FALLBACK_PROPORTION_OF_GUARDS * guard count)
2244 # - the maximum fallback count (MAX_FALLBACK_COUNT)
2245 if MAX_FALLBACK_COUNT is None:
2246 max_count = target_count
2247 else:
2248 max_count = min(target_count, MAX_FALLBACK_COUNT)
2250 candidates.compute_fallbacks()
2251 prefilter_fallbacks = copy.copy(candidates.fallbacks)
2253 # filter with the whitelist and blacklist
2254 # if a relay has changed IPv4 address or ports recently, it will be excluded
2255 # as ineligible before we call apply_filter_lists, and so there will be no
2256 # warning that the details have changed from those in the whitelist.
2257 # instead, there will be an info-level log during the eligibility check.
2258 initial_count = len(candidates.fallbacks)
2259 excluded_count = candidates.apply_filter_lists(whitelist, blacklist)
2260 print candidates.summarise_filters(initial_count, excluded_count)
2261 eligible_count = len(candidates.fallbacks)
2263 # calculate the measured bandwidth of each relay,
2264 # then remove low-bandwidth relays
2265 candidates.calculate_measured_bandwidth()
2266 candidates.remove_low_bandwidth_relays()
2268 # print the raw fallback list
2269 #for x in candidates.fallbacks:
2270 # print x.fallbackdir_line(True)
2271 # print json.dumps(candidates[x]._data, sort_keys=True, indent=4,
2272 # separators=(',', ': '), default=json_util.default)
2274 # impose mandatory conditions here, like one per contact, family, IP
2275 # in measured bandwidth order
2276 candidates.sort_fallbacks_by_measured_bandwidth()
2277 operator_count = 0
2278 # only impose these limits on the final list - operators can nominate
2279 # multiple candidate fallbacks, and then we choose the best set
2280 if not OUTPUT_CANDIDATES:
2281 operator_count += candidates.limit_fallbacks_same_ip()
2282 operator_count += candidates.limit_fallbacks_same_contact()
2283 operator_count += candidates.limit_fallbacks_same_family()
2285 # check if each candidate can serve a consensus
2286 # there's a small risk we've eliminated relays from the same operator that
2287 # can serve a consensus, in favour of one that can't
2288 # but given it takes up to 15 seconds to check each consensus download,
2289 # the risk is worth it
2290 if PERFORM_IPV4_DIRPORT_CHECKS or PERFORM_IPV6_DIRPORT_CHECKS:
2291 logging.warning('Checking consensus download speeds. ' +
2292 'This may take some time.')
2293 failed_count = candidates.perform_download_consensus_checks(max_count)
2295 # work out which fallbacks cache extra-infos
2296 candidates.mark_extra_info_caches()
2298 # analyse and log interesting diversity metrics
2299 # like netblock, ports, exit, IPv4-only
2300 # (we can't easily analyse AS, and it's hard to accurately analyse country)
2301 candidates.describe_fallback_ip_family()
2302 # if we can't import the ipaddress module, we can't do netblock analysis
2303 if HAVE_IPADDRESS:
2304 candidates.describe_fallback_netblocks()
2305 candidates.describe_fallback_ports()
2306 candidates.describe_fallback_extra_info_caches()
2307 candidates.describe_fallback_exit_flag()
2309 # output C comments summarising the fallback selection process
2310 if len(candidates.fallbacks) > 0:
2311 print candidates.summarise_fallbacks(eligible_count, operator_count,
2312 failed_count, guard_count,
2313 target_count)
2314 else:
2315 print '/* No Fallbacks met criteria */'
2317 # output C comments specifying the OnionOO data used to create the list
2318 for s in fetch_source_list():
2319 print describe_fetch_source(s)
2321 # start the list with a separator, to make it easy for parsers
2322 print SECTION_SEPARATOR_COMMENT
2324 # sort the list differently depending on why we've created it:
2325 # if we're outputting the final fallback list, sort by fingerprint
2326 # this makes diffs much more stable
2327 # otherwise, if we're trying to find a bandwidth cutoff, or we want to
2328 # contact operators in priority order, sort by bandwidth (not yet
2329 # implemented)
2330 # otherwise, if we're contacting operators, sort by contact
2331 candidates.sort_fallbacks_by(OUTPUT_SORT_FIELD)
2333 for x in candidates.fallbacks:
2334 print x.fallbackdir_line(candidates.fallbacks, prefilter_fallbacks)
2336 if __name__ == "__main__":
2337 main()