6 # scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc 2> fallback_dirs.log
8 # Check the existing list:
9 # scripts/maint/updateFallbackDirs.py check_existing > fallback_dirs.inc.ok 2> fallback_dirs.log
10 # mv fallback_dirs.inc.ok src/or/fallback_dirs.inc
12 # This script should be run from a stable, reliable network connection,
13 # with no other network activity (and not over tor).
14 # If this is not possible, please disable:
15 # PERFORM_IPV4_DIRPORT_CHECKS and PERFORM_IPV6_DIRPORT_CHECKS
17 # Needs dateutil, stem, and potentially other python packages.
18 # Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package)
19 # for netblock analysis.
21 # Then read the logs to make sure the fallbacks aren't dominated by a single
24 # Script by weasel, April 2015
25 # Portions by gsathya & karsten, 2013
26 # https://trac.torproject.org/projects/tor/attachment/ticket/8374/dir_list.2.py
27 # Modifications by teor, 2015
41 import dateutil
.parser
42 # bson_lazy provides bson
43 #from bson import json_util
47 from stem
.descriptor
import DocumentHandler
48 from stem
.descriptor
.remote
import get_consensus
, get_server_descriptors
, MAX_FINGERPRINTS
51 logging
.root
.name
= ''
53 HAVE_IPADDRESS
= False
55 # python 3 builtin, or install package py2-ipaddress
56 # there are several ipaddress implementations for python 2
57 # with slightly different semantics with str typed text
58 # fortunately, all our IP addresses are in unicode
62 # if this happens, we avoid doing netblock analysis
63 logging
.warning('Unable to import ipaddress, please install py2-ipaddress.' +
64 ' A fallback list will be created, but optional netblock' +
65 ' analysis will not be performed.')
67 ## Top-Level Configuration
69 # We use semantic versioning: https://semver.org
71 # * major changes include removing a mandatory field, or anything else that
72 # would break an appropriately tolerant parser,
73 # * minor changes include adding a field,
74 # * patch changes include changing header comments or other unstructured
76 FALLBACK_FORMAT_VERSION
= '2.0.0'
77 SECTION_SEPARATOR_BASE
= '====='
78 SECTION_SEPARATOR_COMMENT
= '/* ' + SECTION_SEPARATOR_BASE
+ ' */'
80 # Output all candidate fallbacks, or only output selected fallbacks?
81 OUTPUT_CANDIDATES
= False
83 # Perform DirPort checks over IPv4?
84 # Change this to False if IPv4 doesn't work for you, or if you don't want to
85 # download a consensus for each fallback
86 # Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
87 PERFORM_IPV4_DIRPORT_CHECKS
= False if OUTPUT_CANDIDATES
else True
89 # Perform DirPort checks over IPv6?
90 # If you know IPv6 works for you, set this to True
91 # This will exclude IPv6 relays without an IPv6 DirPort configured
92 # So it's best left at False until #18394 is implemented
93 # Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
94 PERFORM_IPV6_DIRPORT_CHECKS
= False if OUTPUT_CANDIDATES
else False
96 # Must relays be running now?
97 MUST_BE_RUNNING_NOW
= (PERFORM_IPV4_DIRPORT_CHECKS
98 or PERFORM_IPV6_DIRPORT_CHECKS
)
100 # Clients have been using microdesc consensuses by default for a while now
101 DOWNLOAD_MICRODESC_CONSENSUS
= True
103 # If a relay delivers an expired consensus, if it expired less than this many
104 # seconds ago, we still allow the relay. This should never be less than -90,
105 # as all directory mirrors should have downloaded a consensus 90 minutes
106 # before it expires. It should never be more than 24 hours, because clients
107 # reject consensuses that are older than REASONABLY_LIVE_TIME.
108 # For the consensus expiry check to be accurate, the machine running this
109 # script needs an accurate clock.
111 # Relays on 0.3.0 and later return a 404 when they are about to serve an
112 # expired consensus. This makes them fail the download check.
113 # We use a tolerance of 0, so that 0.2.x series relays also fail the download
114 # check if they serve an expired consensus.
115 CONSENSUS_EXPIRY_TOLERANCE
= 0
117 # Output fallback name, flags, bandwidth, and ContactInfo in a C comment?
118 OUTPUT_COMMENTS
= True if OUTPUT_CANDIDATES
else False
120 # Output matching ContactInfo in fallbacks list or the blacklist?
121 # Useful if you're trying to contact operators
122 CONTACT_COUNT
= True if OUTPUT_CANDIDATES
else False
123 CONTACT_BLACKLIST_COUNT
= True if OUTPUT_CANDIDATES
else False
125 # How the list should be sorted:
126 # fingerprint: is useful for stable diffs of fallback lists
127 # measured_bandwidth: is useful when pruning the list based on bandwidth
128 # contact: is useful for contacting operators once the list has been pruned
129 OUTPUT_SORT_FIELD
= 'contact' if OUTPUT_CANDIDATES
else 'fingerprint'
133 ONIONOO
= 'https://onionoo.torproject.org/'
134 #ONIONOO = 'https://onionoo.thecthulhu.com/'
136 # Don't bother going out to the Internet, just use the files available locally,
137 # even if they're very old
138 LOCAL_FILES_ONLY
= False
140 ## Whitelist / Blacklist Filter Settings
142 # The whitelist contains entries that are included if all attributes match
143 # (IPv4, dirport, orport, id, and optionally IPv6 and IPv6 orport)
144 # The blacklist contains (partial) entries that are excluded if any
145 # sufficiently specific group of attributes matches:
151 # If neither port is included in the blacklist, the entire IP address is
154 # What happens to entries in neither list?
155 # When True, they are included, when False, they are excluded
156 INCLUDE_UNLISTED_ENTRIES
= True if OUTPUT_CANDIDATES
else False
158 # If an entry is in both lists, what happens?
159 # When True, it is excluded, when False, it is included
160 BLACKLIST_EXCLUDES_WHITELIST_ENTRIES
= True
162 WHITELIST_FILE_NAME
= 'scripts/maint/fallback.whitelist'
163 BLACKLIST_FILE_NAME
= 'scripts/maint/fallback.blacklist'
164 FALLBACK_FILE_NAME
= 'src/or/fallback_dirs.inc'
166 # The number of bytes we'll read from a filter file before giving up
167 MAX_LIST_FILE_SIZE
= 1024 * 1024
169 ## Eligibility Settings
171 # Require fallbacks to have the same address and port for a set amount of time
172 # We used to have this at 1 week, but that caused many fallback failures, which
173 # meant that we had to rebuild the list more often. We want fallbacks to be
174 # stable for 2 years, so we set it to a few months.
176 # If a relay changes address or port, that's it, it's not useful any more,
177 # because clients can't find it
178 ADDRESS_AND_PORT_STABLE_DAYS
= 90
179 # We ignore relays that have been down for more than this period
180 MAX_DOWNTIME_DAYS
= 0 if MUST_BE_RUNNING_NOW
else 7
181 # FallbackDirs must have a time-weighted-fraction that is greater than or
183 # Mirrors that are down half the time are still useful half the time
186 # Guard flags are removed for some time after a relay restarts, so we ignore
189 # FallbackDirs must have a time-weighted-fraction that is less than or equal
191 # .00 means no bad exits
192 PERMITTED_BADEXIT
= .00
194 # older entries' weights are adjusted with ALPHA^(age in days)
197 # this factor is used to scale OnionOO entries to [0,1]
198 ONIONOO_SCALE_ONE
= 999.
200 ## Fallback Count Limits
202 # The target for these parameters is 20% of the guards in the network
203 # This is around 200 as of October 2015
205 FALLBACK_PROPORTION_OF_GUARDS
= None if OUTPUT_CANDIDATES
else _FB_POG
207 # Limit the number of fallbacks (eliminating lowest by advertised bandwidth)
208 MAX_FALLBACK_COUNT
= None if OUTPUT_CANDIDATES
else 200
209 # Emit a C #error if the number of fallbacks is less than expected
210 MIN_FALLBACK_COUNT
= 0 if OUTPUT_CANDIDATES
else MAX_FALLBACK_COUNT
*0.5
212 # The maximum number of fallbacks on the same address, contact, or family
214 # With 150 fallbacks, this means each operator sees 5% of client bootstraps.
216 # - We try to limit guard and exit operators to 5% of the network
217 # - The directory authorities used to see 11% of client bootstraps each
219 # We also don't want too much of the list to go down if a single operator
220 # has to move all their relays.
221 MAX_FALLBACKS_PER_IP
= 1
222 MAX_FALLBACKS_PER_IPV4
= MAX_FALLBACKS_PER_IP
223 MAX_FALLBACKS_PER_IPV6
= MAX_FALLBACKS_PER_IP
224 MAX_FALLBACKS_PER_CONTACT
= 7
225 MAX_FALLBACKS_PER_FAMILY
= 7
227 ## Fallback Bandwidth Requirements
229 # Any fallback with the Exit flag has its bandwidth multiplied by this fraction
230 # to make sure we aren't further overloading exits
231 # (Set to 1.0, because we asked that only lightly loaded exits opt-in,
232 # and the extra load really isn't that much for large relays.)
233 EXIT_BANDWIDTH_FRACTION
= 1.0
235 # If a single fallback's bandwidth is too low, it's pointless adding it
236 # We expect fallbacks to handle an extra 10 kilobytes per second of traffic
237 # Make sure they can support fifty times the expected extra load
239 # We convert this to a consensus weight before applying the filter,
240 # because all the bandwidth amounts are specified by the relay
241 MIN_BANDWIDTH
= 50.0 * 10.0 * 1024.0
243 # Clients will time out after 30 seconds trying to download a consensus
244 # So allow fallback directories half that to deliver a consensus
245 # The exact download times might change based on the network connection
246 # running this script, but only by a few seconds
247 # There is also about a second of python overhead
248 CONSENSUS_DOWNLOAD_SPEED_MAX
= 15.0
249 # If the relay fails a consensus check, retry the download
250 # This avoids delisting a relay due to transient network conditions
251 CONSENSUS_DOWNLOAD_RETRY
= True
256 return datetime
.datetime
.strptime(t
, "%Y-%m-%d %H:%M:%S")
258 def remove_bad_chars(raw_string
, bad_char_list
):
259 # Remove each character in the bad_char_list
260 cleansed_string
= raw_string
261 for c
in bad_char_list
:
262 cleansed_string
= cleansed_string
.replace(c
, '')
263 return cleansed_string
265 def cleanse_unprintable(raw_string
):
266 # Remove all unprintable characters
269 if c
in string
.printable
:
271 return cleansed_string
273 def cleanse_whitespace(raw_string
):
274 # Replace all whitespace characters with a space
275 cleansed_string
= raw_string
276 for c
in string
.whitespace
:
277 cleansed_string
= cleansed_string
.replace(c
, ' ')
278 return cleansed_string
280 def cleanse_c_multiline_comment(raw_string
):
281 cleansed_string
= raw_string
282 # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
283 cleansed_string
= cleanse_whitespace(cleansed_string
)
284 # ContactInfo and Version can be arbitrary binary data
285 cleansed_string
= cleanse_unprintable(cleansed_string
)
286 # Prevent a malicious / unanticipated string from breaking out
287 # of a C-style multiline comment
288 # This removes '/*' and '*/' and '//'
290 # Prevent a malicious string from using C nulls
291 bad_char_list
+= '\0'
292 # Avoid confusing parsers by making sure there is only one comma per fallback
294 # Avoid confusing parsers by making sure there is only one equals per field
296 # Be safer by removing bad characters entirely
297 cleansed_string
= remove_bad_chars(cleansed_string
, bad_char_list
)
298 # Some compilers may further process the content of comments
299 # There isn't much we can do to cover every possible case
300 # But comment-based directives are typically only advisory
301 return cleansed_string
303 def cleanse_c_string(raw_string
):
304 cleansed_string
= raw_string
305 # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
306 cleansed_string
= cleanse_whitespace(cleansed_string
)
307 # ContactInfo and Version can be arbitrary binary data
308 cleansed_string
= cleanse_unprintable(cleansed_string
)
309 # Prevent a malicious address/fingerprint string from breaking out
310 # of a C-style string
312 # Prevent a malicious string from using escapes
313 bad_char_list
+= '\\'
314 # Prevent a malicious string from using C nulls
315 bad_char_list
+= '\0'
316 # Avoid confusing parsers by making sure there is only one comma per fallback
318 # Avoid confusing parsers by making sure there is only one equals per field
320 # Be safer by removing bad characters entirely
321 cleansed_string
= remove_bad_chars(cleansed_string
, bad_char_list
)
322 # Some compilers may further process the content of strings
323 # There isn't much we can do to cover every possible case
324 # But this typically only results in changes to the string data
325 return cleansed_string
327 ## OnionOO Source Functions
329 # a dictionary of source metadata for each onionoo query we've made
332 # register source metadata for 'what'
333 # assumes we only retrieve one document for each 'what'
334 def register_fetch_source(what
, url
, relays_published
, version
):
335 fetch_source
[what
] = {}
336 fetch_source
[what
]['url'] = url
337 fetch_source
[what
]['relays_published'] = relays_published
338 fetch_source
[what
]['version'] = version
340 # list each registered source's 'what'
341 def fetch_source_list():
342 return sorted(fetch_source
.keys())
344 # given 'what', provide a multiline C comment describing the source
345 def describe_fetch_source(what
):
348 desc
+= 'Onionoo Source: '
349 desc
+= cleanse_c_multiline_comment(what
)
351 desc
+= cleanse_c_multiline_comment(fetch_source
[what
]['relays_published'])
353 desc
+= cleanse_c_multiline_comment(fetch_source
[what
]['version'])
356 desc
+= cleanse_c_multiline_comment(fetch_source
[what
]['url'])
361 ## File Processing Functions
363 def write_to_file(str, file_name
, max_len
):
365 with
open(file_name
, 'w') as f
:
366 f
.write(str[0:max_len
])
367 except EnvironmentError, error
:
368 logging
.error('Writing file %s failed: %d: %s'%
374 def read_from_file(file_name
, max_len
):
376 if os
.path
.isfile(file_name
):
377 with
open(file_name
, 'r') as f
:
378 return f
.read(max_len
)
379 except EnvironmentError, error
:
380 logging
.info('Loading file %s failed: %d: %s'%
387 def parse_fallback_file(file_name
):
388 file_data
= read_from_file(file_name
, MAX_LIST_FILE_SIZE
)
389 file_data
= cleanse_unprintable(file_data
)
390 file_data
= remove_bad_chars(file_data
, '\n"\0')
391 file_data
= re
.sub('/\*.*?\*/', '', file_data
)
392 file_data
= file_data
.replace(',', '\n')
393 file_data
= file_data
.replace(' weight=10', '')
396 def load_possibly_compressed_response_json(response
):
397 if response
.info().get('Content-Encoding') == 'gzip':
398 buf
= StringIO
.StringIO( response
.read() )
399 f
= gzip
.GzipFile(fileobj
=buf
)
402 return json
.load(response
)
404 def load_json_from_file(json_file_name
):
405 # An exception here may be resolved by deleting the .last_modified
406 # and .json files, and re-running the script
408 with
open(json_file_name
, 'r') as f
:
410 except EnvironmentError, error
:
411 raise Exception('Reading not-modified json file %s failed: %d: %s'%
419 def datestr_to_datetime(datestr
):
420 # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
421 if datestr
is not None:
422 dt
= dateutil
.parser
.parse(datestr
)
424 # Never modified - use start of epoch
425 dt
= datetime
.datetime
.utcfromtimestamp(0)
426 # strip any timezone out (in case they're supported in future)
427 dt
= dt
.replace(tzinfo
=None)
430 def onionoo_fetch(what
, **kwargs
):
432 params
['type'] = 'relay'
433 #params['limit'] = 10
434 params
['first_seen_days'] = '%d-'%(ADDRESS_AND_PORT_STABLE_DAYS)
435 params
['last_seen_days'] = '-%d'%(MAX_DOWNTIME_DAYS)
436 params
['flag'] = 'V2Dir'
437 url
= ONIONOO
+ what
+ '?' + urllib
.urlencode(params
)
439 # Unfortunately, the URL is too long for some OS filenames,
440 # but we still don't want to get files from different URLs mixed up
441 base_file_name
= what
+ '-' + hashlib
.sha1(url
).hexdigest()
443 full_url_file_name
= base_file_name
+ '.full_url'
444 MAX_FULL_URL_LENGTH
= 1024
446 last_modified_file_name
= base_file_name
+ '.last_modified'
447 MAX_LAST_MODIFIED_LENGTH
= 64
449 json_file_name
= base_file_name
+ '.json'
452 # Read from the local file, don't write to anything
453 response_json
= load_json_from_file(json_file_name
)
455 # store the full URL to a file for debugging
456 # no need to compare as long as you trust SHA-1
457 write_to_file(url
, full_url_file_name
, MAX_FULL_URL_LENGTH
)
459 request
= urllib2
.Request(url
)
460 request
.add_header('Accept-encoding', 'gzip')
462 # load the last modified date from the file, if it exists
463 last_mod_date
= read_from_file(last_modified_file_name
,
464 MAX_LAST_MODIFIED_LENGTH
)
465 if last_mod_date
is not None:
466 request
.add_header('If-modified-since', last_mod_date
)
468 # Parse last modified date
469 last_mod
= datestr_to_datetime(last_mod_date
)
471 # Not Modified and still recent enough to be useful
472 # Onionoo / Globe used to use 6 hours, but we can afford a day
473 required_freshness
= datetime
.datetime
.utcnow()
474 # strip any timezone out (to match dateutil.parser)
475 required_freshness
= required_freshness
.replace(tzinfo
=None)
476 required_freshness
-= datetime
.timedelta(hours
=24)
478 # Make the OnionOO request
481 response
= urllib2
.urlopen(request
)
482 response_code
= response
.getcode()
483 except urllib2
.HTTPError
, error
:
484 response_code
= error
.code
485 if response_code
== 304: # not modified
488 raise Exception("Could not get " + url
+ ": "
489 + str(error
.code
) + ": " + error
.reason
)
491 if response_code
== 200: # OK
492 last_mod
= datestr_to_datetime(response
.info().get('Last-Modified'))
494 # Check for freshness
495 if last_mod
< required_freshness
:
496 if last_mod_date
is not None:
497 # This check sometimes fails transiently, retry the script if it does
498 date_message
= "Outdated data: last updated " + last_mod_date
500 date_message
= "No data: never downloaded "
501 raise Exception(date_message
+ " from " + url
)
504 if response_code
== 200: # OK
506 response_json
= load_possibly_compressed_response_json(response
)
508 with
open(json_file_name
, 'w') as f
:
509 # use the most compact json representation to save space
510 json
.dump(response_json
, f
, separators
=(',',':'))
512 # store the last modified date in its own file
513 if response
.info().get('Last-modified') is not None:
514 write_to_file(response
.info().get('Last-Modified'),
515 last_modified_file_name
,
516 MAX_LAST_MODIFIED_LENGTH
)
518 elif response_code
== 304: # Not Modified
520 response_json
= load_json_from_file(json_file_name
)
522 else: # Unexpected HTTP response code not covered in the HTTPError above
523 raise Exception("Unexpected HTTP response code to " + url
+ ": "
524 + str(response_code
))
526 register_fetch_source(what
,
528 response_json
['relays_published'],
529 response_json
['version'])
533 def fetch(what
, **kwargs
):
534 #x = onionoo_fetch(what, **kwargs)
535 # don't use sort_keys, as the order of or_addresses is significant
536 #print json.dumps(x, indent=4, separators=(',', ': '))
539 return onionoo_fetch(what
, **kwargs
)
541 ## Fallback Candidate Class
543 class Candidate(object):
544 CUTOFF_ADDRESS_AND_PORT_STABLE
= (datetime
.datetime
.utcnow()
545 - datetime
.timedelta(ADDRESS_AND_PORT_STABLE_DAYS
))
547 def __init__(self
, details
):
548 for f
in ['fingerprint', 'nickname', 'last_changed_address_or_port',
549 'consensus_weight', 'or_addresses', 'dir_address']:
550 if not f
in details
: raise Exception("Document has no %s field."%(f
,))
552 if not 'contact' in details
:
553 details
['contact'] = None
554 if not 'flags' in details
or details
['flags'] is None:
555 details
['flags'] = []
556 if (not 'advertised_bandwidth' in details
557 or details
['advertised_bandwidth'] is None):
558 # relays without advertised bandwidth have it calculated from their
560 details
['advertised_bandwidth'] = 0
561 if (not 'effective_family' in details
562 or details
['effective_family'] is None):
563 details
['effective_family'] = []
564 if not 'platform' in details
:
565 details
['platform'] = None
566 details
['last_changed_address_or_port'] = parse_ts(
567 details
['last_changed_address_or_port'])
569 self
._stable
_sort
_or
_addresses
()
571 self
._fpr
= self
._data
['fingerprint']
572 self
._running
= self
._guard
= self
._v
2dir
= 0.
573 self
._split
_dirport
()
574 self
._compute
_orport
()
575 if self
.orport
is None:
576 raise Exception("Failed to get an orport for %s."%(self
._fpr
,))
577 self
._compute
_ipv
6addr
()
578 if not self
.has_ipv6():
579 logging
.debug("Failed to get an ipv6 address for %s."%(self
._fpr
,))
580 self
._compute
_version
()
581 self
._extra
_info
_cache
= None
583 def _stable_sort_or_addresses(self
):
584 # replace self._data['or_addresses'] with a stable ordering,
585 # sorting the secondary addresses in string order
586 # leave the received order in self._data['or_addresses_raw']
587 self
._data
['or_addresses_raw'] = self
._data
['or_addresses']
588 or_address_primary
= self
._data
['or_addresses'][:1]
589 # subsequent entries in the or_addresses array are in an arbitrary order
590 # so we stabilise the addresses by sorting them in string order
591 or_addresses_secondaries_stable
= sorted(self
._data
['or_addresses'][1:])
592 or_addresses_stable
= or_address_primary
+ or_addresses_secondaries_stable
593 self
._data
['or_addresses'] = or_addresses_stable
595 def get_fingerprint(self
):
598 # is_valid_ipv[46]_address by gsathya, karsten, 2013
600 def is_valid_ipv4_address(address
):
601 if not isinstance(address
, (str, unicode)):
604 # check if there are four period separated values
605 if address
.count(".") != 3:
608 # checks that each value in the octet are decimal values between 0-255
609 for entry
in address
.split("."):
610 if not entry
.isdigit() or int(entry
) < 0 or int(entry
) > 255:
612 elif entry
[0] == "0" and len(entry
) > 1:
613 return False # leading zeros, for instance in "1.2.3.001"
618 def is_valid_ipv6_address(address
):
619 if not isinstance(address
, (str, unicode)):
623 address
= address
[1:-1]
625 # addresses are made up of eight colon separated groups of four hex digits
626 # with leading zeros being optional
627 # https://en.wikipedia.org/wiki/IPv6#Address_format
629 colon_count
= address
.count(":")
632 return False # too many groups
633 elif colon_count
!= 7 and not "::" in address
:
634 return False # not enough groups and none are collapsed
635 elif address
.count("::") > 1 or ":::" in address
:
636 return False # multiple groupings of zeros can't be collapsed
638 found_ipv4_on_previous_entry
= False
639 for entry
in address
.split(":"):
640 # If an IPv6 address has an embedded IPv4 address,
641 # it must be the last entry
642 if found_ipv4_on_previous_entry
:
644 if not re
.match("^[0-9a-fA-f]{0,4}$", entry
):
645 if not Candidate
.is_valid_ipv4_address(entry
):
648 found_ipv4_on_previous_entry
= True
652 def _split_dirport(self
):
653 # Split the dir_address into dirip and dirport
654 (self
.dirip
, _dirport
) = self
._data
['dir_address'].split(':', 2)
655 self
.dirport
= int(_dirport
)
657 def _compute_orport(self
):
658 # Choose the first ORPort that's on the same IPv4 address as the DirPort.
659 # In rare circumstances, this might not be the primary ORPort address.
660 # However, _stable_sort_or_addresses() ensures we choose the same one
661 # every time, even if onionoo changes the order of the secondaries.
662 self
._split
_dirport
()
664 for i
in self
._data
['or_addresses']:
665 if i
!= self
._data
['or_addresses'][0]:
666 logging
.debug('Secondary IPv4 Address Used for %s: %s'%(self
._fpr
, i
))
667 (ipaddr
, port
) = i
.rsplit(':', 1)
668 if (ipaddr
== self
.dirip
) and Candidate
.is_valid_ipv4_address(ipaddr
):
669 self
.orport
= int(port
)
672 def _compute_ipv6addr(self
):
673 # Choose the first IPv6 address that uses the same port as the ORPort
674 # Or, choose the first IPv6 address in the list
675 # _stable_sort_or_addresses() ensures we choose the same IPv6 address
676 # every time, even if onionoo changes the order of the secondaries.
678 self
.ipv6orport
= None
679 # Choose the first IPv6 address that uses the same port as the ORPort
680 for i
in self
._data
['or_addresses']:
681 (ipaddr
, port
) = i
.rsplit(':', 1)
682 if (port
== self
.orport
) and Candidate
.is_valid_ipv6_address(ipaddr
):
683 self
.ipv6addr
= ipaddr
684 self
.ipv6orport
= int(port
)
686 # Choose the first IPv6 address in the list
687 for i
in self
._data
['or_addresses']:
688 (ipaddr
, port
) = i
.rsplit(':', 1)
689 if Candidate
.is_valid_ipv6_address(ipaddr
):
690 self
.ipv6addr
= ipaddr
691 self
.ipv6orport
= int(port
)
694 def _compute_version(self
):
695 # parse the version out of the platform string
696 # The platform looks like: "Tor 0.2.7.6 on Linux"
697 self
._data
['version'] = None
698 if self
._data
['platform'] is None:
700 # be tolerant of weird whitespacing, use a whitespace split
701 tokens
= self
._data
['platform'].split()
703 vnums
= token
.split('.')
704 # if it's at least a.b.c.d, with potentially an -alpha-dev, -alpha, -rc
705 if (len(vnums
) >= 4 and vnums
[0].isdigit() and vnums
[1].isdigit() and
707 self
._data
['version'] = token
711 # bug #20499 affects versions from 0.2.9.1-alpha-dev to 0.2.9.4-alpha-dev
712 # and version 0.3.0.0-alpha-dev
713 # Exhaustive lists are hard to get wrong
714 STALE_CONSENSUS_VERSIONS
= ['0.2.9.1-alpha-dev',
724 def is_valid_version(self
):
725 # call _compute_version before calling this
726 # is the version of the relay a version we want as a fallback?
727 # checks both recommended versions and bug #20499 / #20509
729 # if the relay doesn't have a recommended version field, exclude the relay
730 if not self
._data
.has_key('recommended_version'):
731 log_excluded('%s not a candidate: no recommended_version field',
734 if not self
._data
['recommended_version']:
735 log_excluded('%s not a candidate: version not recommended', self
._fpr
)
737 # if the relay doesn't have version field, exclude the relay
738 if not self
._data
.has_key('version'):
739 log_excluded('%s not a candidate: no version field', self
._fpr
)
741 if self
._data
['version'] in Candidate
.STALE_CONSENSUS_VERSIONS
:
742 logging
.warning('%s not a candidate: version delivers stale consensuses',
748 def _extract_generic_history(history
, which
='unknown'):
749 # given a tree like this:
753 # "factor": 0.001001001001001001,
754 # "first": "2015-02-27 06:00:00",
756 # "last": "2015-03-30 06:00:00",
764 # "factor": 0.001001001001001001,
765 # "first": "2015-03-23 07:30:00",
767 # "last": "2015-03-30 07:30:00",
772 # "factor": 0.001001001001001001,
773 # "first": "2014-04-11 00:00:00",
774 # "interval": 172800,
775 # "last": "2015-03-29 00:00:00",
780 # "factor": 0.001001001001001001,
781 # "first": "2014-12-28 06:00:00",
783 # "last": "2015-03-30 06:00:00",
787 # extract exactly one piece of data per time interval,
788 # using smaller intervals where available.
790 # returns list of (age, length, value) dictionaries.
794 periods
= history
.keys()
795 periods
.sort(key
= lambda x
: history
[x
]['interval'])
796 now
= datetime
.datetime
.utcnow()
800 interval
= datetime
.timedelta(seconds
= h
['interval'])
801 this_ts
= parse_ts(h
['last'])
803 if (len(h
['values']) != h
['count']):
804 logging
.warning('Inconsistent value count in %s document for %s'
806 for v
in reversed(h
['values']):
807 if (this_ts
<= newest
):
810 agetmp1
= (agt1
.microseconds
+ (agt1
.seconds
+ agt1
.days
* 24 * 3600)
812 agetmp2
= (agt2
.microseconds
+ (agt2
.seconds
+ agt2
.days
* 24 * 3600)
814 generic_history
.append(
822 if (this_ts
+ interval
!= parse_ts(h
['first'])):
823 logging
.warning('Inconsistent time information in %s document for %s'
826 #print json.dumps(generic_history, sort_keys=True,
827 # indent=4, separators=(',', ': '))
828 return generic_history
831 def _avg_generic_history(generic_history
):
833 for i
in generic_history
:
834 if i
['age'] > (ADDRESS_AND_PORT_STABLE_DAYS
* 24 * 3600):
836 if (i
['length'] is not None
837 and i
['age'] is not None
838 and i
['value'] is not None):
839 w
= i
['length'] * math
.pow(AGE_ALPHA
, i
['age']/(3600*24))
840 a
.append( (i
['value'] * w
, w
) )
842 sv
= math
.fsum(map(lambda x
: x
[0], a
))
843 sw
= math
.fsum(map(lambda x
: x
[1], a
))
851 def _add_generic_history(self
, history
):
852 periods
= r
['read_history'].keys()
853 periods
.sort(key
= lambda x
: r
['read_history'][x
]['interval'] )
857 def add_running_history(self
, history
):
860 def add_uptime(self
, uptime
):
861 logging
.debug('Adding uptime %s.'%(self
._fpr
,))
863 # flags we care about: Running, V2Dir, Guard
864 if not 'flags' in uptime
:
865 logging
.debug('No flags in document for %s.'%(self
._fpr
,))
868 for f
in ['Running', 'Guard', 'V2Dir']:
869 if not f
in uptime
['flags']:
870 logging
.debug('No %s in flags for %s.'%(f
, self
._fpr
,))
873 running
= self
._extract
_generic
_history
(uptime
['flags']['Running'],
874 '%s-Running'%(self
._fpr
))
875 guard
= self
._extract
_generic
_history
(uptime
['flags']['Guard'],
876 '%s-Guard'%(self
._fpr
))
877 v2dir
= self
._extract
_generic
_history
(uptime
['flags']['V2Dir'],
878 '%s-V2Dir'%(self
._fpr
))
879 if 'BadExit' in uptime
['flags']:
880 badexit
= self
._extract
_generic
_history
(uptime
['flags']['BadExit'],
881 '%s-BadExit'%(self
._fpr
))
883 self
._running
= self
._avg
_generic
_history
(running
) / ONIONOO_SCALE_ONE
884 self
._guard
= self
._avg
_generic
_history
(guard
) / ONIONOO_SCALE_ONE
885 self
._v
2dir
= self
._avg
_generic
_history
(v2dir
) / ONIONOO_SCALE_ONE
887 if 'BadExit' in uptime
['flags']:
888 self
._badexit
= self
._avg
_generic
_history
(badexit
) / ONIONOO_SCALE_ONE
890 def is_candidate(self
):
892 if (MUST_BE_RUNNING_NOW
and not self
.is_running()):
893 log_excluded('%s not a candidate: not running now, unable to check ' +
894 'DirPort consensus download', self
._fpr
)
896 if (self
._data
['last_changed_address_or_port'] >
897 self
.CUTOFF_ADDRESS_AND_PORT_STABLE
):
898 log_excluded('%s not a candidate: changed address/port recently (%s)',
899 self
._fpr
, self
._data
['last_changed_address_or_port'])
901 if self
._running
< CUTOFF_RUNNING
:
902 log_excluded('%s not a candidate: running avg too low (%lf)',
903 self
._fpr
, self
._running
)
905 if self
._v
2dir
< CUTOFF_V2DIR
:
906 log_excluded('%s not a candidate: v2dir avg too low (%lf)',
907 self
._fpr
, self
._v
2dir
)
909 if self
._badexit
is not None and self
._badexit
> PERMITTED_BADEXIT
:
910 log_excluded('%s not a candidate: badexit avg too high (%lf)',
911 self
._fpr
, self
._badexit
)
913 # this function logs a message depending on which check fails
914 if not self
.is_valid_version():
916 if self
._guard
< CUTOFF_GUARD
:
917 log_excluded('%s not a candidate: guard avg too low (%lf)',
918 self
._fpr
, self
._guard
)
920 if (not self
._data
.has_key('consensus_weight')
921 or self
._data
['consensus_weight'] < 1):
922 log_excluded('%s not a candidate: consensus weight invalid', self
._fpr
)
924 except BaseException
as e
:
925 logging
.warning("Exception %s when checking if fallback is a candidate",
930 def is_in_whitelist(self
, relaylist
):
931 """ A fallback matches if each key in the whitelist line matches:
936 ipv6 address and port (if present)
937 If the fallback has an ipv6 key, the whitelist line must also have
938 it, and vice versa, otherwise they don't match. """
941 ipv6
= '%s:%d'%(self
.ipv6addr
, self
.ipv6orport
)
942 for entry
in relaylist
:
943 if entry
['id'] != self
._fpr
:
944 # can't log here unless we match an IP and port, because every relay's
945 # fingerprint is compared to every entry's fingerprint
946 if entry
['ipv4'] == self
.dirip
and int(entry
['orport']) == self
.orport
:
947 logging
.warning('%s excluded: has OR %s:%d changed fingerprint to ' +
948 '%s?', entry
['id'], self
.dirip
, self
.orport
,
950 if self
.has_ipv6() and entry
.has_key('ipv6') and entry
['ipv6'] == ipv6
:
951 logging
.warning('%s excluded: has OR %s changed fingerprint to ' +
952 '%s?', entry
['id'], ipv6
, self
._fpr
)
954 if entry
['ipv4'] != self
.dirip
:
955 logging
.warning('%s excluded: has it changed IPv4 from %s to %s?',
956 self
._fpr
, entry
['ipv4'], self
.dirip
)
958 if int(entry
['dirport']) != self
.dirport
:
959 logging
.warning('%s excluded: has it changed DirPort from %s:%d to ' +
960 '%s:%d?', self
._fpr
, self
.dirip
, int(entry
['dirport']),
961 self
.dirip
, self
.dirport
)
963 if int(entry
['orport']) != self
.orport
:
964 logging
.warning('%s excluded: has it changed ORPort from %s:%d to ' +
965 '%s:%d?', self
._fpr
, self
.dirip
, int(entry
['orport']),
966 self
.dirip
, self
.orport
)
968 if entry
.has_key('ipv6') and self
.has_ipv6():
969 # if both entry and fallback have an ipv6 address, compare them
970 if entry
['ipv6'] != ipv6
:
971 logging
.warning('%s excluded: has it changed IPv6 ORPort from %s ' +
972 'to %s?', self
._fpr
, entry
['ipv6'], ipv6
)
974 # if the fallback has an IPv6 address but the whitelist entry
975 # doesn't, or vice versa, the whitelist entry doesn't match
976 elif entry
.has_key('ipv6') and not self
.has_ipv6():
977 logging
.warning('%s excluded: has it lost its former IPv6 address %s?',
978 self
._fpr
, entry
['ipv6'])
980 elif not entry
.has_key('ipv6') and self
.has_ipv6():
981 logging
.warning('%s excluded: has it gained an IPv6 address %s?',
987 def is_in_blacklist(self
, relaylist
):
988 """ A fallback matches a blacklist line if a sufficiently specific group
989 of attributes matches:
995 If the fallback and the blacklist line both have an ipv6 key,
996 their values will be compared, otherwise, they will be ignored.
997 If there is no dirport and no orport, the entry matches all relays on
999 for entry
in relaylist
:
1002 if key
== 'id' and value
== self
._fpr
:
1003 log_excluded('%s is in the blacklist: fingerprint matches',
1006 if key
== 'ipv4' and value
== self
.dirip
:
1007 # if the dirport is present, check it too
1008 if entry
.has_key('dirport'):
1009 if int(entry
['dirport']) == self
.dirport
:
1010 log_excluded('%s is in the blacklist: IPv4 (%s) and ' +
1011 'DirPort (%d) match', self
._fpr
, self
.dirip
,
1014 # if the orport is present, check it too
1015 elif entry
.has_key('orport'):
1016 if int(entry
['orport']) == self
.orport
:
1017 log_excluded('%s is in the blacklist: IPv4 (%s) and ' +
1018 'ORPort (%d) match', self
._fpr
, self
.dirip
,
1022 log_excluded('%s is in the blacklist: IPv4 (%s) matches, and ' +
1023 'entry has no DirPort or ORPort', self
._fpr
,
1028 ipv6
= '%s:%d'%(self
.ipv6addr
, self
.ipv6orport
)
1029 if (key
== 'ipv6' and self
.has_ipv6()):
1030 # if both entry and fallback have an ipv6 address, compare them,
1031 # otherwise, disregard ipv6 addresses
1033 # if the dirport is present, check it too
1034 if entry
.has_key('dirport'):
1035 if int(entry
['dirport']) == self
.dirport
:
1036 log_excluded('%s is in the blacklist: IPv6 (%s) and ' +
1037 'DirPort (%d) match', self
._fpr
, ipv6
,
1040 # we've already checked the ORPort, it's part of entry['ipv6']
1042 log_excluded('%s is in the blacklist: IPv6 (%s) matches, and' +
1043 'entry has no DirPort', self
._fpr
, ipv6
)
1045 elif (key
== 'ipv6' or self
.has_ipv6()):
1046 # only log if the fingerprint matches but the IPv6 doesn't
1047 if entry
.has_key('id') and entry
['id'] == self
._fpr
:
1048 log_excluded('%s skipping IPv6 blacklist comparison: relay ' +
1049 'has%s IPv6%s, but entry has%s IPv6%s', self
._fpr
,
1050 '' if self
.has_ipv6() else ' no',
1051 (' (' + ipv6
+ ')') if self
.has_ipv6() else '',
1052 '' if key
== 'ipv6' else ' no',
1053 (' (' + value
+ ')') if key
== 'ipv6' else '')
1054 logging
.warning('Has %s %s IPv6 address %s?', self
._fpr
,
1055 'gained an' if self
.has_ipv6() else 'lost its former',
1056 ipv6
if self
.has_ipv6() else value
)
1059 def cw_to_bw_factor(self
):
1060 # any relays with a missing or zero consensus weight are not candidates
1061 # any relays with a missing advertised bandwidth have it set to zero
1062 return self
._data
['advertised_bandwidth'] / self
._data
['consensus_weight']
1064 # since advertised_bandwidth is reported by the relay, it can be gamed
1065 # to avoid this, use the median consensus weight to bandwidth factor to
1066 # estimate this relay's measured bandwidth, and make that the upper limit
1067 def measured_bandwidth(self
, median_cw_to_bw_factor
):
1068 cw_to_bw
= median_cw_to_bw_factor
1069 # Reduce exit bandwidth to make sure we're not overloading them
1071 cw_to_bw
*= EXIT_BANDWIDTH_FRACTION
1072 measured_bandwidth
= self
._data
['consensus_weight'] * cw_to_bw
1073 if self
._data
['advertised_bandwidth'] != 0:
1074 # limit advertised bandwidth (if available) to measured bandwidth
1075 return min(measured_bandwidth
, self
._data
['advertised_bandwidth'])
1077 return measured_bandwidth
1079 def set_measured_bandwidth(self
, median_cw_to_bw_factor
):
1080 self
._data
['measured_bandwidth'] = self
.measured_bandwidth(
1081 median_cw_to_bw_factor
)
1084 return 'Exit' in self
._data
['flags']
1087 return 'Guard' in self
._data
['flags']
1089 def is_running(self
):
1090 return 'Running' in self
._data
['flags']
1092 # does this fallback have an IPv6 address and orport?
1094 return self
.ipv6addr
is not None and self
.ipv6orport
is not None
1096 # strip leading and trailing brackets from an IPv6 address
1097 # safe to use on non-bracketed IPv6 and on IPv4 addresses
1098 # also convert to unicode, and make None appear as ''
1100 def strip_ipv6_brackets(ip
):
1105 if ip
[0] == '[' and ip
[-1] == ']':
1106 return unicode(ip
[1:-1])
1109 # are ip_a and ip_b in the same netblock?
1110 # mask_bits is the size of the netblock
1111 # takes both IPv4 and IPv6 addresses
1112 # the versions of ip_a and ip_b must be the same
1113 # the mask must be valid for the IP version
1115 def netblocks_equal(ip_a
, ip_b
, mask_bits
):
1116 if ip_a
is None or ip_b
is None:
1118 ip_a
= Candidate
.strip_ipv6_brackets(ip_a
)
1119 ip_b
= Candidate
.strip_ipv6_brackets(ip_b
)
1120 a
= ipaddress
.ip_address(ip_a
)
1121 b
= ipaddress
.ip_address(ip_b
)
1122 if a
.version
!= b
.version
:
1123 raise Exception('Mismatching IP versions in %s and %s'%(ip_a
, ip_b
))
1124 if mask_bits
> a
.max_prefixlen
:
1125 logging
.error('Bad IP mask %d for %s and %s'%(mask_bits
, ip_a
, ip_b
))
1126 mask_bits
= a
.max_prefixlen
1128 logging
.error('Bad IP mask %d for %s and %s'%(mask_bits
, ip_a
, ip_b
))
1130 a_net
= ipaddress
.ip_network('%s/%d'%(ip_a
, mask_bits
), strict
=False)
1133 # is this fallback's IPv4 address (dirip) in the same netblock as other's
1135 # mask_bits is the size of the netblock
1136 def ipv4_netblocks_equal(self
, other
, mask_bits
):
1137 return Candidate
.netblocks_equal(self
.dirip
, other
.dirip
, mask_bits
)
1139 # is this fallback's IPv6 address (ipv6addr) in the same netblock as
1140 # other's IPv6 address?
1141 # Returns False if either fallback has no IPv6 address
1142 # mask_bits is the size of the netblock
1143 def ipv6_netblocks_equal(self
, other
, mask_bits
):
1144 if not self
.has_ipv6() or not other
.has_ipv6():
1146 return Candidate
.netblocks_equal(self
.ipv6addr
, other
.ipv6addr
, mask_bits
)
1148 # is this fallback's IPv4 DirPort the same as other's IPv4 DirPort?
1149 def dirport_equal(self
, other
):
1150 return self
.dirport
== other
.dirport
1152 # is this fallback's IPv4 ORPort the same as other's IPv4 ORPort?
1153 def ipv4_orport_equal(self
, other
):
1154 return self
.orport
== other
.orport
1156 # is this fallback's IPv6 ORPort the same as other's IPv6 ORPort?
1157 # Returns False if either fallback has no IPv6 address
1158 def ipv6_orport_equal(self
, other
):
1159 if not self
.has_ipv6() or not other
.has_ipv6():
1161 return self
.ipv6orport
== other
.ipv6orport
1163 # does this fallback have the same DirPort, IPv4 ORPort, or
1164 # IPv6 ORPort as other?
1165 # Ignores IPv6 ORPort if either fallback has no IPv6 address
1166 def port_equal(self
, other
):
1167 return (self
.dirport_equal(other
) or self
.ipv4_orport_equal(other
)
1168 or self
.ipv6_orport_equal(other
))
1170 # return a list containing IPv4 ORPort, DirPort, and IPv6 ORPort (if present)
1171 def port_list(self
):
1172 ports
= [self
.dirport
, self
.orport
]
1173 if self
.has_ipv6() and not self
.ipv6orport
in ports
:
1174 ports
.append(self
.ipv6orport
)
1177 # does this fallback share a port with other, regardless of whether the
1179 # For example, if self's IPv4 ORPort is 80 and other's DirPort is 80,
1181 def port_shared(self
, other
):
1182 for p
in self
.port_list():
1183 if p
in other
.port_list():
1187 # log how long it takes to download a consensus from dirip:dirport
1188 # returns True if the download failed, False if it succeeded within max_time
1190 def fallback_consensus_download_speed(dirip
, dirport
, nickname
, fingerprint
,
1192 download_failed
= False
1193 # some directory mirrors respond to requests in ways that hang python
1194 # sockets, which is why we log this line here
1195 logging
.info('Initiating %sconsensus download from %s (%s:%d) %s.',
1196 'microdesc ' if DOWNLOAD_MICRODESC_CONSENSUS
else '',
1197 nickname
, dirip
, dirport
, fingerprint
)
1198 # there appears to be about 1 second of overhead when comparing stem's
1199 # internal trace time and the elapsed time calculated here
1201 start
= datetime
.datetime
.utcnow()
1203 consensus
= get_consensus(
1204 endpoints
= [(dirip
, dirport
)],
1205 timeout
= (max_time
+ TIMEOUT_SLOP
),
1208 fall_back_to_authority
= False,
1209 document_handler
= DocumentHandler
.BARE_DOCUMENT
,
1210 microdescriptor
= DOWNLOAD_MICRODESC_CONSENSUS
1212 end
= datetime
.datetime
.utcnow()
1213 time_since_expiry
= (end
- consensus
.valid_until
).total_seconds()
1214 except Exception, stem_error
:
1215 end
= datetime
.datetime
.utcnow()
1216 log_excluded('Unable to retrieve a consensus from %s: %s', nickname
,
1218 status
= 'error: "%s"' % (stem_error
)
1219 level
= logging
.WARNING
1220 download_failed
= True
1221 elapsed
= (end
- start
).total_seconds()
1223 # keep the error failure status, and avoid using the variables
1225 elif elapsed
> max_time
:
1227 level
= logging
.WARNING
1228 download_failed
= True
1229 elif (time_since_expiry
> 0):
1230 status
= 'outdated consensus, expired %ds ago'%(int(time_since_expiry
))
1231 if time_since_expiry
<= CONSENSUS_EXPIRY_TOLERANCE
:
1232 status
+= ', tolerating up to %ds'%(CONSENSUS_EXPIRY_TOLERANCE)
1233 level
= logging
.INFO
1235 status
+= ', invalid'
1236 level
= logging
.WARNING
1237 download_failed
= True
1240 level
= logging
.DEBUG
1241 logging
.log(level
, 'Consensus download: %0.1fs %s from %s (%s:%d) %s, ' +
1242 'max download time %0.1fs.', elapsed
, status
, nickname
,
1243 dirip
, dirport
, fingerprint
, max_time
)
1244 return download_failed
1246 # does this fallback download the consensus fast enough?
1247 def check_fallback_download_consensus(self
):
1248 # include the relay if we're not doing a check, or we can't check (IPv6)
1251 if PERFORM_IPV4_DIRPORT_CHECKS
:
1252 ipv4_failed
= Candidate
.fallback_consensus_download_speed(self
.dirip
,
1254 self
._data
['nickname'],
1256 CONSENSUS_DOWNLOAD_SPEED_MAX
)
1257 if self
.has_ipv6() and PERFORM_IPV6_DIRPORT_CHECKS
:
1258 # Clients assume the IPv6 DirPort is the same as the IPv4 DirPort
1259 ipv6_failed
= Candidate
.fallback_consensus_download_speed(self
.ipv6addr
,
1261 self
._data
['nickname'],
1263 CONSENSUS_DOWNLOAD_SPEED_MAX
)
1264 return ((not ipv4_failed
) and (not ipv6_failed
))
1266 # if this fallback has not passed a download check, try it again,
1267 # and record the result, available in get_fallback_download_consensus
1268 def try_fallback_download_consensus(self
):
1269 if not self
.get_fallback_download_consensus():
1270 self
._data
['download_check'] = self
.check_fallback_download_consensus()
1272 # did this fallback pass the download check?
1273 def get_fallback_download_consensus(self
):
1274 # if we're not performing checks, return True
1275 if not PERFORM_IPV4_DIRPORT_CHECKS
and not PERFORM_IPV6_DIRPORT_CHECKS
:
1277 # if we are performing checks, but haven't done one, return False
1278 if not self
._data
.has_key('download_check'):
1280 return self
._data
['download_check']
1282 # output an optional header comment and info for this fallback
1283 # try_fallback_download_consensus before calling this
1284 def fallbackdir_line(self
, fallbacks
, prefilter_fallbacks
):
1287 s
+= self
.fallbackdir_comment(fallbacks
, prefilter_fallbacks
)
1288 # if the download speed is ok, output a C string
1289 # if it's not, but we OUTPUT_COMMENTS, output a commented-out C string
1290 if self
.get_fallback_download_consensus() or OUTPUT_COMMENTS
:
1291 s
+= self
.fallbackdir_info(self
.get_fallback_download_consensus())
1294 # output a header comment for this fallback
1295 def fallbackdir_comment(self
, fallbacks
, prefilter_fallbacks
):
1299 # adjusted bandwidth, consensus weight
1301 # [identical contact counts]
1303 # Multiline C comment
1306 s
+= cleanse_c_multiline_comment(self
._data
['nickname'])
1309 s
+= cleanse_c_multiline_comment(' '.join(sorted(self
._data
['flags'])))
1311 # this is an adjusted bandwidth, see calculate_measured_bandwidth()
1312 bandwidth
= self
._data
['measured_bandwidth']
1313 weight
= self
._data
['consensus_weight']
1314 s
+= 'Bandwidth: %.1f MByte/s, Consensus Weight: %d'%(
1315 bandwidth
/(1024.0*1024.0),
1318 if self
._data
['contact'] is not None:
1319 s
+= cleanse_c_multiline_comment(self
._data
['contact'])
1320 if CONTACT_COUNT
or CONTACT_BLACKLIST_COUNT
:
1321 fallback_count
= len([f
for f
in fallbacks
1322 if f
._data
['contact'] == self
._data
['contact']])
1323 if fallback_count
> 1:
1325 s
+= '%d identical contacts listed' % (fallback_count
)
1326 if CONTACT_BLACKLIST_COUNT
:
1327 prefilter_count
= len([f
for f
in prefilter_fallbacks
1328 if f
._data
['contact'] == self
._data
['contact']])
1329 filter_count
= prefilter_count
- fallback_count
1330 if filter_count
> 0:
1331 if fallback_count
> 1:
1335 s
+= '%d blacklisted' % (filter_count
)
1341 # output the fallback info C string for this fallback
1342 # this is the text that would go after FallbackDir in a torrc
1343 # if this relay failed the download test and we OUTPUT_COMMENTS,
1344 # comment-out the returned string
1345 def fallbackdir_info(self
, dl_speed_ok
):
1346 # "address:dirport orport=port id=fingerprint"
1347 # (insert additional madatory fields here)
1348 # "[ipv6=addr:orport]"
1349 # (insert additional optional fields here)
1350 # /* nickname=name */
1351 # /* extrainfo={0,1} */
1352 # (insert additional comment fields here)
1356 # Do we want a C string, or a commented-out string?
1357 c_string
= dl_speed_ok
1358 comment_string
= not dl_speed_ok
and OUTPUT_COMMENTS
1359 # If we don't want either kind of string, bail
1360 if not c_string
and not comment_string
:
1363 # Comment out the fallback directory entry if it's too slow
1364 # See the debug output for which address and port is failing
1366 s
+= '/* Consensus download failed or was too slow:\n'
1367 # Multi-Line C string with trailing comma (part of a string list)
1368 # This makes it easier to diff the file, and remove IPv6 lines using grep
1369 # Integers don't need escaping
1370 s
+= '"%s orport=%d id=%s"'%(
1371 cleanse_c_string(self
._data
['dir_address']),
1373 cleanse_c_string(self
._fpr
))
1375 # (insert additional madatory fields here)
1377 s
+= '" ipv6=%s:%d"'%(cleanse_c_string(self
.ipv6addr
), self
.ipv6orport
)
1379 # (insert additional optional fields here)
1380 if not comment_string
:
1382 s
+= 'nickname=%s'%(cleanse_c_string(self
._data
['nickname']))
1383 if not comment_string
:
1386 # if we know that the fallback is an extrainfo cache, flag it
1387 # and if we don't know, assume it is not
1388 if not comment_string
:
1390 s
+= 'extrainfo=%d'%(1 if self
._extra
_info
_cache
else 0)
1391 if not comment_string
:
1394 # (insert additional comment fields here)
1395 # The terminator and comma must be the last line in each fallback entry
1396 if not comment_string
:
1398 s
+= SECTION_SEPARATOR_BASE
1399 if not comment_string
:
1408 ## Fallback Candidate List Class
1410 class CandidateList(dict):
1414 def _add_relay(self
, details
):
1415 if not 'dir_address' in details
: return
1416 c
= Candidate(details
)
1417 self
[ c
.get_fingerprint() ] = c
1419 def _add_uptime(self
, uptime
):
1421 fpr
= uptime
['fingerprint']
1423 raise Exception("Document has no fingerprint field.")
1428 logging
.debug('Got unknown relay %s in uptime document.'%(fpr
,))
1431 c
.add_uptime(uptime
)
1433 def _add_details(self
):
1434 logging
.debug('Loading details document.')
1435 d
= fetch('details',
1436 fields
=('fingerprint,nickname,contact,last_changed_address_or_port,' +
1437 'consensus_weight,advertised_bandwidth,or_addresses,' +
1438 'dir_address,recommended_version,flags,effective_family,' +
1440 logging
.debug('Loading details document done.')
1442 if not 'relays' in d
: raise Exception("No relays found in document.")
1444 for r
in d
['relays']: self
._add
_relay
(r
)
1446 def _add_uptimes(self
):
1447 logging
.debug('Loading uptime document.')
1449 logging
.debug('Loading uptime document done.')
1451 if not 'relays' in d
: raise Exception("No relays found in document.")
1452 for r
in d
['relays']: self
._add
_uptime
(r
)
1454 def add_relays(self
):
1458 def count_guards(self
):
1460 for fpr
in self
.keys():
1461 if self
[fpr
].is_guard():
1465 # Find fallbacks that fit the uptime, stability, and flags criteria,
1466 # and make an array of them in self.fallbacks
1467 def compute_fallbacks(self
):
1468 self
.fallbacks
= map(lambda x
: self
[x
],
1469 filter(lambda x
: self
[x
].is_candidate(),
1472 # sort fallbacks by their consensus weight to advertised bandwidth factor,
1474 # used to find the median cw_to_bw_factor()
1475 def sort_fallbacks_by_cw_to_bw_factor(self
):
1476 self
.fallbacks
.sort(key
=lambda f
: f
.cw_to_bw_factor())
1478 # sort fallbacks by their measured bandwidth, highest to lowest
1479 # calculate_measured_bandwidth before calling this
1480 # this is useful for reviewing candidates in priority order
1481 def sort_fallbacks_by_measured_bandwidth(self
):
1482 self
.fallbacks
.sort(key
=lambda f
: f
._data
['measured_bandwidth'],
1485 # sort fallbacks by the data field data_field, lowest to highest
1486 def sort_fallbacks_by(self
, data_field
):
1487 self
.fallbacks
.sort(key
=lambda f
: f
._data
[data_field
])
1490 def load_relaylist(file_obj
):
1491 """ Read each line in the file, and parse it like a FallbackDir line:
1492 an IPv4 address and optional port:
1493 <IPv4 address>:<port>
1494 which are parsed into dictionary entries:
1497 followed by a series of key=value entries:
1500 ipv6=<IPv6 address>:<IPv6 orport>
1501 each line's key/value pairs are placed in a dictonary,
1502 (of string -> string key/value pairs),
1503 and these dictionaries are placed in an array.
1504 comments start with # and are ignored """
1505 file_data
= file_obj
['data']
1506 file_name
= file_obj
['name']
1508 if file_data
is None:
1510 for line
in file_data
.split('\n'):
1513 line_comment_split
= line
.split('#')
1514 line
= line_comment_split
[0]
1515 # cleanup whitespace
1516 line
= cleanse_whitespace(line
)
1520 for item
in line
.split(' '):
1524 key_value_split
= item
.split('=')
1525 kvl
= len(key_value_split
)
1526 if kvl
< 1 or kvl
> 2:
1527 print '#error Bad %s item: %s, format is key=value.'%(
1530 # assume that entries without a key are the ipv4 address,
1531 # perhaps with a dirport
1532 ipv4_maybe_dirport
= key_value_split
[0]
1533 ipv4_maybe_dirport_split
= ipv4_maybe_dirport
.split(':')
1534 dirl
= len(ipv4_maybe_dirport_split
)
1535 if dirl
< 1 or dirl
> 2:
1536 print '#error Bad %s IPv4 item: %s, format is ipv4:port.'%(
1539 relay_entry
['ipv4'] = ipv4_maybe_dirport_split
[0]
1541 relay_entry
['dirport'] = ipv4_maybe_dirport_split
[1]
1543 relay_entry
[key_value_split
[0]] = key_value_split
[1]
1544 relaylist
.append(relay_entry
)
1547 # apply the fallback whitelist and blacklist
1548 def apply_filter_lists(self
, whitelist_obj
, blacklist_obj
):
1550 logging
.debug('Applying whitelist and blacklist.')
1551 # parse the whitelist and blacklist
1552 whitelist
= self
.load_relaylist(whitelist_obj
)
1553 blacklist
= self
.load_relaylist(blacklist_obj
)
1554 filtered_fallbacks
= []
1555 for f
in self
.fallbacks
:
1556 in_whitelist
= f
.is_in_whitelist(whitelist
)
1557 in_blacklist
= f
.is_in_blacklist(blacklist
)
1558 if in_whitelist
and in_blacklist
:
1559 if BLACKLIST_EXCLUDES_WHITELIST_ENTRIES
:
1562 logging
.warning('Excluding %s: in both blacklist and whitelist.',
1566 filtered_fallbacks
.append(f
)
1569 filtered_fallbacks
.append(f
)
1573 log_excluded('Excluding %s: in blacklist.', f
._fpr
)
1575 if INCLUDE_UNLISTED_ENTRIES
:
1577 filtered_fallbacks
.append(f
)
1581 log_excluded('Excluding %s: in neither blacklist nor whitelist.',
1583 self
.fallbacks
= filtered_fallbacks
1584 return excluded_count
1587 def summarise_filters(initial_count
, excluded_count
):
1588 return '/* Whitelist & blacklist excluded %d of %d candidates. */'%(
1589 excluded_count
, initial_count
)
1591 # calculate each fallback's measured bandwidth based on the median
1592 # consensus weight to advertised bandwidth ratio
1593 def calculate_measured_bandwidth(self
):
1594 self
.sort_fallbacks_by_cw_to_bw_factor()
1595 median_fallback
= self
.fallback_median(True)
1596 if median_fallback
is not None:
1597 median_cw_to_bw_factor
= median_fallback
.cw_to_bw_factor()
1599 # this will never be used, because there are no fallbacks
1600 median_cw_to_bw_factor
= None
1601 for f
in self
.fallbacks
:
1602 f
.set_measured_bandwidth(median_cw_to_bw_factor
)
1604 # remove relays with low measured bandwidth from the fallback list
1605 # calculate_measured_bandwidth for each relay before calling this
1606 def remove_low_bandwidth_relays(self
):
1607 if MIN_BANDWIDTH
is None:
1609 above_min_bw_fallbacks
= []
1610 for f
in self
.fallbacks
:
1611 if f
._data
['measured_bandwidth'] >= MIN_BANDWIDTH
:
1612 above_min_bw_fallbacks
.append(f
)
1614 # the bandwidth we log here is limited by the relay's consensus weight
1615 # as well as its adverttised bandwidth. See set_measured_bandwidth
1617 log_excluded('%s not a candidate: bandwidth %.1fMByte/s too low, ' +
1618 'must be at least %.1fMByte/s', f
._fpr
,
1619 f
._data
['measured_bandwidth']/(1024.0*1024.0),
1620 MIN_BANDWIDTH
/(1024.0*1024.0))
1621 self
.fallbacks
= above_min_bw_fallbacks
1623 # the minimum fallback in the list
1624 # call one of the sort_fallbacks_* functions before calling this
1625 def fallback_min(self
):
1626 if len(self
.fallbacks
) > 0:
1627 return self
.fallbacks
[-1]
1631 # the median fallback in the list
1632 # call one of the sort_fallbacks_* functions before calling this
1633 def fallback_median(self
, require_advertised_bandwidth
):
1634 # use the low-median when there are an evan number of fallbacks,
1635 # for consistency with the bandwidth authorities
1636 if len(self
.fallbacks
) > 0:
1637 median_position
= (len(self
.fallbacks
) - 1) / 2
1638 if not require_advertised_bandwidth
:
1639 return self
.fallbacks
[median_position
]
1640 # if we need advertised_bandwidth but this relay doesn't have it,
1641 # move to a fallback with greater consensus weight until we find one
1642 while not self
.fallbacks
[median_position
]._data
['advertised_bandwidth']:
1643 median_position
+= 1
1644 if median_position
>= len(self
.fallbacks
):
1646 return self
.fallbacks
[median_position
]
1650 # the maximum fallback in the list
1651 # call one of the sort_fallbacks_* functions before calling this
1652 def fallback_max(self
):
1653 if len(self
.fallbacks
) > 0:
1654 return self
.fallbacks
[0]
1658 # return a new bag suitable for storing attributes
1660 def attribute_new():
1663 # get the count of attribute in attribute_bag
1664 # if attribute is None or the empty string, return 0
1666 def attribute_count(attribute
, attribute_bag
):
1667 if attribute
is None or attribute
== '':
1669 if attribute
not in attribute_bag
:
1671 return attribute_bag
[attribute
]
1673 # does attribute_bag contain more than max_count instances of attribute?
1674 # if so, return False
1675 # if not, return True
1676 # if attribute is None or the empty string, or max_count is invalid,
1677 # always return True
1679 def attribute_allow(attribute
, attribute_bag
, max_count
=1):
1680 if attribute
is None or attribute
== '' or max_count
<= 0:
1682 elif CandidateList
.attribute_count(attribute
, attribute_bag
) >= max_count
:
1687 # add attribute to attribute_bag, incrementing the count if it is already
1689 # if attribute is None or the empty string, or count is invalid,
1692 def attribute_add(attribute
, attribute_bag
, count
=1):
1693 if attribute
is None or attribute
== '' or count
<= 0:
1695 attribute_bag
.setdefault(attribute
, 0)
1696 attribute_bag
[attribute
] += count
1698 # make sure there are only MAX_FALLBACKS_PER_IP fallbacks per IPv4 address,
1699 # and per IPv6 address
1700 # there is only one IPv4 address on each fallback: the IPv4 DirPort address
1701 # (we choose the IPv4 ORPort which is on the same IPv4 as the DirPort)
1702 # there is at most one IPv6 address on each fallback: the IPv6 ORPort address
1703 # we try to match the IPv4 ORPort, but will use any IPv6 address if needed
1704 # (clients only use the IPv6 ORPort)
1705 # if there is no IPv6 address, only the IPv4 address is checked
1706 # return the number of candidates we excluded
1707 def limit_fallbacks_same_ip(self
):
1708 ip_limit_fallbacks
= []
1709 ip_list
= CandidateList
.attribute_new()
1710 for f
in self
.fallbacks
:
1711 if (CandidateList
.attribute_allow(f
.dirip
, ip_list
,
1712 MAX_FALLBACKS_PER_IPV4
)
1713 and CandidateList
.attribute_allow(f
.ipv6addr
, ip_list
,
1714 MAX_FALLBACKS_PER_IPV6
)):
1715 ip_limit_fallbacks
.append(f
)
1716 CandidateList
.attribute_add(f
.dirip
, ip_list
)
1718 CandidateList
.attribute_add(f
.ipv6addr
, ip_list
)
1719 elif not CandidateList
.attribute_allow(f
.dirip
, ip_list
,
1720 MAX_FALLBACKS_PER_IPV4
):
1721 log_excluded('Eliminated %s: already have %d fallback(s) on IPv4 %s'
1722 %(f
._fpr
, CandidateList
.attribute_count(f
.dirip
, ip_list
),
1724 elif (f
.has_ipv6() and
1725 not CandidateList
.attribute_allow(f
.ipv6addr
, ip_list
,
1726 MAX_FALLBACKS_PER_IPV6
)):
1727 log_excluded('Eliminated %s: already have %d fallback(s) on IPv6 %s'
1728 %(f
._fpr
, CandidateList
.attribute_count(f
.ipv6addr
,
1731 original_count
= len(self
.fallbacks
)
1732 self
.fallbacks
= ip_limit_fallbacks
1733 return original_count
- len(self
.fallbacks
)
1735 # make sure there are only MAX_FALLBACKS_PER_CONTACT fallbacks for each
1737 # if there is no ContactInfo, allow the fallback
1738 # this check can be gamed by providing no ContactInfo, or by setting the
1739 # ContactInfo to match another fallback
1740 # However, given the likelihood that relays with the same ContactInfo will
1741 # go down at similar times, its usefulness outweighs the risk
1742 def limit_fallbacks_same_contact(self
):
1743 contact_limit_fallbacks
= []
1744 contact_list
= CandidateList
.attribute_new()
1745 for f
in self
.fallbacks
:
1746 if CandidateList
.attribute_allow(f
._data
['contact'], contact_list
,
1747 MAX_FALLBACKS_PER_CONTACT
):
1748 contact_limit_fallbacks
.append(f
)
1749 CandidateList
.attribute_add(f
._data
['contact'], contact_list
)
1752 'Eliminated %s: already have %d fallback(s) on ContactInfo %s'
1753 %(f
._fpr
, CandidateList
.attribute_count(f
._data
['contact'],
1755 f
._data
['contact']))
1756 original_count
= len(self
.fallbacks
)
1757 self
.fallbacks
= contact_limit_fallbacks
1758 return original_count
- len(self
.fallbacks
)
1760 # make sure there are only MAX_FALLBACKS_PER_FAMILY fallbacks per effective
1762 # if there is no family, allow the fallback
1763 # we use effective family, which ensures mutual family declarations
1764 # but the check can be gamed by not declaring a family at all
1765 # if any indirect families exist, the result depends on the order in which
1766 # fallbacks are sorted in the list
1767 def limit_fallbacks_same_family(self
):
1768 family_limit_fallbacks
= []
1769 fingerprint_list
= CandidateList
.attribute_new()
1770 for f
in self
.fallbacks
:
1771 if CandidateList
.attribute_allow(f
._fpr
, fingerprint_list
,
1772 MAX_FALLBACKS_PER_FAMILY
):
1773 family_limit_fallbacks
.append(f
)
1774 CandidateList
.attribute_add(f
._fpr
, fingerprint_list
)
1775 for family_fingerprint
in f
._data
['effective_family']:
1776 CandidateList
.attribute_add(family_fingerprint
, fingerprint_list
)
1778 # we already have a fallback with this fallback in its effective
1781 'Eliminated %s: already have %d fallback(s) in effective family'
1782 %(f
._fpr
, CandidateList
.attribute_count(f
._fpr
, fingerprint_list
)))
1783 original_count
= len(self
.fallbacks
)
1784 self
.fallbacks
= family_limit_fallbacks
1785 return original_count
- len(self
.fallbacks
)
1787 # try once to get the descriptors for fingerprint_list using stem
1788 # returns an empty list on exception
1790 def get_fallback_descriptors_once(fingerprint_list
):
1791 desc_list
= get_server_descriptors(fingerprints
=fingerprint_list
).run(suppress
=True)
1794 # try up to max_retries times to get the descriptors for fingerprint_list
1795 # using stem. Stops retrying when all descriptors have been retrieved.
1796 # returns a list containing the descriptors that were retrieved
1798 def get_fallback_descriptors(fingerprint_list
, max_retries
=5):
1799 # we can't use stem's retries=, because we want to support more than 96
1802 # add an attempt for every MAX_FINGERPRINTS (or part thereof) in the list
1803 max_retries
+= (len(fingerprint_list
) + MAX_FINGERPRINTS
- 1) / MAX_FINGERPRINTS
1804 remaining_list
= fingerprint_list
1806 for _
in xrange(max_retries
):
1807 if len(remaining_list
) == 0:
1809 new_desc_list
= CandidateList
.get_fallback_descriptors_once(remaining_list
[0:MAX_FINGERPRINTS
])
1810 for d
in new_desc_list
:
1812 remaining_list
.remove(d
.fingerprint
)
1814 # warn and ignore if a directory mirror returned a bad descriptor
1815 logging
.warning("Directory mirror returned unwanted descriptor %s, ignoring",
1821 # find the fallbacks that cache extra-info documents
1822 # Onionoo doesn't know this, so we have to use stem
1823 def mark_extra_info_caches(self
):
1824 fingerprint_list
= [ f
._fpr
for f
in self
.fallbacks
]
1825 logging
.info("Downloading fallback descriptors to find extra-info caches")
1826 desc_list
= CandidateList
.get_fallback_descriptors(fingerprint_list
)
1828 self
[d
.fingerprint
]._extra
_info
_cache
= d
.extra_info_cache
1829 missing_descriptor_list
= [ f
._fpr
for f
in self
.fallbacks
1830 if f
._extra
_info
_cache
is None ]
1831 for f
in missing_descriptor_list
:
1832 logging
.warning("No descriptor for {}. Assuming extrainfo=0.".format(f
))
1834 # try a download check on each fallback candidate in order
1835 # stop after max_count successful downloads
1836 # but don't remove any candidates from the array
1837 def try_download_consensus_checks(self
, max_count
):
1839 for f
in self
.fallbacks
:
1840 f
.try_fallback_download_consensus()
1841 if f
.get_fallback_download_consensus():
1842 # this fallback downloaded a consensus ok
1844 if dl_ok_count
>= max_count
:
1845 # we have enough fallbacks
1848 # put max_count successful candidates in the fallbacks array:
1849 # - perform download checks on each fallback candidate
1850 # - retry failed candidates if CONSENSUS_DOWNLOAD_RETRY is set
1851 # - eliminate failed candidates
1852 # - if there are more than max_count candidates, eliminate lowest bandwidth
1853 # - if there are fewer than max_count candidates, leave only successful
1854 # Return the number of fallbacks that failed the consensus check
1855 def perform_download_consensus_checks(self
, max_count
):
1856 self
.sort_fallbacks_by_measured_bandwidth()
1857 self
.try_download_consensus_checks(max_count
)
1858 if CONSENSUS_DOWNLOAD_RETRY
:
1859 # try unsuccessful candidates again
1860 # we could end up with more than max_count successful candidates here
1861 self
.try_download_consensus_checks(max_count
)
1862 # now we have at least max_count successful candidates,
1863 # or we've tried them all
1864 original_count
= len(self
.fallbacks
)
1865 self
.fallbacks
= filter(lambda x
: x
.get_fallback_download_consensus(),
1867 # some of these failed the check, others skipped the check,
1868 # if we already had enough successful downloads
1869 failed_count
= original_count
- len(self
.fallbacks
)
1870 self
.fallbacks
= self
.fallbacks
[:max_count
]
1873 # return a string that describes a/b as a percentage
1875 def describe_percentage(a
, b
):
1877 return '%d/%d = %.0f%%'%(a
, b
, (a
*100.0)/b
)
1879 # technically, 0/0 is undefined, but 0.0% is a sensible result
1880 return '%d/%d = %.0f%%'%(a
, b
, 0.0)
1882 # return a dictionary of lists of fallbacks by IPv4 netblock
1883 # the dictionary is keyed by the fingerprint of an arbitrary fallback
1885 # mask_bits is the size of the netblock
1886 def fallbacks_by_ipv4_netblock(self
, mask_bits
):
1888 for f
in self
.fallbacks
:
1889 found_netblock
= False
1890 for b
in netblocks
.keys():
1891 # we found an existing netblock containing this fallback
1892 if f
.ipv4_netblocks_equal(self
[b
], mask_bits
):
1893 # add it to the list
1894 netblocks
[b
].append(f
)
1895 found_netblock
= True
1897 # make a new netblock based on this fallback's fingerprint
1898 if not found_netblock
:
1899 netblocks
[f
._fpr
] = [f
]
1902 # return a dictionary of lists of fallbacks by IPv6 netblock
1903 # where mask_bits is the size of the netblock
1904 def fallbacks_by_ipv6_netblock(self
, mask_bits
):
1906 for f
in self
.fallbacks
:
1907 # skip fallbacks without IPv6 addresses
1908 if not f
.has_ipv6():
1910 found_netblock
= False
1911 for b
in netblocks
.keys():
1912 # we found an existing netblock containing this fallback
1913 if f
.ipv6_netblocks_equal(self
[b
], mask_bits
):
1914 # add it to the list
1915 netblocks
[b
].append(f
)
1916 found_netblock
= True
1918 # make a new netblock based on this fallback's fingerprint
1919 if not found_netblock
:
1920 netblocks
[f
._fpr
] = [f
]
1923 # log a message about the proportion of fallbacks in each IPv4 netblock,
1924 # where mask_bits is the size of the netblock
1925 def describe_fallback_ipv4_netblock_mask(self
, mask_bits
):
1926 fallback_count
= len(self
.fallbacks
)
1927 shared_netblock_fallback_count
= 0
1928 most_frequent_netblock
= None
1929 netblocks
= self
.fallbacks_by_ipv4_netblock(mask_bits
)
1930 for b
in netblocks
.keys():
1931 if len(netblocks
[b
]) > 1:
1932 # how many fallbacks are in a netblock with other fallbacks?
1933 shared_netblock_fallback_count
+= len(netblocks
[b
])
1934 # what's the netblock with the most fallbacks?
1935 if (most_frequent_netblock
is None
1936 or len(netblocks
[b
]) > len(netblocks
[most_frequent_netblock
])):
1937 most_frequent_netblock
= b
1938 logging
.debug('Fallback IPv4 addresses in the same /%d:'%(mask_bits))
1939 for f
in netblocks
[b
]:
1940 logging
.debug('%s - %s', f
.dirip
, f
._fpr
)
1941 if most_frequent_netblock
is not None:
1942 logging
.warning('There are %s fallbacks in the IPv4 /%d containing %s'%(
1943 CandidateList
.describe_percentage(
1944 len(netblocks
[most_frequent_netblock
]),
1947 self
[most_frequent_netblock
].dirip
))
1948 if shared_netblock_fallback_count
> 0:
1949 logging
.warning(('%s of fallbacks are in an IPv4 /%d with other ' +
1950 'fallbacks')%(CandidateList
.describe_percentage(
1951 shared_netblock_fallback_count
,
1955 # log a message about the proportion of fallbacks in each IPv6 netblock,
1956 # where mask_bits is the size of the netblock
1957 def describe_fallback_ipv6_netblock_mask(self
, mask_bits
):
1958 fallback_count
= len(self
.fallbacks_with_ipv6())
1959 shared_netblock_fallback_count
= 0
1960 most_frequent_netblock
= None
1961 netblocks
= self
.fallbacks_by_ipv6_netblock(mask_bits
)
1962 for b
in netblocks
.keys():
1963 if len(netblocks
[b
]) > 1:
1964 # how many fallbacks are in a netblock with other fallbacks?
1965 shared_netblock_fallback_count
+= len(netblocks
[b
])
1966 # what's the netblock with the most fallbacks?
1967 if (most_frequent_netblock
is None
1968 or len(netblocks
[b
]) > len(netblocks
[most_frequent_netblock
])):
1969 most_frequent_netblock
= b
1970 logging
.debug('Fallback IPv6 addresses in the same /%d:'%(mask_bits))
1971 for f
in netblocks
[b
]:
1972 logging
.debug('%s - %s', f
.ipv6addr
, f
._fpr
)
1973 if most_frequent_netblock
is not None:
1974 logging
.warning('There are %s fallbacks in the IPv6 /%d containing %s'%(
1975 CandidateList
.describe_percentage(
1976 len(netblocks
[most_frequent_netblock
]),
1979 self
[most_frequent_netblock
].ipv6addr
))
1980 if shared_netblock_fallback_count
> 0:
1981 logging
.warning(('%s of fallbacks are in an IPv6 /%d with other ' +
1982 'fallbacks')%(CandidateList
.describe_percentage(
1983 shared_netblock_fallback_count
,
1987 # log a message about the proportion of fallbacks in each IPv4 /8, /16,
1989 def describe_fallback_ipv4_netblocks(self
):
1990 # this doesn't actually tell us anything useful
1991 #self.describe_fallback_ipv4_netblock_mask(8)
1992 self
.describe_fallback_ipv4_netblock_mask(16)
1993 #self.describe_fallback_ipv4_netblock_mask(24)
1995 # log a message about the proportion of fallbacks in each IPv6 /12 (RIR),
1996 # /23 (smaller RIR blocks), /32 (LIR), /48 (Customer), and /64 (Host)
1997 # https://www.iana.org/assignments/ipv6-unicast-address-assignments/
1998 def describe_fallback_ipv6_netblocks(self
):
1999 # these don't actually tell us anything useful
2000 #self.describe_fallback_ipv6_netblock_mask(12)
2001 #self.describe_fallback_ipv6_netblock_mask(23)
2002 self
.describe_fallback_ipv6_netblock_mask(32)
2003 #self.describe_fallback_ipv6_netblock_mask(48)
2004 self
.describe_fallback_ipv6_netblock_mask(64)
2006 # log a message about the proportion of fallbacks in each IPv4 and IPv6
2008 def describe_fallback_netblocks(self
):
2009 self
.describe_fallback_ipv4_netblocks()
2010 self
.describe_fallback_ipv6_netblocks()
2012 # return a list of fallbacks which are on the IPv4 ORPort port
2013 def fallbacks_on_ipv4_orport(self
, port
):
2014 return filter(lambda x
: x
.orport
== port
, self
.fallbacks
)
2016 # return a list of fallbacks which are on the IPv6 ORPort port
2017 def fallbacks_on_ipv6_orport(self
, port
):
2018 return filter(lambda x
: x
.ipv6orport
== port
, self
.fallbacks_with_ipv6())
2020 # return a list of fallbacks which are on the DirPort port
2021 def fallbacks_on_dirport(self
, port
):
2022 return filter(lambda x
: x
.dirport
== port
, self
.fallbacks
)
2024 # log a message about the proportion of fallbacks on IPv4 ORPort port
2025 # and return that count
2026 def describe_fallback_ipv4_orport(self
, port
):
2027 port_count
= len(self
.fallbacks_on_ipv4_orport(port
))
2028 fallback_count
= len(self
.fallbacks
)
2029 logging
.warning('%s of fallbacks are on IPv4 ORPort %d'%(
2030 CandidateList
.describe_percentage(port_count
,
2035 # log a message about the proportion of IPv6 fallbacks on IPv6 ORPort port
2036 # and return that count
2037 def describe_fallback_ipv6_orport(self
, port
):
2038 port_count
= len(self
.fallbacks_on_ipv6_orport(port
))
2039 fallback_count
= len(self
.fallbacks_with_ipv6())
2040 logging
.warning('%s of IPv6 fallbacks are on IPv6 ORPort %d'%(
2041 CandidateList
.describe_percentage(port_count
,
2046 # log a message about the proportion of fallbacks on DirPort port
2047 # and return that count
2048 def describe_fallback_dirport(self
, port
):
2049 port_count
= len(self
.fallbacks_on_dirport(port
))
2050 fallback_count
= len(self
.fallbacks
)
2051 logging
.warning('%s of fallbacks are on DirPort %d'%(
2052 CandidateList
.describe_percentage(port_count
,
2057 # log a message about the proportion of fallbacks on each dirport,
2058 # each IPv4 orport, and each IPv6 orport
2059 def describe_fallback_ports(self
):
2060 fallback_count
= len(self
.fallbacks
)
2061 ipv4_or_count
= fallback_count
2062 ipv4_or_count
-= self
.describe_fallback_ipv4_orport(443)
2063 ipv4_or_count
-= self
.describe_fallback_ipv4_orport(9001)
2064 logging
.warning('%s of fallbacks are on other IPv4 ORPorts'%(
2065 CandidateList
.describe_percentage(ipv4_or_count
,
2067 ipv6_fallback_count
= len(self
.fallbacks_with_ipv6())
2068 ipv6_or_count
= ipv6_fallback_count
2069 ipv6_or_count
-= self
.describe_fallback_ipv6_orport(443)
2070 ipv6_or_count
-= self
.describe_fallback_ipv6_orport(9001)
2071 logging
.warning('%s of IPv6 fallbacks are on other IPv6 ORPorts'%(
2072 CandidateList
.describe_percentage(ipv6_or_count
,
2073 ipv6_fallback_count
)))
2074 dir_count
= fallback_count
2075 dir_count
-= self
.describe_fallback_dirport(80)
2076 dir_count
-= self
.describe_fallback_dirport(9030)
2077 logging
.warning('%s of fallbacks are on other DirPorts'%(
2078 CandidateList
.describe_percentage(dir_count
,
2081 # return a list of fallbacks which cache extra-info documents
2082 def fallbacks_with_extra_info_cache(self
):
2083 return filter(lambda x
: x
._extra
_info
_cache
, self
.fallbacks
)
2085 # log a message about the proportion of fallbacks that cache extra-info docs
2086 def describe_fallback_extra_info_caches(self
):
2087 extra_info_falback_count
= len(self
.fallbacks_with_extra_info_cache())
2088 fallback_count
= len(self
.fallbacks
)
2089 logging
.warning('%s of fallbacks cache extra-info documents'%(
2090 CandidateList
.describe_percentage(extra_info_falback_count
,
2093 # return a list of fallbacks which have the Exit flag
2094 def fallbacks_with_exit(self
):
2095 return filter(lambda x
: x
.is_exit(), self
.fallbacks
)
2097 # log a message about the proportion of fallbacks with an Exit flag
2098 def describe_fallback_exit_flag(self
):
2099 exit_falback_count
= len(self
.fallbacks_with_exit())
2100 fallback_count
= len(self
.fallbacks
)
2101 logging
.warning('%s of fallbacks have the Exit flag'%(
2102 CandidateList
.describe_percentage(exit_falback_count
,
2105 # return a list of fallbacks which have an IPv6 address
2106 def fallbacks_with_ipv6(self
):
2107 return filter(lambda x
: x
.has_ipv6(), self
.fallbacks
)
2109 # log a message about the proportion of fallbacks on IPv6
2110 def describe_fallback_ip_family(self
):
2111 ipv6_falback_count
= len(self
.fallbacks_with_ipv6())
2112 fallback_count
= len(self
.fallbacks
)
2113 logging
.warning('%s of fallbacks are on IPv6'%(
2114 CandidateList
.describe_percentage(ipv6_falback_count
,
2117 def summarise_fallbacks(self
, eligible_count
, operator_count
, failed_count
,
2118 guard_count
, target_count
):
2121 # whether we checked consensus download times
2122 # the number of fallback directories (and limits/exclusions, if relevant)
2123 # min & max fallback bandwidths
2124 # #error if below minimum count
2125 if PERFORM_IPV4_DIRPORT_CHECKS
or PERFORM_IPV6_DIRPORT_CHECKS
:
2126 s
+= '/* Checked %s%s%s DirPorts served a consensus within %.1fs. */'%(
2127 'IPv4' if PERFORM_IPV4_DIRPORT_CHECKS
else '',
2128 ' and ' if (PERFORM_IPV4_DIRPORT_CHECKS
2129 and PERFORM_IPV6_DIRPORT_CHECKS
) else '',
2130 'IPv6' if PERFORM_IPV6_DIRPORT_CHECKS
else '',
2131 CONSENSUS_DOWNLOAD_SPEED_MAX
)
2133 s
+= '/* Did not check IPv4 or IPv6 DirPort consensus downloads. */'
2135 # Multiline C comment with #error if things go bad
2138 # Integers don't need escaping in C comments
2139 fallback_count
= len(self
.fallbacks
)
2140 if FALLBACK_PROPORTION_OF_GUARDS
is None:
2141 fallback_proportion
= ''
2143 fallback_proportion
= ', Target %d (%d * %.2f)'%(target_count
,
2145 FALLBACK_PROPORTION_OF_GUARDS
)
2146 s
+= 'Final Count: %d (Eligible %d%s'%(fallback_count
, eligible_count
,
2147 fallback_proportion
)
2148 if MAX_FALLBACK_COUNT
is not None:
2149 s
+= ', Max %d'%(MAX_FALLBACK_COUNT)
2151 if eligible_count
!= fallback_count
:
2152 removed_count
= eligible_count
- fallback_count
2153 excess_to_target_or_max
= (eligible_count
- operator_count
- failed_count
2155 # some 'Failed' failed the check, others 'Skipped' the check,
2156 # if we already had enough successful downloads
2157 s
+= ('Excluded: %d (Same Operator %d, Failed/Skipped Download %d, ' +
2158 'Excess %d)')%(removed_count
, operator_count
, failed_count
,
2159 excess_to_target_or_max
)
2161 min_fb
= self
.fallback_min()
2162 min_bw
= min_fb
._data
['measured_bandwidth']
2163 max_fb
= self
.fallback_max()
2164 max_bw
= max_fb
._data
['measured_bandwidth']
2165 s
+= 'Bandwidth Range: %.1f - %.1f MByte/s'%(min_bw
/(1024.0*1024.0),
2166 max_bw
/(1024.0*1024.0))
2169 if fallback_count
< MIN_FALLBACK_COUNT
:
2170 # We must have a minimum number of fallbacks so they are always
2171 # reachable, and are in diverse locations
2173 s
+= '#error Fallback Count %d is too low. '%(fallback_count)
2174 s
+= 'Must be at least %d for diversity. '%(MIN_FALLBACK_COUNT)
2175 s
+= 'Try adding entries to the whitelist, '
2176 s
+= 'or setting INCLUDE_UNLISTED_ENTRIES = True.'
2179 def process_existing():
2180 logging
.basicConfig(level
=logging
.INFO
)
2181 logging
.getLogger('stem').setLevel(logging
.INFO
)
2182 whitelist
= {'data': parse_fallback_file(FALLBACK_FILE_NAME
),
2183 'name': FALLBACK_FILE_NAME
}
2184 blacklist
= {'data': read_from_file(BLACKLIST_FILE_NAME
, MAX_LIST_FILE_SIZE
),
2185 'name': BLACKLIST_FILE_NAME
}
2186 list_fallbacks(whitelist
, blacklist
)
2188 def process_default():
2189 logging
.basicConfig(level
=logging
.WARNING
)
2190 logging
.getLogger('stem').setLevel(logging
.WARNING
)
2191 whitelist
= {'data': read_from_file(WHITELIST_FILE_NAME
, MAX_LIST_FILE_SIZE
),
2192 'name': WHITELIST_FILE_NAME
}
2193 blacklist
= {'data': read_from_file(BLACKLIST_FILE_NAME
, MAX_LIST_FILE_SIZE
),
2194 'name': BLACKLIST_FILE_NAME
}
2195 list_fallbacks(whitelist
, blacklist
)
2199 if get_command() == 'check_existing':
2205 if len(sys
.argv
) == 2:
2210 def log_excluded(msg
, *args
):
2211 if get_command() == 'check_existing':
2212 logging
.warning(msg
, *args
)
2214 logging
.info(msg
, *args
)
2216 def list_fallbacks(whitelist
, blacklist
):
2217 """ Fetches required onionoo documents and evaluates the
2218 fallback directory criteria for each of the relays """
2220 print "/* type=fallback */"
2221 print ("/* version={} */"
2222 .format(cleanse_c_multiline_comment(FALLBACK_FORMAT_VERSION
)))
2223 now
= datetime
.datetime
.utcnow()
2224 timestamp
= now
.strftime('%Y%m%d%H%M%S')
2225 print ("/* timestamp={} */"
2226 .format(cleanse_c_multiline_comment(timestamp
)))
2227 # end the header with a separator, to make it easier for parsers
2228 print SECTION_SEPARATOR_COMMENT
2230 logging
.warning('Downloading and parsing Onionoo data. ' +
2231 'This may take some time.')
2232 # find relays that could be fallbacks
2233 candidates
= CandidateList()
2234 candidates
.add_relays()
2236 # work out how many fallbacks we want
2237 guard_count
= candidates
.count_guards()
2238 if FALLBACK_PROPORTION_OF_GUARDS
is None:
2239 target_count
= guard_count
2241 target_count
= int(guard_count
* FALLBACK_PROPORTION_OF_GUARDS
)
2242 # the maximum number of fallbacks is the least of:
2243 # - the target fallback count (FALLBACK_PROPORTION_OF_GUARDS * guard count)
2244 # - the maximum fallback count (MAX_FALLBACK_COUNT)
2245 if MAX_FALLBACK_COUNT
is None:
2246 max_count
= target_count
2248 max_count
= min(target_count
, MAX_FALLBACK_COUNT
)
2250 candidates
.compute_fallbacks()
2251 prefilter_fallbacks
= copy
.copy(candidates
.fallbacks
)
2253 # filter with the whitelist and blacklist
2254 # if a relay has changed IPv4 address or ports recently, it will be excluded
2255 # as ineligible before we call apply_filter_lists, and so there will be no
2256 # warning that the details have changed from those in the whitelist.
2257 # instead, there will be an info-level log during the eligibility check.
2258 initial_count
= len(candidates
.fallbacks
)
2259 excluded_count
= candidates
.apply_filter_lists(whitelist
, blacklist
)
2260 print candidates
.summarise_filters(initial_count
, excluded_count
)
2261 eligible_count
= len(candidates
.fallbacks
)
2263 # calculate the measured bandwidth of each relay,
2264 # then remove low-bandwidth relays
2265 candidates
.calculate_measured_bandwidth()
2266 candidates
.remove_low_bandwidth_relays()
2268 # print the raw fallback list
2269 #for x in candidates.fallbacks:
2270 # print x.fallbackdir_line(True)
2271 # print json.dumps(candidates[x]._data, sort_keys=True, indent=4,
2272 # separators=(',', ': '), default=json_util.default)
2274 # impose mandatory conditions here, like one per contact, family, IP
2275 # in measured bandwidth order
2276 candidates
.sort_fallbacks_by_measured_bandwidth()
2278 # only impose these limits on the final list - operators can nominate
2279 # multiple candidate fallbacks, and then we choose the best set
2280 if not OUTPUT_CANDIDATES
:
2281 operator_count
+= candidates
.limit_fallbacks_same_ip()
2282 operator_count
+= candidates
.limit_fallbacks_same_contact()
2283 operator_count
+= candidates
.limit_fallbacks_same_family()
2285 # check if each candidate can serve a consensus
2286 # there's a small risk we've eliminated relays from the same operator that
2287 # can serve a consensus, in favour of one that can't
2288 # but given it takes up to 15 seconds to check each consensus download,
2289 # the risk is worth it
2290 if PERFORM_IPV4_DIRPORT_CHECKS
or PERFORM_IPV6_DIRPORT_CHECKS
:
2291 logging
.warning('Checking consensus download speeds. ' +
2292 'This may take some time.')
2293 failed_count
= candidates
.perform_download_consensus_checks(max_count
)
2295 # work out which fallbacks cache extra-infos
2296 candidates
.mark_extra_info_caches()
2298 # analyse and log interesting diversity metrics
2299 # like netblock, ports, exit, IPv4-only
2300 # (we can't easily analyse AS, and it's hard to accurately analyse country)
2301 candidates
.describe_fallback_ip_family()
2302 # if we can't import the ipaddress module, we can't do netblock analysis
2304 candidates
.describe_fallback_netblocks()
2305 candidates
.describe_fallback_ports()
2306 candidates
.describe_fallback_extra_info_caches()
2307 candidates
.describe_fallback_exit_flag()
2309 # output C comments summarising the fallback selection process
2310 if len(candidates
.fallbacks
) > 0:
2311 print candidates
.summarise_fallbacks(eligible_count
, operator_count
,
2312 failed_count
, guard_count
,
2315 print '/* No Fallbacks met criteria */'
2317 # output C comments specifying the OnionOO data used to create the list
2318 for s
in fetch_source_list():
2319 print describe_fetch_source(s
)
2321 # start the list with a separator, to make it easy for parsers
2322 print SECTION_SEPARATOR_COMMENT
2324 # sort the list differently depending on why we've created it:
2325 # if we're outputting the final fallback list, sort by fingerprint
2326 # this makes diffs much more stable
2327 # otherwise, if we're trying to find a bandwidth cutoff, or we want to
2328 # contact operators in priority order, sort by bandwidth (not yet
2330 # otherwise, if we're contacting operators, sort by contact
2331 candidates
.sort_fallbacks_by(OUTPUT_SORT_FIELD
)
2333 for x
in candidates
.fallbacks
:
2334 print x
.fallbackdir_line(candidates
.fallbacks
, prefilter_fallbacks
)
2336 if __name__
== "__main__":