3 # Usage: scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc
5 # This script should be run from a stable, reliable network connection,
6 # with no other network activity (and not over tor).
7 # If this is not possible, please disable:
8 # PERFORM_IPV4_DIRPORT_CHECKS and PERFORM_IPV6_DIRPORT_CHECKS
10 # Needs dateutil (and potentially other python packages)
11 # Needs stem available in your PYTHONPATH, or just ln -s ../stem/stem .
12 # Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package)
13 # for netblock analysis, in PYTHONPATH, or just
14 # ln -s ../py2-ipaddress-3.4.1/ipaddress.py .
16 # Then read the logs to make sure the fallbacks aren't dominated by a single
19 # Script by weasel, April 2015
20 # Portions by gsathya & karsten, 2013
21 # https://trac.torproject.org/projects/tor/attachment/ticket/8374/dir_list.2.py
22 # Modifications by teor, 2015
36 import dateutil
.parser
37 # bson_lazy provides bson
38 #from bson import json_util
41 from stem
.descriptor
.remote
import DescriptorDownloader
44 # INFO tells you why each relay was included or excluded
45 # WARN tells you about potential misconfigurations and relay detail changes
46 logging
.basicConfig(level
=logging
.WARNING
)
47 logging
.root
.name
= ''
48 # INFO tells you about each consensus download attempt
49 logging
.getLogger('stem').setLevel(logging
.WARNING
)
51 HAVE_IPADDRESS
= False
53 # python 3 builtin, or install package py2-ipaddress
54 # there are several ipaddress implementations for python 2
55 # with slightly different semantics with str typed text
56 # fortunately, all our IP addresses are in unicode
60 # if this happens, we avoid doing netblock analysis
61 logging
.warning('Unable to import ipaddress, please install py2-ipaddress.' +
62 ' A fallback list will be created, but optional netblock' +
63 ' analysis will not be performed.')
65 ## Top-Level Configuration
67 # Output all candidate fallbacks, or only output selected fallbacks?
68 OUTPUT_CANDIDATES
= False
70 # Perform DirPort checks over IPv4?
71 # Change this to False if IPv4 doesn't work for you, or if you don't want to
72 # download a consensus for each fallback
73 # Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
74 PERFORM_IPV4_DIRPORT_CHECKS
= False if OUTPUT_CANDIDATES
else True
76 # Perform DirPort checks over IPv6?
77 # If you know IPv6 works for you, set this to True
78 # This will exclude IPv6 relays without an IPv6 DirPort configured
79 # So it's best left at False until #18394 is implemented
80 # Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
81 PERFORM_IPV6_DIRPORT_CHECKS
= False if OUTPUT_CANDIDATES
else False
83 # Output fallback name, flags, and ContactInfo in a C comment?
84 OUTPUT_COMMENTS
= True if OUTPUT_CANDIDATES
else False
86 # Output matching ContactInfo in fallbacks list or the blacklist?
87 # Useful if you're trying to contact operators
88 CONTACT_COUNT
= True if OUTPUT_CANDIDATES
else False
89 CONTACT_BLACKLIST_COUNT
= True if OUTPUT_CANDIDATES
else False
93 ONIONOO
= 'https://onionoo.torproject.org/'
94 #ONIONOO = 'https://onionoo.thecthulhu.com/'
96 # Don't bother going out to the Internet, just use the files available locally,
97 # even if they're very old
98 LOCAL_FILES_ONLY
= False
100 ## Whitelist / Blacklist Filter Settings
102 # The whitelist contains entries that are included if all attributes match
103 # (IPv4, dirport, orport, id, and optionally IPv6 and IPv6 orport)
104 # The blacklist contains (partial) entries that are excluded if any
105 # sufficiently specific group of attributes matches:
111 # If neither port is included in the blacklist, the entire IP address is
114 # What happens to entries in neither list?
115 # When True, they are included, when False, they are excluded
116 INCLUDE_UNLISTED_ENTRIES
= True if OUTPUT_CANDIDATES
else False
118 # If an entry is in both lists, what happens?
119 # When True, it is excluded, when False, it is included
120 BLACKLIST_EXCLUDES_WHITELIST_ENTRIES
= True
122 WHITELIST_FILE_NAME
= 'scripts/maint/fallback.whitelist'
123 BLACKLIST_FILE_NAME
= 'scripts/maint/fallback.blacklist'
125 # The number of bytes we'll read from a filter file before giving up
126 MAX_LIST_FILE_SIZE
= 1024 * 1024
128 ## Eligibility Settings
130 # Reduced due to a bug in tor where a relay submits a 0 DirPort when restarted
131 # This causes OnionOO to (correctly) reset its stability timer
132 # This issue will be fixed in 0.2.7.7 and 0.2.8.2
133 # Until then, the CUTOFFs below ensure a decent level of stability.
134 ADDRESS_AND_PORT_STABLE_DAYS
= 7
135 # What time-weighted-fraction of these flags must FallbackDirs
140 # What time-weighted-fraction of these flags must FallbackDirs
141 # Equal or Fall Under?
142 # .00 means no bad exits
143 PERMITTED_BADEXIT
= .00
145 # older entries' weights are adjusted with ALPHA^(age in days)
148 # this factor is used to scale OnionOO entries to [0,1]
149 ONIONOO_SCALE_ONE
= 999.
151 ## Fallback Count Limits
153 # The target for these parameters is 20% of the guards in the network
154 # This is around 200 as of October 2015
156 FALLBACK_PROPORTION_OF_GUARDS
= None if OUTPUT_CANDIDATES
else _FB_POG
158 # We want exactly 100 fallbacks for the initial release
159 # This gives us scope to add extra fallbacks to the list as needed
160 # Limit the number of fallbacks (eliminating lowest by advertised bandwidth)
161 MAX_FALLBACK_COUNT
= None if OUTPUT_CANDIDATES
else 100
162 # Emit a C #error if the number of fallbacks is below
163 MIN_FALLBACK_COUNT
= 100
165 ## Fallback Bandwidth Requirements
167 # Any fallback with the Exit flag has its bandwidth multipled by this fraction
168 # to make sure we aren't further overloading exits
169 # (Set to 1.0, because we asked that only lightly loaded exits opt-in,
170 # and the extra load really isn't that much for large relays.)
171 EXIT_BANDWIDTH_FRACTION
= 1.0
173 # If a single fallback's bandwidth is too low, it's pointless adding it
174 # We expect fallbacks to handle an extra 30 kilobytes per second of traffic
175 # Make sure they can support a hundred times the expected extra load
176 # (Use 102.4 to make it come out nicely in MB/s)
177 # We convert this to a consensus weight before applying the filter,
178 # because all the bandwidth amounts are specified by the relay
179 MIN_BANDWIDTH
= 102.4 * 30.0 * 1024.0
181 # Clients will time out after 30 seconds trying to download a consensus
182 # So allow fallback directories half that to deliver a consensus
183 # The exact download times might change based on the network connection
184 # running this script, but only by a few seconds
185 # There is also about a second of python overhead
186 CONSENSUS_DOWNLOAD_SPEED_MAX
= 15.0
187 # If the relay fails a consensus check, retry the download
188 # This avoids delisting a relay due to transient network conditions
189 CONSENSUS_DOWNLOAD_RETRY
= True
191 ## Fallback Weights for Client Selection
193 # All fallback weights are equal, and set to the value below
194 # Authorities are weighted 1.0 by default
195 # Clients use these weights to select fallbacks and authorities at random
196 # If there are 100 fallbacks and 9 authorities:
197 # - each fallback is chosen with probability 10.0/(10.0*100 + 1.0*9) ~= 0.99%
198 # - each authority is chosen with probability 1.0/(10.0*100 + 1.0*9) ~= 0.09%
199 # A client choosing a bootstrap directory server will choose a fallback for
200 # 10.0/(10.0*100 + 1.0*9) * 100 = 99.1% of attempts, and an authority for
201 # 1.0/(10.0*100 + 1.0*9) * 9 = 0.9% of attempts.
202 # (This disregards the bootstrap schedules, where clients start by choosing
203 # from fallbacks & authoritites, then later choose from only authorities.)
204 FALLBACK_OUTPUT_WEIGHT
= 10.0
209 return datetime
.datetime
.strptime(t
, "%Y-%m-%d %H:%M:%S")
211 def remove_bad_chars(raw_string
, bad_char_list
):
212 # Remove each character in the bad_char_list
213 cleansed_string
= raw_string
214 for c
in bad_char_list
:
215 cleansed_string
= cleansed_string
.replace(c
, '')
216 return cleansed_string
218 def cleanse_unprintable(raw_string
):
219 # Remove all unprintable characters
222 if c
in string
.printable
:
224 return cleansed_string
226 def cleanse_whitespace(raw_string
):
227 # Replace all whitespace characters with a space
228 cleansed_string
= raw_string
229 for c
in string
.whitespace
:
230 cleansed_string
= cleansed_string
.replace(c
, ' ')
231 return cleansed_string
233 def cleanse_c_multiline_comment(raw_string
):
234 cleansed_string
= raw_string
235 # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
236 cleansed_string
= cleanse_whitespace(cleansed_string
)
237 # ContactInfo and Version can be arbitrary binary data
238 cleansed_string
= cleanse_unprintable(cleansed_string
)
239 # Prevent a malicious / unanticipated string from breaking out
240 # of a C-style multiline comment
241 # This removes '/*' and '*/' and '//'
243 # Prevent a malicious string from using C nulls
244 bad_char_list
+= '\0'
245 # Be safer by removing bad characters entirely
246 cleansed_string
= remove_bad_chars(cleansed_string
, bad_char_list
)
247 # Some compilers may further process the content of comments
248 # There isn't much we can do to cover every possible case
249 # But comment-based directives are typically only advisory
250 return cleansed_string
252 def cleanse_c_string(raw_string
):
253 cleansed_string
= raw_string
254 # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
255 cleansed_string
= cleanse_whitespace(cleansed_string
)
256 # ContactInfo and Version can be arbitrary binary data
257 cleansed_string
= cleanse_unprintable(cleansed_string
)
258 # Prevent a malicious address/fingerprint string from breaking out
259 # of a C-style string
261 # Prevent a malicious string from using escapes
262 bad_char_list
+= '\\'
263 # Prevent a malicious string from using C nulls
264 bad_char_list
+= '\0'
265 # Be safer by removing bad characters entirely
266 cleansed_string
= remove_bad_chars(cleansed_string
, bad_char_list
)
267 # Some compilers may further process the content of strings
268 # There isn't much we can do to cover every possible case
269 # But this typically only results in changes to the string data
270 return cleansed_string
272 ## OnionOO Source Functions
274 # a dictionary of source metadata for each onionoo query we've made
277 # register source metadata for 'what'
278 # assumes we only retrieve one document for each 'what'
279 def register_fetch_source(what
, url
, relays_published
, version
):
280 fetch_source
[what
] = {}
281 fetch_source
[what
]['url'] = url
282 fetch_source
[what
]['relays_published'] = relays_published
283 fetch_source
[what
]['version'] = version
285 # list each registered source's 'what'
286 def fetch_source_list():
287 return sorted(fetch_source
.keys())
289 # given 'what', provide a multiline C comment describing the source
290 def describe_fetch_source(what
):
293 desc
+= 'Onionoo Source: '
294 desc
+= cleanse_c_multiline_comment(what
)
296 desc
+= cleanse_c_multiline_comment(fetch_source
[what
]['relays_published'])
298 desc
+= cleanse_c_multiline_comment(fetch_source
[what
]['version'])
301 desc
+= cleanse_c_multiline_comment(fetch_source
[what
]['url'])
306 ## File Processing Functions
308 def write_to_file(str, file_name
, max_len
):
310 with
open(file_name
, 'w') as f
:
311 f
.write(str[0:max_len
])
312 except EnvironmentError, error
:
313 logging
.error('Writing file %s failed: %d: %s'%
319 def read_from_file(file_name
, max_len
):
321 if os
.path
.isfile(file_name
):
322 with
open(file_name
, 'r') as f
:
323 return f
.read(max_len
)
324 except EnvironmentError, error
:
325 logging
.info('Loading file %s failed: %d: %s'%
332 def load_possibly_compressed_response_json(response
):
333 if response
.info().get('Content-Encoding') == 'gzip':
334 buf
= StringIO
.StringIO( response
.read() )
335 f
= gzip
.GzipFile(fileobj
=buf
)
338 return json
.load(response
)
340 def load_json_from_file(json_file_name
):
341 # An exception here may be resolved by deleting the .last_modified
342 # and .json files, and re-running the script
344 with
open(json_file_name
, 'r') as f
:
346 except EnvironmentError, error
:
347 raise Exception('Reading not-modified json file %s failed: %d: %s'%
355 def datestr_to_datetime(datestr
):
356 # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
357 if datestr
is not None:
358 dt
= dateutil
.parser
.parse(datestr
)
360 # Never modified - use start of epoch
361 dt
= datetime
.datetime
.utcfromtimestamp(0)
362 # strip any timezone out (in case they're supported in future)
363 dt
= dt
.replace(tzinfo
=None)
366 def onionoo_fetch(what
, **kwargs
):
368 params
['type'] = 'relay'
369 #params['limit'] = 10
370 params
['first_seen_days'] = '%d-'%(ADDRESS_AND_PORT_STABLE_DAYS
,)
371 params
['last_seen_days'] = '-7'
372 params
['flag'] = 'V2Dir'
373 url
= ONIONOO
+ what
+ '?' + urllib
.urlencode(params
)
375 # Unfortunately, the URL is too long for some OS filenames,
376 # but we still don't want to get files from different URLs mixed up
377 base_file_name
= what
+ '-' + hashlib
.sha1(url
).hexdigest()
379 full_url_file_name
= base_file_name
+ '.full_url'
380 MAX_FULL_URL_LENGTH
= 1024
382 last_modified_file_name
= base_file_name
+ '.last_modified'
383 MAX_LAST_MODIFIED_LENGTH
= 64
385 json_file_name
= base_file_name
+ '.json'
388 # Read from the local file, don't write to anything
389 response_json
= load_json_from_file(json_file_name
)
391 # store the full URL to a file for debugging
392 # no need to compare as long as you trust SHA-1
393 write_to_file(url
, full_url_file_name
, MAX_FULL_URL_LENGTH
)
395 request
= urllib2
.Request(url
)
396 request
.add_header('Accept-encoding', 'gzip')
398 # load the last modified date from the file, if it exists
399 last_mod_date
= read_from_file(last_modified_file_name
,
400 MAX_LAST_MODIFIED_LENGTH
)
401 if last_mod_date
is not None:
402 request
.add_header('If-modified-since', last_mod_date
)
404 # Parse last modified date
405 last_mod
= datestr_to_datetime(last_mod_date
)
407 # Not Modified and still recent enough to be useful
408 # Onionoo / Globe used to use 6 hours, but we can afford a day
409 required_freshness
= datetime
.datetime
.utcnow()
410 # strip any timezone out (to match dateutil.parser)
411 required_freshness
= required_freshness
.replace(tzinfo
=None)
412 required_freshness
-= datetime
.timedelta(hours
=24)
414 # Make the OnionOO request
417 response
= urllib2
.urlopen(request
)
418 response_code
= response
.getcode()
419 except urllib2
.HTTPError
, error
:
420 response_code
= error
.code
421 if response_code
== 304: # not modified
424 raise Exception("Could not get " + url
+ ": "
425 + str(error
.code
) + ": " + error
.reason
)
427 if response_code
== 200: # OK
428 last_mod
= datestr_to_datetime(response
.info().get('Last-Modified'))
430 # Check for freshness
431 if last_mod
< required_freshness
:
432 if last_mod_date
is not None:
433 # This check sometimes fails transiently, retry the script if it does
434 date_message
= "Outdated data: last updated " + last_mod_date
436 date_message
= "No data: never downloaded "
437 raise Exception(date_message
+ " from " + url
)
440 if response_code
== 200: # OK
442 response_json
= load_possibly_compressed_response_json(response
)
444 with
open(json_file_name
, 'w') as f
:
445 # use the most compact json representation to save space
446 json
.dump(response_json
, f
, separators
=(',',':'))
448 # store the last modified date in its own file
449 if response
.info().get('Last-modified') is not None:
450 write_to_file(response
.info().get('Last-Modified'),
451 last_modified_file_name
,
452 MAX_LAST_MODIFIED_LENGTH
)
454 elif response_code
== 304: # Not Modified
456 response_json
= load_json_from_file(json_file_name
)
458 else: # Unexpected HTTP response code not covered in the HTTPError above
459 raise Exception("Unexpected HTTP response code to " + url
+ ": "
460 + str(response_code
))
462 register_fetch_source(what
,
464 response_json
['relays_published'],
465 response_json
['version'])
469 def fetch(what
, **kwargs
):
470 #x = onionoo_fetch(what, **kwargs)
471 # don't use sort_keys, as the order of or_addresses is significant
472 #print json.dumps(x, indent=4, separators=(',', ': '))
475 return onionoo_fetch(what
, **kwargs
)
477 ## Fallback Candidate Class
479 class Candidate(object):
480 CUTOFF_ADDRESS_AND_PORT_STABLE
= (datetime
.datetime
.utcnow()
481 - datetime
.timedelta(ADDRESS_AND_PORT_STABLE_DAYS
))
483 def __init__(self
, details
):
484 for f
in ['fingerprint', 'nickname', 'last_changed_address_or_port',
485 'consensus_weight', 'or_addresses', 'dir_address']:
486 if not f
in details
: raise Exception("Document has no %s field."%(f
,))
488 if not 'contact' in details
:
489 details
['contact'] = None
490 if not 'flags' in details
or details
['flags'] is None:
491 details
['flags'] = []
492 if (not 'advertised_bandwidth' in details
493 or details
['advertised_bandwidth'] is None):
494 # relays without advertised bandwdith have it calculated from their
496 details
['advertised_bandwidth'] = 0
497 if (not 'effective_family' in details
498 or details
['effective_family'] is None):
499 details
['effective_family'] = []
500 details
['last_changed_address_or_port'] = parse_ts(
501 details
['last_changed_address_or_port'])
503 self
._stable
_sort
_or
_addresses
()
505 self
._fpr
= self
._data
['fingerprint']
506 self
._running
= self
._guard
= self
._v
2dir
= 0.
507 self
._split
_dirport
()
508 self
._compute
_orport
()
509 if self
.orport
is None:
510 raise Exception("Failed to get an orport for %s."%(self
._fpr
,))
511 self
._compute
_ipv
6addr
()
512 if not self
.has_ipv6():
513 logging
.debug("Failed to get an ipv6 address for %s."%(self
._fpr
,))
515 def _stable_sort_or_addresses(self
):
516 # replace self._data['or_addresses'] with a stable ordering,
517 # sorting the secondary addresses in string order
518 # leave the received order in self._data['or_addresses_raw']
519 self
._data
['or_addresses_raw'] = self
._data
['or_addresses']
520 or_address_primary
= self
._data
['or_addresses'][:1]
521 # subsequent entries in the or_addresses array are in an arbitrary order
522 # so we stabilise the addresses by sorting them in string order
523 or_addresses_secondaries_stable
= sorted(self
._data
['or_addresses'][1:])
524 or_addresses_stable
= or_address_primary
+ or_addresses_secondaries_stable
525 self
._data
['or_addresses'] = or_addresses_stable
527 def get_fingerprint(self
):
530 # is_valid_ipv[46]_address by gsathya, karsten, 2013
532 def is_valid_ipv4_address(address
):
533 if not isinstance(address
, (str, unicode)):
536 # check if there are four period separated values
537 if address
.count(".") != 3:
540 # checks that each value in the octet are decimal values between 0-255
541 for entry
in address
.split("."):
542 if not entry
.isdigit() or int(entry
) < 0 or int(entry
) > 255:
544 elif entry
[0] == "0" and len(entry
) > 1:
545 return False # leading zeros, for instance in "1.2.3.001"
550 def is_valid_ipv6_address(address
):
551 if not isinstance(address
, (str, unicode)):
555 address
= address
[1:-1]
557 # addresses are made up of eight colon separated groups of four hex digits
558 # with leading zeros being optional
559 # https://en.wikipedia.org/wiki/IPv6#Address_format
561 colon_count
= address
.count(":")
564 return False # too many groups
565 elif colon_count
!= 7 and not "::" in address
:
566 return False # not enough groups and none are collapsed
567 elif address
.count("::") > 1 or ":::" in address
:
568 return False # multiple groupings of zeros can't be collapsed
570 found_ipv4_on_previous_entry
= False
571 for entry
in address
.split(":"):
572 # If an IPv6 address has an embedded IPv4 address,
573 # it must be the last entry
574 if found_ipv4_on_previous_entry
:
576 if not re
.match("^[0-9a-fA-f]{0,4}$", entry
):
577 if not Candidate
.is_valid_ipv4_address(entry
):
580 found_ipv4_on_previous_entry
= True
584 def _split_dirport(self
):
585 # Split the dir_address into dirip and dirport
586 (self
.dirip
, _dirport
) = self
._data
['dir_address'].split(':', 2)
587 self
.dirport
= int(_dirport
)
589 def _compute_orport(self
):
590 # Choose the first ORPort that's on the same IPv4 address as the DirPort.
591 # In rare circumstances, this might not be the primary ORPort address.
592 # However, _stable_sort_or_addresses() ensures we choose the same one
593 # every time, even if onionoo changes the order of the secondaries.
594 self
._split
_dirport
()
596 for i
in self
._data
['or_addresses']:
597 if i
!= self
._data
['or_addresses'][0]:
598 logging
.debug('Secondary IPv4 Address Used for %s: %s'%(self
._fpr
, i
))
599 (ipaddr
, port
) = i
.rsplit(':', 1)
600 if (ipaddr
== self
.dirip
) and Candidate
.is_valid_ipv4_address(ipaddr
):
601 self
.orport
= int(port
)
604 def _compute_ipv6addr(self
):
605 # Choose the first IPv6 address that uses the same port as the ORPort
606 # Or, choose the first IPv6 address in the list
607 # _stable_sort_or_addresses() ensures we choose the same IPv6 address
608 # every time, even if onionoo changes the order of the secondaries.
610 self
.ipv6orport
= None
611 # Choose the first IPv6 address that uses the same port as the ORPort
612 for i
in self
._data
['or_addresses']:
613 (ipaddr
, port
) = i
.rsplit(':', 1)
614 if (port
== self
.orport
) and Candidate
.is_valid_ipv6_address(ipaddr
):
615 self
.ipv6addr
= ipaddr
616 self
.ipv6orport
= int(port
)
618 # Choose the first IPv6 address in the list
619 for i
in self
._data
['or_addresses']:
620 (ipaddr
, port
) = i
.rsplit(':', 1)
621 if Candidate
.is_valid_ipv6_address(ipaddr
):
622 self
.ipv6addr
= ipaddr
623 self
.ipv6orport
= int(port
)
627 def _extract_generic_history(history
, which
='unknown'):
628 # given a tree like this:
632 # "factor": 0.001001001001001001,
633 # "first": "2015-02-27 06:00:00",
635 # "last": "2015-03-30 06:00:00",
643 # "factor": 0.001001001001001001,
644 # "first": "2015-03-23 07:30:00",
646 # "last": "2015-03-30 07:30:00",
651 # "factor": 0.001001001001001001,
652 # "first": "2014-04-11 00:00:00",
653 # "interval": 172800,
654 # "last": "2015-03-29 00:00:00",
659 # "factor": 0.001001001001001001,
660 # "first": "2014-12-28 06:00:00",
662 # "last": "2015-03-30 06:00:00",
666 # extract exactly one piece of data per time interval,
667 # using smaller intervals where available.
669 # returns list of (age, length, value) dictionaries.
673 periods
= history
.keys()
674 periods
.sort(key
= lambda x
: history
[x
]['interval'])
675 now
= datetime
.datetime
.utcnow()
679 interval
= datetime
.timedelta(seconds
= h
['interval'])
680 this_ts
= parse_ts(h
['last'])
682 if (len(h
['values']) != h
['count']):
683 logging
.warning('Inconsistent value count in %s document for %s'
685 for v
in reversed(h
['values']):
686 if (this_ts
<= newest
):
689 agetmp1
= (agt1
.microseconds
+ (agt1
.seconds
+ agt1
.days
* 24 * 3600)
691 agetmp2
= (agt2
.microseconds
+ (agt2
.seconds
+ agt2
.days
* 24 * 3600)
693 generic_history
.append(
701 if (this_ts
+ interval
!= parse_ts(h
['first'])):
702 logging
.warning('Inconsistent time information in %s document for %s'
705 #print json.dumps(generic_history, sort_keys=True,
706 # indent=4, separators=(',', ': '))
707 return generic_history
710 def _avg_generic_history(generic_history
):
712 for i
in generic_history
:
713 if i
['age'] > (ADDRESS_AND_PORT_STABLE_DAYS
* 24 * 3600):
715 if (i
['length'] is not None
716 and i
['age'] is not None
717 and i
['value'] is not None):
718 w
= i
['length'] * math
.pow(AGE_ALPHA
, i
['age']/(3600*24))
719 a
.append( (i
['value'] * w
, w
) )
721 sv
= math
.fsum(map(lambda x
: x
[0], a
))
722 sw
= math
.fsum(map(lambda x
: x
[1], a
))
730 def _add_generic_history(self
, history
):
731 periods
= r
['read_history'].keys()
732 periods
.sort(key
= lambda x
: r
['read_history'][x
]['interval'] )
736 def add_running_history(self
, history
):
739 def add_uptime(self
, uptime
):
740 logging
.debug('Adding uptime %s.'%(self
._fpr
,))
742 # flags we care about: Running, V2Dir, Guard
743 if not 'flags' in uptime
:
744 logging
.debug('No flags in document for %s.'%(self
._fpr
,))
747 for f
in ['Running', 'Guard', 'V2Dir']:
748 if not f
in uptime
['flags']:
749 logging
.debug('No %s in flags for %s.'%(f
, self
._fpr
,))
752 running
= self
._extract
_generic
_history
(uptime
['flags']['Running'],
753 '%s-Running'%(self
._fpr
))
754 guard
= self
._extract
_generic
_history
(uptime
['flags']['Guard'],
755 '%s-Guard'%(self
._fpr
))
756 v2dir
= self
._extract
_generic
_history
(uptime
['flags']['V2Dir'],
757 '%s-V2Dir'%(self
._fpr
))
758 if 'BadExit' in uptime
['flags']:
759 badexit
= self
._extract
_generic
_history
(uptime
['flags']['BadExit'],
760 '%s-BadExit'%(self
._fpr
))
762 self
._running
= self
._avg
_generic
_history
(running
) / ONIONOO_SCALE_ONE
763 self
._guard
= self
._avg
_generic
_history
(guard
) / ONIONOO_SCALE_ONE
764 self
._v
2dir
= self
._avg
_generic
_history
(v2dir
) / ONIONOO_SCALE_ONE
766 if 'BadExit' in uptime
['flags']:
767 self
._badexit
= self
._avg
_generic
_history
(badexit
) / ONIONOO_SCALE_ONE
769 def is_candidate(self
):
770 must_be_running_now
= (PERFORM_IPV4_DIRPORT_CHECKS
771 or PERFORM_IPV6_DIRPORT_CHECKS
)
772 if (must_be_running_now
and not self
.is_running()):
773 logging
.info('%s not a candidate: not running now, unable to check ' +
774 'DirPort consensus download', self
._fpr
)
776 if (self
._data
['last_changed_address_or_port'] >
777 self
.CUTOFF_ADDRESS_AND_PORT_STABLE
):
778 logging
.info('%s not a candidate: changed address/port recently (%s)',
779 self
._fpr
, self
._data
['last_changed_address_or_port'])
781 if self
._running
< CUTOFF_RUNNING
:
782 logging
.info('%s not a candidate: running avg too low (%lf)',
783 self
._fpr
, self
._running
)
785 if self
._v
2dir
< CUTOFF_V2DIR
:
786 logging
.info('%s not a candidate: v2dir avg too low (%lf)',
787 self
._fpr
, self
._v
2dir
)
789 if self
._badexit
is not None and self
._badexit
> PERMITTED_BADEXIT
:
790 logging
.info('%s not a candidate: badexit avg too high (%lf)',
791 self
._fpr
, self
._badexit
)
793 # if the relay doesn't report a version, also exclude the relay
794 if (not self
._data
.has_key('recommended_version')
795 or not self
._data
['recommended_version']):
796 logging
.info('%s not a candidate: version not recommended', self
._fpr
)
798 if self
._guard
< CUTOFF_GUARD
:
799 logging
.info('%s not a candidate: guard avg too low (%lf)',
800 self
._fpr
, self
._guard
)
802 if (not self
._data
.has_key('consensus_weight')
803 or self
._data
['consensus_weight'] < 1):
804 logging
.info('%s not a candidate: consensus weight invalid', self
._fpr
)
808 def is_in_whitelist(self
, relaylist
):
809 """ A fallback matches if each key in the whitelist line matches:
814 ipv6 address and port (if present)
815 If the fallback has an ipv6 key, the whitelist line must also have
816 it, and vice versa, otherwise they don't match. """
819 ipv6
= '%s:%d'%(self
.ipv6addr
, self
.ipv6orport
)
820 for entry
in relaylist
:
821 if entry
['id'] != self
._fpr
:
822 # can't log here unless we match an IP and port, because every relay's
823 # fingerprint is compared to every entry's fingerprint
824 if entry
['ipv4'] == self
.dirip
and int(entry
['orport']) == self
.orport
:
825 logging
.warning('%s excluded: has OR %s:%d changed fingerprint to ' +
826 '%s?', entry
['id'], self
.dirip
, self
.orport
,
828 if self
.has_ipv6() and entry
.has_key('ipv6') and entry
['ipv6'] == ipv6
:
829 logging
.warning('%s excluded: has OR %s changed fingerprint to ' +
830 '%s?', entry
['id'], ipv6
, self
._fpr
)
832 if entry
['ipv4'] != self
.dirip
:
833 logging
.warning('%s excluded: has it changed IPv4 from %s to %s?',
834 self
._fpr
, entry
['ipv4'], self
.dirip
)
836 if int(entry
['dirport']) != self
.dirport
:
837 logging
.warning('%s excluded: has it changed DirPort from %s:%d to ' +
838 '%s:%d?', self
._fpr
, self
.dirip
, int(entry
['dirport']),
839 self
.dirip
, self
.dirport
)
841 if int(entry
['orport']) != self
.orport
:
842 logging
.warning('%s excluded: has it changed ORPort from %s:%d to ' +
843 '%s:%d?', self
._fpr
, self
.dirip
, int(entry
['orport']),
844 self
.dirip
, self
.orport
)
846 if entry
.has_key('ipv6') and self
.has_ipv6():
847 # if both entry and fallback have an ipv6 address, compare them
848 if entry
['ipv6'] != ipv6
:
849 logging
.warning('%s excluded: has it changed IPv6 ORPort from %s ' +
850 'to %s?', self
._fpr
, entry
['ipv6'], ipv6
)
852 # if the fallback has an IPv6 address but the whitelist entry
853 # doesn't, or vice versa, the whitelist entry doesn't match
854 elif entry
.has_key('ipv6') and not self
.has_ipv6():
855 logging
.warning('%s excluded: has it lost its former IPv6 address %s?',
856 self
._fpr
, entry
['ipv6'])
858 elif not entry
.has_key('ipv6') and self
.has_ipv6():
859 logging
.warning('%s excluded: has it gained an IPv6 address %s?',
865 def is_in_blacklist(self
, relaylist
):
866 """ A fallback matches a blacklist line if a sufficiently specific group
867 of attributes matches:
873 If the fallback and the blacklist line both have an ipv6 key,
874 their values will be compared, otherwise, they will be ignored.
875 If there is no dirport and no orport, the entry matches all relays on
877 for entry
in relaylist
:
880 if key
== 'id' and value
== self
._fpr
:
881 logging
.info('%s is in the blacklist: fingerprint matches',
884 if key
== 'ipv4' and value
== self
.dirip
:
885 # if the dirport is present, check it too
886 if entry
.has_key('dirport'):
887 if int(entry
['dirport']) == self
.dirport
:
888 logging
.info('%s is in the blacklist: IPv4 (%s) and ' +
889 'DirPort (%d) match', self
._fpr
, self
.dirip
,
892 # if the orport is present, check it too
893 elif entry
.has_key('orport'):
894 if int(entry
['orport']) == self
.orport
:
895 logging
.info('%s is in the blacklist: IPv4 (%s) and ' +
896 'ORPort (%d) match', self
._fpr
, self
.dirip
,
900 logging
.info('%s is in the blacklist: IPv4 (%s) matches, and ' +
901 'entry has no DirPort or ORPort', self
._fpr
,
906 ipv6
= '%s:%d'%(self
.ipv6addr
, self
.ipv6orport
)
907 if (key
== 'ipv6' and self
.has_ipv6()):
908 # if both entry and fallback have an ipv6 address, compare them,
909 # otherwise, disregard ipv6 addresses
911 # if the dirport is present, check it too
912 if entry
.has_key('dirport'):
913 if int(entry
['dirport']) == self
.dirport
:
914 logging
.info('%s is in the blacklist: IPv6 (%s) and ' +
915 'DirPort (%d) match', self
._fpr
, ipv6
,
918 # we've already checked the ORPort, it's part of entry['ipv6']
920 logging
.info('%s is in the blacklist: IPv6 (%s) matches, and' +
921 'entry has no DirPort', self
._fpr
, ipv6
)
923 elif (key
== 'ipv6' or self
.has_ipv6()):
924 # only log if the fingerprint matches but the IPv6 doesn't
925 if entry
.has_key('id') and entry
['id'] == self
._fpr
:
926 logging
.info('%s skipping IPv6 blacklist comparison: relay ' +
927 'has%s IPv6%s, but entry has%s IPv6%s', self
._fpr
,
928 '' if self
.has_ipv6() else ' no',
929 (' (' + ipv6
+ ')') if self
.has_ipv6() else '',
930 '' if key
== 'ipv6' else ' no',
931 (' (' + value
+ ')') if key
== 'ipv6' else '')
932 logging
.warning('Has %s %s IPv6 address %s?', self
._fpr
,
933 'gained an' if self
.has_ipv6() else 'lost its former',
934 ipv6
if self
.has_ipv6() else value
)
937 def cw_to_bw_factor(self
):
938 # any relays with a missing or zero consensus weight are not candidates
939 # any relays with a missing advertised bandwidth have it set to zero
940 return self
._data
['advertised_bandwidth'] / self
._data
['consensus_weight']
942 # since advertised_bandwidth is reported by the relay, it can be gamed
943 # to avoid this, use the median consensus weight to bandwidth factor to
944 # estimate this relay's measured bandwidth, and make that the upper limit
945 def measured_bandwidth(self
, median_cw_to_bw_factor
):
946 cw_to_bw
= median_cw_to_bw_factor
947 # Reduce exit bandwidth to make sure we're not overloading them
949 cw_to_bw
*= EXIT_BANDWIDTH_FRACTION
950 measured_bandwidth
= self
._data
['consensus_weight'] * cw_to_bw
951 if self
._data
['advertised_bandwidth'] != 0:
952 # limit advertised bandwidth (if available) to measured bandwidth
953 return min(measured_bandwidth
, self
._data
['advertised_bandwidth'])
955 return measured_bandwidth
957 def set_measured_bandwidth(self
, median_cw_to_bw_factor
):
958 self
._data
['measured_bandwidth'] = self
.measured_bandwidth(
959 median_cw_to_bw_factor
)
962 return 'Exit' in self
._data
['flags']
965 return 'Guard' in self
._data
['flags']
967 def is_running(self
):
968 return 'Running' in self
._data
['flags']
970 # does this fallback have an IPv6 address and orport?
972 return self
.ipv6addr
is not None and self
.ipv6orport
is not None
974 # strip leading and trailing brackets from an IPv6 address
975 # safe to use on non-bracketed IPv6 and on IPv4 addresses
976 # also convert to unicode, and make None appear as ''
978 def strip_ipv6_brackets(ip
):
983 if ip
[0] == '[' and ip
[-1] == ']':
984 return unicode(ip
[1:-1])
987 # are ip_a and ip_b in the same netblock?
988 # mask_bits is the size of the netblock
989 # takes both IPv4 and IPv6 addresses
990 # the versions of ip_a and ip_b must be the same
991 # the mask must be valid for the IP version
993 def netblocks_equal(ip_a
, ip_b
, mask_bits
):
994 if ip_a
is None or ip_b
is None:
996 ip_a
= Candidate
.strip_ipv6_brackets(ip_a
)
997 ip_b
= Candidate
.strip_ipv6_brackets(ip_b
)
998 a
= ipaddress
.ip_address(ip_a
)
999 b
= ipaddress
.ip_address(ip_b
)
1000 if a
.version
!= b
.version
:
1001 raise Exception('Mismatching IP versions in %s and %s'%(ip_a
, ip_b
))
1002 if mask_bits
> a
.max_prefixlen
:
1003 logging
.error('Bad IP mask %d for %s and %s'%(mask_bits
, ip_a
, ip_b
))
1004 mask_bits
= a
.max_prefixlen
1006 logging
.error('Bad IP mask %d for %s and %s'%(mask_bits
, ip_a
, ip_b
))
1008 a_net
= ipaddress
.ip_network('%s/%d'%(ip_a
, mask_bits
), strict
=False)
1011 # is this fallback's IPv4 address (dirip) in the same netblock as other's
1013 # mask_bits is the size of the netblock
1014 def ipv4_netblocks_equal(self
, other
, mask_bits
):
1015 return Candidate
.netblocks_equal(self
.dirip
, other
.dirip
, mask_bits
)
1017 # is this fallback's IPv6 address (ipv6addr) in the same netblock as
1018 # other's IPv6 address?
1019 # Returns False if either fallback has no IPv6 address
1020 # mask_bits is the size of the netblock
1021 def ipv6_netblocks_equal(self
, other
, mask_bits
):
1022 if not self
.has_ipv6() or not other
.has_ipv6():
1024 return Candidate
.netblocks_equal(self
.ipv6addr
, other
.ipv6addr
, mask_bits
)
1026 # is this fallback's IPv4 DirPort the same as other's IPv4 DirPort?
1027 def dirport_equal(self
, other
):
1028 return self
.dirport
== other
.dirport
1030 # is this fallback's IPv4 ORPort the same as other's IPv4 ORPort?
1031 def ipv4_orport_equal(self
, other
):
1032 return self
.orport
== other
.orport
1034 # is this fallback's IPv6 ORPort the same as other's IPv6 ORPort?
1035 # Returns False if either fallback has no IPv6 address
1036 def ipv6_orport_equal(self
, other
):
1037 if not self
.has_ipv6() or not other
.has_ipv6():
1039 return self
.ipv6orport
== other
.ipv6orport
1041 # does this fallback have the same DirPort, IPv4 ORPort, or
1042 # IPv6 ORPort as other?
1043 # Ignores IPv6 ORPort if either fallback has no IPv6 address
1044 def port_equal(self
, other
):
1045 return (self
.dirport_equal(other
) or self
.ipv4_orport_equal(other
)
1046 or self
.ipv6_orport_equal(other
))
1048 # return a list containing IPv4 ORPort, DirPort, and IPv6 ORPort (if present)
1049 def port_list(self
):
1050 ports
= [self
.dirport
, self
.orport
]
1051 if self
.has_ipv6() and not self
.ipv6orport
in ports
:
1052 ports
.append(self
.ipv6orport
)
1055 # does this fallback share a port with other, regardless of whether the
1057 # For example, if self's IPv4 ORPort is 80 and other's DirPort is 80,
1059 def port_shared(self
, other
):
1060 for p
in self
.port_list():
1061 if p
in other
.port_list():
1065 # report how long it takes to download a consensus from dirip:dirport
1067 def fallback_consensus_download_speed(dirip
, dirport
, nickname
, max_time
):
1068 download_failed
= False
1069 downloader
= DescriptorDownloader()
1070 start
= datetime
.datetime
.utcnow()
1071 # some directory mirrors respond to requests in ways that hang python
1072 # sockets, which is why we log this line here
1073 logging
.info('Initiating consensus download from %s (%s:%d).', nickname
,
1075 # there appears to be about 1 second of overhead when comparing stem's
1076 # internal trace time and the elapsed time calculated here
1079 downloader
.get_consensus(endpoints
= [(dirip
, dirport
)],
1080 timeout
= (max_time
+ TIMEOUT_SLOP
),
1083 fall_back_to_authority
= False).run()
1084 except Exception, stem_error
:
1085 logging
.info('Unable to retrieve a consensus from %s: %s', nickname
,
1087 status
= 'error: "%s"' % (stem_error
)
1088 level
= logging
.WARNING
1089 download_failed
= True
1090 elapsed
= (datetime
.datetime
.utcnow() - start
).total_seconds()
1091 if elapsed
> max_time
:
1093 level
= logging
.WARNING
1094 download_failed
= True
1097 level
= logging
.DEBUG
1098 logging
.log(level
, 'Consensus download: %0.1fs %s from %s (%s:%d), ' +
1099 'max download time %0.1fs.', elapsed
, status
, nickname
,
1100 dirip
, dirport
, max_time
)
1101 return download_failed
1103 # does this fallback download the consensus fast enough?
1104 def check_fallback_download_consensus(self
):
1105 # include the relay if we're not doing a check, or we can't check (IPv6)
1108 if PERFORM_IPV4_DIRPORT_CHECKS
:
1109 ipv4_failed
= Candidate
.fallback_consensus_download_speed(self
.dirip
,
1111 self
._data
['nickname'],
1112 CONSENSUS_DOWNLOAD_SPEED_MAX
)
1113 if self
.has_ipv6() and PERFORM_IPV6_DIRPORT_CHECKS
:
1114 # Clients assume the IPv6 DirPort is the same as the IPv4 DirPort
1115 ipv6_failed
= Candidate
.fallback_consensus_download_speed(self
.ipv6addr
,
1117 self
._data
['nickname'],
1118 CONSENSUS_DOWNLOAD_SPEED_MAX
)
1119 return ((not ipv4_failed
) and (not ipv6_failed
))
1121 # if this fallback has not passed a download check, try it again,
1122 # and record the result, available in get_fallback_download_consensus
1123 def try_fallback_download_consensus(self
):
1124 if not self
.get_fallback_download_consensus():
1125 self
._data
['download_check'] = self
.check_fallback_download_consensus()
1127 # did this fallback pass the download check?
1128 def get_fallback_download_consensus(self
):
1129 # if we're not performing checks, return True
1130 if not PERFORM_IPV4_DIRPORT_CHECKS
and not PERFORM_IPV6_DIRPORT_CHECKS
:
1132 # if we are performing checks, but haven't done one, return False
1133 if not self
._data
.has_key('download_check'):
1135 return self
._data
['download_check']
1137 # output an optional header comment and info for this fallback
1138 # try_fallback_download_consensus before calling this
1139 def fallbackdir_line(self
, fallbacks
, prefilter_fallbacks
):
1142 s
+= self
.fallbackdir_comment(fallbacks
, prefilter_fallbacks
)
1143 # if the download speed is ok, output a C string
1144 # if it's not, but we OUTPUT_COMMENTS, output a commented-out C string
1145 if self
.get_fallback_download_consensus() or OUTPUT_COMMENTS
:
1146 s
+= self
.fallbackdir_info(self
.get_fallback_download_consensus())
1149 # output a header comment for this fallback
1150 def fallbackdir_comment(self
, fallbacks
, prefilter_fallbacks
):
1155 # [identical contact counts]
1157 # Multiline C comment
1160 s
+= cleanse_c_multiline_comment(self
._data
['nickname'])
1163 s
+= cleanse_c_multiline_comment(' '.join(sorted(self
._data
['flags'])))
1165 if self
._data
['contact'] is not None:
1166 s
+= cleanse_c_multiline_comment(self
._data
['contact'])
1167 if CONTACT_COUNT
or CONTACT_BLACKLIST_COUNT
:
1168 fallback_count
= len([f
for f
in fallbacks
1169 if f
._data
['contact'] == self
._data
['contact']])
1170 if fallback_count
> 1:
1172 s
+= '%d identical contacts listed' % (fallback_count
)
1173 if CONTACT_BLACKLIST_COUNT
:
1174 prefilter_count
= len([f
for f
in prefilter_fallbacks
1175 if f
._data
['contact'] == self
._data
['contact']])
1176 filter_count
= prefilter_count
- fallback_count
1177 if filter_count
> 0:
1178 if fallback_count
> 1:
1182 s
+= '%d blacklisted' % (filter_count
)
1187 # output the fallback info C string for this fallback
1188 # this is the text that would go after FallbackDir in a torrc
1189 # if this relay failed the download test and we OUTPUT_COMMENTS,
1190 # comment-out the returned string
1191 def fallbackdir_info(self
, dl_speed_ok
):
1192 # "address:dirport orport=port id=fingerprint"
1193 # "[ipv6=addr:orport]"
1194 # "weight=FALLBACK_OUTPUT_WEIGHT",
1196 # Do we want a C string, or a commented-out string?
1197 c_string
= dl_speed_ok
1198 comment_string
= not dl_speed_ok
and OUTPUT_COMMENTS
1199 # If we don't want either kind of string, bail
1200 if not c_string
and not comment_string
:
1203 # Comment out the fallback directory entry if it's too slow
1204 # See the debug output for which address and port is failing
1206 s
+= '/* Consensus download failed or was too slow:\n'
1207 # Multi-Line C string with trailing comma (part of a string list)
1208 # This makes it easier to diff the file, and remove IPv6 lines using grep
1209 # Integers don't need escaping
1210 s
+= '"%s orport=%d id=%s"'%(
1211 cleanse_c_string(self
._data
['dir_address']),
1213 cleanse_c_string(self
._fpr
))
1216 s
+= '" ipv6=%s:%d"'%(cleanse_c_string(self
.ipv6addr
), self
.ipv6orport
)
1218 s
+= '" weight=%d",'%(FALLBACK_OUTPUT_WEIGHT)
1224 ## Fallback Candidate List Class
1226 class CandidateList(dict):
1230 def _add_relay(self
, details
):
1231 if not 'dir_address' in details
: return
1232 c
= Candidate(details
)
1233 self
[ c
.get_fingerprint() ] = c
1235 def _add_uptime(self
, uptime
):
1237 fpr
= uptime
['fingerprint']
1239 raise Exception("Document has no fingerprint field.")
1244 logging
.debug('Got unknown relay %s in uptime document.'%(fpr
,))
1247 c
.add_uptime(uptime
)
1249 def _add_details(self
):
1250 logging
.debug('Loading details document.')
1251 d
= fetch('details',
1252 fields
=('fingerprint,nickname,contact,last_changed_address_or_port,' +
1253 'consensus_weight,advertised_bandwidth,or_addresses,' +
1254 'dir_address,recommended_version,flags,effective_family'))
1255 logging
.debug('Loading details document done.')
1257 if not 'relays' in d
: raise Exception("No relays found in document.")
1259 for r
in d
['relays']: self
._add
_relay
(r
)
1261 def _add_uptimes(self
):
1262 logging
.debug('Loading uptime document.')
1264 logging
.debug('Loading uptime document done.')
1266 if not 'relays' in d
: raise Exception("No relays found in document.")
1267 for r
in d
['relays']: self
._add
_uptime
(r
)
1269 def add_relays(self
):
1273 def count_guards(self
):
1275 for fpr
in self
.keys():
1276 if self
[fpr
].is_guard():
1280 # Find fallbacks that fit the uptime, stability, and flags criteria,
1281 # and make an array of them in self.fallbacks
1282 def compute_fallbacks(self
):
1283 self
.fallbacks
= map(lambda x
: self
[x
],
1284 filter(lambda x
: self
[x
].is_candidate(),
1287 # sort fallbacks by their consensus weight to advertised bandwidth factor,
1289 # used to find the median cw_to_bw_factor()
1290 def sort_fallbacks_by_cw_to_bw_factor(self
):
1291 self
.fallbacks
.sort(key
=lambda f
: f
.cw_to_bw_factor())
1293 # sort fallbacks by their measured bandwidth, highest to lowest
1294 # calculate_measured_bandwidth before calling this
1295 # this is useful for reviewing candidates in priority order
1296 def sort_fallbacks_by_measured_bandwidth(self
):
1297 self
.fallbacks
.sort(key
=lambda f
: f
._data
['measured_bandwidth'],
1300 # sort fallbacks by their fingerprint, lowest to highest
1301 # this is useful for stable diffs of fallback lists
1302 def sort_fallbacks_by_fingerprint(self
):
1303 self
.fallbacks
.sort(key
=lambda f
: f
._fpr
)
1306 def load_relaylist(file_name
):
1307 """ Read each line in the file, and parse it like a FallbackDir line:
1308 an IPv4 address and optional port:
1309 <IPv4 address>:<port>
1310 which are parsed into dictionary entries:
1313 followed by a series of key=value entries:
1316 ipv6=<IPv6 address>:<IPv6 orport>
1317 each line's key/value pairs are placed in a dictonary,
1318 (of string -> string key/value pairs),
1319 and these dictionaries are placed in an array.
1320 comments start with # and are ignored """
1322 file_data
= read_from_file(file_name
, MAX_LIST_FILE_SIZE
)
1323 if file_data
is None:
1325 for line
in file_data
.split('\n'):
1328 line_comment_split
= line
.split('#')
1329 line
= line_comment_split
[0]
1330 # cleanup whitespace
1331 line
= cleanse_whitespace(line
)
1335 for item
in line
.split(' '):
1339 key_value_split
= item
.split('=')
1340 kvl
= len(key_value_split
)
1341 if kvl
< 1 or kvl
> 2:
1342 print '#error Bad %s item: %s, format is key=value.'%(
1345 # assume that entries without a key are the ipv4 address,
1346 # perhaps with a dirport
1347 ipv4_maybe_dirport
= key_value_split
[0]
1348 ipv4_maybe_dirport_split
= ipv4_maybe_dirport
.split(':')
1349 dirl
= len(ipv4_maybe_dirport_split
)
1350 if dirl
< 1 or dirl
> 2:
1351 print '#error Bad %s IPv4 item: %s, format is ipv4:port.'%(
1354 relay_entry
['ipv4'] = ipv4_maybe_dirport_split
[0]
1356 relay_entry
['dirport'] = ipv4_maybe_dirport_split
[1]
1358 relay_entry
[key_value_split
[0]] = key_value_split
[1]
1359 relaylist
.append(relay_entry
)
1362 # apply the fallback whitelist and blacklist
1363 def apply_filter_lists(self
):
1365 logging
.debug('Applying whitelist and blacklist.')
1366 # parse the whitelist and blacklist
1367 whitelist
= self
.load_relaylist(WHITELIST_FILE_NAME
)
1368 blacklist
= self
.load_relaylist(BLACKLIST_FILE_NAME
)
1369 filtered_fallbacks
= []
1370 for f
in self
.fallbacks
:
1371 in_whitelist
= f
.is_in_whitelist(whitelist
)
1372 in_blacklist
= f
.is_in_blacklist(blacklist
)
1373 if in_whitelist
and in_blacklist
:
1374 if BLACKLIST_EXCLUDES_WHITELIST_ENTRIES
:
1377 logging
.warning('Excluding %s: in both blacklist and whitelist.',
1381 filtered_fallbacks
.append(f
)
1384 filtered_fallbacks
.append(f
)
1388 logging
.info('Excluding %s: in blacklist.', f
._fpr
)
1390 if INCLUDE_UNLISTED_ENTRIES
:
1392 filtered_fallbacks
.append(f
)
1396 logging
.info('Excluding %s: in neither blacklist nor whitelist.',
1398 self
.fallbacks
= filtered_fallbacks
1399 return excluded_count
1402 def summarise_filters(initial_count
, excluded_count
):
1403 return '/* Whitelist & blacklist excluded %d of %d candidates. */'%(
1404 excluded_count
, initial_count
)
1406 # calculate each fallback's measured bandwidth based on the median
1407 # consensus weight to advertised bandwdith ratio
1408 def calculate_measured_bandwidth(self
):
1409 self
.sort_fallbacks_by_cw_to_bw_factor()
1410 median_fallback
= self
.fallback_median(True)
1411 if median_fallback
is not None:
1412 median_cw_to_bw_factor
= median_fallback
.cw_to_bw_factor()
1414 # this will never be used, because there are no fallbacks
1415 median_cw_to_bw_factor
= None
1416 for f
in self
.fallbacks
:
1417 f
.set_measured_bandwidth(median_cw_to_bw_factor
)
1419 # remove relays with low measured bandwidth from the fallback list
1420 # calculate_measured_bandwidth for each relay before calling this
1421 def remove_low_bandwidth_relays(self
):
1422 if MIN_BANDWIDTH
is None:
1424 above_min_bw_fallbacks
= []
1425 for f
in self
.fallbacks
:
1426 if f
._data
['measured_bandwidth'] >= MIN_BANDWIDTH
:
1427 above_min_bw_fallbacks
.append(f
)
1429 # the bandwidth we log here is limited by the relay's consensus weight
1430 # as well as its adverttised bandwidth. See set_measured_bandwidth
1432 logging
.info('%s not a candidate: bandwidth %.1fMB/s too low, must ' +
1433 'be at least %.1fMB/s', f
._fpr
,
1434 f
._data
['measured_bandwidth']/(1024.0*1024.0),
1435 MIN_BANDWIDTH
/(1024.0*1024.0))
1436 self
.fallbacks
= above_min_bw_fallbacks
1438 # the minimum fallback in the list
1439 # call one of the sort_fallbacks_* functions before calling this
1440 def fallback_min(self
):
1441 if len(self
.fallbacks
) > 0:
1442 return self
.fallbacks
[-1]
1446 # the median fallback in the list
1447 # call one of the sort_fallbacks_* functions before calling this
1448 def fallback_median(self
, require_advertised_bandwidth
):
1449 # use the low-median when there are an evan number of fallbacks,
1450 # for consistency with the bandwidth authorities
1451 if len(self
.fallbacks
) > 0:
1452 median_position
= (len(self
.fallbacks
) - 1) / 2
1453 if not require_advertised_bandwidth
:
1454 return self
.fallbacks
[median_position
]
1455 # if we need advertised_bandwidth but this relay doesn't have it,
1456 # move to a fallback with greater consensus weight until we find one
1457 while not self
.fallbacks
[median_position
]._data
['advertised_bandwidth']:
1458 median_position
+= 1
1459 if median_position
>= len(self
.fallbacks
):
1461 return self
.fallbacks
[median_position
]
1465 # the maximum fallback in the list
1466 # call one of the sort_fallbacks_* functions before calling this
1467 def fallback_max(self
):
1468 if len(self
.fallbacks
) > 0:
1469 return self
.fallbacks
[0]
1473 # does exclusion_list contain attribute?
1474 # if so, return False
1475 # if not, return True
1476 # if attribute is None or the empty string, always return True
1478 def allow(attribute
, exclusion_list
):
1479 if attribute
is None or attribute
== '':
1481 elif attribute
in exclusion_list
:
1486 # make sure there is only one fallback per IPv4 address, and per IPv6 address
1487 # there is only one IPv4 address on each fallback: the IPv4 DirPort address
1488 # (we choose the IPv4 ORPort which is on the same IPv4 as the DirPort)
1489 # there is at most one IPv6 address on each fallback: the IPv6 ORPort address
1490 # we try to match the IPv4 ORPort, but will use any IPv6 address if needed
1491 # (clients assume the IPv6 DirPort is the same as the IPv4 DirPort, but
1492 # typically only use the IPv6 ORPort)
1493 # if there is no IPv6 address, only the IPv4 address is checked
1494 # return the number of candidates we excluded
1495 def limit_fallbacks_same_ip(self
):
1496 ip_limit_fallbacks
= []
1498 for f
in self
.fallbacks
:
1499 if (CandidateList
.allow(f
.dirip
, ip_list
)
1500 and CandidateList
.allow(f
.ipv6addr
, ip_list
)):
1501 ip_limit_fallbacks
.append(f
)
1502 ip_list
.append(f
.dirip
)
1504 ip_list
.append(f
.ipv6addr
)
1505 elif not CandidateList
.allow(f
.dirip
, ip_list
):
1506 logging
.info('Eliminated %s: already have fallback on IPv4 %s'%(
1508 elif f
.has_ipv6() and not CandidateList
.allow(f
.ipv6addr
, ip_list
):
1509 logging
.info('Eliminated %s: already have fallback on IPv6 %s'%(
1510 f
._fpr
, f
.ipv6addr
))
1511 original_count
= len(self
.fallbacks
)
1512 self
.fallbacks
= ip_limit_fallbacks
1513 return original_count
- len(self
.fallbacks
)
1515 # make sure there is only one fallback per ContactInfo
1516 # if there is no ContactInfo, allow the fallback
1517 # this check can be gamed by providing no ContactInfo, or by setting the
1518 # ContactInfo to match another fallback
1519 # However, given the likelihood that relays with the same ContactInfo will
1520 # go down at similar times, its usefulness outweighs the risk
1521 def limit_fallbacks_same_contact(self
):
1522 contact_limit_fallbacks
= []
1524 for f
in self
.fallbacks
:
1525 if CandidateList
.allow(f
._data
['contact'], contact_list
):
1526 contact_limit_fallbacks
.append(f
)
1527 contact_list
.append(f
._data
['contact'])
1529 logging
.info(('Eliminated %s: already have fallback on ' +
1530 'ContactInfo %s')%(f
._fpr
, f
._data
['contact']))
1531 original_count
= len(self
.fallbacks
)
1532 self
.fallbacks
= contact_limit_fallbacks
1533 return original_count
- len(self
.fallbacks
)
1535 # make sure there is only one fallback per effective family
1536 # if there is no family, allow the fallback
1537 # this check can't be gamed, because we use effective family, which ensures
1538 # mutual family declarations
1539 # if any indirect families exist, the result depends on the order in which
1540 # fallbacks are sorted in the list
1541 def limit_fallbacks_same_family(self
):
1542 family_limit_fallbacks
= []
1543 fingerprint_list
= []
1544 for f
in self
.fallbacks
:
1545 if CandidateList
.allow(f
._fpr
, fingerprint_list
):
1546 family_limit_fallbacks
.append(f
)
1547 fingerprint_list
.append(f
._fpr
)
1548 fingerprint_list
.extend(f
._data
['effective_family'])
1550 # technically, we already have a fallback with this fallback in its
1552 logging
.info('Eliminated %s: already have fallback in effective ' +
1554 original_count
= len(self
.fallbacks
)
1555 self
.fallbacks
= family_limit_fallbacks
1556 return original_count
- len(self
.fallbacks
)
1558 # try a download check on each fallback candidate in order
1559 # stop after max_count successful downloads
1560 # but don't remove any candidates from the array
1561 def try_download_consensus_checks(self
, max_count
):
1563 for f
in self
.fallbacks
:
1564 f
.try_fallback_download_consensus()
1565 if f
.get_fallback_download_consensus():
1566 # this fallback downloaded a consensus ok
1568 if dl_ok_count
>= max_count
:
1569 # we have enough fallbacks
1572 # put max_count successful candidates in the fallbacks array:
1573 # - perform download checks on each fallback candidate
1574 # - retry failed candidates if CONSENSUS_DOWNLOAD_RETRY is set
1575 # - eliminate failed candidates
1576 # - if there are more than max_count candidates, eliminate lowest bandwidth
1577 # - if there are fewer than max_count candidates, leave only successful
1578 # Return the number of fallbacks that failed the consensus check
1579 def perform_download_consensus_checks(self
, max_count
):
1580 self
.sort_fallbacks_by_measured_bandwidth()
1581 self
.try_download_consensus_checks(max_count
)
1582 if CONSENSUS_DOWNLOAD_RETRY
:
1583 # try unsuccessful candidates again
1584 # we could end up with more than max_count successful candidates here
1585 self
.try_download_consensus_checks(max_count
)
1586 # now we have at least max_count successful candidates,
1587 # or we've tried them all
1588 original_count
= len(self
.fallbacks
)
1589 self
.fallbacks
= filter(lambda x
: x
.get_fallback_download_consensus(),
1591 # some of these failed the check, others skipped the check,
1592 # if we already had enough successful downloads
1593 failed_count
= original_count
- len(self
.fallbacks
)
1594 self
.fallbacks
= self
.fallbacks
[:max_count
]
1597 # return a string that describes a/b as a percentage
1599 def describe_percentage(a
, b
):
1601 return '%d/%d = %.0f%%'%(a
, b
, (a
*100.0)/b
)
1603 # technically, 0/0 is undefined, but 0.0% is a sensible result
1604 return '%d/%d = %.0f%%'%(a
, b
, 0.0)
1606 # return a dictionary of lists of fallbacks by IPv4 netblock
1607 # the dictionary is keyed by the fingerprint of an arbitrary fallback
1609 # mask_bits is the size of the netblock
1610 def fallbacks_by_ipv4_netblock(self
, mask_bits
):
1612 for f
in self
.fallbacks
:
1613 found_netblock
= False
1614 for b
in netblocks
.keys():
1615 # we found an existing netblock containing this fallback
1616 if f
.ipv4_netblocks_equal(self
[b
], mask_bits
):
1617 # add it to the list
1618 netblocks
[b
].append(f
)
1619 found_netblock
= True
1621 # make a new netblock based on this fallback's fingerprint
1622 if not found_netblock
:
1623 netblocks
[f
._fpr
] = [f
]
1626 # return a dictionary of lists of fallbacks by IPv6 netblock
1627 # where mask_bits is the size of the netblock
1628 def fallbacks_by_ipv6_netblock(self
, mask_bits
):
1630 for f
in self
.fallbacks
:
1631 # skip fallbacks without IPv6 addresses
1632 if not f
.has_ipv6():
1634 found_netblock
= False
1635 for b
in netblocks
.keys():
1636 # we found an existing netblock containing this fallback
1637 if f
.ipv6_netblocks_equal(self
[b
], mask_bits
):
1638 # add it to the list
1639 netblocks
[b
].append(f
)
1640 found_netblock
= True
1642 # make a new netblock based on this fallback's fingerprint
1643 if not found_netblock
:
1644 netblocks
[f
._fpr
] = [f
]
1647 # log a message about the proportion of fallbacks in each IPv4 netblock,
1648 # where mask_bits is the size of the netblock
1649 def describe_fallback_ipv4_netblock_mask(self
, mask_bits
):
1650 fallback_count
= len(self
.fallbacks
)
1651 shared_netblock_fallback_count
= 0
1652 most_frequent_netblock
= None
1653 netblocks
= self
.fallbacks_by_ipv4_netblock(mask_bits
)
1654 for b
in netblocks
.keys():
1655 if len(netblocks
[b
]) > 1:
1656 # how many fallbacks are in a netblock with other fallbacks?
1657 shared_netblock_fallback_count
+= len(netblocks
[b
])
1658 # what's the netblock with the most fallbacks?
1659 if (most_frequent_netblock
is None
1660 or len(netblocks
[b
]) > len(netblocks
[most_frequent_netblock
])):
1661 most_frequent_netblock
= b
1662 logging
.debug('Fallback IPv4 addresses in the same /%d:'%(mask_bits))
1663 for f
in netblocks
[b
]:
1664 logging
.debug('%s - %s', f
.dirip
, f
._fpr
)
1665 if most_frequent_netblock
is not None:
1666 logging
.warning('There are %s fallbacks in the IPv4 /%d containing %s'%(
1667 CandidateList
.describe_percentage(
1668 len(netblocks
[most_frequent_netblock
]),
1671 self
[most_frequent_netblock
].dirip
))
1672 if shared_netblock_fallback_count
> 0:
1673 logging
.warning(('%s of fallbacks are in an IPv4 /%d with other ' +
1674 'fallbacks')%(CandidateList
.describe_percentage(
1675 shared_netblock_fallback_count
,
1679 # log a message about the proportion of fallbacks in each IPv6 netblock,
1680 # where mask_bits is the size of the netblock
1681 def describe_fallback_ipv6_netblock_mask(self
, mask_bits
):
1682 fallback_count
= len(self
.fallbacks_with_ipv6())
1683 shared_netblock_fallback_count
= 0
1684 most_frequent_netblock
= None
1685 netblocks
= self
.fallbacks_by_ipv6_netblock(mask_bits
)
1686 for b
in netblocks
.keys():
1687 if len(netblocks
[b
]) > 1:
1688 # how many fallbacks are in a netblock with other fallbacks?
1689 shared_netblock_fallback_count
+= len(netblocks
[b
])
1690 # what's the netblock with the most fallbacks?
1691 if (most_frequent_netblock
is None
1692 or len(netblocks
[b
]) > len(netblocks
[most_frequent_netblock
])):
1693 most_frequent_netblock
= b
1694 logging
.debug('Fallback IPv6 addresses in the same /%d:'%(mask_bits))
1695 for f
in netblocks
[b
]:
1696 logging
.debug('%s - %s', f
.ipv6addr
, f
._fpr
)
1697 if most_frequent_netblock
is not None:
1698 logging
.warning('There are %s fallbacks in the IPv6 /%d containing %s'%(
1699 CandidateList
.describe_percentage(
1700 len(netblocks
[most_frequent_netblock
]),
1703 self
[most_frequent_netblock
].ipv6addr
))
1704 if shared_netblock_fallback_count
> 0:
1705 logging
.warning(('%s of fallbacks are in an IPv6 /%d with other ' +
1706 'fallbacks')%(CandidateList
.describe_percentage(
1707 shared_netblock_fallback_count
,
1711 # log a message about the proportion of fallbacks in each IPv4 /8, /16,
1713 def describe_fallback_ipv4_netblocks(self
):
1714 # this doesn't actually tell us anything useful
1715 #self.describe_fallback_ipv4_netblock_mask(8)
1716 self
.describe_fallback_ipv4_netblock_mask(16)
1717 self
.describe_fallback_ipv4_netblock_mask(24)
1719 # log a message about the proportion of fallbacks in each IPv6 /12 (RIR),
1720 # /23 (smaller RIR blocks), /32 (LIR), /48 (Customer), and /64 (Host)
1721 # https://www.iana.org/assignments/ipv6-unicast-address-assignments/
1722 def describe_fallback_ipv6_netblocks(self
):
1723 # these don't actually tell us anything useful
1724 #self.describe_fallback_ipv6_netblock_mask(12)
1725 #self.describe_fallback_ipv6_netblock_mask(23)
1726 self
.describe_fallback_ipv6_netblock_mask(32)
1727 self
.describe_fallback_ipv6_netblock_mask(48)
1728 self
.describe_fallback_ipv6_netblock_mask(64)
1730 # log a message about the proportion of fallbacks in each IPv4 and IPv6
1732 def describe_fallback_netblocks(self
):
1733 self
.describe_fallback_ipv4_netblocks()
1734 self
.describe_fallback_ipv6_netblocks()
1736 # return a list of fallbacks which are on the IPv4 ORPort port
1737 def fallbacks_on_ipv4_orport(self
, port
):
1738 return filter(lambda x
: x
.orport
== port
, self
.fallbacks
)
1740 # return a list of fallbacks which are on the IPv6 ORPort port
1741 def fallbacks_on_ipv6_orport(self
, port
):
1742 return filter(lambda x
: x
.ipv6orport
== port
, self
.fallbacks_with_ipv6())
1744 # return a list of fallbacks which are on the DirPort port
1745 def fallbacks_on_dirport(self
, port
):
1746 return filter(lambda x
: x
.dirport
== port
, self
.fallbacks
)
1748 # log a message about the proportion of fallbacks on IPv4 ORPort port
1749 # and return that count
1750 def describe_fallback_ipv4_orport(self
, port
):
1751 port_count
= len(self
.fallbacks_on_ipv4_orport(port
))
1752 fallback_count
= len(self
.fallbacks
)
1753 logging
.warning('%s of fallbacks are on IPv4 ORPort %d'%(
1754 CandidateList
.describe_percentage(port_count
,
1759 # log a message about the proportion of IPv6 fallbacks on IPv6 ORPort port
1760 # and return that count
1761 def describe_fallback_ipv6_orport(self
, port
):
1762 port_count
= len(self
.fallbacks_on_ipv6_orport(port
))
1763 fallback_count
= len(self
.fallbacks_with_ipv6())
1764 logging
.warning('%s of IPv6 fallbacks are on IPv6 ORPort %d'%(
1765 CandidateList
.describe_percentage(port_count
,
1770 # log a message about the proportion of fallbacks on DirPort port
1771 # and return that count
1772 def describe_fallback_dirport(self
, port
):
1773 port_count
= len(self
.fallbacks_on_dirport(port
))
1774 fallback_count
= len(self
.fallbacks
)
1775 logging
.warning('%s of fallbacks are on DirPort %d'%(
1776 CandidateList
.describe_percentage(port_count
,
1781 # log a message about the proportion of fallbacks on each dirport,
1782 # each IPv4 orport, and each IPv6 orport
1783 def describe_fallback_ports(self
):
1784 fallback_count
= len(self
.fallbacks
)
1785 ipv4_or_count
= fallback_count
1786 ipv4_or_count
-= self
.describe_fallback_ipv4_orport(443)
1787 ipv4_or_count
-= self
.describe_fallback_ipv4_orport(9001)
1788 logging
.warning('%s of fallbacks are on other IPv4 ORPorts'%(
1789 CandidateList
.describe_percentage(ipv4_or_count
,
1791 ipv6_fallback_count
= len(self
.fallbacks_with_ipv6())
1792 ipv6_or_count
= ipv6_fallback_count
1793 ipv6_or_count
-= self
.describe_fallback_ipv6_orport(443)
1794 ipv6_or_count
-= self
.describe_fallback_ipv6_orport(9001)
1795 logging
.warning('%s of IPv6 fallbacks are on other IPv6 ORPorts'%(
1796 CandidateList
.describe_percentage(ipv6_or_count
,
1797 ipv6_fallback_count
)))
1798 dir_count
= fallback_count
1799 dir_count
-= self
.describe_fallback_dirport(80)
1800 dir_count
-= self
.describe_fallback_dirport(9030)
1801 logging
.warning('%s of fallbacks are on other DirPorts'%(
1802 CandidateList
.describe_percentage(dir_count
,
1805 # return a list of fallbacks which have the Exit flag
1806 def fallbacks_with_exit(self
):
1807 return filter(lambda x
: x
.is_exit(), self
.fallbacks
)
1809 # log a message about the proportion of fallbacks with an Exit flag
1810 def describe_fallback_exit_flag(self
):
1811 exit_falback_count
= len(self
.fallbacks_with_exit())
1812 fallback_count
= len(self
.fallbacks
)
1813 logging
.warning('%s of fallbacks have the Exit flag'%(
1814 CandidateList
.describe_percentage(exit_falback_count
,
1817 # return a list of fallbacks which have an IPv6 address
1818 def fallbacks_with_ipv6(self
):
1819 return filter(lambda x
: x
.has_ipv6(), self
.fallbacks
)
1821 # log a message about the proportion of fallbacks on IPv6
1822 def describe_fallback_ip_family(self
):
1823 ipv6_falback_count
= len(self
.fallbacks_with_ipv6())
1824 fallback_count
= len(self
.fallbacks
)
1825 logging
.warning('%s of fallbacks are on IPv6'%(
1826 CandidateList
.describe_percentage(ipv6_falback_count
,
1829 def summarise_fallbacks(self
, eligible_count
, operator_count
, failed_count
,
1830 guard_count
, target_count
):
1832 s
+= '/* To comment-out entries in this file, use C comments, and add *'
1833 s
+= ' to the start of each line. (stem finds fallback entries using "'
1834 s
+= ' at the start of a line.) */'
1837 # whether we checked consensus download times
1838 # the number of fallback directories (and limits/exclusions, if relevant)
1839 # min & max fallback bandwidths
1840 # #error if below minimum count
1841 if PERFORM_IPV4_DIRPORT_CHECKS
or PERFORM_IPV6_DIRPORT_CHECKS
:
1842 s
+= '/* Checked %s%s%s DirPorts served a consensus within %.1fs. */'%(
1843 'IPv4' if PERFORM_IPV4_DIRPORT_CHECKS
else '',
1844 ' and ' if (PERFORM_IPV4_DIRPORT_CHECKS
1845 and PERFORM_IPV6_DIRPORT_CHECKS
) else '',
1846 'IPv6' if PERFORM_IPV6_DIRPORT_CHECKS
else '',
1847 CONSENSUS_DOWNLOAD_SPEED_MAX
)
1849 s
+= '/* Did not check IPv4 or IPv6 DirPort consensus downloads. */'
1851 # Multiline C comment with #error if things go bad
1854 # Integers don't need escaping in C comments
1855 fallback_count
= len(self
.fallbacks
)
1856 if FALLBACK_PROPORTION_OF_GUARDS
is None:
1857 fallback_proportion
= ''
1859 fallback_proportion
= ', Target %d (%d * %.2f)'%(target_count
,
1861 FALLBACK_PROPORTION_OF_GUARDS
)
1862 s
+= 'Final Count: %d (Eligible %d%s'%(fallback_count
, eligible_count
,
1863 fallback_proportion
)
1864 if MAX_FALLBACK_COUNT
is not None:
1865 s
+= ', Max %d'%(MAX_FALLBACK_COUNT)
1867 if eligible_count
!= fallback_count
:
1868 removed_count
= eligible_count
- fallback_count
1869 excess_to_target_or_max
= (eligible_count
- operator_count
- failed_count
1871 # some 'Failed' failed the check, others 'Skipped' the check,
1872 # if we already had enough successful downloads
1873 s
+= ('Excluded: %d (Same Operator %d, Failed/Skipped Download %d, ' +
1874 'Excess %d)')%(removed_count
, operator_count
, failed_count
,
1875 excess_to_target_or_max
)
1877 min_fb
= self
.fallback_min()
1878 min_bw
= min_fb
._data
['measured_bandwidth']
1879 max_fb
= self
.fallback_max()
1880 max_bw
= max_fb
._data
['measured_bandwidth']
1881 s
+= 'Bandwidth Range: %.1f - %.1f MB/s'%(min_bw
/(1024.0*1024.0),
1882 max_bw
/(1024.0*1024.0))
1885 if fallback_count
< MIN_FALLBACK_COUNT
:
1886 # We must have a minimum number of fallbacks so they are always
1887 # reachable, and are in diverse locations
1889 s
+= '#error Fallback Count %d is too low. '%(fallback_count)
1890 s
+= 'Must be at least %d for diversity. '%(MIN_FALLBACK_COUNT)
1891 s
+= 'Try adding entries to the whitelist, '
1892 s
+= 'or setting INCLUDE_UNLISTED_ENTRIES = True.'
1897 def list_fallbacks():
1898 """ Fetches required onionoo documents and evaluates the
1899 fallback directory criteria for each of the relays """
1901 logging
.warning('Downloading and parsing Onionoo data. ' +
1902 'This may take some time.')
1903 # find relays that could be fallbacks
1904 candidates
= CandidateList()
1905 candidates
.add_relays()
1907 # work out how many fallbacks we want
1908 guard_count
= candidates
.count_guards()
1909 if FALLBACK_PROPORTION_OF_GUARDS
is None:
1910 target_count
= guard_count
1912 target_count
= int(guard_count
* FALLBACK_PROPORTION_OF_GUARDS
)
1913 # the maximum number of fallbacks is the least of:
1914 # - the target fallback count (FALLBACK_PROPORTION_OF_GUARDS * guard count)
1915 # - the maximum fallback count (MAX_FALLBACK_COUNT)
1916 if MAX_FALLBACK_COUNT
is None:
1917 max_count
= target_count
1919 max_count
= min(target_count
, MAX_FALLBACK_COUNT
)
1921 candidates
.compute_fallbacks()
1922 prefilter_fallbacks
= copy
.copy(candidates
.fallbacks
)
1924 # filter with the whitelist and blacklist
1925 # if a relay has changed IPv4 address or ports recently, it will be excluded
1926 # as ineligible before we call apply_filter_lists, and so there will be no
1927 # warning that the details have changed from those in the whitelist.
1928 # instead, there will be an info-level log during the eligibility check.
1929 initial_count
= len(candidates
.fallbacks
)
1930 excluded_count
= candidates
.apply_filter_lists()
1931 print candidates
.summarise_filters(initial_count
, excluded_count
)
1932 eligible_count
= len(candidates
.fallbacks
)
1934 # calculate the measured bandwidth of each relay,
1935 # then remove low-bandwidth relays
1936 candidates
.calculate_measured_bandwidth()
1937 candidates
.remove_low_bandwidth_relays()
1939 # print the raw fallback list
1940 #for x in candidates.fallbacks:
1941 # print x.fallbackdir_line(True)
1942 # print json.dumps(candidates[x]._data, sort_keys=True, indent=4,
1943 # separators=(',', ': '), default=json_util.default)
1945 # impose mandatory conditions here, like one per contact, family, IP
1946 # in measured bandwidth order
1947 candidates
.sort_fallbacks_by_measured_bandwidth()
1949 # only impose these limits on the final list - operators can nominate
1950 # multiple candidate fallbacks, and then we choose the best set
1951 if not OUTPUT_CANDIDATES
:
1952 operator_count
+= candidates
.limit_fallbacks_same_ip()
1953 operator_count
+= candidates
.limit_fallbacks_same_contact()
1954 operator_count
+= candidates
.limit_fallbacks_same_family()
1956 # check if each candidate can serve a consensus
1957 # there's a small risk we've eliminated relays from the same operator that
1958 # can serve a consensus, in favour of one that can't
1959 # but given it takes up to 15 seconds to check each consensus download,
1960 # the risk is worth it
1961 if PERFORM_IPV4_DIRPORT_CHECKS
or PERFORM_IPV6_DIRPORT_CHECKS
:
1962 logging
.warning('Checking consensus download speeds. ' +
1963 'This may take some time.')
1964 failed_count
= candidates
.perform_download_consensus_checks(max_count
)
1966 # analyse and log interesting diversity metrics
1967 # like netblock, ports, exit, IPv4-only
1968 # (we can't easily analyse AS, and it's hard to accurately analyse country)
1969 candidates
.describe_fallback_ip_family()
1970 # if we can't import the ipaddress module, we can't do netblock analysis
1972 candidates
.describe_fallback_netblocks()
1973 candidates
.describe_fallback_ports()
1974 candidates
.describe_fallback_exit_flag()
1976 # output C comments summarising the fallback selection process
1977 if len(candidates
.fallbacks
) > 0:
1978 print candidates
.summarise_fallbacks(eligible_count
, operator_count
,
1979 failed_count
, guard_count
,
1982 print '/* No Fallbacks met criteria */'
1984 # output C comments specifying the OnionOO data used to create the list
1985 for s
in fetch_source_list():
1986 print describe_fetch_source(s
)
1988 # if we're outputting the final fallback list, sort by fingerprint
1989 # this makes diffs much more stable
1990 # otherwise, leave sorted by bandwidth, which allows operators to be
1991 # contacted in priority order
1992 if not OUTPUT_CANDIDATES
:
1993 candidates
.sort_fallbacks_by_fingerprint()
1995 for x
in candidates
.fallbacks
:
1996 print x
.fallbackdir_line(candidates
.fallbacks
, prefilter_fallbacks
)
1998 if __name__
== "__main__":