1 /* Copyright (c) 2001 Matej Pfajfar.
2 * Copyright (c) 2001-2004, Roger Dingledine.
3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 * Copyright (c) 2007-2019, The Tor Project, Inc. */
5 /* See LICENSE for licensing information */
10 * \brief Maintains and analyzes statistics about circuit built times, so we
11 * can tell how long we may need to wait for a fast circuit to be constructed.
13 * By keeping these statistics, a client learns when it should time out a slow
14 * circuit for being too slow, and when it should keep a circuit open in order
15 * to wait for it to complete.
17 * The information here is kept in a circuit_built_times_t structure, which is
18 * currently a singleton, but doesn't need to be. It's updated by calls to
19 * circuit_build_times_count_timeout() from circuituse.c,
20 * circuit_build_times_count_close() from circuituse.c, and
21 * circuit_build_times_add_time() from circuitbuild.c, and inspected by other
22 * calls into this module, mostly from circuitlist.c. Observations are
23 * persisted to disk via the or_state_t-related calls.
26 #define CIRCUITSTATS_PRIVATE
28 #include "core/or/or.h"
29 #include "core/or/circuitbuild.h"
30 #include "core/or/circuitstats.h"
31 #include "app/config/config.h"
32 #include "app/config/confparse.h"
33 #include "feature/control/control.h"
34 #include "lib/crypt_ops/crypto_rand.h"
35 #include "core/mainloop/mainloop.h"
36 #include "feature/nodelist/networkstatus.h"
37 #include "feature/rend/rendclient.h"
38 #include "feature/rend/rendservice.h"
39 #include "feature/relay/router.h"
40 #include "app/config/statefile.h"
41 #include "core/or/circuitlist.h"
42 #include "core/or/circuituse.h"
43 #include "lib/math/fp.h"
44 #include "lib/time/tvdiff.h"
45 #include "lib/encoding/confline.h"
46 #include "feature/dirauth/authmode.h"
48 #include "core/or/crypt_path_st.h"
49 #include "core/or/origin_circuit_st.h"
50 #include "app/config/or_state_st.h"
55 static void cbt_control_event_buildtimeout_set(
56 const circuit_build_times_t
*cbt
,
57 buildtimeout_set_event_t type
);
58 static void circuit_build_times_scale_circ_counts(circuit_build_times_t
*cbt
);
60 #define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2))
62 /** Global list of circuit build times */
63 // XXXX: Add this as a member for entry_guard_t instead of global?
64 // Then we could do per-guard statistics, as guards are likely to
65 // vary in their own latency. The downside of this is that guards
66 // can change frequently, so we'd be building a lot more circuits
68 static circuit_build_times_t circ_times
;
71 /** If set, we're running the unit tests: we should avoid clobbering
72 * our state file or accessing get_options() or get_or_state() */
73 static int unit_tests
= 0;
76 #endif /* defined(TOR_UNIT_TESTS) */
78 /** Return a pointer to the data structure describing our current circuit
79 * build time history and computations. */
80 const circuit_build_times_t
*
81 get_circuit_build_times(void)
86 /** As get_circuit_build_times, but return a mutable pointer. */
87 circuit_build_times_t
*
88 get_circuit_build_times_mutable(void)
93 /** Return the time to wait before actually closing an under-construction, in
96 get_circuit_build_close_time_ms(void)
98 return circ_times
.close_ms
;
101 /** Return the time to wait before giving up on an under-construction circuit,
102 * in milliseconds. */
104 get_circuit_build_timeout_ms(void)
106 return circ_times
.timeout_ms
;
110 * This function decides if CBT learning should be disabled. It returns
111 * true if one or more of the following conditions are met:
113 * 1. If the cbtdisabled consensus parameter is set.
114 * 2. If the torrc option LearnCircuitBuildTimeout is false.
115 * 3. If we are a directory authority
116 * 4. If we fail to write circuit build time history to our state file.
117 * 5. If we are configured in Single Onion mode
120 circuit_build_times_disabled(const or_options_t
*options
)
122 return circuit_build_times_disabled_(options
, 0);
125 /** As circuit_build_times_disabled, but take options as an argument. */
127 circuit_build_times_disabled_(const or_options_t
*options
,
128 int ignore_consensus
)
133 int consensus_disabled
=
134 ignore_consensus
? 0 : networkstatus_get_param(NULL
, "cbtdisabled",
136 int config_disabled
= !options
->LearnCircuitBuildTimeout
;
137 int dirauth_disabled
= authdir_mode(options
);
138 int state_disabled
= did_last_state_file_write_fail() ? 1 : 0;
139 /* LearnCircuitBuildTimeout and Single Onion Services are
140 * incompatible in two ways:
142 * - LearnCircuitBuildTimeout results in a low CBT, which
143 * Single Onion use of one-hop intro and rendezvous circuits lowers
144 * much further, producing *far* too many timeouts.
146 * - The adaptive CBT code does not update its timeout estimate
147 * using build times for single-hop circuits.
149 * If we fix both of these issues someday, we should test
150 * these modes with LearnCircuitBuildTimeout on again. */
151 int single_onion_disabled
= rend_service_allow_non_anonymous_connection(
154 if (consensus_disabled
|| config_disabled
|| dirauth_disabled
||
155 state_disabled
|| single_onion_disabled
) {
158 "CircuitBuildTime learning is disabled. "
159 "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
160 consensus_disabled
, config_disabled
, dirauth_disabled
,
167 "CircuitBuildTime learning is not disabled. "
168 "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
169 consensus_disabled
, config_disabled
, dirauth_disabled
,
178 * Retrieve and bounds-check the cbtmaxtimeouts consensus parameter.
180 * Effect: When this many timeouts happen in the last 'cbtrecentcount'
181 * circuit attempts, the client should discard all of its history and
182 * begin learning a fresh timeout value.
185 circuit_build_times_max_timeouts(void)
187 int32_t cbt_maxtimeouts
;
189 cbt_maxtimeouts
= networkstatus_get_param(NULL
, "cbtmaxtimeouts",
190 CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT
,
191 CBT_MIN_MAX_RECENT_TIMEOUT_COUNT
,
192 CBT_MAX_MAX_RECENT_TIMEOUT_COUNT
);
194 if (!(get_options()->LearnCircuitBuildTimeout
)) {
196 "circuit_build_times_max_timeouts() called, cbtmaxtimeouts is"
201 return cbt_maxtimeouts
;
205 * Retrieve and bounds-check the cbtnummodes consensus parameter.
207 * Effect: This value governs how many modes to use in the weighted
208 * average calculation of Pareto parameter Xm. A value of 3 introduces
209 * some bias (2-5% of CDF) under ideal conditions, but allows for better
210 * performance in the event that a client chooses guard nodes of radically
211 * different performance characteristics.
214 circuit_build_times_default_num_xm_modes(void)
216 int32_t num
= networkstatus_get_param(NULL
, "cbtnummodes",
217 CBT_DEFAULT_NUM_XM_MODES
,
218 CBT_MIN_NUM_XM_MODES
,
219 CBT_MAX_NUM_XM_MODES
);
221 if (!(get_options()->LearnCircuitBuildTimeout
)) {
223 "circuit_build_times_default_num_xm_modes() called, cbtnummodes"
232 * Retrieve and bounds-check the cbtmincircs consensus parameter.
234 * Effect: This is the minimum number of circuits to build before
235 * computing a timeout.
238 circuit_build_times_min_circs_to_observe(void)
240 int32_t num
= networkstatus_get_param(NULL
, "cbtmincircs",
241 CBT_DEFAULT_MIN_CIRCUITS_TO_OBSERVE
,
242 CBT_MIN_MIN_CIRCUITS_TO_OBSERVE
,
243 CBT_MAX_MIN_CIRCUITS_TO_OBSERVE
);
245 if (!(get_options()->LearnCircuitBuildTimeout
)) {
247 "circuit_build_times_min_circs_to_observe() called, cbtmincircs"
255 /** Return true iff <b>cbt</b> has recorded enough build times that we
256 * want to start acting on the timeout it implies. */
258 circuit_build_times_enough_to_compute(const circuit_build_times_t
*cbt
)
260 return cbt
->total_build_times
>= circuit_build_times_min_circs_to_observe();
264 * Retrieve and bounds-check the cbtquantile consensus parameter.
266 * Effect: This is the position on the quantile curve to use to set the
267 * timeout value. It is a percent (10-99).
270 circuit_build_times_quantile_cutoff(void)
272 int32_t num
= networkstatus_get_param(NULL
, "cbtquantile",
273 CBT_DEFAULT_QUANTILE_CUTOFF
,
274 CBT_MIN_QUANTILE_CUTOFF
,
275 CBT_MAX_QUANTILE_CUTOFF
);
277 if (!(get_options()->LearnCircuitBuildTimeout
)) {
279 "circuit_build_times_quantile_cutoff() called, cbtquantile"
288 * Retrieve and bounds-check the cbtclosequantile consensus parameter.
290 * Effect: This is the position on the quantile curve to use to set the
291 * timeout value to use to actually close circuits. It is a percent
295 circuit_build_times_close_quantile(void)
298 /* Cast is safe - circuit_build_times_quantile_cutoff() is capped */
299 int32_t min
= (int)tor_lround(100*circuit_build_times_quantile_cutoff());
300 param
= networkstatus_get_param(NULL
, "cbtclosequantile",
301 CBT_DEFAULT_CLOSE_QUANTILE
,
302 CBT_MIN_CLOSE_QUANTILE
,
303 CBT_MAX_CLOSE_QUANTILE
);
305 if (!(get_options()->LearnCircuitBuildTimeout
)) {
307 "circuit_build_times_close_quantile() called, cbtclosequantile"
312 log_warn(LD_DIR
, "Consensus parameter cbtclosequantile is "
313 "too small, raising to %d", min
);
316 return param
/ 100.0;
320 * Retrieve and bounds-check the cbttestfreq consensus parameter.
322 * Effect: Describes how often in seconds to build a test circuit to
323 * gather timeout values. Only applies if less than 'cbtmincircs'
324 * have been recorded.
327 circuit_build_times_test_frequency(void)
329 int32_t num
= networkstatus_get_param(NULL
, "cbttestfreq",
330 CBT_DEFAULT_TEST_FREQUENCY
,
331 CBT_MIN_TEST_FREQUENCY
,
332 CBT_MAX_TEST_FREQUENCY
);
334 if (!(get_options()->LearnCircuitBuildTimeout
)) {
336 "circuit_build_times_test_frequency() called, cbttestfreq is %d",
344 * Retrieve and bounds-check the cbtmintimeout consensus parameter.
346 * Effect: This is the minimum allowed timeout value in milliseconds.
347 * The minimum is to prevent rounding to 0 (we only check once
351 circuit_build_times_min_timeout(void)
353 int32_t num
= networkstatus_get_param(NULL
, "cbtmintimeout",
354 CBT_DEFAULT_TIMEOUT_MIN_VALUE
,
355 CBT_MIN_TIMEOUT_MIN_VALUE
,
356 CBT_MAX_TIMEOUT_MIN_VALUE
);
358 if (!(get_options()->LearnCircuitBuildTimeout
)) {
360 "circuit_build_times_min_timeout() called, cbtmintimeout is %d",
367 * Retrieve and bounds-check the cbtinitialtimeout consensus parameter.
369 * Effect: This is the timeout value to use before computing a timeout,
373 circuit_build_times_initial_timeout(void)
375 int32_t min
= circuit_build_times_min_timeout();
376 int32_t param
= networkstatus_get_param(NULL
, "cbtinitialtimeout",
377 CBT_DEFAULT_TIMEOUT_INITIAL_VALUE
,
378 CBT_MIN_TIMEOUT_INITIAL_VALUE
,
379 CBT_MAX_TIMEOUT_INITIAL_VALUE
);
381 if (!(get_options()->LearnCircuitBuildTimeout
)) {
383 "circuit_build_times_initial_timeout() called, "
384 "cbtinitialtimeout is %d",
389 log_warn(LD_DIR
, "Consensus parameter cbtinitialtimeout is too small, "
390 "raising to %d", min
);
397 * Retrieve and bounds-check the cbtrecentcount consensus parameter.
399 * Effect: This is the number of circuit build times to keep track of
400 * for deciding if we hit cbtmaxtimeouts and need to reset our state
401 * and learn a new timeout.
404 circuit_build_times_recent_circuit_count(networkstatus_t
*ns
)
407 num
= networkstatus_get_param(ns
, "cbtrecentcount",
408 CBT_DEFAULT_RECENT_CIRCUITS
,
409 CBT_MIN_RECENT_CIRCUITS
,
410 CBT_MAX_RECENT_CIRCUITS
);
412 if (!(get_options()->LearnCircuitBuildTimeout
)) {
414 "circuit_build_times_recent_circuit_count() called, "
415 "cbtrecentcount is %d",
423 * This function is called when we get a consensus update.
425 * It checks to see if we have changed any consensus parameters
426 * that require reallocation or discard of previous stats.
429 circuit_build_times_new_consensus_params(circuit_build_times_t
*cbt
,
435 * First check if we're doing adaptive timeouts at all; nothing to
436 * update if we aren't.
439 if (!circuit_build_times_disabled(get_options())) {
440 num
= circuit_build_times_recent_circuit_count(ns
);
443 if (num
!= cbt
->liveness
.num_recent_circs
) {
444 int8_t *recent_circs
;
445 if (cbt
->liveness
.num_recent_circs
> 0) {
446 log_notice(LD_CIRC
, "The Tor Directory Consensus has changed how "
447 "many circuits we must track to detect network failures "
448 "from %d to %d.", cbt
->liveness
.num_recent_circs
, num
);
450 log_notice(LD_CIRC
, "Upon receiving a consensus directory, "
451 "re-enabling circuit-based network failure detection.");
454 tor_assert(cbt
->liveness
.timeouts_after_firsthop
||
455 cbt
->liveness
.num_recent_circs
== 0);
458 * Technically this is a circular array that we are reallocating
459 * and memcopying. However, since it only consists of either 1s
460 * or 0s, and is only used in a statistical test to determine when
461 * we should discard our history after a sufficient number of 1's
462 * have been reached, it is fine if order is not preserved or
465 * cbtrecentcount should only be changing in cases of severe network
466 * distress anyway, so memory correctness here is paramount over
467 * doing acrobatics to preserve the array.
469 recent_circs
= tor_calloc(num
, sizeof(int8_t));
470 if (cbt
->liveness
.timeouts_after_firsthop
&&
471 cbt
->liveness
.num_recent_circs
> 0) {
472 memcpy(recent_circs
, cbt
->liveness
.timeouts_after_firsthop
,
473 sizeof(int8_t)*MIN(num
, cbt
->liveness
.num_recent_circs
));
476 // Adjust the index if it needs it.
477 if (num
< cbt
->liveness
.num_recent_circs
) {
478 cbt
->liveness
.after_firsthop_idx
= MIN(num
-1,
479 cbt
->liveness
.after_firsthop_idx
);
482 tor_free(cbt
->liveness
.timeouts_after_firsthop
);
483 cbt
->liveness
.timeouts_after_firsthop
= recent_circs
;
484 cbt
->liveness
.num_recent_circs
= num
;
486 /* else no change, nothing to do */
487 } else { /* num == 0 */
489 * Weird. This probably shouldn't happen, so log a warning, but try
490 * to do something sensible anyway.
494 "The cbtrecentcircs consensus parameter came back zero! "
495 "This disables adaptive timeouts since we can't keep track of "
496 "any recent circuits.");
498 circuit_build_times_free_timeouts(cbt
);
502 * Adaptive timeouts are disabled; this might be because of the
503 * LearnCircuitBuildTimes config parameter, and hence permanent, or
504 * the cbtdisabled consensus parameter, so it may be a new condition.
505 * Treat it like getting num == 0 above and free the circuit history
509 circuit_build_times_free_timeouts(cbt
);
514 * Return the initial default or configured timeout in milliseconds
517 circuit_build_times_get_initial_timeout(void)
520 const or_options_t
*options
= get_options();
523 * Check if we have LearnCircuitBuildTimeout, and if we don't,
524 * always use CircuitBuildTimeout, no questions asked.
526 if (!unit_tests
&& options
->CircuitBuildTimeout
) {
527 timeout
= options
->CircuitBuildTimeout
*1000;
528 if (!circuit_build_times_disabled(options
) &&
529 timeout
< circuit_build_times_min_timeout()) {
530 log_warn(LD_CIRC
, "Config CircuitBuildTimeout too low. Setting to %ds",
531 circuit_build_times_min_timeout()/1000);
532 timeout
= circuit_build_times_min_timeout();
535 timeout
= circuit_build_times_initial_timeout();
542 * Reset the build time state.
544 * Leave estimated parameters, timeout and network liveness intact
548 circuit_build_times_reset(circuit_build_times_t
*cbt
)
550 memset(cbt
->circuit_build_times
, 0, sizeof(cbt
->circuit_build_times
));
551 cbt
->total_build_times
= 0;
552 cbt
->build_times_idx
= 0;
553 cbt
->have_computed_timeout
= 0;
555 // Reset timeout and close counts
556 cbt
->num_circ_succeeded
= 0;
557 cbt
->num_circ_closed
= 0;
558 cbt
->num_circ_timeouts
= 0;
562 * Initialize the buildtimes structure for first use.
564 * Sets the initial timeout values based on either the config setting,
565 * the consensus param, or the default (CBT_DEFAULT_TIMEOUT_INITIAL_VALUE).
568 circuit_build_times_init(circuit_build_times_t
*cbt
)
570 memset(cbt
, 0, sizeof(*cbt
));
572 * Check if we really are using adaptive timeouts, and don't keep
573 * track of this stuff if not.
575 if (!circuit_build_times_disabled(get_options())) {
576 cbt
->liveness
.num_recent_circs
=
577 circuit_build_times_recent_circuit_count(NULL
);
578 cbt
->liveness
.timeouts_after_firsthop
=
579 tor_calloc(cbt
->liveness
.num_recent_circs
, sizeof(int8_t));
581 cbt
->liveness
.num_recent_circs
= 0;
582 cbt
->liveness
.timeouts_after_firsthop
= NULL
;
584 cbt
->close_ms
= cbt
->timeout_ms
= circuit_build_times_get_initial_timeout();
585 cbt_control_event_buildtimeout_set(cbt
, BUILDTIMEOUT_SET_EVENT_RESET
);
589 * Free the saved timeouts, if the cbtdisabled consensus parameter got turned
594 circuit_build_times_free_timeouts(circuit_build_times_t
*cbt
)
598 if (cbt
->liveness
.timeouts_after_firsthop
) {
599 tor_free(cbt
->liveness
.timeouts_after_firsthop
);
602 cbt
->liveness
.num_recent_circs
= 0;
607 * Rewind our build time history by n positions.
610 circuit_build_times_rewind_history(circuit_build_times_t
*cbt
, int n
)
614 cbt
->build_times_idx
-= n
;
615 cbt
->build_times_idx
%= CBT_NCIRCUITS_TO_OBSERVE
;
617 for (i
= 0; i
< n
; i
++) {
618 cbt
->circuit_build_times
[(i
+cbt
->build_times_idx
)
619 %CBT_NCIRCUITS_TO_OBSERVE
]=0;
622 if (cbt
->total_build_times
> n
) {
623 cbt
->total_build_times
-= n
;
625 cbt
->total_build_times
= 0;
629 "Rewound history by %d places. Current index: %d. "
630 "Total: %d", n
, cbt
->build_times_idx
, cbt
->total_build_times
);
635 * Mark this circuit as timed out, but change its purpose
636 * so that it continues to build, allowing us to measure
637 * its full build time.
640 circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t
*circ
)
642 control_event_circuit_status(circ
,
644 END_CIRC_REASON_TIMEOUT
);
645 circuit_change_purpose(TO_CIRCUIT(circ
),
646 CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT
);
647 /* Record this event to check for too many timeouts
648 * in a row. This function does not record a time value yet
649 * (we do that later); it only counts the fact that we did
650 * have a timeout. We also want to avoid double-counting
651 * already "relaxed" circuits, which are counted in
652 * circuit_expire_building(). */
653 if (!circ
->relaxed_timeout
) {
654 int first_hop_succeeded
= circ
->cpath
&&
655 circ
->cpath
->state
== CPATH_STATE_OPEN
;
657 circuit_build_times_count_timeout(
658 get_circuit_build_times_mutable(),
659 first_hop_succeeded
);
664 * Perform the build time work that needs to be done when a circuit
667 * This function decides if we should record a circuit's build time
668 * in our histogram data and other statistics, and if so, records it.
669 * It also will mark circuits that have already timed out as
670 * measurement-only circuits, so they can continue to build but
673 * For this, we want to consider circuits that will eventually make
674 * it to the third hop. For circuits longer than 3 hops, we want to
675 * record their build time when they reach the third hop, but let
676 * them continue (and not count them later). For circuits that are
677 * exactly 3 hops, this will count them when they are completed. We
678 * do this so that CBT is always gathering statistics on circuits
679 * of the same length, regardless of their type.
682 circuit_build_times_handle_completed_hop(origin_circuit_t
*circ
)
687 /* If circuit build times are disabled, let circuit_expire_building()
689 if (circuit_build_times_disabled(get_options())) {
693 /* Is this a circuit for which the timeout applies in a straight-forward
694 * way? If so, handle it below. If not, just return (and let
695 * circuit_expire_building() eventually take care of it).
697 if (!circuit_timeout_want_to_count_circ(circ
)) {
701 tor_gettimeofday(&end
);
702 timediff
= tv_mdiff(&circ
->base_
.timestamp_began
, &end
);
704 /* Check if we would have timed out already. If so, change the
705 * purpose here. But don't do any timeout handling here if there
706 * are no circuits opened yet. Save it for circuit_expire_building()
707 * (to allow it to handle timeout "relaxing" over there). */
708 if (timediff
> get_circuit_build_timeout_ms() &&
709 circuit_any_opened_circuits_cached()) {
711 /* Circuits are allowed to last longer for measurement.
712 * Switch their purpose and wait. */
713 if (circ
->base_
.purpose
!= CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT
) {
715 "Deciding to timeout circuit %"PRIu32
"\n",
716 (circ
->global_identifier
));
717 circuit_build_times_mark_circ_as_measurement_only(circ
);
721 /* If the circuit is built to exactly the DEFAULT_ROUTE_LEN,
722 * add it to our buildtimes. */
723 if (circuit_get_cpath_opened_len(circ
) == DEFAULT_ROUTE_LEN
) {
724 /* If the circuit build time is much greater than we would have cut
725 * it off at, we probably had a suspend event along this codepath,
726 * and we should discard the value.
729 timediff
> 2*get_circuit_build_close_time_ms()+1000) {
730 log_notice(LD_CIRC
, "Strange value for circuit build time: %ldmsec. "
731 "Assuming clock jump. Purpose %d (%s)", timediff
,
733 circuit_purpose_to_string(circ
->base_
.purpose
));
735 /* Only count circuit times if the network is live */
736 if (circuit_build_times_network_check_live(
737 get_circuit_build_times())) {
738 circuit_build_times_add_time(get_circuit_build_times_mutable(),
739 (build_time_t
)timediff
);
740 circuit_build_times_set_timeout(get_circuit_build_times_mutable());
743 if (circ
->base_
.purpose
!= CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT
) {
744 circuit_build_times_network_circ_success(
745 get_circuit_build_times_mutable());
752 * Add a new build time value <b>time</b> to the set of build times. Time
753 * units are milliseconds.
755 * circuit_build_times <b>cbt</b> is a circular array, so loop around when
759 circuit_build_times_add_time(circuit_build_times_t
*cbt
, build_time_t btime
)
761 if (btime
<= 0 || btime
> CBT_BUILD_TIME_MAX
) {
762 log_warn(LD_BUG
, "Circuit build time is too large (%u)."
763 "This is probably a bug.", btime
);
764 tor_fragile_assert();
768 log_debug(LD_CIRC
, "Adding circuit build time %u", btime
);
770 cbt
->circuit_build_times
[cbt
->build_times_idx
] = btime
;
771 cbt
->build_times_idx
= (cbt
->build_times_idx
+ 1) % CBT_NCIRCUITS_TO_OBSERVE
;
772 if (cbt
->total_build_times
< CBT_NCIRCUITS_TO_OBSERVE
)
773 cbt
->total_build_times
++;
775 if ((cbt
->total_build_times
% CBT_SAVE_STATE_EVERY
) == 0) {
776 /* Save state every n circuit builds */
777 if (!unit_tests
&& !get_options()->AvoidDiskWrites
)
778 or_state_mark_dirty(get_or_state(), 0);
785 * Return maximum circuit build time
788 circuit_build_times_max(const circuit_build_times_t
*cbt
)
791 build_time_t max_build_time
= 0;
792 for (i
= 0; i
< CBT_NCIRCUITS_TO_OBSERVE
; i
++) {
793 if (cbt
->circuit_build_times
[i
] > max_build_time
794 && cbt
->circuit_build_times
[i
] != CBT_BUILD_ABANDONED
)
795 max_build_time
= cbt
->circuit_build_times
[i
];
797 return max_build_time
;
801 /** Return minimum circuit build time */
803 circuit_build_times_min(circuit_build_times_t
*cbt
)
806 build_time_t min_build_time
= CBT_BUILD_TIME_MAX
;
807 for (i
= 0; i
< CBT_NCIRCUITS_TO_OBSERVE
; i
++) {
808 if (cbt
->circuit_build_times
[i
] && /* 0 <-> uninitialized */
809 cbt
->circuit_build_times
[i
] < min_build_time
)
810 min_build_time
= cbt
->circuit_build_times
[i
];
812 if (min_build_time
== CBT_BUILD_TIME_MAX
) {
813 log_warn(LD_CIRC
, "No build times less than CBT_BUILD_TIME_MAX!");
815 return min_build_time
;
820 * Calculate and return a histogram for the set of build times.
822 * Returns an allocated array of histrogram bins representing
823 * the frequency of index*CBT_BIN_WIDTH millisecond
824 * build times. Also outputs the number of bins in nbins.
826 * The return value must be freed by the caller.
829 circuit_build_times_create_histogram(const circuit_build_times_t
*cbt
,
833 build_time_t max_build_time
= circuit_build_times_max(cbt
);
836 *nbins
= 1 + (max_build_time
/ CBT_BIN_WIDTH
);
837 histogram
= tor_calloc(*nbins
, sizeof(build_time_t
));
839 // calculate histogram
840 for (i
= 0; i
< CBT_NCIRCUITS_TO_OBSERVE
; i
++) {
841 if (cbt
->circuit_build_times
[i
] == 0
842 || cbt
->circuit_build_times
[i
] == CBT_BUILD_ABANDONED
)
843 continue; /* 0 <-> uninitialized */
845 c
= (cbt
->circuit_build_times
[i
] / CBT_BIN_WIDTH
);
853 * Return the Pareto start-of-curve parameter Xm.
855 * Because we are not a true Pareto curve, we compute this as the
856 * weighted average of the N most frequent build time bins. N is either
857 * 1 if we don't have enough circuit build time data collected, or
858 * determined by the consensus parameter cbtnummodes (default 3).
861 circuit_build_times_get_xm(circuit_build_times_t
*cbt
)
863 build_time_t i
, nbins
;
864 build_time_t
*nth_max_bin
;
865 int32_t bin_counts
=0;
866 build_time_t ret
= 0;
867 uint32_t *histogram
= circuit_build_times_create_histogram(cbt
, &nbins
);
869 int num_modes
= circuit_build_times_default_num_xm_modes();
871 tor_assert(nbins
> 0);
872 tor_assert(num_modes
> 0);
874 // Only use one mode if < 1000 buildtimes. Not enough data
876 if (cbt
->total_build_times
< CBT_NCIRCUITS_TO_OBSERVE
)
879 nth_max_bin
= tor_calloc(num_modes
, sizeof(build_time_t
));
881 /* Determine the N most common build times */
882 for (i
= 0; i
< nbins
; i
++) {
883 if (histogram
[i
] >= histogram
[nth_max_bin
[0]]) {
887 for (n
= 1; n
< num_modes
; n
++) {
888 if (histogram
[i
] >= histogram
[nth_max_bin
[n
]] &&
889 (!histogram
[nth_max_bin
[n
-1]]
890 || histogram
[i
] < histogram
[nth_max_bin
[n
-1]])) {
896 for (n
= 0; n
< num_modes
; n
++) {
897 bin_counts
+= histogram
[nth_max_bin
[n
]];
898 ret
+= CBT_BIN_TO_MS(nth_max_bin
[n
])*histogram
[nth_max_bin
[n
]];
899 log_info(LD_CIRC
, "Xm mode #%d: %u %u", n
, CBT_BIN_TO_MS(nth_max_bin
[n
]),
900 histogram
[nth_max_bin
[n
]]);
903 /* bin_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE
904 * circuits were abandoned before they completed. This shouldn't happen,
905 * though. We should have reset/re-learned a lower timeout first. */
906 if (bin_counts
== 0) {
909 "No valid circuit build time data out of %d times, %u modes, "
910 "have_timeout=%d, %lfms", cbt
->total_build_times
, num_modes
,
911 cbt
->have_computed_timeout
, cbt
->timeout_ms
);
915 tor_assert(bin_counts
> 0);
921 tor_free(nth_max_bin
);
927 * Output a histogram of current circuit build times to
928 * the or_state_t state structure.
931 circuit_build_times_update_state(const circuit_build_times_t
*cbt
,
936 build_time_t nbins
= 0;
937 config_line_t
**next
, *line
;
939 histogram
= circuit_build_times_create_histogram(cbt
, &nbins
);
941 config_free_lines(state
->BuildtimeHistogram
);
942 next
= &state
->BuildtimeHistogram
;
945 state
->TotalBuildTimes
= cbt
->total_build_times
;
946 state
->CircuitBuildAbandonedCount
= 0;
948 for (i
= 0; i
< CBT_NCIRCUITS_TO_OBSERVE
; i
++) {
949 if (cbt
->circuit_build_times
[i
] == CBT_BUILD_ABANDONED
)
950 state
->CircuitBuildAbandonedCount
++;
953 for (i
= 0; i
< nbins
; i
++) {
954 // compress the histogram by skipping the blanks
955 if (histogram
[i
] == 0) continue;
956 *next
= line
= tor_malloc_zero(sizeof(config_line_t
));
957 line
->key
= tor_strdup("CircuitBuildTimeBin");
958 tor_asprintf(&line
->value
, "%d %d",
959 CBT_BIN_TO_MS(i
), histogram
[i
]);
960 next
= &(line
->next
);
964 if (!get_options()->AvoidDiskWrites
)
965 or_state_mark_dirty(get_or_state(), 0);
972 * Shuffle the build times array.
974 * Adapted from http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
977 circuit_build_times_shuffle_and_store_array(circuit_build_times_t
*cbt
,
978 build_time_t
*raw_times
,
981 uint32_t n
= num_times
;
982 if (num_times
> CBT_NCIRCUITS_TO_OBSERVE
) {
983 log_notice(LD_CIRC
, "The number of circuit times that this Tor version "
984 "uses to calculate build times is less than the number stored "
985 "in your state file. Decreasing the circuit time history from "
986 "%lu to %d.", (unsigned long)num_times
,
987 CBT_NCIRCUITS_TO_OBSERVE
);
991 log_warn(LD_CIRC
, "For some insane reasons, you had %lu circuit build "
992 "observations in your state file. That's far too many; probably "
993 "there's a bug here.", (unsigned long)n
);
997 /* This code can only be run on a compact array */
999 int k
= crypto_rand_int(n
+ 1); /* 0 <= k <= n. */
1000 build_time_t tmp
= raw_times
[k
];
1001 raw_times
[k
] = raw_times
[n
];
1005 /* Since the times are now shuffled, take a random CBT_NCIRCUITS_TO_OBSERVE
1006 * subset (ie the first CBT_NCIRCUITS_TO_OBSERVE values) */
1007 for (n
= 0; n
< MIN(num_times
, CBT_NCIRCUITS_TO_OBSERVE
); n
++) {
1008 circuit_build_times_add_time(cbt
, raw_times
[n
]);
1013 * Load histogram from <b>state</b>, shuffling the resulting array
1014 * after we do so. Use this result to estimate parameters and
1015 * calculate the timeout.
1017 * Return -1 on error.
1020 circuit_build_times_parse_state(circuit_build_times_t
*cbt
,
1024 uint32_t loaded_cnt
= 0, N
= 0;
1025 config_line_t
*line
;
1027 build_time_t
*loaded_times
;
1029 circuit_build_times_init(cbt
);
1031 if (circuit_build_times_disabled(get_options())) {
1035 /* build_time_t 0 means uninitialized */
1036 loaded_times
= tor_calloc(state
->TotalBuildTimes
, sizeof(build_time_t
));
1038 for (line
= state
->BuildtimeHistogram
; line
; line
= line
->next
) {
1039 smartlist_t
*args
= smartlist_new();
1040 smartlist_split_string(args
, line
->value
, " ",
1041 SPLIT_SKIP_SPACE
|SPLIT_IGNORE_BLANK
, 0);
1042 if (smartlist_len(args
) < 2) {
1043 log_warn(LD_GENERAL
, "Unable to parse circuit build times: "
1044 "Too few arguments to CircuitBuildTime");
1046 SMARTLIST_FOREACH(args
, char*, cp
, tor_free(cp
));
1047 smartlist_free(args
);
1050 const char *ms_str
= smartlist_get(args
,0);
1051 const char *count_str
= smartlist_get(args
,1);
1055 ms
= (build_time_t
)tor_parse_ulong(ms_str
, 10, 0,
1056 CBT_BUILD_TIME_MAX
, &ok
, NULL
);
1058 log_warn(LD_GENERAL
, "Unable to parse circuit build times: "
1059 "Unparsable bin number");
1061 SMARTLIST_FOREACH(args
, char*, cp
, tor_free(cp
));
1062 smartlist_free(args
);
1065 count
= (uint32_t)tor_parse_ulong(count_str
, 10, 0,
1066 UINT32_MAX
, &ok
, NULL
);
1068 log_warn(LD_GENERAL
, "Unable to parse circuit build times: "
1069 "Unparsable bin count");
1071 SMARTLIST_FOREACH(args
, char*, cp
, tor_free(cp
));
1072 smartlist_free(args
);
1076 if (loaded_cnt
+count
+ (unsigned)state
->CircuitBuildAbandonedCount
1077 > (unsigned) state
->TotalBuildTimes
) {
1079 "Too many build times in state file. "
1080 "Stopping short before %d",
1082 SMARTLIST_FOREACH(args
, char*, cp
, tor_free(cp
));
1083 smartlist_free(args
);
1087 for (k
= 0; k
< count
; k
++) {
1088 loaded_times
[loaded_cnt
++] = ms
;
1091 SMARTLIST_FOREACH(args
, char*, cp
, tor_free(cp
));
1092 smartlist_free(args
);
1097 "Adding %d timeouts.", state
->CircuitBuildAbandonedCount
);
1098 for (i
=0; i
< state
->CircuitBuildAbandonedCount
; i
++) {
1099 loaded_times
[loaded_cnt
++] = CBT_BUILD_ABANDONED
;
1102 if (loaded_cnt
!= (unsigned)state
->TotalBuildTimes
) {
1104 "Corrupt state file? Build times count mismatch. "
1105 "Read %d times, but file says %d", loaded_cnt
,
1106 state
->TotalBuildTimes
);
1108 circuit_build_times_reset(cbt
);
1112 circuit_build_times_shuffle_and_store_array(cbt
, loaded_times
, loaded_cnt
);
1114 /* Verify that we didn't overwrite any indexes */
1115 for (i
=0; i
< CBT_NCIRCUITS_TO_OBSERVE
; i
++) {
1116 if (!cbt
->circuit_build_times
[i
])
1121 "Loaded %d/%d values from %d lines in circuit time histogram",
1122 tot_values
, cbt
->total_build_times
, N
);
1124 if (cbt
->total_build_times
!= tot_values
1125 || cbt
->total_build_times
> CBT_NCIRCUITS_TO_OBSERVE
) {
1127 "Corrupt state file? Shuffled build times mismatch. "
1128 "Read %d times, but file says %d", tot_values
,
1129 state
->TotalBuildTimes
);
1131 circuit_build_times_reset(cbt
);
1135 circuit_build_times_set_timeout(cbt
);
1138 tor_free(loaded_times
);
1139 return err
? -1 : 0;
1143 * Estimates the Xm and Alpha parameters using
1144 * http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation
1146 * The notable difference is that we use mode instead of min to estimate Xm.
1147 * This is because our distribution is frechet-like. We claim this is
1148 * an acceptable approximation because we are only concerned with the
1149 * accuracy of the CDF of the tail.
1152 circuit_build_times_update_alpha(circuit_build_times_t
*cbt
)
1154 build_time_t
*x
=cbt
->circuit_build_times
;
1156 int n
=0,i
=0,abandoned_count
=0;
1157 build_time_t max_time
=0;
1159 /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */
1160 /* We sort of cheat here and make our samples slightly more pareto-like
1161 * and less frechet-like. */
1162 cbt
->Xm
= circuit_build_times_get_xm(cbt
);
1164 /* If Xm came back 0, then too many circuits were abandoned. */
1168 tor_assert(cbt
->Xm
> 0);
1170 for (i
=0; i
< CBT_NCIRCUITS_TO_OBSERVE
; i
++) {
1175 if (x
[i
] < cbt
->Xm
) {
1176 a
+= tor_mathlog(cbt
->Xm
);
1178 } else if (x
[i
] == CBT_BUILD_ABANDONED
) {
1181 a
+= tor_mathlog(x
[i
]);
1182 if (x
[i
] > max_time
)
1189 * We are erring and asserting here because this can only happen
1190 * in codepaths other than startup. The startup state parsing code
1191 * performs this same check, and resets state if it hits it. If we
1192 * hit it at runtime, something serious has gone wrong.
1194 if (n
!=cbt
->total_build_times
-abandoned_count
) {
1195 log_err(LD_CIRC
, "Discrepancy in build times count: %d vs %d", n
,
1196 cbt
->total_build_times
);
1198 tor_assert_nonfatal(n
==cbt
->total_build_times
-abandoned_count
);
1200 if (max_time
<= 0) {
1201 /* This can happen if Xm is actually the *maximum* value in the set.
1202 * It can also happen if we've abandoned every single circuit somehow.
1203 * In either case, tell the caller not to compute a new build timeout. */
1205 "Could not determine largest build time (%d). "
1206 "Xm is %dms and we've abandoned %d out of %d circuits.", max_time
,
1207 cbt
->Xm
, abandoned_count
, n
);
1211 /* This is the "Maximum Likelihood Estimator" for parameter alpha of a Pareto
1212 * Distribution. See:
1213 * https://en.wikipedia.org/wiki/Pareto_distribution#Estimation_of_parameters
1215 * The division in the estimator is done with subtraction outside the ln(),
1216 * with the sum occurring in the for loop above.
1218 * This done is to avoid the precision issues of logs of small values.
1220 a
-= n
*tor_mathlog(cbt
->Xm
);
1229 * This is the Pareto Quantile Function. It calculates the point x
1230 * in the distribution such that F(x) = quantile (ie quantile*100%
1231 * of the mass of the density function is below x on the curve).
1233 * We use it to calculate the timeout and also to generate synthetic
1234 * values of time for circuits that timeout before completion.
1236 * See http://en.wikipedia.org/wiki/Quantile_function,
1237 * http://en.wikipedia.org/wiki/Inverse_transform_sampling and
1238 * http://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_
1239 * random_sample_from_Pareto_distribution
1240 * That's right. I'll cite wikipedia all day long.
1242 * Return value is in milliseconds, clamped to INT32_MAX.
1245 circuit_build_times_calculate_timeout(circuit_build_times_t
*cbt
,
1249 tor_assert(quantile
>= 0);
1250 tor_assert(1.0-quantile
> 0);
1251 tor_assert(cbt
->Xm
> 0);
1253 /* If either alpha or p are 0, we would divide by zero, yielding an
1254 * infinite (double) result; which would be clamped to INT32_MAX.
1255 * Instead, initialise ret to INT32_MAX, and skip over these
1256 * potentially illegal/trapping divides by zero.
1260 if (cbt
->alpha
> 0) {
1262 p
= pow(1.0-quantile
,1.0/cbt
->alpha
);
1268 if (ret
> INT32_MAX
) {
1271 tor_assert(ret
> 0);
1275 #ifdef TOR_UNIT_TESTS
1278 circuit_build_times_cdf(circuit_build_times_t
*cbt
, double x
)
1281 tor_assert(cbt
->Xm
> 0);
1282 ret
= 1.0-pow(cbt
->Xm
/x
,cbt
->alpha
);
1283 tor_assert(0 <= ret
&& ret
<= 1.0);
1286 #endif /* defined(TOR_UNIT_TESTS) */
1288 #ifdef TOR_UNIT_TESTS
1290 * Generate a synthetic time using our distribution parameters.
1292 * The return value will be within the [q_lo, q_hi) quantile points
1296 circuit_build_times_generate_sample(circuit_build_times_t
*cbt
,
1297 double q_lo
, double q_hi
)
1299 double randval
= crypto_rand_double();
1303 /* Generate between [q_lo, q_hi) */
1304 /*XXXX This is what nextafter is supposed to be for; we should use it on the
1305 * platforms that support it. */
1306 q_hi
-= 1.0/(INT32_MAX
);
1308 tor_assert(q_lo
>= 0);
1309 tor_assert(q_hi
< 1);
1310 tor_assert(q_lo
< q_hi
);
1312 u
= q_lo
+ (q_hi
-q_lo
)*randval
;
1314 tor_assert(0 <= u
&& u
< 1.0);
1315 /* circuit_build_times_calculate_timeout returns <= INT32_MAX */
1316 ret
= (build_time_t
)
1317 tor_lround(circuit_build_times_calculate_timeout(cbt
, u
));
1318 tor_assert(ret
> 0);
1321 #endif /* defined(TOR_UNIT_TESTS) */
1323 #ifdef TOR_UNIT_TESTS
1325 * Estimate an initial alpha parameter by solving the quantile
1326 * function with a quantile point and a specific timeout value.
1329 circuit_build_times_initial_alpha(circuit_build_times_t
*cbt
,
1330 double quantile
, double timeout_ms
)
1332 // Q(u) = Xm/((1-u)^(1/a))
1333 // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout
1334 // CircBuildTimeout = Xm/((1-0.8))^(1/a))
1335 // CircBuildTimeout = Xm*((1-0.8))^(-1/a))
1336 // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
1337 // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
1338 tor_assert(quantile
>= 0);
1339 tor_assert(cbt
->Xm
> 0);
1340 cbt
->alpha
= tor_mathlog(1.0-quantile
)/
1341 (tor_mathlog(cbt
->Xm
)-tor_mathlog(timeout_ms
));
1342 tor_assert(cbt
->alpha
> 0);
1344 #endif /* defined(TOR_UNIT_TESTS) */
1347 * Returns true if we need circuits to be built
1350 circuit_build_times_needs_circuits(const circuit_build_times_t
*cbt
)
1352 /* Return true if < MIN_CIRCUITS_TO_OBSERVE */
1353 return !circuit_build_times_enough_to_compute(cbt
);
1357 * Returns true if we should build a timeout test circuit
1361 circuit_build_times_needs_circuits_now(const circuit_build_times_t
*cbt
)
1363 return circuit_build_times_needs_circuits(cbt
) &&
1364 approx_time()-cbt
->last_circ_at
> circuit_build_times_test_frequency();
1368 * How long should we be unreachable before we think we need to check if
1369 * our published IP address has changed.
1371 #define CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP (60*3)
1374 * Called to indicate that the network showed some signs of liveness,
1375 * i.e. we received a cell.
1377 * This is used by circuit_build_times_network_check_live() to decide
1378 * if we should record the circuit build timeout or not.
1380 * This function is called every time we receive a cell. Avoid
1381 * syscalls, events, and other high-intensity work.
1384 circuit_build_times_network_is_live(circuit_build_times_t
*cbt
)
1386 time_t now
= approx_time();
1387 if (cbt
->liveness
.nonlive_timeouts
> 0) {
1388 time_t time_since_live
= now
- cbt
->liveness
.network_last_live
;
1390 "Tor now sees network activity. Restoring circuit build "
1391 "timeout recording. Network was down for %d seconds "
1392 "during %d circuit attempts.",
1393 (int)time_since_live
,
1394 cbt
->liveness
.nonlive_timeouts
);
1395 if (time_since_live
> CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP
)
1396 reschedule_descriptor_update_check();
1398 cbt
->liveness
.network_last_live
= now
;
1399 cbt
->liveness
.nonlive_timeouts
= 0;
1401 /* Tell control.c */
1402 control_event_network_liveness_update(1);
1406 * Non-destructively scale all of our circuit success, timeout, and close
1407 * counts down by a factor of two. Scaling in this way preserves the
1408 * ratios between succeeded vs timed out vs closed circuits, so that
1409 * our statistics don't change when we scale.
1411 * This is used only in the rare event that we build more than
1412 * INT32_MAX circuits. Since the num_circ_* variables are
1413 * uint32_t, we won't even be close to overflowing them.
1416 circuit_build_times_scale_circ_counts(circuit_build_times_t
*cbt
)
1418 cbt
->num_circ_succeeded
/= 2;
1419 cbt
->num_circ_timeouts
/= 2;
1420 cbt
->num_circ_closed
/= 2;
1424 * Called to indicate that we "completed" a circuit. Because this circuit
1425 * succeeded, it doesn't count as a timeout-after-the-first-hop.
1427 * (For the purposes of the cbt code, we consider a circuit "completed" if
1428 * it has 3 hops, regardless of its final hop count. We do this because
1429 * we're trying to answer the question, "how long should a circuit take to
1430 * reach the 3-hop count".)
1432 * This is used by circuit_build_times_network_check_changed() to determine
1433 * if we had too many recent timeouts and need to reset our learned timeout
1434 * to something higher.
1437 circuit_build_times_network_circ_success(circuit_build_times_t
*cbt
)
1439 // Count circuit success
1440 cbt
->num_circ_succeeded
++;
1442 // If we're going to wrap int32, scale everything
1443 if (cbt
->num_circ_succeeded
>= INT32_MAX
) {
1444 circuit_build_times_scale_circ_counts(cbt
);
1447 /* Check for NULLness because we might not be using adaptive timeouts */
1448 if (cbt
->liveness
.timeouts_after_firsthop
&&
1449 cbt
->liveness
.num_recent_circs
> 0) {
1450 cbt
->liveness
.timeouts_after_firsthop
[cbt
->liveness
.after_firsthop_idx
]
1452 cbt
->liveness
.after_firsthop_idx
++;
1453 cbt
->liveness
.after_firsthop_idx
%= cbt
->liveness
.num_recent_circs
;
1458 * A circuit just timed out. If it failed after the first hop, record it
1459 * in our history for later deciding if the network speed has changed.
1461 * This is used by circuit_build_times_network_check_changed() to determine
1462 * if we had too many recent timeouts and need to reset our learned timeout
1463 * to something higher.
1466 circuit_build_times_network_timeout(circuit_build_times_t
*cbt
,
1469 // Count circuit timeout
1470 cbt
->num_circ_timeouts
++;
1472 // If we're going to wrap int32, scale everything
1473 if (cbt
->num_circ_timeouts
>= INT32_MAX
) {
1474 circuit_build_times_scale_circ_counts(cbt
);
1477 /* Check for NULLness because we might not be using adaptive timeouts */
1478 if (cbt
->liveness
.timeouts_after_firsthop
&&
1479 cbt
->liveness
.num_recent_circs
> 0) {
1481 cbt
->liveness
.timeouts_after_firsthop
[cbt
->liveness
.after_firsthop_idx
]
1483 cbt
->liveness
.after_firsthop_idx
++;
1484 cbt
->liveness
.after_firsthop_idx
%= cbt
->liveness
.num_recent_circs
;
1490 * A circuit was just forcibly closed. If there has been no recent network
1491 * activity at all, but this circuit was launched back when we thought the
1492 * network was live, increment the number of "nonlive" circuit timeouts.
1494 * This is used by circuit_build_times_network_check_live() to decide
1495 * if we should record the circuit build timeout or not.
1498 circuit_build_times_network_close(circuit_build_times_t
*cbt
,
1499 int did_onehop
, time_t start_time
)
1501 time_t now
= time(NULL
);
1503 // Count circuit close
1504 cbt
->num_circ_closed
++;
1506 // If we're going to wrap int32, scale everything
1507 if (cbt
->num_circ_closed
>= INT32_MAX
) {
1508 circuit_build_times_scale_circ_counts(cbt
);
1512 * Check if this is a timeout that was for a circuit that spent its
1513 * entire existence during a time where we have had no network activity.
1515 if (cbt
->liveness
.network_last_live
< start_time
) {
1517 char last_live_buf
[ISO_TIME_LEN
+1];
1518 char start_time_buf
[ISO_TIME_LEN
+1];
1519 char now_buf
[ISO_TIME_LEN
+1];
1520 format_local_iso_time(last_live_buf
, cbt
->liveness
.network_last_live
);
1521 format_local_iso_time(start_time_buf
, start_time
);
1522 format_local_iso_time(now_buf
, now
);
1524 "A circuit somehow completed a hop while the network was "
1525 "not live. The network was last live at %s, but the circuit "
1526 "launched at %s. It's now %s. This could mean your clock "
1527 "changed.", last_live_buf
, start_time_buf
, now_buf
);
1529 cbt
->liveness
.nonlive_timeouts
++;
1530 if (cbt
->liveness
.nonlive_timeouts
== 1) {
1532 "Tor has not observed any network activity for the past %d "
1533 "seconds. Disabling circuit build timeout recording.",
1534 (int)(now
- cbt
->liveness
.network_last_live
));
1536 /* Tell control.c */
1537 control_event_network_liveness_update(0);
1540 "Got non-live timeout. Current count is: %d",
1541 cbt
->liveness
.nonlive_timeouts
);
1547 * When the network is not live, we do not record circuit build times.
1549 * The network is considered not live if there has been at least one
1550 * circuit build that began and ended (had its close_ms measurement
1551 * period expire) since we last received a cell.
1553 * Also has the side effect of rewinding the circuit time history
1554 * in the case of recent liveness changes.
1557 circuit_build_times_network_check_live(const circuit_build_times_t
*cbt
)
1559 if (cbt
->liveness
.nonlive_timeouts
> 0) {
1567 * Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of
1568 * the past RECENT_CIRCUITS time out after the first hop. Used to detect
1569 * if the network connection has changed significantly, and if so,
1570 * resets our circuit build timeout to the default.
1572 * Also resets the entire timeout history in this case and causes us
1573 * to restart the process of building test circuits and estimating a
1577 circuit_build_times_network_check_changed(circuit_build_times_t
*cbt
)
1579 int total_build_times
= cbt
->total_build_times
;
1580 int timeout_count
=0;
1583 if (cbt
->liveness
.timeouts_after_firsthop
&&
1584 cbt
->liveness
.num_recent_circs
> 0) {
1585 /* how many of our recent circuits made it to the first hop but then
1587 for (i
= 0; i
< cbt
->liveness
.num_recent_circs
; i
++) {
1588 timeout_count
+= cbt
->liveness
.timeouts_after_firsthop
[i
];
1592 /* If 80% of our recent circuits are timing out after the first hop,
1593 * we need to re-estimate a new initial alpha and timeout. */
1594 if (timeout_count
< circuit_build_times_max_timeouts()) {
1598 circuit_build_times_reset(cbt
);
1599 if (cbt
->liveness
.timeouts_after_firsthop
&&
1600 cbt
->liveness
.num_recent_circs
> 0) {
1601 memset(cbt
->liveness
.timeouts_after_firsthop
, 0,
1602 sizeof(*cbt
->liveness
.timeouts_after_firsthop
)*
1603 cbt
->liveness
.num_recent_circs
);
1605 cbt
->liveness
.after_firsthop_idx
= 0;
1607 #define MAX_TIMEOUT ((int32_t) (INT32_MAX/2))
1608 /* Check to see if this has happened before. If so, double the timeout
1609 * to give clients on abysmally bad network connections a shot at access */
1610 if (cbt
->timeout_ms
>= circuit_build_times_get_initial_timeout()) {
1611 if (cbt
->timeout_ms
> MAX_TIMEOUT
|| cbt
->close_ms
> MAX_TIMEOUT
) {
1612 log_warn(LD_CIRC
, "Insanely large circuit build timeout value. "
1613 "(timeout = %fmsec, close = %fmsec)",
1614 cbt
->timeout_ms
, cbt
->close_ms
);
1616 cbt
->timeout_ms
*= 2;
1620 cbt
->close_ms
= cbt
->timeout_ms
1621 = circuit_build_times_get_initial_timeout();
1625 cbt_control_event_buildtimeout_set(cbt
, BUILDTIMEOUT_SET_EVENT_RESET
);
1628 "Your network connection speed appears to have changed. Resetting "
1629 "timeout to %lds after %d timeouts and %d buildtimes.",
1630 tor_lround(cbt
->timeout_ms
/1000), timeout_count
,
1637 * Count the number of timeouts in a set of cbt data.
1640 circuit_build_times_timeout_rate(const circuit_build_times_t
*cbt
)
1643 for (i
= 0; i
< CBT_NCIRCUITS_TO_OBSERVE
; i
++) {
1644 if (cbt
->circuit_build_times
[i
] >= cbt
->timeout_ms
) {
1649 if (!cbt
->total_build_times
)
1652 return ((double)timeouts
)/cbt
->total_build_times
;
1656 * Count the number of closed circuits in a set of cbt data.
1659 circuit_build_times_close_rate(const circuit_build_times_t
*cbt
)
1662 for (i
= 0; i
< CBT_NCIRCUITS_TO_OBSERVE
; i
++) {
1663 if (cbt
->circuit_build_times
[i
] == CBT_BUILD_ABANDONED
) {
1668 if (!cbt
->total_build_times
)
1671 return ((double)closed
)/cbt
->total_build_times
;
1675 * Store a timeout as a synthetic value.
1677 * Returns true if the store was successful and we should possibly
1678 * update our timeout estimate.
1681 circuit_build_times_count_close(circuit_build_times_t
*cbt
,
1685 if (circuit_build_times_disabled(get_options())) {
1686 cbt
->close_ms
= cbt
->timeout_ms
1687 = circuit_build_times_get_initial_timeout();
1691 /* Record this force-close to help determine if the network is dead */
1692 circuit_build_times_network_close(cbt
, did_onehop
, start_time
);
1694 /* Only count timeouts if network is live.. */
1695 if (!circuit_build_times_network_check_live(cbt
)) {
1699 circuit_build_times_add_time(cbt
, CBT_BUILD_ABANDONED
);
1704 * Update timeout counts to determine if we need to expire
1705 * our build time history due to excessive timeouts.
1707 * We do not record any actual time values at this stage;
1708 * we are only interested in recording the fact that a timeout
1709 * happened. We record the time values via
1710 * circuit_build_times_count_close() and circuit_build_times_add_time().
1713 circuit_build_times_count_timeout(circuit_build_times_t
*cbt
,
1716 if (circuit_build_times_disabled(get_options())) {
1717 cbt
->close_ms
= cbt
->timeout_ms
1718 = circuit_build_times_get_initial_timeout();
1722 /* Register the fact that a timeout just occurred. */
1723 circuit_build_times_network_timeout(cbt
, did_onehop
);
1725 /* If there are a ton of timeouts, we should reset
1726 * the circuit build timeout. */
1727 circuit_build_times_network_check_changed(cbt
);
1731 * Estimate a new timeout based on history and set our timeout
1732 * variable accordingly.
1735 circuit_build_times_set_timeout_worker(circuit_build_times_t
*cbt
)
1737 build_time_t max_time
;
1738 if (!circuit_build_times_enough_to_compute(cbt
))
1741 if (!circuit_build_times_update_alpha(cbt
))
1744 cbt
->timeout_ms
= circuit_build_times_calculate_timeout(cbt
,
1745 circuit_build_times_quantile_cutoff());
1747 cbt
->close_ms
= circuit_build_times_calculate_timeout(cbt
,
1748 circuit_build_times_close_quantile());
1750 max_time
= circuit_build_times_max(cbt
);
1752 if (cbt
->timeout_ms
> max_time
) {
1754 "Circuit build timeout of %dms is beyond the maximum build "
1755 "time we have ever observed. Capping it to %dms.",
1756 (int)cbt
->timeout_ms
, max_time
);
1757 cbt
->timeout_ms
= max_time
;
1760 if (max_time
< INT32_MAX
/2 && cbt
->close_ms
> 2*max_time
) {
1762 "Circuit build measurement period of %dms is more than twice "
1763 "the maximum build time we have ever observed. Capping it to "
1764 "%dms.", (int)cbt
->close_ms
, 2*max_time
);
1765 cbt
->close_ms
= 2*max_time
;
1768 /* Sometimes really fast guard nodes give us such a steep curve
1769 * that this ends up being not that much greater than timeout_ms.
1770 * Make it be at least 1 min to handle this case. */
1771 cbt
->close_ms
= MAX(cbt
->close_ms
, circuit_build_times_initial_timeout());
1773 cbt
->have_computed_timeout
= 1;
1778 * Exposed function to compute a new timeout. Dispatches events and
1779 * also filters out extremely high timeout values.
1782 circuit_build_times_set_timeout(circuit_build_times_t
*cbt
)
1784 long prev_timeout
= tor_lround(cbt
->timeout_ms
/1000);
1785 double timeout_rate
;
1788 * Just return if we aren't using adaptive timeouts
1790 if (circuit_build_times_disabled(get_options()))
1793 if (!circuit_build_times_set_timeout_worker(cbt
))
1796 if (cbt
->timeout_ms
< circuit_build_times_min_timeout()) {
1797 log_notice(LD_CIRC
, "Set buildtimeout to low value %fms. Setting to %dms",
1798 cbt
->timeout_ms
, circuit_build_times_min_timeout());
1799 cbt
->timeout_ms
= circuit_build_times_min_timeout();
1800 if (cbt
->close_ms
< cbt
->timeout_ms
) {
1801 /* This shouldn't happen because of MAX() in timeout_worker above,
1802 * but doing it just in case */
1803 cbt
->close_ms
= circuit_build_times_initial_timeout();
1807 cbt_control_event_buildtimeout_set(cbt
, BUILDTIMEOUT_SET_EVENT_COMPUTED
);
1809 timeout_rate
= circuit_build_times_timeout_rate(cbt
);
1811 if (prev_timeout
> tor_lround(cbt
->timeout_ms
/1000)) {
1813 "Based on %d circuit times, it looks like we don't need to "
1814 "wait so long for circuits to finish. We will now assume a "
1815 "circuit is too slow to use after waiting %ld seconds.",
1816 cbt
->total_build_times
,
1817 tor_lround(cbt
->timeout_ms
/1000));
1819 "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
1820 cbt
->timeout_ms
, cbt
->close_ms
, cbt
->Xm
, cbt
->alpha
,
1822 } else if (prev_timeout
< tor_lround(cbt
->timeout_ms
/1000)) {
1824 "Based on %d circuit times, it looks like we need to wait "
1825 "longer for circuits to finish. We will now assume a "
1826 "circuit is too slow to use after waiting %ld seconds.",
1827 cbt
->total_build_times
,
1828 tor_lround(cbt
->timeout_ms
/1000));
1830 "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
1831 cbt
->timeout_ms
, cbt
->close_ms
, cbt
->Xm
, cbt
->alpha
,
1835 "Set circuit build timeout to %lds (%fms, %fms, Xm: %d, a: %f,"
1836 " r: %f) based on %d circuit times",
1837 tor_lround(cbt
->timeout_ms
/1000),
1838 cbt
->timeout_ms
, cbt
->close_ms
, cbt
->Xm
, cbt
->alpha
, timeout_rate
,
1839 cbt
->total_build_times
);
1843 #ifdef TOR_UNIT_TESTS
1844 /** Make a note that we're running unit tests (rather than running Tor
1845 * itself), so we avoid clobbering our state file. */
1847 circuitbuild_running_unit_tests(void)
1851 #endif /* defined(TOR_UNIT_TESTS) */
1854 circuit_build_times_update_last_circ(circuit_build_times_t
*cbt
)
1856 cbt
->last_circ_at
= approx_time();
1860 cbt_control_event_buildtimeout_set(const circuit_build_times_t
*cbt
,
1861 buildtimeout_set_event_t type
)
1865 double timeout_rate
= 0.0;
1866 double close_rate
= 0.0;
1869 case BUILDTIMEOUT_SET_EVENT_RESET
:
1870 case BUILDTIMEOUT_SET_EVENT_SUSPENDED
:
1871 case BUILDTIMEOUT_SET_EVENT_DISCARD
:
1874 case BUILDTIMEOUT_SET_EVENT_COMPUTED
:
1875 case BUILDTIMEOUT_SET_EVENT_RESUME
:
1877 qnt
= circuit_build_times_quantile_cutoff();
1881 /* The timeout rate is the ratio of the timeout count over
1882 * the total number of circuits attempted. The total number of
1883 * circuits is (timeouts+succeeded), since every circuit
1884 * either succeeds, or times out. "Closed" circuits are
1885 * MEASURE_TIMEOUT circuits whose measurement period expired.
1886 * All MEASURE_TIMEOUT circuits are counted in the timeouts stat
1887 * before transitioning to MEASURE_TIMEOUT (in
1888 * circuit_build_times_mark_circ_as_measurement_only()).
1889 * MEASURE_TIMEOUT circuits that succeed are *not* counted as
1890 * "succeeded". See circuit_build_times_handle_completed_hop().
1892 * We cast the denominator
1893 * to promote it to double before the addition, to avoid int32
1895 const double total_circuits
=
1896 ((double)cbt
->num_circ_timeouts
) + cbt
->num_circ_succeeded
;
1897 if (total_circuits
>= 1.0) {
1898 timeout_rate
= cbt
->num_circ_timeouts
/ total_circuits
;
1899 close_rate
= cbt
->num_circ_closed
/ total_circuits
;
1902 tor_asprintf(&args
, "TOTAL_TIMES=%lu "
1903 "TIMEOUT_MS=%lu XM=%lu ALPHA=%f CUTOFF_QUANTILE=%f "
1904 "TIMEOUT_RATE=%f CLOSE_MS=%lu CLOSE_RATE=%f",
1905 (unsigned long)cbt
->total_build_times
,
1906 (unsigned long)cbt
->timeout_ms
,
1907 (unsigned long)cbt
->Xm
, cbt
->alpha
, qnt
,
1909 (unsigned long)cbt
->close_ms
,
1912 control_event_buildtimeout_set(type
, args
);