Bump copyright date to 2019
[tor.git] / src / core / or / circuitstats.c
blobc6ea2fff975353a46e4285ef922c839e3819046c
1 /* Copyright (c) 2001 Matej Pfajfar.
2 * Copyright (c) 2001-2004, Roger Dingledine.
3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 * Copyright (c) 2007-2019, The Tor Project, Inc. */
5 /* See LICENSE for licensing information */
7 /**
8 * \file circuitstats.c
10 * \brief Maintains and analyzes statistics about circuit built times, so we
11 * can tell how long we may need to wait for a fast circuit to be constructed.
13 * By keeping these statistics, a client learns when it should time out a slow
14 * circuit for being too slow, and when it should keep a circuit open in order
15 * to wait for it to complete.
17 * The information here is kept in a circuit_built_times_t structure, which is
18 * currently a singleton, but doesn't need to be. It's updated by calls to
19 * circuit_build_times_count_timeout() from circuituse.c,
20 * circuit_build_times_count_close() from circuituse.c, and
21 * circuit_build_times_add_time() from circuitbuild.c, and inspected by other
22 * calls into this module, mostly from circuitlist.c. Observations are
23 * persisted to disk via the or_state_t-related calls.
26 #define CIRCUITSTATS_PRIVATE
28 #include "core/or/or.h"
29 #include "core/or/circuitbuild.h"
30 #include "core/or/circuitstats.h"
31 #include "app/config/config.h"
32 #include "app/config/confparse.h"
33 #include "feature/control/control.h"
34 #include "lib/crypt_ops/crypto_rand.h"
35 #include "core/mainloop/mainloop.h"
36 #include "feature/nodelist/networkstatus.h"
37 #include "feature/rend/rendclient.h"
38 #include "feature/rend/rendservice.h"
39 #include "feature/relay/router.h"
40 #include "app/config/statefile.h"
41 #include "core/or/circuitlist.h"
42 #include "core/or/circuituse.h"
43 #include "lib/math/fp.h"
44 #include "lib/time/tvdiff.h"
45 #include "lib/encoding/confline.h"
46 #include "feature/dirauth/authmode.h"
48 #include "core/or/crypt_path_st.h"
49 #include "core/or/origin_circuit_st.h"
50 #include "app/config/or_state_st.h"
52 #undef log
53 #include <math.h>
55 static void cbt_control_event_buildtimeout_set(
56 const circuit_build_times_t *cbt,
57 buildtimeout_set_event_t type);
58 static void circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt);
60 #define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2))
62 /** Global list of circuit build times */
63 // XXXX: Add this as a member for entry_guard_t instead of global?
64 // Then we could do per-guard statistics, as guards are likely to
65 // vary in their own latency. The downside of this is that guards
66 // can change frequently, so we'd be building a lot more circuits
67 // most likely.
68 static circuit_build_times_t circ_times;
70 #ifdef TOR_UNIT_TESTS
71 /** If set, we're running the unit tests: we should avoid clobbering
72 * our state file or accessing get_options() or get_or_state() */
73 static int unit_tests = 0;
74 #else
75 #define unit_tests 0
76 #endif /* defined(TOR_UNIT_TESTS) */
78 /** Return a pointer to the data structure describing our current circuit
79 * build time history and computations. */
80 const circuit_build_times_t *
81 get_circuit_build_times(void)
83 return &circ_times;
86 /** As get_circuit_build_times, but return a mutable pointer. */
87 circuit_build_times_t *
88 get_circuit_build_times_mutable(void)
90 return &circ_times;
93 /** Return the time to wait before actually closing an under-construction, in
94 * milliseconds. */
95 double
96 get_circuit_build_close_time_ms(void)
98 return circ_times.close_ms;
101 /** Return the time to wait before giving up on an under-construction circuit,
102 * in milliseconds. */
103 double
104 get_circuit_build_timeout_ms(void)
106 return circ_times.timeout_ms;
110 * This function decides if CBT learning should be disabled. It returns
111 * true if one or more of the following conditions are met:
113 * 1. If the cbtdisabled consensus parameter is set.
114 * 2. If the torrc option LearnCircuitBuildTimeout is false.
115 * 3. If we are a directory authority
116 * 4. If we fail to write circuit build time history to our state file.
117 * 5. If we are configured in Single Onion mode
120 circuit_build_times_disabled(const or_options_t *options)
122 return circuit_build_times_disabled_(options, 0);
125 /** As circuit_build_times_disabled, but take options as an argument. */
127 circuit_build_times_disabled_(const or_options_t *options,
128 int ignore_consensus)
130 if (unit_tests) {
131 return 0;
132 } else {
133 int consensus_disabled =
134 ignore_consensus ? 0 : networkstatus_get_param(NULL, "cbtdisabled",
135 0, 0, 1);
136 int config_disabled = !options->LearnCircuitBuildTimeout;
137 int dirauth_disabled = authdir_mode(options);
138 int state_disabled = did_last_state_file_write_fail() ? 1 : 0;
139 /* LearnCircuitBuildTimeout and Single Onion Services are
140 * incompatible in two ways:
142 * - LearnCircuitBuildTimeout results in a low CBT, which
143 * Single Onion use of one-hop intro and rendezvous circuits lowers
144 * much further, producing *far* too many timeouts.
146 * - The adaptive CBT code does not update its timeout estimate
147 * using build times for single-hop circuits.
149 * If we fix both of these issues someday, we should test
150 * these modes with LearnCircuitBuildTimeout on again. */
151 int single_onion_disabled = rend_service_allow_non_anonymous_connection(
152 options);
154 if (consensus_disabled || config_disabled || dirauth_disabled ||
155 state_disabled || single_onion_disabled) {
156 #if 0
157 log_debug(LD_CIRC,
158 "CircuitBuildTime learning is disabled. "
159 "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
160 consensus_disabled, config_disabled, dirauth_disabled,
161 state_disabled);
162 #endif /* 0 */
163 return 1;
164 } else {
165 #if 0
166 log_debug(LD_CIRC,
167 "CircuitBuildTime learning is not disabled. "
168 "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d",
169 consensus_disabled, config_disabled, dirauth_disabled,
170 state_disabled);
171 #endif /* 0 */
172 return 0;
178 * Retrieve and bounds-check the cbtmaxtimeouts consensus parameter.
180 * Effect: When this many timeouts happen in the last 'cbtrecentcount'
181 * circuit attempts, the client should discard all of its history and
182 * begin learning a fresh timeout value.
184 static int32_t
185 circuit_build_times_max_timeouts(void)
187 int32_t cbt_maxtimeouts;
189 cbt_maxtimeouts = networkstatus_get_param(NULL, "cbtmaxtimeouts",
190 CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT,
191 CBT_MIN_MAX_RECENT_TIMEOUT_COUNT,
192 CBT_MAX_MAX_RECENT_TIMEOUT_COUNT);
194 if (!(get_options()->LearnCircuitBuildTimeout)) {
195 log_debug(LD_BUG,
196 "circuit_build_times_max_timeouts() called, cbtmaxtimeouts is"
197 " %d",
198 cbt_maxtimeouts);
201 return cbt_maxtimeouts;
205 * Retrieve and bounds-check the cbtnummodes consensus parameter.
207 * Effect: This value governs how many modes to use in the weighted
208 * average calculation of Pareto parameter Xm. A value of 3 introduces
209 * some bias (2-5% of CDF) under ideal conditions, but allows for better
210 * performance in the event that a client chooses guard nodes of radically
211 * different performance characteristics.
213 static int32_t
214 circuit_build_times_default_num_xm_modes(void)
216 int32_t num = networkstatus_get_param(NULL, "cbtnummodes",
217 CBT_DEFAULT_NUM_XM_MODES,
218 CBT_MIN_NUM_XM_MODES,
219 CBT_MAX_NUM_XM_MODES);
221 if (!(get_options()->LearnCircuitBuildTimeout)) {
222 log_debug(LD_BUG,
223 "circuit_build_times_default_num_xm_modes() called, cbtnummodes"
224 " is %d",
225 num);
228 return num;
232 * Retrieve and bounds-check the cbtmincircs consensus parameter.
234 * Effect: This is the minimum number of circuits to build before
235 * computing a timeout.
237 static int32_t
238 circuit_build_times_min_circs_to_observe(void)
240 int32_t num = networkstatus_get_param(NULL, "cbtmincircs",
241 CBT_DEFAULT_MIN_CIRCUITS_TO_OBSERVE,
242 CBT_MIN_MIN_CIRCUITS_TO_OBSERVE,
243 CBT_MAX_MIN_CIRCUITS_TO_OBSERVE);
245 if (!(get_options()->LearnCircuitBuildTimeout)) {
246 log_debug(LD_BUG,
247 "circuit_build_times_min_circs_to_observe() called, cbtmincircs"
248 " is %d",
249 num);
252 return num;
255 /** Return true iff <b>cbt</b> has recorded enough build times that we
256 * want to start acting on the timeout it implies. */
258 circuit_build_times_enough_to_compute(const circuit_build_times_t *cbt)
260 return cbt->total_build_times >= circuit_build_times_min_circs_to_observe();
264 * Retrieve and bounds-check the cbtquantile consensus parameter.
266 * Effect: This is the position on the quantile curve to use to set the
267 * timeout value. It is a percent (10-99).
269 double
270 circuit_build_times_quantile_cutoff(void)
272 int32_t num = networkstatus_get_param(NULL, "cbtquantile",
273 CBT_DEFAULT_QUANTILE_CUTOFF,
274 CBT_MIN_QUANTILE_CUTOFF,
275 CBT_MAX_QUANTILE_CUTOFF);
277 if (!(get_options()->LearnCircuitBuildTimeout)) {
278 log_debug(LD_BUG,
279 "circuit_build_times_quantile_cutoff() called, cbtquantile"
280 " is %d",
281 num);
284 return num/100.0;
288 * Retrieve and bounds-check the cbtclosequantile consensus parameter.
290 * Effect: This is the position on the quantile curve to use to set the
291 * timeout value to use to actually close circuits. It is a percent
292 * (0-99).
294 static double
295 circuit_build_times_close_quantile(void)
297 int32_t param;
298 /* Cast is safe - circuit_build_times_quantile_cutoff() is capped */
299 int32_t min = (int)tor_lround(100*circuit_build_times_quantile_cutoff());
300 param = networkstatus_get_param(NULL, "cbtclosequantile",
301 CBT_DEFAULT_CLOSE_QUANTILE,
302 CBT_MIN_CLOSE_QUANTILE,
303 CBT_MAX_CLOSE_QUANTILE);
305 if (!(get_options()->LearnCircuitBuildTimeout)) {
306 log_debug(LD_BUG,
307 "circuit_build_times_close_quantile() called, cbtclosequantile"
308 " is %d", param);
311 if (param < min) {
312 log_warn(LD_DIR, "Consensus parameter cbtclosequantile is "
313 "too small, raising to %d", min);
314 param = min;
316 return param / 100.0;
320 * Retrieve and bounds-check the cbttestfreq consensus parameter.
322 * Effect: Describes how often in seconds to build a test circuit to
323 * gather timeout values. Only applies if less than 'cbtmincircs'
324 * have been recorded.
326 static int32_t
327 circuit_build_times_test_frequency(void)
329 int32_t num = networkstatus_get_param(NULL, "cbttestfreq",
330 CBT_DEFAULT_TEST_FREQUENCY,
331 CBT_MIN_TEST_FREQUENCY,
332 CBT_MAX_TEST_FREQUENCY);
334 if (!(get_options()->LearnCircuitBuildTimeout)) {
335 log_debug(LD_BUG,
336 "circuit_build_times_test_frequency() called, cbttestfreq is %d",
337 num);
340 return num;
344 * Retrieve and bounds-check the cbtmintimeout consensus parameter.
346 * Effect: This is the minimum allowed timeout value in milliseconds.
347 * The minimum is to prevent rounding to 0 (we only check once
348 * per second).
350 static int32_t
351 circuit_build_times_min_timeout(void)
353 int32_t num = networkstatus_get_param(NULL, "cbtmintimeout",
354 CBT_DEFAULT_TIMEOUT_MIN_VALUE,
355 CBT_MIN_TIMEOUT_MIN_VALUE,
356 CBT_MAX_TIMEOUT_MIN_VALUE);
358 if (!(get_options()->LearnCircuitBuildTimeout)) {
359 log_debug(LD_BUG,
360 "circuit_build_times_min_timeout() called, cbtmintimeout is %d",
361 num);
363 return num;
367 * Retrieve and bounds-check the cbtinitialtimeout consensus parameter.
369 * Effect: This is the timeout value to use before computing a timeout,
370 * in milliseconds.
372 int32_t
373 circuit_build_times_initial_timeout(void)
375 int32_t min = circuit_build_times_min_timeout();
376 int32_t param = networkstatus_get_param(NULL, "cbtinitialtimeout",
377 CBT_DEFAULT_TIMEOUT_INITIAL_VALUE,
378 CBT_MIN_TIMEOUT_INITIAL_VALUE,
379 CBT_MAX_TIMEOUT_INITIAL_VALUE);
381 if (!(get_options()->LearnCircuitBuildTimeout)) {
382 log_debug(LD_BUG,
383 "circuit_build_times_initial_timeout() called, "
384 "cbtinitialtimeout is %d",
385 param);
388 if (param < min) {
389 log_warn(LD_DIR, "Consensus parameter cbtinitialtimeout is too small, "
390 "raising to %d", min);
391 param = min;
393 return param;
397 * Retrieve and bounds-check the cbtrecentcount consensus parameter.
399 * Effect: This is the number of circuit build times to keep track of
400 * for deciding if we hit cbtmaxtimeouts and need to reset our state
401 * and learn a new timeout.
403 static int32_t
404 circuit_build_times_recent_circuit_count(networkstatus_t *ns)
406 int32_t num;
407 num = networkstatus_get_param(ns, "cbtrecentcount",
408 CBT_DEFAULT_RECENT_CIRCUITS,
409 CBT_MIN_RECENT_CIRCUITS,
410 CBT_MAX_RECENT_CIRCUITS);
412 if (!(get_options()->LearnCircuitBuildTimeout)) {
413 log_debug(LD_BUG,
414 "circuit_build_times_recent_circuit_count() called, "
415 "cbtrecentcount is %d",
416 num);
419 return num;
423 * This function is called when we get a consensus update.
425 * It checks to see if we have changed any consensus parameters
426 * that require reallocation or discard of previous stats.
428 void
429 circuit_build_times_new_consensus_params(circuit_build_times_t *cbt,
430 networkstatus_t *ns)
432 int32_t num;
435 * First check if we're doing adaptive timeouts at all; nothing to
436 * update if we aren't.
439 if (!circuit_build_times_disabled(get_options())) {
440 num = circuit_build_times_recent_circuit_count(ns);
442 if (num > 0) {
443 if (num != cbt->liveness.num_recent_circs) {
444 int8_t *recent_circs;
445 if (cbt->liveness.num_recent_circs > 0) {
446 log_notice(LD_CIRC, "The Tor Directory Consensus has changed how "
447 "many circuits we must track to detect network failures "
448 "from %d to %d.", cbt->liveness.num_recent_circs, num);
449 } else {
450 log_notice(LD_CIRC, "Upon receiving a consensus directory, "
451 "re-enabling circuit-based network failure detection.");
454 tor_assert(cbt->liveness.timeouts_after_firsthop ||
455 cbt->liveness.num_recent_circs == 0);
458 * Technically this is a circular array that we are reallocating
459 * and memcopying. However, since it only consists of either 1s
460 * or 0s, and is only used in a statistical test to determine when
461 * we should discard our history after a sufficient number of 1's
462 * have been reached, it is fine if order is not preserved or
463 * elements are lost.
465 * cbtrecentcount should only be changing in cases of severe network
466 * distress anyway, so memory correctness here is paramount over
467 * doing acrobatics to preserve the array.
469 recent_circs = tor_calloc(num, sizeof(int8_t));
470 if (cbt->liveness.timeouts_after_firsthop &&
471 cbt->liveness.num_recent_circs > 0) {
472 memcpy(recent_circs, cbt->liveness.timeouts_after_firsthop,
473 sizeof(int8_t)*MIN(num, cbt->liveness.num_recent_circs));
476 // Adjust the index if it needs it.
477 if (num < cbt->liveness.num_recent_circs) {
478 cbt->liveness.after_firsthop_idx = MIN(num-1,
479 cbt->liveness.after_firsthop_idx);
482 tor_free(cbt->liveness.timeouts_after_firsthop);
483 cbt->liveness.timeouts_after_firsthop = recent_circs;
484 cbt->liveness.num_recent_circs = num;
486 /* else no change, nothing to do */
487 } else { /* num == 0 */
489 * Weird. This probably shouldn't happen, so log a warning, but try
490 * to do something sensible anyway.
493 log_warn(LD_CIRC,
494 "The cbtrecentcircs consensus parameter came back zero! "
495 "This disables adaptive timeouts since we can't keep track of "
496 "any recent circuits.");
498 circuit_build_times_free_timeouts(cbt);
500 } else {
502 * Adaptive timeouts are disabled; this might be because of the
503 * LearnCircuitBuildTimes config parameter, and hence permanent, or
504 * the cbtdisabled consensus parameter, so it may be a new condition.
505 * Treat it like getting num == 0 above and free the circuit history
506 * if we have any.
509 circuit_build_times_free_timeouts(cbt);
514 * Return the initial default or configured timeout in milliseconds
516 static double
517 circuit_build_times_get_initial_timeout(void)
519 double timeout;
520 const or_options_t *options = get_options();
523 * Check if we have LearnCircuitBuildTimeout, and if we don't,
524 * always use CircuitBuildTimeout, no questions asked.
526 if (!unit_tests && options->CircuitBuildTimeout) {
527 timeout = options->CircuitBuildTimeout*1000;
528 if (!circuit_build_times_disabled(options) &&
529 timeout < circuit_build_times_min_timeout()) {
530 log_warn(LD_CIRC, "Config CircuitBuildTimeout too low. Setting to %ds",
531 circuit_build_times_min_timeout()/1000);
532 timeout = circuit_build_times_min_timeout();
534 } else {
535 timeout = circuit_build_times_initial_timeout();
538 return timeout;
542 * Reset the build time state.
544 * Leave estimated parameters, timeout and network liveness intact
545 * for future use.
547 STATIC void
548 circuit_build_times_reset(circuit_build_times_t *cbt)
550 memset(cbt->circuit_build_times, 0, sizeof(cbt->circuit_build_times));
551 cbt->total_build_times = 0;
552 cbt->build_times_idx = 0;
553 cbt->have_computed_timeout = 0;
555 // Reset timeout and close counts
556 cbt->num_circ_succeeded = 0;
557 cbt->num_circ_closed = 0;
558 cbt->num_circ_timeouts = 0;
562 * Initialize the buildtimes structure for first use.
564 * Sets the initial timeout values based on either the config setting,
565 * the consensus param, or the default (CBT_DEFAULT_TIMEOUT_INITIAL_VALUE).
567 void
568 circuit_build_times_init(circuit_build_times_t *cbt)
570 memset(cbt, 0, sizeof(*cbt));
572 * Check if we really are using adaptive timeouts, and don't keep
573 * track of this stuff if not.
575 if (!circuit_build_times_disabled(get_options())) {
576 cbt->liveness.num_recent_circs =
577 circuit_build_times_recent_circuit_count(NULL);
578 cbt->liveness.timeouts_after_firsthop =
579 tor_calloc(cbt->liveness.num_recent_circs, sizeof(int8_t));
580 } else {
581 cbt->liveness.num_recent_circs = 0;
582 cbt->liveness.timeouts_after_firsthop = NULL;
584 cbt->close_ms = cbt->timeout_ms = circuit_build_times_get_initial_timeout();
585 cbt_control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
589 * Free the saved timeouts, if the cbtdisabled consensus parameter got turned
590 * on or something.
593 void
594 circuit_build_times_free_timeouts(circuit_build_times_t *cbt)
596 if (!cbt) return;
598 if (cbt->liveness.timeouts_after_firsthop) {
599 tor_free(cbt->liveness.timeouts_after_firsthop);
602 cbt->liveness.num_recent_circs = 0;
605 #if 0
607 * Rewind our build time history by n positions.
609 static void
610 circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
612 int i = 0;
614 cbt->build_times_idx -= n;
615 cbt->build_times_idx %= CBT_NCIRCUITS_TO_OBSERVE;
617 for (i = 0; i < n; i++) {
618 cbt->circuit_build_times[(i+cbt->build_times_idx)
619 %CBT_NCIRCUITS_TO_OBSERVE]=0;
622 if (cbt->total_build_times > n) {
623 cbt->total_build_times -= n;
624 } else {
625 cbt->total_build_times = 0;
628 log_info(LD_CIRC,
629 "Rewound history by %d places. Current index: %d. "
630 "Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
632 #endif /* 0 */
635 * Mark this circuit as timed out, but change its purpose
636 * so that it continues to build, allowing us to measure
637 * its full build time.
639 void
640 circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ)
642 circuit_event_status(circ,
643 CIRC_EVENT_FAILED,
644 END_CIRC_REASON_TIMEOUT);
645 circuit_change_purpose(TO_CIRCUIT(circ),
646 CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT);
647 /* Record this event to check for too many timeouts
648 * in a row. This function does not record a time value yet
649 * (we do that later); it only counts the fact that we did
650 * have a timeout. We also want to avoid double-counting
651 * already "relaxed" circuits, which are counted in
652 * circuit_expire_building(). */
653 if (!circ->relaxed_timeout) {
654 int first_hop_succeeded = circ->cpath &&
655 circ->cpath->state == CPATH_STATE_OPEN;
657 circuit_build_times_count_timeout(
658 get_circuit_build_times_mutable(),
659 first_hop_succeeded);
664 * Perform the build time work that needs to be done when a circuit
665 * completes a hop.
667 * This function decides if we should record a circuit's build time
668 * in our histogram data and other statistics, and if so, records it.
669 * It also will mark circuits that have already timed out as
670 * measurement-only circuits, so they can continue to build but
671 * not get used.
673 * For this, we want to consider circuits that will eventually make
674 * it to the third hop. For circuits longer than 3 hops, we want to
675 * record their build time when they reach the third hop, but let
676 * them continue (and not count them later). For circuits that are
677 * exactly 3 hops, this will count them when they are completed. We
678 * do this so that CBT is always gathering statistics on circuits
679 * of the same length, regardless of their type.
681 void
682 circuit_build_times_handle_completed_hop(origin_circuit_t *circ)
684 struct timeval end;
685 long timediff;
687 /* If circuit build times are disabled, let circuit_expire_building()
688 * handle it.. */
689 if (circuit_build_times_disabled(get_options())) {
690 return;
693 /* Is this a circuit for which the timeout applies in a straight-forward
694 * way? If so, handle it below. If not, just return (and let
695 * circuit_expire_building() eventually take care of it).
697 if (!circuit_timeout_want_to_count_circ(circ)) {
698 return;
701 tor_gettimeofday(&end);
702 timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
704 /* Check if we would have timed out already. If so, change the
705 * purpose here. But don't do any timeout handling here if there
706 * are no circuits opened yet. Save it for circuit_expire_building()
707 * (to allow it to handle timeout "relaxing" over there). */
708 if (timediff > get_circuit_build_timeout_ms() &&
709 circuit_any_opened_circuits_cached()) {
711 /* Circuits are allowed to last longer for measurement.
712 * Switch their purpose and wait. */
713 if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
714 log_info(LD_CIRC,
715 "Deciding to timeout circuit %"PRIu32"\n",
716 (circ->global_identifier));
717 circuit_build_times_mark_circ_as_measurement_only(circ);
721 /* If the circuit is built to exactly the DEFAULT_ROUTE_LEN,
722 * add it to our buildtimes. */
723 if (circuit_get_cpath_opened_len(circ) == DEFAULT_ROUTE_LEN) {
724 /* If the circuit build time is much greater than we would have cut
725 * it off at, we probably had a suspend event along this codepath,
726 * and we should discard the value.
728 if (timediff < 0 ||
729 timediff > 2*get_circuit_build_close_time_ms()+1000) {
730 log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. "
731 "Assuming clock jump. Purpose %d (%s)", timediff,
732 circ->base_.purpose,
733 circuit_purpose_to_string(circ->base_.purpose));
734 } else {
735 /* Only count circuit times if the network is live */
736 if (circuit_build_times_network_check_live(
737 get_circuit_build_times())) {
738 circuit_build_times_add_time(get_circuit_build_times_mutable(),
739 (build_time_t)timediff);
740 circuit_build_times_set_timeout(get_circuit_build_times_mutable());
743 if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
744 circuit_build_times_network_circ_success(
745 get_circuit_build_times_mutable());
752 * Add a new build time value <b>time</b> to the set of build times. Time
753 * units are milliseconds.
755 * circuit_build_times <b>cbt</b> is a circular array, so loop around when
756 * array is full.
759 circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t btime)
761 if (btime <= 0 || btime > CBT_BUILD_TIME_MAX) {
762 log_warn(LD_BUG, "Circuit build time is too large (%u)."
763 "This is probably a bug.", btime);
764 tor_fragile_assert();
765 return -1;
768 log_debug(LD_CIRC, "Adding circuit build time %u", btime);
770 cbt->circuit_build_times[cbt->build_times_idx] = btime;
771 cbt->build_times_idx = (cbt->build_times_idx + 1) % CBT_NCIRCUITS_TO_OBSERVE;
772 if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE)
773 cbt->total_build_times++;
775 if ((cbt->total_build_times % CBT_SAVE_STATE_EVERY) == 0) {
776 /* Save state every n circuit builds */
777 if (!unit_tests && !get_options()->AvoidDiskWrites)
778 or_state_mark_dirty(get_or_state(), 0);
781 return 0;
785 * Return maximum circuit build time
787 static build_time_t
788 circuit_build_times_max(const circuit_build_times_t *cbt)
790 int i = 0;
791 build_time_t max_build_time = 0;
792 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
793 if (cbt->circuit_build_times[i] > max_build_time
794 && cbt->circuit_build_times[i] != CBT_BUILD_ABANDONED)
795 max_build_time = cbt->circuit_build_times[i];
797 return max_build_time;
800 #if 0
801 /** Return minimum circuit build time */
802 build_time_t
803 circuit_build_times_min(circuit_build_times_t *cbt)
805 int i = 0;
806 build_time_t min_build_time = CBT_BUILD_TIME_MAX;
807 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
808 if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */
809 cbt->circuit_build_times[i] < min_build_time)
810 min_build_time = cbt->circuit_build_times[i];
812 if (min_build_time == CBT_BUILD_TIME_MAX) {
813 log_warn(LD_CIRC, "No build times less than CBT_BUILD_TIME_MAX!");
815 return min_build_time;
817 #endif /* 0 */
820 * Calculate and return a histogram for the set of build times.
822 * Returns an allocated array of histrogram bins representing
823 * the frequency of index*CBT_BIN_WIDTH millisecond
824 * build times. Also outputs the number of bins in nbins.
826 * The return value must be freed by the caller.
828 static uint32_t *
829 circuit_build_times_create_histogram(const circuit_build_times_t *cbt,
830 build_time_t *nbins)
832 uint32_t *histogram;
833 build_time_t max_build_time = circuit_build_times_max(cbt);
834 int i, c;
836 *nbins = 1 + (max_build_time / CBT_BIN_WIDTH);
837 histogram = tor_calloc(*nbins, sizeof(build_time_t));
839 // calculate histogram
840 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
841 if (cbt->circuit_build_times[i] == 0
842 || cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED)
843 continue; /* 0 <-> uninitialized */
845 c = (cbt->circuit_build_times[i] / CBT_BIN_WIDTH);
846 histogram[c]++;
849 return histogram;
853 * Return the Pareto start-of-curve parameter Xm.
855 * Because we are not a true Pareto curve, we compute this as the
856 * weighted average of the N most frequent build time bins. N is either
857 * 1 if we don't have enough circuit build time data collected, or
858 * determined by the consensus parameter cbtnummodes (default 3).
860 static build_time_t
861 circuit_build_times_get_xm(circuit_build_times_t *cbt)
863 build_time_t i, nbins;
864 build_time_t *nth_max_bin;
865 int32_t bin_counts=0;
866 build_time_t ret = 0;
867 uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins);
868 int n=0;
869 int num_modes = circuit_build_times_default_num_xm_modes();
871 tor_assert(nbins > 0);
872 tor_assert(num_modes > 0);
874 // Only use one mode if < 1000 buildtimes. Not enough data
875 // for multiple.
876 if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE)
877 num_modes = 1;
879 nth_max_bin = tor_calloc(num_modes, sizeof(build_time_t));
881 /* Determine the N most common build times */
882 for (i = 0; i < nbins; i++) {
883 if (histogram[i] >= histogram[nth_max_bin[0]]) {
884 nth_max_bin[0] = i;
887 for (n = 1; n < num_modes; n++) {
888 if (histogram[i] >= histogram[nth_max_bin[n]] &&
889 (!histogram[nth_max_bin[n-1]]
890 || histogram[i] < histogram[nth_max_bin[n-1]])) {
891 nth_max_bin[n] = i;
896 for (n = 0; n < num_modes; n++) {
897 bin_counts += histogram[nth_max_bin[n]];
898 ret += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]];
899 log_info(LD_CIRC, "Xm mode #%d: %u %u", n, CBT_BIN_TO_MS(nth_max_bin[n]),
900 histogram[nth_max_bin[n]]);
903 /* bin_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE
904 * circuits were abandoned before they completed. This shouldn't happen,
905 * though. We should have reset/re-learned a lower timeout first. */
906 if (bin_counts == 0) {
907 ret = 0;
908 log_warn(LD_CIRC,
909 "No valid circuit build time data out of %d times, %u modes, "
910 "have_timeout=%d, %lfms", cbt->total_build_times, num_modes,
911 cbt->have_computed_timeout, cbt->timeout_ms);
912 goto done;
915 tor_assert(bin_counts > 0);
917 ret /= bin_counts;
919 done:
920 tor_free(histogram);
921 tor_free(nth_max_bin);
923 return ret;
927 * Output a histogram of current circuit build times to
928 * the or_state_t state structure.
930 void
931 circuit_build_times_update_state(const circuit_build_times_t *cbt,
932 or_state_t *state)
934 uint32_t *histogram;
935 build_time_t i = 0;
936 build_time_t nbins = 0;
937 config_line_t **next, *line;
939 histogram = circuit_build_times_create_histogram(cbt, &nbins);
940 // write to state
941 config_free_lines(state->BuildtimeHistogram);
942 next = &state->BuildtimeHistogram;
943 *next = NULL;
945 state->TotalBuildTimes = cbt->total_build_times;
946 state->CircuitBuildAbandonedCount = 0;
948 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
949 if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED)
950 state->CircuitBuildAbandonedCount++;
953 for (i = 0; i < nbins; i++) {
954 // compress the histogram by skipping the blanks
955 if (histogram[i] == 0) continue;
956 *next = line = tor_malloc_zero(sizeof(config_line_t));
957 line->key = tor_strdup("CircuitBuildTimeBin");
958 tor_asprintf(&line->value, "%d %d",
959 CBT_BIN_TO_MS(i), histogram[i]);
960 next = &(line->next);
963 if (!unit_tests) {
964 if (!get_options()->AvoidDiskWrites)
965 or_state_mark_dirty(get_or_state(), 0);
968 tor_free(histogram);
972 * Shuffle the build times array.
974 * Adapted from http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
976 static void
977 circuit_build_times_shuffle_and_store_array(circuit_build_times_t *cbt,
978 build_time_t *raw_times,
979 uint32_t num_times)
981 uint32_t n = num_times;
982 if (num_times > CBT_NCIRCUITS_TO_OBSERVE) {
983 log_notice(LD_CIRC, "The number of circuit times that this Tor version "
984 "uses to calculate build times is less than the number stored "
985 "in your state file. Decreasing the circuit time history from "
986 "%lu to %d.", (unsigned long)num_times,
987 CBT_NCIRCUITS_TO_OBSERVE);
990 if (n > INT_MAX-1) {
991 log_warn(LD_CIRC, "For some insane reasons, you had %lu circuit build "
992 "observations in your state file. That's far too many; probably "
993 "there's a bug here.", (unsigned long)n);
994 n = INT_MAX-1;
997 /* This code can only be run on a compact array */
998 while (n-- > 1) {
999 int k = crypto_rand_int(n + 1); /* 0 <= k <= n. */
1000 build_time_t tmp = raw_times[k];
1001 raw_times[k] = raw_times[n];
1002 raw_times[n] = tmp;
1005 /* Since the times are now shuffled, take a random CBT_NCIRCUITS_TO_OBSERVE
1006 * subset (ie the first CBT_NCIRCUITS_TO_OBSERVE values) */
1007 for (n = 0; n < MIN(num_times, CBT_NCIRCUITS_TO_OBSERVE); n++) {
1008 circuit_build_times_add_time(cbt, raw_times[n]);
1013 * Filter old synthetic timeouts that were created before the
1014 * new right-censored Pareto calculation was deployed.
1016 * Once all clients before 0.2.1.13-alpha are gone, this code
1017 * will be unused.
1019 static int
1020 circuit_build_times_filter_timeouts(circuit_build_times_t *cbt)
1022 int num_filtered=0, i=0;
1023 double timeout_rate = 0;
1024 build_time_t max_timeout = 0;
1026 timeout_rate = circuit_build_times_timeout_rate(cbt);
1027 max_timeout = (build_time_t)cbt->close_ms;
1029 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1030 if (cbt->circuit_build_times[i] > max_timeout) {
1031 build_time_t replaced = cbt->circuit_build_times[i];
1032 num_filtered++;
1033 cbt->circuit_build_times[i] = CBT_BUILD_ABANDONED;
1035 log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced,
1036 cbt->circuit_build_times[i]);
1040 log_info(LD_CIRC,
1041 "We had %d timeouts out of %d build times, "
1042 "and filtered %d above the max of %u",
1043 (int)(cbt->total_build_times*timeout_rate),
1044 cbt->total_build_times, num_filtered, max_timeout);
1046 return num_filtered;
1050 * Load histogram from <b>state</b>, shuffling the resulting array
1051 * after we do so. Use this result to estimate parameters and
1052 * calculate the timeout.
1054 * Return -1 on error.
1057 circuit_build_times_parse_state(circuit_build_times_t *cbt,
1058 or_state_t *state)
1060 int tot_values = 0;
1061 uint32_t loaded_cnt = 0, N = 0;
1062 config_line_t *line;
1063 int i;
1064 build_time_t *loaded_times;
1065 int err = 0;
1066 circuit_build_times_init(cbt);
1068 if (circuit_build_times_disabled(get_options())) {
1069 return 0;
1072 /* build_time_t 0 means uninitialized */
1073 loaded_times = tor_calloc(state->TotalBuildTimes, sizeof(build_time_t));
1075 for (line = state->BuildtimeHistogram; line; line = line->next) {
1076 smartlist_t *args = smartlist_new();
1077 smartlist_split_string(args, line->value, " ",
1078 SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0);
1079 if (smartlist_len(args) < 2) {
1080 log_warn(LD_GENERAL, "Unable to parse circuit build times: "
1081 "Too few arguments to CircuitBuildTime");
1082 err = 1;
1083 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1084 smartlist_free(args);
1085 break;
1086 } else {
1087 const char *ms_str = smartlist_get(args,0);
1088 const char *count_str = smartlist_get(args,1);
1089 uint32_t count, k;
1090 build_time_t ms;
1091 int ok;
1092 ms = (build_time_t)tor_parse_ulong(ms_str, 10, 0,
1093 CBT_BUILD_TIME_MAX, &ok, NULL);
1094 if (!ok) {
1095 log_warn(LD_GENERAL, "Unable to parse circuit build times: "
1096 "Unparsable bin number");
1097 err = 1;
1098 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1099 smartlist_free(args);
1100 break;
1102 count = (uint32_t)tor_parse_ulong(count_str, 10, 0,
1103 UINT32_MAX, &ok, NULL);
1104 if (!ok) {
1105 log_warn(LD_GENERAL, "Unable to parse circuit build times: "
1106 "Unparsable bin count");
1107 err = 1;
1108 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1109 smartlist_free(args);
1110 break;
1113 if (loaded_cnt+count+ (unsigned)state->CircuitBuildAbandonedCount
1114 > (unsigned) state->TotalBuildTimes) {
1115 log_warn(LD_CIRC,
1116 "Too many build times in state file. "
1117 "Stopping short before %d",
1118 loaded_cnt+count);
1119 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1120 smartlist_free(args);
1121 break;
1124 for (k = 0; k < count; k++) {
1125 loaded_times[loaded_cnt++] = ms;
1127 N++;
1128 SMARTLIST_FOREACH(args, char*, cp, tor_free(cp));
1129 smartlist_free(args);
1133 log_info(LD_CIRC,
1134 "Adding %d timeouts.", state->CircuitBuildAbandonedCount);
1135 for (i=0; i < state->CircuitBuildAbandonedCount; i++) {
1136 loaded_times[loaded_cnt++] = CBT_BUILD_ABANDONED;
1139 if (loaded_cnt != (unsigned)state->TotalBuildTimes) {
1140 log_warn(LD_CIRC,
1141 "Corrupt state file? Build times count mismatch. "
1142 "Read %d times, but file says %d", loaded_cnt,
1143 state->TotalBuildTimes);
1144 err = 1;
1145 circuit_build_times_reset(cbt);
1146 goto done;
1149 circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt);
1151 /* Verify that we didn't overwrite any indexes */
1152 for (i=0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1153 if (!cbt->circuit_build_times[i])
1154 break;
1155 tot_values++;
1157 log_info(LD_CIRC,
1158 "Loaded %d/%d values from %d lines in circuit time histogram",
1159 tot_values, cbt->total_build_times, N);
1161 if (cbt->total_build_times != tot_values
1162 || cbt->total_build_times > CBT_NCIRCUITS_TO_OBSERVE) {
1163 log_warn(LD_CIRC,
1164 "Corrupt state file? Shuffled build times mismatch. "
1165 "Read %d times, but file says %d", tot_values,
1166 state->TotalBuildTimes);
1167 err = 1;
1168 circuit_build_times_reset(cbt);
1169 goto done;
1172 circuit_build_times_set_timeout(cbt);
1174 if (!state->CircuitBuildAbandonedCount && cbt->total_build_times) {
1175 circuit_build_times_filter_timeouts(cbt);
1178 done:
1179 tor_free(loaded_times);
1180 return err ? -1 : 0;
1184 * Estimates the Xm and Alpha parameters using
1185 * http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation
1187 * The notable difference is that we use mode instead of min to estimate Xm.
1188 * This is because our distribution is frechet-like. We claim this is
1189 * an acceptable approximation because we are only concerned with the
1190 * accuracy of the CDF of the tail.
1192 STATIC int
1193 circuit_build_times_update_alpha(circuit_build_times_t *cbt)
1195 build_time_t *x=cbt->circuit_build_times;
1196 double a = 0;
1197 int n=0,i=0,abandoned_count=0;
1198 build_time_t max_time=0;
1200 /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */
1201 /* We sort of cheat here and make our samples slightly more pareto-like
1202 * and less frechet-like. */
1203 cbt->Xm = circuit_build_times_get_xm(cbt);
1205 /* If Xm came back 0, then too many circuits were abandoned. */
1206 if (cbt->Xm == 0)
1207 return 0;
1209 tor_assert(cbt->Xm > 0);
1211 for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) {
1212 if (!x[i]) {
1213 continue;
1216 if (x[i] < cbt->Xm) {
1217 a += tor_mathlog(cbt->Xm);
1218 } else if (x[i] == CBT_BUILD_ABANDONED) {
1219 abandoned_count++;
1220 } else {
1221 a += tor_mathlog(x[i]);
1222 if (x[i] > max_time)
1223 max_time = x[i];
1225 n++;
1229 * We are erring and asserting here because this can only happen
1230 * in codepaths other than startup. The startup state parsing code
1231 * performs this same check, and resets state if it hits it. If we
1232 * hit it at runtime, something serious has gone wrong.
1234 if (n!=cbt->total_build_times) {
1235 log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n,
1236 cbt->total_build_times);
1238 tor_assert(n==cbt->total_build_times);
1240 if (max_time <= 0) {
1241 /* This can happen if Xm is actually the *maximum* value in the set.
1242 * It can also happen if we've abandoned every single circuit somehow.
1243 * In either case, tell the caller not to compute a new build timeout. */
1244 log_warn(LD_BUG,
1245 "Could not determine largest build time (%d). "
1246 "Xm is %dms and we've abandoned %d out of %d circuits.", max_time,
1247 cbt->Xm, abandoned_count, n);
1248 return 0;
1251 a += abandoned_count*tor_mathlog(max_time);
1253 a -= n*tor_mathlog(cbt->Xm);
1254 // Estimator comes from Eq #4 in:
1255 // "Bayesian estimation based on trimmed samples from Pareto populations"
1256 // by Arturo J. Fernández. We are right-censored only.
1257 a = (n-abandoned_count)/a;
1259 cbt->alpha = a;
1261 return 1;
1265 * This is the Pareto Quantile Function. It calculates the point x
1266 * in the distribution such that F(x) = quantile (ie quantile*100%
1267 * of the mass of the density function is below x on the curve).
1269 * We use it to calculate the timeout and also to generate synthetic
1270 * values of time for circuits that timeout before completion.
1272 * See http://en.wikipedia.org/wiki/Quantile_function,
1273 * http://en.wikipedia.org/wiki/Inverse_transform_sampling and
1274 * http://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_
1275 * random_sample_from_Pareto_distribution
1276 * That's right. I'll cite wikipedia all day long.
1278 * Return value is in milliseconds, clamped to INT32_MAX.
1280 STATIC double
1281 circuit_build_times_calculate_timeout(circuit_build_times_t *cbt,
1282 double quantile)
1284 double ret;
1285 tor_assert(quantile >= 0);
1286 tor_assert(1.0-quantile > 0);
1287 tor_assert(cbt->Xm > 0);
1289 /* If either alpha or p are 0, we would divide by zero, yielding an
1290 * infinite (double) result; which would be clamped to INT32_MAX.
1291 * Instead, initialise ret to INT32_MAX, and skip over these
1292 * potentially illegal/trapping divides by zero.
1294 ret = INT32_MAX;
1296 if (cbt->alpha > 0) {
1297 double p;
1298 p = pow(1.0-quantile,1.0/cbt->alpha);
1299 if (p > 0) {
1300 ret = cbt->Xm/p;
1304 if (ret > INT32_MAX) {
1305 ret = INT32_MAX;
1307 tor_assert(ret > 0);
1308 return ret;
1311 #ifdef TOR_UNIT_TESTS
1312 /** Pareto CDF */
1313 double
1314 circuit_build_times_cdf(circuit_build_times_t *cbt, double x)
1316 double ret;
1317 tor_assert(cbt->Xm > 0);
1318 ret = 1.0-pow(cbt->Xm/x,cbt->alpha);
1319 tor_assert(0 <= ret && ret <= 1.0);
1320 return ret;
1322 #endif /* defined(TOR_UNIT_TESTS) */
1324 #ifdef TOR_UNIT_TESTS
1326 * Generate a synthetic time using our distribution parameters.
1328 * The return value will be within the [q_lo, q_hi) quantile points
1329 * on the CDF.
1331 build_time_t
1332 circuit_build_times_generate_sample(circuit_build_times_t *cbt,
1333 double q_lo, double q_hi)
1335 double randval = crypto_rand_double();
1336 build_time_t ret;
1337 double u;
1339 /* Generate between [q_lo, q_hi) */
1340 /*XXXX This is what nextafter is supposed to be for; we should use it on the
1341 * platforms that support it. */
1342 q_hi -= 1.0/(INT32_MAX);
1344 tor_assert(q_lo >= 0);
1345 tor_assert(q_hi < 1);
1346 tor_assert(q_lo < q_hi);
1348 u = q_lo + (q_hi-q_lo)*randval;
1350 tor_assert(0 <= u && u < 1.0);
1351 /* circuit_build_times_calculate_timeout returns <= INT32_MAX */
1352 ret = (build_time_t)
1353 tor_lround(circuit_build_times_calculate_timeout(cbt, u));
1354 tor_assert(ret > 0);
1355 return ret;
1357 #endif /* defined(TOR_UNIT_TESTS) */
1359 #ifdef TOR_UNIT_TESTS
1361 * Estimate an initial alpha parameter by solving the quantile
1362 * function with a quantile point and a specific timeout value.
1364 void
1365 circuit_build_times_initial_alpha(circuit_build_times_t *cbt,
1366 double quantile, double timeout_ms)
1368 // Q(u) = Xm/((1-u)^(1/a))
1369 // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout
1370 // CircBuildTimeout = Xm/((1-0.8))^(1/a))
1371 // CircBuildTimeout = Xm*((1-0.8))^(-1/a))
1372 // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
1373 // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
1374 tor_assert(quantile >= 0);
1375 tor_assert(cbt->Xm > 0);
1376 cbt->alpha = tor_mathlog(1.0-quantile)/
1377 (tor_mathlog(cbt->Xm)-tor_mathlog(timeout_ms));
1378 tor_assert(cbt->alpha > 0);
1380 #endif /* defined(TOR_UNIT_TESTS) */
1383 * Returns true if we need circuits to be built
1386 circuit_build_times_needs_circuits(const circuit_build_times_t *cbt)
1388 /* Return true if < MIN_CIRCUITS_TO_OBSERVE */
1389 return !circuit_build_times_enough_to_compute(cbt);
1393 * Returns true if we should build a timeout test circuit
1394 * right now.
1397 circuit_build_times_needs_circuits_now(const circuit_build_times_t *cbt)
1399 return circuit_build_times_needs_circuits(cbt) &&
1400 approx_time()-cbt->last_circ_at > circuit_build_times_test_frequency();
1404 * How long should we be unreachable before we think we need to check if
1405 * our published IP address has changed.
1407 #define CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP (60*3)
1410 * Called to indicate that the network showed some signs of liveness,
1411 * i.e. we received a cell.
1413 * This is used by circuit_build_times_network_check_live() to decide
1414 * if we should record the circuit build timeout or not.
1416 * This function is called every time we receive a cell. Avoid
1417 * syscalls, events, and other high-intensity work.
1419 void
1420 circuit_build_times_network_is_live(circuit_build_times_t *cbt)
1422 time_t now = approx_time();
1423 if (cbt->liveness.nonlive_timeouts > 0) {
1424 time_t time_since_live = now - cbt->liveness.network_last_live;
1425 log_notice(LD_CIRC,
1426 "Tor now sees network activity. Restoring circuit build "
1427 "timeout recording. Network was down for %d seconds "
1428 "during %d circuit attempts.",
1429 (int)time_since_live,
1430 cbt->liveness.nonlive_timeouts);
1431 if (time_since_live > CIRCUIT_TIMEOUT_BEFORE_RECHECK_IP)
1432 reschedule_descriptor_update_check();
1434 cbt->liveness.network_last_live = now;
1435 cbt->liveness.nonlive_timeouts = 0;
1437 /* Tell control.c */
1438 control_event_network_liveness_update(1);
1442 * Non-destructively scale all of our circuit success, timeout, and close
1443 * counts down by a factor of two. Scaling in this way preserves the
1444 * ratios between succeeded vs timed out vs closed circuits, so that
1445 * our statistics don't change when we scale.
1447 * This is used only in the rare event that we build more than
1448 * INT32_MAX circuits. Since the num_circ_* variables are
1449 * uint32_t, we won't even be close to overflowing them.
1451 void
1452 circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt)
1454 cbt->num_circ_succeeded /= 2;
1455 cbt->num_circ_timeouts /= 2;
1456 cbt->num_circ_closed /= 2;
1460 * Called to indicate that we "completed" a circuit. Because this circuit
1461 * succeeded, it doesn't count as a timeout-after-the-first-hop.
1463 * (For the purposes of the cbt code, we consider a circuit "completed" if
1464 * it has 3 hops, regardless of its final hop count. We do this because
1465 * we're trying to answer the question, "how long should a circuit take to
1466 * reach the 3-hop count".)
1468 * This is used by circuit_build_times_network_check_changed() to determine
1469 * if we had too many recent timeouts and need to reset our learned timeout
1470 * to something higher.
1472 void
1473 circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
1475 // Count circuit success
1476 cbt->num_circ_succeeded++;
1478 // If we're going to wrap int32, scale everything
1479 if (cbt->num_circ_succeeded >= INT32_MAX) {
1480 circuit_build_times_scale_circ_counts(cbt);
1483 /* Check for NULLness because we might not be using adaptive timeouts */
1484 if (cbt->liveness.timeouts_after_firsthop &&
1485 cbt->liveness.num_recent_circs > 0) {
1486 cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]
1487 = 0;
1488 cbt->liveness.after_firsthop_idx++;
1489 cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs;
1494 * A circuit just timed out. If it failed after the first hop, record it
1495 * in our history for later deciding if the network speed has changed.
1497 * This is used by circuit_build_times_network_check_changed() to determine
1498 * if we had too many recent timeouts and need to reset our learned timeout
1499 * to something higher.
1501 static void
1502 circuit_build_times_network_timeout(circuit_build_times_t *cbt,
1503 int did_onehop)
1505 // Count circuit timeout
1506 cbt->num_circ_timeouts++;
1508 // If we're going to wrap int32, scale everything
1509 if (cbt->num_circ_timeouts >= INT32_MAX) {
1510 circuit_build_times_scale_circ_counts(cbt);
1513 /* Check for NULLness because we might not be using adaptive timeouts */
1514 if (cbt->liveness.timeouts_after_firsthop &&
1515 cbt->liveness.num_recent_circs > 0) {
1516 if (did_onehop) {
1517 cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]
1518 = 1;
1519 cbt->liveness.after_firsthop_idx++;
1520 cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs;
1526 * A circuit was just forcibly closed. If there has been no recent network
1527 * activity at all, but this circuit was launched back when we thought the
1528 * network was live, increment the number of "nonlive" circuit timeouts.
1530 * This is used by circuit_build_times_network_check_live() to decide
1531 * if we should record the circuit build timeout or not.
1533 static void
1534 circuit_build_times_network_close(circuit_build_times_t *cbt,
1535 int did_onehop, time_t start_time)
1537 time_t now = time(NULL);
1539 // Count circuit close
1540 cbt->num_circ_closed++;
1542 // If we're going to wrap int32, scale everything
1543 if (cbt->num_circ_closed >= INT32_MAX) {
1544 circuit_build_times_scale_circ_counts(cbt);
1548 * Check if this is a timeout that was for a circuit that spent its
1549 * entire existence during a time where we have had no network activity.
1551 if (cbt->liveness.network_last_live < start_time) {
1552 if (did_onehop) {
1553 char last_live_buf[ISO_TIME_LEN+1];
1554 char start_time_buf[ISO_TIME_LEN+1];
1555 char now_buf[ISO_TIME_LEN+1];
1556 format_local_iso_time(last_live_buf, cbt->liveness.network_last_live);
1557 format_local_iso_time(start_time_buf, start_time);
1558 format_local_iso_time(now_buf, now);
1559 log_notice(LD_CIRC,
1560 "A circuit somehow completed a hop while the network was "
1561 "not live. The network was last live at %s, but the circuit "
1562 "launched at %s. It's now %s. This could mean your clock "
1563 "changed.", last_live_buf, start_time_buf, now_buf);
1565 cbt->liveness.nonlive_timeouts++;
1566 if (cbt->liveness.nonlive_timeouts == 1) {
1567 log_notice(LD_CIRC,
1568 "Tor has not observed any network activity for the past %d "
1569 "seconds. Disabling circuit build timeout recording.",
1570 (int)(now - cbt->liveness.network_last_live));
1572 /* Tell control.c */
1573 control_event_network_liveness_update(0);
1574 } else {
1575 log_info(LD_CIRC,
1576 "Got non-live timeout. Current count is: %d",
1577 cbt->liveness.nonlive_timeouts);
1583 * When the network is not live, we do not record circuit build times.
1585 * The network is considered not live if there has been at least one
1586 * circuit build that began and ended (had its close_ms measurement
1587 * period expire) since we last received a cell.
1589 * Also has the side effect of rewinding the circuit time history
1590 * in the case of recent liveness changes.
1593 circuit_build_times_network_check_live(const circuit_build_times_t *cbt)
1595 if (cbt->liveness.nonlive_timeouts > 0) {
1596 return 0;
1599 return 1;
1603 * Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of
1604 * the past RECENT_CIRCUITS time out after the first hop. Used to detect
1605 * if the network connection has changed significantly, and if so,
1606 * resets our circuit build timeout to the default.
1608 * Also resets the entire timeout history in this case and causes us
1609 * to restart the process of building test circuits and estimating a
1610 * new timeout.
1612 STATIC int
1613 circuit_build_times_network_check_changed(circuit_build_times_t *cbt)
1615 int total_build_times = cbt->total_build_times;
1616 int timeout_count=0;
1617 int i;
1619 if (cbt->liveness.timeouts_after_firsthop &&
1620 cbt->liveness.num_recent_circs > 0) {
1621 /* how many of our recent circuits made it to the first hop but then
1622 * timed out? */
1623 for (i = 0; i < cbt->liveness.num_recent_circs; i++) {
1624 timeout_count += cbt->liveness.timeouts_after_firsthop[i];
1628 /* If 80% of our recent circuits are timing out after the first hop,
1629 * we need to re-estimate a new initial alpha and timeout. */
1630 if (timeout_count < circuit_build_times_max_timeouts()) {
1631 return 0;
1634 circuit_build_times_reset(cbt);
1635 if (cbt->liveness.timeouts_after_firsthop &&
1636 cbt->liveness.num_recent_circs > 0) {
1637 memset(cbt->liveness.timeouts_after_firsthop, 0,
1638 sizeof(*cbt->liveness.timeouts_after_firsthop)*
1639 cbt->liveness.num_recent_circs);
1641 cbt->liveness.after_firsthop_idx = 0;
1643 #define MAX_TIMEOUT ((int32_t) (INT32_MAX/2))
1644 /* Check to see if this has happened before. If so, double the timeout
1645 * to give clients on abysmally bad network connections a shot at access */
1646 if (cbt->timeout_ms >= circuit_build_times_get_initial_timeout()) {
1647 if (cbt->timeout_ms > MAX_TIMEOUT || cbt->close_ms > MAX_TIMEOUT) {
1648 log_warn(LD_CIRC, "Insanely large circuit build timeout value. "
1649 "(timeout = %fmsec, close = %fmsec)",
1650 cbt->timeout_ms, cbt->close_ms);
1651 } else {
1652 cbt->timeout_ms *= 2;
1653 cbt->close_ms *= 2;
1655 } else {
1656 cbt->close_ms = cbt->timeout_ms
1657 = circuit_build_times_get_initial_timeout();
1659 #undef MAX_TIMEOUT
1661 cbt_control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
1663 log_notice(LD_CIRC,
1664 "Your network connection speed appears to have changed. Resetting "
1665 "timeout to %lds after %d timeouts and %d buildtimes.",
1666 tor_lround(cbt->timeout_ms/1000), timeout_count,
1667 total_build_times);
1669 return 1;
1673 * Count the number of timeouts in a set of cbt data.
1675 double
1676 circuit_build_times_timeout_rate(const circuit_build_times_t *cbt)
1678 int i=0,timeouts=0;
1679 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1680 if (cbt->circuit_build_times[i] >= cbt->timeout_ms) {
1681 timeouts++;
1685 if (!cbt->total_build_times)
1686 return 0;
1688 return ((double)timeouts)/cbt->total_build_times;
1692 * Count the number of closed circuits in a set of cbt data.
1694 double
1695 circuit_build_times_close_rate(const circuit_build_times_t *cbt)
1697 int i=0,closed=0;
1698 for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
1699 if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED) {
1700 closed++;
1704 if (!cbt->total_build_times)
1705 return 0;
1707 return ((double)closed)/cbt->total_build_times;
1711 * Store a timeout as a synthetic value.
1713 * Returns true if the store was successful and we should possibly
1714 * update our timeout estimate.
1717 circuit_build_times_count_close(circuit_build_times_t *cbt,
1718 int did_onehop,
1719 time_t start_time)
1721 if (circuit_build_times_disabled(get_options())) {
1722 cbt->close_ms = cbt->timeout_ms
1723 = circuit_build_times_get_initial_timeout();
1724 return 0;
1727 /* Record this force-close to help determine if the network is dead */
1728 circuit_build_times_network_close(cbt, did_onehop, start_time);
1730 /* Only count timeouts if network is live.. */
1731 if (!circuit_build_times_network_check_live(cbt)) {
1732 return 0;
1735 circuit_build_times_add_time(cbt, CBT_BUILD_ABANDONED);
1736 return 1;
1740 * Update timeout counts to determine if we need to expire
1741 * our build time history due to excessive timeouts.
1743 * We do not record any actual time values at this stage;
1744 * we are only interested in recording the fact that a timeout
1745 * happened. We record the time values via
1746 * circuit_build_times_count_close() and circuit_build_times_add_time().
1748 void
1749 circuit_build_times_count_timeout(circuit_build_times_t *cbt,
1750 int did_onehop)
1752 if (circuit_build_times_disabled(get_options())) {
1753 cbt->close_ms = cbt->timeout_ms
1754 = circuit_build_times_get_initial_timeout();
1755 return;
1758 /* Register the fact that a timeout just occurred. */
1759 circuit_build_times_network_timeout(cbt, did_onehop);
1761 /* If there are a ton of timeouts, we should reset
1762 * the circuit build timeout. */
1763 circuit_build_times_network_check_changed(cbt);
1767 * Estimate a new timeout based on history and set our timeout
1768 * variable accordingly.
1770 static int
1771 circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
1773 build_time_t max_time;
1774 if (!circuit_build_times_enough_to_compute(cbt))
1775 return 0;
1777 if (!circuit_build_times_update_alpha(cbt))
1778 return 0;
1780 cbt->timeout_ms = circuit_build_times_calculate_timeout(cbt,
1781 circuit_build_times_quantile_cutoff());
1783 cbt->close_ms = circuit_build_times_calculate_timeout(cbt,
1784 circuit_build_times_close_quantile());
1786 max_time = circuit_build_times_max(cbt);
1788 if (cbt->timeout_ms > max_time) {
1789 log_info(LD_CIRC,
1790 "Circuit build timeout of %dms is beyond the maximum build "
1791 "time we have ever observed. Capping it to %dms.",
1792 (int)cbt->timeout_ms, max_time);
1793 cbt->timeout_ms = max_time;
1796 if (max_time < INT32_MAX/2 && cbt->close_ms > 2*max_time) {
1797 log_info(LD_CIRC,
1798 "Circuit build measurement period of %dms is more than twice "
1799 "the maximum build time we have ever observed. Capping it to "
1800 "%dms.", (int)cbt->close_ms, 2*max_time);
1801 cbt->close_ms = 2*max_time;
1804 /* Sometimes really fast guard nodes give us such a steep curve
1805 * that this ends up being not that much greater than timeout_ms.
1806 * Make it be at least 1 min to handle this case. */
1807 cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
1809 cbt->have_computed_timeout = 1;
1810 return 1;
1814 * Exposed function to compute a new timeout. Dispatches events and
1815 * also filters out extremely high timeout values.
1817 void
1818 circuit_build_times_set_timeout(circuit_build_times_t *cbt)
1820 long prev_timeout = tor_lround(cbt->timeout_ms/1000);
1821 double timeout_rate;
1824 * Just return if we aren't using adaptive timeouts
1826 if (circuit_build_times_disabled(get_options()))
1827 return;
1829 if (!circuit_build_times_set_timeout_worker(cbt))
1830 return;
1832 if (cbt->timeout_ms < circuit_build_times_min_timeout()) {
1833 log_info(LD_CIRC, "Set buildtimeout to low value %fms. Setting to %dms",
1834 cbt->timeout_ms, circuit_build_times_min_timeout());
1835 cbt->timeout_ms = circuit_build_times_min_timeout();
1836 if (cbt->close_ms < cbt->timeout_ms) {
1837 /* This shouldn't happen because of MAX() in timeout_worker above,
1838 * but doing it just in case */
1839 cbt->close_ms = circuit_build_times_initial_timeout();
1843 cbt_control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_COMPUTED);
1845 timeout_rate = circuit_build_times_timeout_rate(cbt);
1847 if (prev_timeout > tor_lround(cbt->timeout_ms/1000)) {
1848 log_info(LD_CIRC,
1849 "Based on %d circuit times, it looks like we don't need to "
1850 "wait so long for circuits to finish. We will now assume a "
1851 "circuit is too slow to use after waiting %ld seconds.",
1852 cbt->total_build_times,
1853 tor_lround(cbt->timeout_ms/1000));
1854 log_info(LD_CIRC,
1855 "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
1856 cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
1857 timeout_rate);
1858 } else if (prev_timeout < tor_lround(cbt->timeout_ms/1000)) {
1859 log_info(LD_CIRC,
1860 "Based on %d circuit times, it looks like we need to wait "
1861 "longer for circuits to finish. We will now assume a "
1862 "circuit is too slow to use after waiting %ld seconds.",
1863 cbt->total_build_times,
1864 tor_lround(cbt->timeout_ms/1000));
1865 log_info(LD_CIRC,
1866 "Circuit timeout data: %fms, %fms, Xm: %d, a: %f, r: %f",
1867 cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha,
1868 timeout_rate);
1869 } else {
1870 log_info(LD_CIRC,
1871 "Set circuit build timeout to %lds (%fms, %fms, Xm: %d, a: %f,"
1872 " r: %f) based on %d circuit times",
1873 tor_lround(cbt->timeout_ms/1000),
1874 cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, timeout_rate,
1875 cbt->total_build_times);
1879 #ifdef TOR_UNIT_TESTS
1880 /** Make a note that we're running unit tests (rather than running Tor
1881 * itself), so we avoid clobbering our state file. */
1882 void
1883 circuitbuild_running_unit_tests(void)
1885 unit_tests = 1;
1887 #endif /* defined(TOR_UNIT_TESTS) */
1889 void
1890 circuit_build_times_update_last_circ(circuit_build_times_t *cbt)
1892 cbt->last_circ_at = approx_time();
1895 static void
1896 cbt_control_event_buildtimeout_set(const circuit_build_times_t *cbt,
1897 buildtimeout_set_event_t type)
1899 char *args = NULL;
1900 double qnt;
1901 double timeout_rate = 0.0;
1902 double close_rate = 0.0;
1904 switch (type) {
1905 case BUILDTIMEOUT_SET_EVENT_RESET:
1906 case BUILDTIMEOUT_SET_EVENT_SUSPENDED:
1907 case BUILDTIMEOUT_SET_EVENT_DISCARD:
1908 qnt = 1.0;
1909 break;
1910 case BUILDTIMEOUT_SET_EVENT_COMPUTED:
1911 case BUILDTIMEOUT_SET_EVENT_RESUME:
1912 default:
1913 qnt = circuit_build_times_quantile_cutoff();
1914 break;
1917 /* The timeout rate is the ratio of the timeout count over
1918 * the total number of circuits attempted. The total number of
1919 * circuits is (timeouts+succeeded), since every circuit
1920 * either succeeds, or times out. "Closed" circuits are
1921 * MEASURE_TIMEOUT circuits whose measurement period expired.
1922 * All MEASURE_TIMEOUT circuits are counted in the timeouts stat
1923 * before transitioning to MEASURE_TIMEOUT (in
1924 * circuit_build_times_mark_circ_as_measurement_only()).
1925 * MEASURE_TIMEOUT circuits that succeed are *not* counted as
1926 * "succeeded". See circuit_build_times_handle_completed_hop().
1928 * We cast the denominator
1929 * to promote it to double before the addition, to avoid int32
1930 * overflow. */
1931 const double total_circuits =
1932 ((double)cbt->num_circ_timeouts) + cbt->num_circ_succeeded;
1933 if (total_circuits >= 1.0) {
1934 timeout_rate = cbt->num_circ_timeouts / total_circuits;
1935 close_rate = cbt->num_circ_closed / total_circuits;
1938 tor_asprintf(&args, "TOTAL_TIMES=%lu "
1939 "TIMEOUT_MS=%lu XM=%lu ALPHA=%f CUTOFF_QUANTILE=%f "
1940 "TIMEOUT_RATE=%f CLOSE_MS=%lu CLOSE_RATE=%f",
1941 (unsigned long)cbt->total_build_times,
1942 (unsigned long)cbt->timeout_ms,
1943 (unsigned long)cbt->Xm, cbt->alpha, qnt,
1944 timeout_rate,
1945 (unsigned long)cbt->close_ms,
1946 close_rate);
1948 control_event_buildtimeout_set(type, args);
1950 tor_free(args);