relay: Change DNS timeout label on MetricsPort
[tor.git] / src / feature / stats / rephist.c
blob5ff4ef1d2e8d09833155d332880bbfbc4b59d0d0
1 /* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
2 * Copyright (c) 2007-2021, The Tor Project, Inc. */
3 /* See LICENSE for licensing information */
5 /**
6 * \file rephist.c
7 * \brief Basic history and performance-tracking functionality.
9 * Basic history and performance-tracking functionality to remember
10 * which servers have worked in the past, how much bandwidth we've
11 * been using, which ports we tend to want, and so on; further,
12 * exit port statistics, cell statistics, and connection statistics.
14 * The history and information tracked in this module could sensibly be
15 * divided into several categories:
17 * <ul><li>Statistics used by authorities to remember the uptime and
18 * stability information about various relays, including "uptime",
19 * "weighted fractional uptime" and "mean time between failures".
21 * <li>Predicted ports, used by clients to remember how long it's been
22 * since they opened an exit connection to each given target
23 * port. Clients use this information in order to try to keep circuits
24 * open to exit nodes that can connect to the ports that they care
25 * about. (The predicted ports mechanism also handles predicted circuit
26 * usage that _isn't_ port-specific, such as resolves, internal circuits,
27 * and so on.)
29 * <li>Public key operation counters, for tracking how many times we've
30 * done each public key operation. (This is unmaintained and we should
31 * remove it.)
33 * <li>Exit statistics by port, used by exits to keep track of the
34 * number of streams and bytes they've served at each exit port, so they
35 * can generate their exit-kibibytes-{read,written} and
36 * exit-streams-opened statistics.
38 * <li>Circuit stats, used by relays instances to tract circuit
39 * queue fullness and delay over time, and generate cell-processed-cells,
40 * cell-queued-cells, cell-time-in-queue, and cell-circuits-per-decile
41 * statistics.
43 * <li>Descriptor serving statistics, used by directory caches to track
44 * how many descriptors they've served.
46 * <li>Onion handshake statistics, used by relays to count how many
47 * TAP and ntor handshakes they've handled.
49 * <li>Hidden service statistics, used by relays to count rendezvous
50 * traffic and HSDir-stored descriptors.
52 * <li>Link protocol statistics, used by relays to count how many times
53 * each link protocol has been used.
55 * </ul>
57 * The entry points for this module are scattered throughout the
58 * codebase. Sending data, receiving data, connecting to a relay,
59 * losing a connection to a relay, and so on can all trigger a change in
60 * our current stats. Relays also invoke this module in order to
61 * extract their statistics when building routerinfo and extrainfo
62 * objects in router.c.
64 * TODO: This module should be broken up.
66 * (The "rephist" name originally stood for "reputation and history". )
67 **/
69 #define REPHIST_PRIVATE
70 #include "core/or/or.h"
71 #include "app/config/config.h"
72 #include "core/or/circuitlist.h"
73 #include "core/or/connection_or.h"
74 #include "feature/dirauth/authmode.h"
75 #include "feature/nodelist/networkstatus.h"
76 #include "feature/nodelist/nodelist.h"
77 #include "feature/stats/predict_ports.h"
78 #include "feature/stats/connstats.h"
79 #include "feature/stats/rephist.h"
80 #include "lib/container/order.h"
81 #include "lib/crypt_ops/crypto_rand.h"
82 #include "lib/math/laplace.h"
84 #include "feature/nodelist/networkstatus_st.h"
85 #include "core/or/or_circuit_st.h"
87 #include <event2/dns.h>
89 #ifdef HAVE_FCNTL_H
90 #include <fcntl.h>
91 #endif
93 /** Total number of bytes currently allocated in fields used by rephist.c. */
94 uint64_t rephist_total_alloc=0;
95 /** Number of or_history_t objects currently allocated. */
96 uint32_t rephist_total_num=0;
98 /** If the total weighted run count of all runs for a router ever falls
99 * below this amount, the router can be treated as having 0 MTBF. */
100 #define STABILITY_EPSILON 0.0001
101 /** Value by which to discount all old intervals for MTBF purposes. This
102 * is compounded every STABILITY_INTERVAL. */
103 #define STABILITY_ALPHA 0.95
104 /** Interval at which to discount all old intervals for MTBF purposes. */
105 #define STABILITY_INTERVAL (12*60*60)
106 /* (This combination of ALPHA, INTERVAL, and EPSILON makes it so that an
107 * interval that just ended counts twice as much as one that ended a week ago,
108 * 20X as much as one that ended a month ago, and routers that have had no
109 * uptime data for about half a year will get forgotten.) */
111 /** History of an OR. */
112 typedef struct or_history_t {
113 /** When did we start tracking this OR? */
114 time_t since;
115 /** When did we most recently note a change to this OR? */
116 time_t changed;
118 /** The address at which we most recently connected to this OR
119 * successfully. */
120 tor_addr_t last_reached_addr;
122 /** The port at which we most recently connected to this OR successfully */
123 uint16_t last_reached_port;
125 /* === For MTBF tracking: */
126 /** Weighted sum total of all times that this router has been online.
128 unsigned long weighted_run_length;
129 /** If the router is now online (according to stability-checking rules),
130 * when did it come online? */
131 time_t start_of_run;
132 /** Sum of weights for runs in weighted_run_length. */
133 double total_run_weights;
134 /* === For fractional uptime tracking: */
135 time_t start_of_downtime;
136 unsigned long weighted_uptime;
137 unsigned long total_weighted_time;
138 } or_history_t;
141 * This structure holds accounting needed to calculate the padding overhead.
143 typedef struct padding_counts_t {
144 /** Total number of cells we have received, including padding */
145 uint64_t read_cell_count;
146 /** Total number of cells we have sent, including padding */
147 uint64_t write_cell_count;
148 /** Total number of CELL_PADDING cells we have received */
149 uint64_t read_pad_cell_count;
150 /** Total number of CELL_PADDING cells we have sent */
151 uint64_t write_pad_cell_count;
152 /** Total number of read cells on padding-enabled conns */
153 uint64_t enabled_read_cell_count;
154 /** Total number of sent cells on padding-enabled conns */
155 uint64_t enabled_write_cell_count;
156 /** Total number of read CELL_PADDING cells on padding-enabled cons */
157 uint64_t enabled_read_pad_cell_count;
158 /** Total number of sent CELL_PADDING cells on padding-enabled cons */
159 uint64_t enabled_write_pad_cell_count;
160 /** Total number of RELAY_DROP cells we have received */
161 uint64_t read_drop_cell_count;
162 /** Total number of RELAY_DROP cells we have sent */
163 uint64_t write_drop_cell_count;
164 /** The maximum number of padding timers we've seen in 24 hours */
165 uint64_t maximum_chanpad_timers;
166 /** When did we first copy padding_current into padding_published? */
167 char first_published_at[ISO_TIME_LEN+1];
168 } padding_counts_t;
170 /** Holds the current values of our padding statistics.
171 * It is not published until it is transferred to padding_published. */
172 static padding_counts_t padding_current;
174 /** Remains fixed for a 24 hour period, and then is replaced
175 * by a redacted copy of padding_current */
176 static padding_counts_t padding_published;
178 /** When did we last multiply all routers' weighted_run_length and
179 * total_run_weights by STABILITY_ALPHA? */
180 static time_t stability_last_downrated = 0;
182 /** */
183 static time_t started_tracking_stability = 0;
185 /** Map from hex OR identity digest to or_history_t. */
186 static digestmap_t *history_map = NULL;
188 /** Represents a state of overload stats.
190 * All the timestamps in this structure have already been rounded down to the
191 * nearest hour. */
192 typedef struct {
193 /* When did we last experience a general overload? */
194 time_t overload_general_time;
196 /* When did we last experience a bandwidth-related overload? */
197 time_t overload_ratelimits_time;
198 /* How many times have we gone off the our read limits? */
199 uint64_t overload_read_count;
200 /* How many times have we gone off the our write limits? */
201 uint64_t overload_write_count;
203 /* When did we last experience a file descriptor exhaustion? */
204 time_t overload_fd_exhausted_time;
205 /* How many times have we experienced a file descriptor exhaustion? */
206 uint64_t overload_fd_exhausted;
207 } overload_stats_t;
209 /** Current state of overload stats */
210 static overload_stats_t overload_stats;
212 /** Counters to count the number of times we've reached an overload for the
213 * global connection read/write limit. Reported on the MetricsPort. */
214 static uint64_t stats_n_read_limit_reached = 0;
215 static uint64_t stats_n_write_limit_reached = 0;
217 /** Total number of times we've reached TCP port exhaustion. */
218 static uint64_t stats_n_tcp_exhaustion = 0;
220 /***** DNS statistics *****/
222 /** Overload DNS statistics. The information in this object is used to assess
223 * if, due to DNS errors, we should emit a general overload signal or not.
225 * NOTE: This structure is _not_ per DNS query type like the statistics below
226 * because of a libevent bug
227 * (https://github.com/libevent/libevent/issues/1219), on error, the type is
228 * not propagated up back to the user and so we need to keep our own stats for
229 * the overload signal. */
230 typedef struct {
231 /** Total number of DNS request seen at an Exit. They might not all end
232 * successfully or might even be lost by tor. This counter is incremented
233 * right before the DNS request is initiated. */
234 uint64_t stats_n_request;
236 /** When is the next assessment time of the general overload for DNS errors.
237 * Once this time is reached, all stats are reset and this time is set to the
238 * next assessment time. */
239 time_t next_assessment_time;
240 } overload_dns_stats_t;
242 /** Keep track of the DNS requests for the general overload state. */
243 static overload_dns_stats_t overload_dns_stats;
245 /** Represents the statistics of DNS queries seen if it is an Exit. */
246 typedef struct {
247 /* Total number of DNS errors found in RFC 1035 (from 0 to 5 code). */
248 uint64_t stats_n_error_none; /* 0 */
249 uint64_t stats_n_error_format; /* 1 */
250 uint64_t stats_n_error_serverfailed; /* 2 */
251 uint64_t stats_n_error_notexist; /* 3 */
252 uint64_t stats_n_error_notimpl; /* 4 */
253 uint64_t stats_n_error_refused; /* 5 */
255 /* Total number of DNS errors specific to libevent. */
256 uint64_t stats_n_error_truncated; /* 65 */
257 uint64_t stats_n_error_unknown; /* 66 */
258 uint64_t stats_n_error_tor_timeout; /* 67 */
259 uint64_t stats_n_error_shutdown; /* 68 */
260 uint64_t stats_n_error_cancel; /* 69 */
261 uint64_t stats_n_error_nodata; /* 70 */
263 /* Total number of DNS request seen at an Exit. They might not all end
264 * successfully or might even be lost by tor. This counter is incremented
265 * right before the DNS request is initiated. */
266 uint64_t stats_n_request;
267 } dns_stats_t;
269 /* This is disabled because of the libevent bug where on error we don't get the
270 * DNS query type back. Once it is fixed, we can re-enable this. */
271 #if 0
272 /** DNS statistics store for each DNS record type for which tor supports only
273 * three at the moment: A, PTR and AAAA. */
274 static dns_stats_t dns_A_stats;
275 static dns_stats_t dns_PTR_stats;
276 static dns_stats_t dns_AAAA_stats;
277 #endif
279 /** DNS query statistics store. It covers all type of queries. */
280 static dns_stats_t dns_all_stats;
282 /** Return the point to the DNS statistics store. Ignore the type for now
283 * because of a libevent problem. */
284 static inline dns_stats_t *
285 get_dns_stats_by_type(const int type)
287 (void) type;
288 return &dns_all_stats;
291 #if 0
292 /** From a libevent record type, return a pointer to the corresponding DNS
293 * statistics store. NULL is returned if the type is unhandled. */
294 static inline dns_stats_t *
295 get_dns_stats_by_type(const int type)
297 switch (type) {
298 case DNS_IPv4_A:
299 return &dns_A_stats;
300 case DNS_PTR:
301 return &dns_PTR_stats;
302 case DNS_IPv6_AAAA:
303 return &dns_AAAA_stats;
304 default:
305 return NULL;
308 #endif
310 /** Return the DNS error count for the given libevent DNS type and error code.
311 * The possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. */
312 uint64_t
313 rep_hist_get_n_dns_error(int type, uint8_t error)
315 dns_stats_t *dns_stats = get_dns_stats_by_type(type);
316 if (BUG(!dns_stats)) {
317 return 0;
320 switch (error) {
321 case DNS_ERR_NONE:
322 return dns_stats->stats_n_error_none;
323 case DNS_ERR_FORMAT:
324 return dns_stats->stats_n_error_format;
325 case DNS_ERR_SERVERFAILED:
326 return dns_stats->stats_n_error_serverfailed;
327 case DNS_ERR_NOTEXIST:
328 return dns_stats->stats_n_error_notexist;
329 case DNS_ERR_NOTIMPL:
330 return dns_stats->stats_n_error_notimpl;
331 case DNS_ERR_REFUSED:
332 return dns_stats->stats_n_error_refused;
333 case DNS_ERR_TRUNCATED:
334 return dns_stats->stats_n_error_truncated;
335 case DNS_ERR_UNKNOWN:
336 return dns_stats->stats_n_error_unknown;
337 case DNS_ERR_TIMEOUT:
338 return dns_stats->stats_n_error_tor_timeout;
339 case DNS_ERR_SHUTDOWN:
340 return dns_stats->stats_n_error_shutdown;
341 case DNS_ERR_CANCEL:
342 return dns_stats->stats_n_error_cancel;
343 case DNS_ERR_NODATA:
344 return dns_stats->stats_n_error_nodata;
345 default:
346 /* Unhandled code sent back by libevent. */
347 return 0;
351 /** Return the total number of DNS request seen for the given libevent DNS
352 * record type. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. */
353 uint64_t
354 rep_hist_get_n_dns_request(int type)
356 dns_stats_t *dns_stats = get_dns_stats_by_type(type);
357 if (BUG(!dns_stats)) {
358 return 0;
360 return dns_stats->stats_n_request;
363 /** Note a DNS error for the given given libevent DNS record type and error
364 * code. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA.
366 * NOTE: Libevent is _not_ returning the type in case of an error and so if
367 * error is anything but DNS_ERR_NONE, the type is not usable and set to 0.
369 * See: https://gitlab.torproject.org/tpo/core/tor/-/issues/40490 */
370 void
371 rep_hist_note_dns_error(int type, uint8_t error)
373 overload_dns_stats.stats_n_request++;
375 /* Again, the libevent bug (see function comment), for an error that is
376 * anything but DNS_ERR_NONE, the type is always 0 which means that we don't
377 * have a DNS stat object for it so this code will do nothing until libevent
378 * is fixed. */
379 dns_stats_t *dns_stats = get_dns_stats_by_type(type);
380 /* Unsupported DNS query type. */
381 if (!dns_stats) {
382 return;
385 switch (error) {
386 case DNS_ERR_NONE:
387 dns_stats->stats_n_error_none++;
388 break;
389 case DNS_ERR_FORMAT:
390 dns_stats->stats_n_error_format++;
391 break;
392 case DNS_ERR_SERVERFAILED:
393 dns_stats->stats_n_error_serverfailed++;
394 break;
395 case DNS_ERR_NOTEXIST:
396 dns_stats->stats_n_error_notexist++;
397 break;
398 case DNS_ERR_NOTIMPL:
399 dns_stats->stats_n_error_notimpl++;
400 break;
401 case DNS_ERR_REFUSED:
402 dns_stats->stats_n_error_refused++;
403 break;
404 case DNS_ERR_TRUNCATED:
405 dns_stats->stats_n_error_truncated++;
406 break;
407 case DNS_ERR_UNKNOWN:
408 dns_stats->stats_n_error_unknown++;
409 break;
410 case DNS_ERR_TIMEOUT:
411 dns_stats->stats_n_error_tor_timeout++;
412 break;
413 case DNS_ERR_SHUTDOWN:
414 dns_stats->stats_n_error_shutdown++;
415 break;
416 case DNS_ERR_CANCEL:
417 dns_stats->stats_n_error_cancel++;
418 break;
419 case DNS_ERR_NODATA:
420 dns_stats->stats_n_error_nodata++;
421 break;
422 default:
423 /* Unhandled code sent back by libevent. */
424 break;
428 /** Note a DNS request for the given given libevent DNS record type. */
429 void
430 rep_hist_note_dns_request(int type)
432 dns_stats_t *dns_stats = get_dns_stats_by_type(type);
433 if (BUG(!dns_stats)) {
434 return;
436 dns_stats->stats_n_request++;
439 /***** END of DNS statistics *****/
441 /** Return true if this overload happened within the last `n_hours`. */
442 static bool
443 overload_happened_recently(time_t overload_time, int n_hours)
445 /* An overload is relevant if it happened in the last 72 hours */
446 if (overload_time > approx_time() - 3600 * n_hours) {
447 return true;
449 return false;
452 /* The current version of the overload stats version */
453 #define OVERLOAD_STATS_VERSION 1
455 /** Return the stats_n_read_limit_reached counter. */
456 uint64_t
457 rep_hist_get_n_read_limit_reached(void)
459 return stats_n_read_limit_reached;
462 /** Return the stats_n_write_limit_reached counter. */
463 uint64_t
464 rep_hist_get_n_write_limit_reached(void)
466 return stats_n_write_limit_reached;
469 /** Returns an allocated string for server descriptor for publising information
470 * on whether we are overloaded or not. */
471 char *
472 rep_hist_get_overload_general_line(void)
474 char *result = NULL;
475 char tbuf[ISO_TIME_LEN+1];
477 /* Encode the general overload */
478 if (overload_happened_recently(overload_stats.overload_general_time, 72)) {
479 format_iso_time(tbuf, overload_stats.overload_general_time);
480 tor_asprintf(&result, "overload-general %d %s\n",
481 OVERLOAD_STATS_VERSION, tbuf);
484 return result;
487 /** Returns an allocated string for extra-info documents for publishing
488 * overload statistics. */
489 char *
490 rep_hist_get_overload_stats_lines(void)
492 char *result = NULL;
493 smartlist_t *chunks = smartlist_new();
494 char tbuf[ISO_TIME_LEN+1];
496 /* Add bandwidth-related overloads */
497 if (overload_happened_recently(overload_stats.overload_ratelimits_time,24)) {
498 const or_options_t *options = get_options();
499 format_iso_time(tbuf, overload_stats.overload_ratelimits_time);
500 smartlist_add_asprintf(chunks,
501 "overload-ratelimits %d %s %" PRIu64 " %" PRIu64
502 " %" PRIu64 " %" PRIu64 "\n",
503 OVERLOAD_STATS_VERSION, tbuf,
504 options->BandwidthRate, options->BandwidthBurst,
505 overload_stats.overload_read_count,
506 overload_stats.overload_write_count);
509 /* Finally file descriptor overloads */
510 if (overload_happened_recently(
511 overload_stats.overload_fd_exhausted_time, 72)) {
512 format_iso_time(tbuf, overload_stats.overload_fd_exhausted_time);
513 smartlist_add_asprintf(chunks, "overload-fd-exhausted %d %s\n",
514 OVERLOAD_STATS_VERSION, tbuf);
517 /* Bail early if we had nothing to write */
518 if (smartlist_len(chunks) == 0) {
519 goto done;
522 result = smartlist_join_strings(chunks, "", 0, NULL);
524 done:
525 SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp));
526 smartlist_free(chunks);
527 return result;
530 /** Round down the time in `a` to the beginning of the current hour */
531 #define SET_TO_START_OF_HOUR(a) STMT_BEGIN \
532 (a) = approx_time() - (approx_time() % 3600); \
533 STMT_END
535 /** Note down an overload event of type `overload`. */
536 void
537 rep_hist_note_overload(overload_type_t overload)
539 static time_t last_read_counted = 0;
540 static time_t last_write_counted = 0;
542 switch (overload) {
543 case OVERLOAD_GENERAL:
544 SET_TO_START_OF_HOUR(overload_stats.overload_general_time);
545 break;
546 case OVERLOAD_READ: {
547 stats_n_read_limit_reached++;
548 SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
549 if (approx_time() >= last_read_counted + 60) { /* Count once a minute */
550 overload_stats.overload_read_count++;
551 last_read_counted = approx_time();
553 break;
555 case OVERLOAD_WRITE: {
556 stats_n_write_limit_reached++;
557 SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
558 if (approx_time() >= last_write_counted + 60) { /* Count once a minute */
559 overload_stats.overload_write_count++;
560 last_write_counted = approx_time();
562 break;
564 case OVERLOAD_FD_EXHAUSTED:
565 SET_TO_START_OF_HOUR(overload_stats.overload_fd_exhausted_time);
566 overload_stats.overload_fd_exhausted++;
567 break;
571 /** Note down that we've reached a TCP port exhaustion. This triggers an
572 * overload general event. */
573 void
574 rep_hist_note_tcp_exhaustion(void)
576 stats_n_tcp_exhaustion++;
577 rep_hist_note_overload(OVERLOAD_GENERAL);
580 /** Return the total number of TCP exhaustion times we've reached. */
581 uint64_t
582 rep_hist_get_n_tcp_exhaustion(void)
584 return stats_n_tcp_exhaustion;
587 /** Return the or_history_t for the OR with identity digest <b>id</b>,
588 * creating it if necessary. */
589 static or_history_t *
590 get_or_history(const char* id)
592 or_history_t *hist;
594 if (tor_digest_is_zero(id))
595 return NULL;
597 hist = digestmap_get(history_map, id);
598 if (!hist) {
599 hist = tor_malloc_zero(sizeof(or_history_t));
600 rephist_total_alloc += sizeof(or_history_t);
601 rephist_total_num++;
602 hist->since = hist->changed = time(NULL);
603 tor_addr_make_unspec(&hist->last_reached_addr);
604 digestmap_set(history_map, id, hist);
606 return hist;
609 /** Helper: free storage held by a single OR history entry. */
610 static void
611 free_or_history(void *_hist)
613 or_history_t *hist = _hist;
614 rephist_total_alloc -= sizeof(or_history_t);
615 rephist_total_num--;
616 tor_free(hist);
619 /** Initialize the static data structures for tracking history. */
620 void
621 rep_hist_init(void)
623 history_map = digestmap_new();
626 /** We have just decided that this router with identity digest <b>id</b> is
627 * reachable, meaning we will give it a "Running" flag for the next while. */
628 void
629 rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr,
630 const uint16_t at_port, time_t when)
632 or_history_t *hist = get_or_history(id);
633 int was_in_run = 1;
634 char tbuf[ISO_TIME_LEN+1];
635 int addr_changed, port_changed;
637 tor_assert(hist);
638 tor_assert((!at_addr && !at_port) || (at_addr && at_port));
640 addr_changed = at_addr && !tor_addr_is_null(&hist->last_reached_addr) &&
641 tor_addr_compare(at_addr, &hist->last_reached_addr, CMP_EXACT) != 0;
642 port_changed = at_port && hist->last_reached_port &&
643 at_port != hist->last_reached_port;
645 if (!started_tracking_stability)
646 started_tracking_stability = time(NULL);
647 if (!hist->start_of_run) {
648 hist->start_of_run = when;
649 was_in_run = 0;
651 if (hist->start_of_downtime) {
652 long down_length;
654 format_local_iso_time(tbuf, hist->start_of_downtime);
655 log_info(LD_HIST, "Router %s is now Running; it had been down since %s.",
656 hex_str(id, DIGEST_LEN), tbuf);
657 if (was_in_run)
658 log_info(LD_HIST, " (Paradoxically, it was already Running too.)");
660 down_length = when - hist->start_of_downtime;
661 hist->total_weighted_time += down_length;
662 hist->start_of_downtime = 0;
663 } else if (addr_changed || port_changed) {
664 /* If we're reachable, but the address changed, treat this as some
665 * downtime. */
666 int penalty = get_options()->TestingTorNetwork ? 240 : 3600;
667 networkstatus_t *ns;
669 if ((ns = networkstatus_get_latest_consensus())) {
670 int fresh_interval = (int)(ns->fresh_until - ns->valid_after);
671 int live_interval = (int)(ns->valid_until - ns->valid_after);
672 /* on average, a descriptor addr change takes .5 intervals to make it
673 * into a consensus, and half a liveness period to make it to
674 * clients. */
675 penalty = (int)(fresh_interval + live_interval) / 2;
677 format_local_iso_time(tbuf, hist->start_of_run);
678 log_info(LD_HIST,"Router %s still seems Running, but its address appears "
679 "to have changed since the last time it was reachable. I'm "
680 "going to treat it as having been down for %d seconds",
681 hex_str(id, DIGEST_LEN), penalty);
682 rep_hist_note_router_unreachable(id, when-penalty);
683 rep_hist_note_router_reachable(id, NULL, 0, when);
684 } else {
685 format_local_iso_time(tbuf, hist->start_of_run);
686 if (was_in_run)
687 log_debug(LD_HIST, "Router %s is still Running; it has been Running "
688 "since %s", hex_str(id, DIGEST_LEN), tbuf);
689 else
690 log_info(LD_HIST,"Router %s is now Running; it was previously untracked",
691 hex_str(id, DIGEST_LEN));
693 if (at_addr)
694 tor_addr_copy(&hist->last_reached_addr, at_addr);
695 if (at_port)
696 hist->last_reached_port = at_port;
699 /** We have just decided that this router is unreachable, meaning
700 * we are taking away its "Running" flag. */
701 void
702 rep_hist_note_router_unreachable(const char *id, time_t when)
704 or_history_t *hist = get_or_history(id);
705 char tbuf[ISO_TIME_LEN+1];
706 int was_running = 0;
707 if (!started_tracking_stability)
708 started_tracking_stability = time(NULL);
710 tor_assert(hist);
711 if (hist->start_of_run) {
712 /*XXXX We could treat failed connections differently from failed
713 * connect attempts. */
714 long run_length = when - hist->start_of_run;
715 format_local_iso_time(tbuf, hist->start_of_run);
717 hist->total_run_weights += 1.0;
718 hist->start_of_run = 0;
719 if (run_length < 0) {
720 unsigned long penalty = -run_length;
721 #define SUBTRACT_CLAMPED(var, penalty) \
722 do { (var) = (var) < (penalty) ? 0 : (var) - (penalty); } while (0)
724 SUBTRACT_CLAMPED(hist->weighted_run_length, penalty);
725 SUBTRACT_CLAMPED(hist->weighted_uptime, penalty);
726 } else {
727 hist->weighted_run_length += run_length;
728 hist->weighted_uptime += run_length;
729 hist->total_weighted_time += run_length;
731 was_running = 1;
732 log_info(LD_HIST, "Router %s is now non-Running: it had previously been "
733 "Running since %s. Its total weighted uptime is %lu/%lu.",
734 hex_str(id, DIGEST_LEN), tbuf, hist->weighted_uptime,
735 hist->total_weighted_time);
737 if (!hist->start_of_downtime) {
738 hist->start_of_downtime = when;
740 if (!was_running)
741 log_info(LD_HIST, "Router %s is now non-Running; it was previously "
742 "untracked.", hex_str(id, DIGEST_LEN));
743 } else {
744 if (!was_running) {
745 format_local_iso_time(tbuf, hist->start_of_downtime);
747 log_info(LD_HIST, "Router %s is still non-Running; it has been "
748 "non-Running since %s.", hex_str(id, DIGEST_LEN), tbuf);
753 /** Mark a router with ID <b>id</b> as non-Running, and retroactively declare
754 * that it has never been running: give it no stability and no WFU. */
755 void
756 rep_hist_make_router_pessimal(const char *id, time_t when)
758 or_history_t *hist = get_or_history(id);
759 tor_assert(hist);
761 rep_hist_note_router_unreachable(id, when);
763 hist->weighted_run_length = 0;
764 hist->weighted_uptime = 0;
767 /** Helper: Discount all old MTBF data, if it is time to do so. Return
768 * the time at which we should next discount MTBF data. */
769 time_t
770 rep_hist_downrate_old_runs(time_t now)
772 digestmap_iter_t *orhist_it;
773 const char *digest1;
774 or_history_t *hist;
775 void *hist_p;
776 double alpha = 1.0;
778 if (!history_map)
779 history_map = digestmap_new();
780 if (!stability_last_downrated)
781 stability_last_downrated = now;
782 if (stability_last_downrated + STABILITY_INTERVAL > now)
783 return stability_last_downrated + STABILITY_INTERVAL;
785 /* Okay, we should downrate the data. By how much? */
786 while (stability_last_downrated + STABILITY_INTERVAL <= now) {
787 stability_last_downrated += STABILITY_INTERVAL;
788 alpha *= STABILITY_ALPHA;
791 log_info(LD_HIST, "Discounting all old stability info by a factor of %f",
792 alpha);
794 /* Multiply every w_r_l, t_r_w pair by alpha. */
795 for (orhist_it = digestmap_iter_init(history_map);
796 !digestmap_iter_done(orhist_it);
797 orhist_it = digestmap_iter_next(history_map,orhist_it)) {
798 digestmap_iter_get(orhist_it, &digest1, &hist_p);
799 hist = hist_p;
801 hist->weighted_run_length =
802 (unsigned long)(hist->weighted_run_length * alpha);
803 hist->total_run_weights *= alpha;
805 hist->weighted_uptime = (unsigned long)(hist->weighted_uptime * alpha);
806 hist->total_weighted_time = (unsigned long)
807 (hist->total_weighted_time * alpha);
810 return stability_last_downrated + STABILITY_INTERVAL;
813 /** Helper: Return the weighted MTBF of the router with history <b>hist</b>. */
814 static double
815 get_stability(or_history_t *hist, time_t when)
817 long total = hist->weighted_run_length;
818 double total_weights = hist->total_run_weights;
820 if (hist->start_of_run) {
821 /* We're currently in a run. Let total and total_weights hold the values
822 * they would hold if the current run were to end now. */
823 total += (when-hist->start_of_run);
824 total_weights += 1.0;
826 if (total_weights < STABILITY_EPSILON) {
827 /* Round down to zero, and avoid divide-by-zero. */
828 return 0.0;
831 return total / total_weights;
834 /** Return the total amount of time we've been observing, with each run of
835 * time downrated by the appropriate factor. */
836 static long
837 get_total_weighted_time(or_history_t *hist, time_t when)
839 long total = hist->total_weighted_time;
840 if (hist->start_of_run) {
841 total += (when - hist->start_of_run);
842 } else if (hist->start_of_downtime) {
843 total += (when - hist->start_of_downtime);
845 return total;
848 /** Helper: Return the weighted percent-of-time-online of the router with
849 * history <b>hist</b>. */
850 static double
851 get_weighted_fractional_uptime(or_history_t *hist, time_t when)
853 long total = hist->total_weighted_time;
854 long up = hist->weighted_uptime;
856 if (hist->start_of_run) {
857 long run_length = (when - hist->start_of_run);
858 up += run_length;
859 total += run_length;
860 } else if (hist->start_of_downtime) {
861 total += (when - hist->start_of_downtime);
864 if (!total) {
865 /* Avoid calling anybody's uptime infinity (which should be impossible if
866 * the code is working), or NaN (which can happen for any router we haven't
867 * observed up or down yet). */
868 return 0.0;
871 return ((double) up) / total;
874 /** Return how long the router whose identity digest is <b>id</b> has
875 * been reachable. Return 0 if the router is unknown or currently deemed
876 * unreachable. */
877 long
878 rep_hist_get_uptime(const char *id, time_t when)
880 or_history_t *hist = get_or_history(id);
881 if (!hist)
882 return 0;
883 if (!hist->start_of_run || when < hist->start_of_run)
884 return 0;
885 return when - hist->start_of_run;
888 /** Return an estimated MTBF for the router whose identity digest is
889 * <b>id</b>. Return 0 if the router is unknown. */
890 double
891 rep_hist_get_stability(const char *id, time_t when)
893 or_history_t *hist = get_or_history(id);
894 if (!hist)
895 return 0.0;
897 return get_stability(hist, when);
900 /** Return an estimated percent-of-time-online for the router whose identity
901 * digest is <b>id</b>. Return 0 if the router is unknown. */
902 double
903 rep_hist_get_weighted_fractional_uptime(const char *id, time_t when)
905 or_history_t *hist = get_or_history(id);
906 if (!hist)
907 return 0.0;
909 return get_weighted_fractional_uptime(hist, when);
912 /** Return a number representing how long we've known about the router whose
913 * digest is <b>id</b>. Return 0 if the router is unknown.
915 * Be careful: this measure increases monotonically as we know the router for
916 * longer and longer, but it doesn't increase linearly.
918 long
919 rep_hist_get_weighted_time_known(const char *id, time_t when)
921 or_history_t *hist = get_or_history(id);
922 if (!hist)
923 return 0;
925 return get_total_weighted_time(hist, when);
928 /** Return true if we've been measuring MTBFs for long enough to
929 * pronounce on Stability. */
931 rep_hist_have_measured_enough_stability(void)
933 /* XXXX++ This doesn't do so well when we change our opinion
934 * as to whether we're tracking router stability. */
935 return started_tracking_stability < time(NULL) - 4*60*60;
938 /** Log all the reliability data we have remembered, with the chosen
939 * severity.
941 void
942 rep_hist_dump_stats(time_t now, int severity)
944 digestmap_iter_t *orhist_it;
945 const char *name1, *digest1;
946 char hexdigest1[HEX_DIGEST_LEN+1];
947 or_history_t *or_history;
948 void *or_history_p;
949 const node_t *node;
951 rep_history_clean(now - get_options()->RephistTrackTime);
953 tor_log(severity, LD_HIST, "--------------- Dumping history information:");
955 for (orhist_it = digestmap_iter_init(history_map);
956 !digestmap_iter_done(orhist_it);
957 orhist_it = digestmap_iter_next(history_map,orhist_it)) {
958 double s;
959 long stability;
960 digestmap_iter_get(orhist_it, &digest1, &or_history_p);
961 or_history = (or_history_t*) or_history_p;
963 if ((node = node_get_by_id(digest1)) && node_get_nickname(node))
964 name1 = node_get_nickname(node);
965 else
966 name1 = "(unknown)";
967 base16_encode(hexdigest1, sizeof(hexdigest1), digest1, DIGEST_LEN);
968 s = get_stability(or_history, now);
969 stability = (long)s;
970 tor_log(severity, LD_HIST,
971 "OR %s [%s]: wmtbf %lu:%02lu:%02lu",
972 name1, hexdigest1,
973 stability/3600, (stability/60)%60, stability%60);
977 /** Remove history info for routers/links that haven't changed since
978 * <b>before</b>.
980 void
981 rep_history_clean(time_t before)
983 int authority = authdir_mode(get_options());
984 or_history_t *or_history;
985 void *or_history_p;
986 digestmap_iter_t *orhist_it;
987 const char *d1;
989 orhist_it = digestmap_iter_init(history_map);
990 while (!digestmap_iter_done(orhist_it)) {
991 int should_remove;
992 digestmap_iter_get(orhist_it, &d1, &or_history_p);
993 or_history = or_history_p;
995 should_remove = authority ?
996 (or_history->total_run_weights < STABILITY_EPSILON &&
997 !or_history->start_of_run)
998 : (or_history->changed < before);
999 if (should_remove) {
1000 orhist_it = digestmap_iter_next_rmv(history_map, orhist_it);
1001 free_or_history(or_history);
1002 continue;
1004 orhist_it = digestmap_iter_next(history_map, orhist_it);
1008 /** Write MTBF data to disk. Return 0 on success, negative on failure.
1010 * If <b>missing_means_down</b>, then if we're about to write an entry
1011 * that is still considered up but isn't in our routerlist, consider it
1012 * to be down. */
1014 rep_hist_record_mtbf_data(time_t now, int missing_means_down)
1016 char time_buf[ISO_TIME_LEN+1];
1018 digestmap_iter_t *orhist_it;
1019 const char *digest;
1020 void *or_history_p;
1021 or_history_t *hist;
1022 open_file_t *open_file = NULL;
1023 FILE *f;
1026 char *filename = get_datadir_fname("router-stability");
1027 f = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE|O_TEXT, 0600,
1028 &open_file);
1029 tor_free(filename);
1030 if (!f)
1031 return -1;
1034 /* File format is:
1035 * FormatLine *KeywordLine Data
1037 * FormatLine = "format 1" NL
1038 * KeywordLine = Keyword SP Arguments NL
1039 * Data = "data" NL *RouterMTBFLine "." NL
1040 * RouterMTBFLine = Fingerprint SP WeightedRunLen SP
1041 * TotalRunWeights [SP S=StartRunTime] NL
1043 #define PUT(s) STMT_BEGIN if (fputs((s),f)<0) goto err; STMT_END
1044 #define PRINTF(args) STMT_BEGIN if (fprintf args <0) goto err; STMT_END
1046 PUT("format 2\n");
1048 format_iso_time(time_buf, time(NULL));
1049 PRINTF((f, "stored-at %s\n", time_buf));
1051 if (started_tracking_stability) {
1052 format_iso_time(time_buf, started_tracking_stability);
1053 PRINTF((f, "tracked-since %s\n", time_buf));
1055 if (stability_last_downrated) {
1056 format_iso_time(time_buf, stability_last_downrated);
1057 PRINTF((f, "last-downrated %s\n", time_buf));
1060 PUT("data\n");
1062 /* XXX Nick: now bridge auths record this for all routers too.
1063 * Should we make them record it only for bridge routers? -RD
1064 * Not for 0.2.0. -NM */
1065 for (orhist_it = digestmap_iter_init(history_map);
1066 !digestmap_iter_done(orhist_it);
1067 orhist_it = digestmap_iter_next(history_map,orhist_it)) {
1068 char dbuf[HEX_DIGEST_LEN+1];
1069 const char *t = NULL;
1070 digestmap_iter_get(orhist_it, &digest, &or_history_p);
1071 hist = (or_history_t*) or_history_p;
1073 base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
1075 if (missing_means_down && hist->start_of_run &&
1076 !connection_or_digest_is_known_relay(digest)) {
1077 /* We think this relay is running, but it's not listed in our
1078 * consensus. Somehow it fell out without telling us it went
1079 * down. Complain and also correct it. */
1080 log_info(LD_HIST,
1081 "Relay '%s' is listed as up in rephist, but it's not in "
1082 "our routerlist. Correcting.", dbuf);
1083 rep_hist_note_router_unreachable(digest, now);
1086 PRINTF((f, "R %s\n", dbuf));
1087 if (hist->start_of_run > 0) {
1088 format_iso_time(time_buf, hist->start_of_run);
1089 t = time_buf;
1091 PRINTF((f, "+MTBF %lu %.5f%s%s\n",
1092 hist->weighted_run_length, hist->total_run_weights,
1093 t ? " S=" : "", t ? t : ""));
1094 t = NULL;
1095 if (hist->start_of_downtime > 0) {
1096 format_iso_time(time_buf, hist->start_of_downtime);
1097 t = time_buf;
1099 PRINTF((f, "+WFU %lu %lu%s%s\n",
1100 hist->weighted_uptime, hist->total_weighted_time,
1101 t ? " S=" : "", t ? t : ""));
1104 PUT(".\n");
1106 #undef PUT
1107 #undef PRINTF
1109 return finish_writing_to_file(open_file);
1110 err:
1111 abort_writing_to_file(open_file);
1112 return -1;
1115 /** Helper: return the first j >= i such that !strcmpstart(sl[j], prefix) and
1116 * such that no line sl[k] with i <= k < j starts with "R ". Return -1 if no
1117 * such line exists. */
1118 static int
1119 find_next_with(smartlist_t *sl, int i, const char *prefix)
1121 for ( ; i < smartlist_len(sl); ++i) {
1122 const char *line = smartlist_get(sl, i);
1123 if (!strcmpstart(line, prefix))
1124 return i;
1125 if (!strcmpstart(line, "R "))
1126 return -1;
1128 return -1;
1131 /** How many bad times has parse_possibly_bad_iso_time() parsed? */
1132 static int n_bogus_times = 0;
1133 /** Parse the ISO-formatted time in <b>s</b> into *<b>time_out</b>, but
1134 * round any pre-1970 date to Jan 1, 1970. */
1135 static int
1136 parse_possibly_bad_iso_time(const char *s, time_t *time_out)
1138 int year;
1139 char b[5];
1140 strlcpy(b, s, sizeof(b));
1141 b[4] = '\0';
1142 year = (int)tor_parse_long(b, 10, 0, INT_MAX, NULL, NULL);
1143 if (year < 1970) {
1144 *time_out = 0;
1145 ++n_bogus_times;
1146 return 0;
1147 } else
1148 return parse_iso_time(s, time_out);
1151 /** We've read a time <b>t</b> from a file stored at <b>stored_at</b>, which
1152 * says we started measuring at <b>started_measuring</b>. Return a new number
1153 * that's about as much before <b>now</b> as <b>t</b> was before
1154 * <b>stored_at</b>.
1156 static inline time_t
1157 correct_time(time_t t, time_t now, time_t stored_at, time_t started_measuring)
1159 if (t < started_measuring - 24*60*60*365)
1160 return 0;
1161 else if (t < started_measuring)
1162 return started_measuring;
1163 else if (t > stored_at)
1164 return 0;
1165 else {
1166 long run_length = stored_at - t;
1167 t = (time_t)(now - run_length);
1168 if (t < started_measuring)
1169 t = started_measuring;
1170 return t;
1174 /** Load MTBF data from disk. Returns 0 on success or recoverable error, -1
1175 * on failure. */
1177 rep_hist_load_mtbf_data(time_t now)
1179 /* XXXX won't handle being called while history is already populated. */
1180 smartlist_t *lines;
1181 const char *line = NULL;
1182 int r=0, i;
1183 time_t last_downrated = 0, stored_at = 0, tracked_since = 0;
1184 time_t latest_possible_start = now;
1185 long format = -1;
1188 char *filename = get_datadir_fname("router-stability");
1189 char *d = read_file_to_str(filename, RFTS_IGNORE_MISSING, NULL);
1190 tor_free(filename);
1191 if (!d)
1192 return -1;
1193 lines = smartlist_new();
1194 smartlist_split_string(lines, d, "\n", SPLIT_SKIP_SPACE, 0);
1195 tor_free(d);
1199 const char *firstline;
1200 if (smartlist_len(lines)>4) {
1201 firstline = smartlist_get(lines, 0);
1202 if (!strcmpstart(firstline, "format "))
1203 format = tor_parse_long(firstline+strlen("format "),
1204 10, -1, LONG_MAX, NULL, NULL);
1207 if (format != 1 && format != 2) {
1208 log_warn(LD_HIST,
1209 "Unrecognized format in mtbf history file. Skipping.");
1210 goto err;
1212 for (i = 1; i < smartlist_len(lines); ++i) {
1213 line = smartlist_get(lines, i);
1214 if (!strcmp(line, "data"))
1215 break;
1216 if (!strcmpstart(line, "last-downrated ")) {
1217 if (parse_iso_time(line+strlen("last-downrated "), &last_downrated)<0)
1218 log_warn(LD_HIST,"Couldn't parse downrate time in mtbf "
1219 "history file.");
1221 if (!strcmpstart(line, "stored-at ")) {
1222 if (parse_iso_time(line+strlen("stored-at "), &stored_at)<0)
1223 log_warn(LD_HIST,"Couldn't parse stored time in mtbf "
1224 "history file.");
1226 if (!strcmpstart(line, "tracked-since ")) {
1227 if (parse_iso_time(line+strlen("tracked-since "), &tracked_since)<0)
1228 log_warn(LD_HIST,"Couldn't parse started-tracking time in mtbf "
1229 "history file.");
1232 if (last_downrated > now)
1233 last_downrated = now;
1234 if (tracked_since > now)
1235 tracked_since = now;
1237 if (!stored_at) {
1238 log_warn(LD_HIST, "No stored time recorded.");
1239 goto err;
1242 if (line && !strcmp(line, "data"))
1243 ++i;
1245 n_bogus_times = 0;
1247 for (; i < smartlist_len(lines); ++i) {
1248 char digest[DIGEST_LEN];
1249 char hexbuf[HEX_DIGEST_LEN+1];
1250 char mtbf_timebuf[ISO_TIME_LEN+1];
1251 char wfu_timebuf[ISO_TIME_LEN+1];
1252 time_t start_of_run = 0;
1253 time_t start_of_downtime = 0;
1254 int have_mtbf = 0, have_wfu = 0;
1255 long wrl = 0;
1256 double trw = 0;
1257 long wt_uptime = 0, total_wt_time = 0;
1258 int n;
1259 or_history_t *hist;
1260 line = smartlist_get(lines, i);
1261 if (!strcmp(line, "."))
1262 break;
1264 mtbf_timebuf[0] = '\0';
1265 wfu_timebuf[0] = '\0';
1267 if (format == 1) {
1268 n = tor_sscanf(line, "%40s %ld %lf S=%10s %8s",
1269 hexbuf, &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11);
1270 if (n != 3 && n != 5) {
1271 log_warn(LD_HIST, "Couldn't scan line %s", escaped(line));
1272 continue;
1274 have_mtbf = 1;
1275 } else {
1276 // format == 2.
1277 int mtbf_idx, wfu_idx;
1278 if (strcmpstart(line, "R ") || strlen(line) < 2+HEX_DIGEST_LEN)
1279 continue;
1280 strlcpy(hexbuf, line+2, sizeof(hexbuf));
1281 mtbf_idx = find_next_with(lines, i+1, "+MTBF ");
1282 wfu_idx = find_next_with(lines, i+1, "+WFU ");
1283 if (mtbf_idx >= 0) {
1284 const char *mtbfline = smartlist_get(lines, mtbf_idx);
1285 n = tor_sscanf(mtbfline, "+MTBF %lu %lf S=%10s %8s",
1286 &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11);
1287 if (n == 2 || n == 4) {
1288 have_mtbf = 1;
1289 } else {
1290 log_warn(LD_HIST, "Couldn't scan +MTBF line %s",
1291 escaped(mtbfline));
1294 if (wfu_idx >= 0) {
1295 const char *wfuline = smartlist_get(lines, wfu_idx);
1296 n = tor_sscanf(wfuline, "+WFU %lu %lu S=%10s %8s",
1297 &wt_uptime, &total_wt_time,
1298 wfu_timebuf, wfu_timebuf+11);
1299 if (n == 2 || n == 4) {
1300 have_wfu = 1;
1301 } else {
1302 log_warn(LD_HIST, "Couldn't scan +WFU line %s", escaped(wfuline));
1305 if (wfu_idx > i)
1306 i = wfu_idx;
1307 if (mtbf_idx > i)
1308 i = mtbf_idx;
1310 if (base16_decode(digest, DIGEST_LEN,
1311 hexbuf, HEX_DIGEST_LEN) != DIGEST_LEN) {
1312 log_warn(LD_HIST, "Couldn't hex string %s", escaped(hexbuf));
1313 continue;
1315 hist = get_or_history(digest);
1316 if (!hist)
1317 continue;
1319 if (have_mtbf) {
1320 if (mtbf_timebuf[0]) {
1321 mtbf_timebuf[10] = ' ';
1322 if (parse_possibly_bad_iso_time(mtbf_timebuf, &start_of_run)<0)
1323 log_warn(LD_HIST, "Couldn't parse time %s",
1324 escaped(mtbf_timebuf));
1326 hist->start_of_run = correct_time(start_of_run, now, stored_at,
1327 tracked_since);
1328 if (hist->start_of_run < latest_possible_start + wrl)
1329 latest_possible_start = (time_t)(hist->start_of_run - wrl);
1331 hist->weighted_run_length = wrl;
1332 hist->total_run_weights = trw;
1334 if (have_wfu) {
1335 if (wfu_timebuf[0]) {
1336 wfu_timebuf[10] = ' ';
1337 if (parse_possibly_bad_iso_time(wfu_timebuf, &start_of_downtime)<0)
1338 log_warn(LD_HIST, "Couldn't parse time %s", escaped(wfu_timebuf));
1341 hist->start_of_downtime = correct_time(start_of_downtime, now, stored_at,
1342 tracked_since);
1343 hist->weighted_uptime = wt_uptime;
1344 hist->total_weighted_time = total_wt_time;
1346 if (strcmp(line, "."))
1347 log_warn(LD_HIST, "Truncated MTBF file.");
1349 if (tracked_since < 86400*365) /* Recover from insanely early value. */
1350 tracked_since = latest_possible_start;
1352 stability_last_downrated = last_downrated;
1353 started_tracking_stability = tracked_since;
1355 goto done;
1356 err:
1357 r = -1;
1358 done:
1359 SMARTLIST_FOREACH(lines, char *, cp, tor_free(cp));
1360 smartlist_free(lines);
1361 return r;
1364 /*** Exit port statistics ***/
1366 /* Some constants */
1367 /** To what multiple should byte numbers be rounded up? */
1368 #define EXIT_STATS_ROUND_UP_BYTES 1024
1369 /** To what multiple should stream counts be rounded up? */
1370 #define EXIT_STATS_ROUND_UP_STREAMS 4
1371 /** Number of TCP ports */
1372 #define EXIT_STATS_NUM_PORTS 65536
1373 /** Top n ports that will be included in exit stats. */
1374 #define EXIT_STATS_TOP_N_PORTS 10
1376 /* The following data structures are arrays and no fancy smartlists or maps,
1377 * so that all write operations can be done in constant time. This comes at
1378 * the price of some memory (1.25 MB) and linear complexity when writing
1379 * stats for measuring relays. */
1380 /** Number of bytes read in current period by exit port */
1381 static uint64_t *exit_bytes_read = NULL;
1382 /** Number of bytes written in current period by exit port */
1383 static uint64_t *exit_bytes_written = NULL;
1384 /** Number of streams opened in current period by exit port */
1385 static uint32_t *exit_streams = NULL;
1387 /** Start time of exit stats or 0 if we're not collecting exit stats. */
1388 static time_t start_of_exit_stats_interval;
1390 /** Initialize exit port stats. */
1391 void
1392 rep_hist_exit_stats_init(time_t now)
1394 start_of_exit_stats_interval = now;
1395 exit_bytes_read = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint64_t));
1396 exit_bytes_written = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint64_t));
1397 exit_streams = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint32_t));
1400 /** Reset counters for exit port statistics. */
1401 void
1402 rep_hist_reset_exit_stats(time_t now)
1404 start_of_exit_stats_interval = now;
1405 memset(exit_bytes_read, 0, EXIT_STATS_NUM_PORTS * sizeof(uint64_t));
1406 memset(exit_bytes_written, 0, EXIT_STATS_NUM_PORTS * sizeof(uint64_t));
1407 memset(exit_streams, 0, EXIT_STATS_NUM_PORTS * sizeof(uint32_t));
1410 /** Stop collecting exit port stats in a way that we can re-start doing
1411 * so in rep_hist_exit_stats_init(). */
1412 void
1413 rep_hist_exit_stats_term(void)
1415 start_of_exit_stats_interval = 0;
1416 tor_free(exit_bytes_read);
1417 tor_free(exit_bytes_written);
1418 tor_free(exit_streams);
1421 /** Helper for qsort: compare two ints. Does not handle overflow properly,
1422 * but works fine for sorting an array of port numbers, which is what we use
1423 * it for. */
1424 static int
1425 compare_int_(const void *x, const void *y)
1427 return (*(int*)x - *(int*)y);
1430 /** Return a newly allocated string containing the exit port statistics
1431 * until <b>now</b>, or NULL if we're not collecting exit stats. Caller
1432 * must ensure start_of_exit_stats_interval is in the past. */
1433 char *
1434 rep_hist_format_exit_stats(time_t now)
1436 int i, j, top_elements = 0, cur_min_idx = 0, cur_port;
1437 uint64_t top_bytes[EXIT_STATS_TOP_N_PORTS];
1438 int top_ports[EXIT_STATS_TOP_N_PORTS];
1439 uint64_t cur_bytes = 0, other_read = 0, other_written = 0,
1440 total_read = 0, total_written = 0;
1441 uint32_t total_streams = 0, other_streams = 0;
1442 smartlist_t *written_strings, *read_strings, *streams_strings;
1443 char *written_string, *read_string, *streams_string;
1444 char t[ISO_TIME_LEN+1];
1445 char *result;
1447 if (!start_of_exit_stats_interval)
1448 return NULL; /* Not initialized. */
1450 tor_assert(now >= start_of_exit_stats_interval);
1452 /* Go through all ports to find the n ports that saw most written and
1453 * read bytes.
1455 * Invariant: at the end of the loop for iteration i,
1456 * total_read is the sum of all exit_bytes_read[0..i]
1457 * total_written is the sum of all exit_bytes_written[0..i]
1458 * total_stream is the sum of all exit_streams[0..i]
1460 * top_elements = MAX(EXIT_STATS_TOP_N_PORTS,
1461 * #{j | 0 <= j <= i && volume(i) > 0})
1463 * For all 0 <= j < top_elements,
1464 * top_bytes[j] > 0
1465 * 0 <= top_ports[j] <= 65535
1466 * top_bytes[j] = volume(top_ports[j])
1468 * There is no j in 0..i and k in 0..top_elements such that:
1469 * volume(j) > top_bytes[k] AND j is not in top_ports[0..top_elements]
1471 * There is no j!=cur_min_idx in 0..top_elements such that:
1472 * top_bytes[j] < top_bytes[cur_min_idx]
1474 * where volume(x) == exit_bytes_read[x]+exit_bytes_written[x]
1476 * Worst case: O(EXIT_STATS_NUM_PORTS * EXIT_STATS_TOP_N_PORTS)
1478 for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
1479 total_read += exit_bytes_read[i];
1480 total_written += exit_bytes_written[i];
1481 total_streams += exit_streams[i];
1482 cur_bytes = exit_bytes_read[i] + exit_bytes_written[i];
1483 if (cur_bytes == 0) {
1484 continue;
1486 if (top_elements < EXIT_STATS_TOP_N_PORTS) {
1487 top_bytes[top_elements] = cur_bytes;
1488 top_ports[top_elements++] = i;
1489 } else if (cur_bytes > top_bytes[cur_min_idx]) {
1490 top_bytes[cur_min_idx] = cur_bytes;
1491 top_ports[cur_min_idx] = i;
1492 } else {
1493 continue;
1495 cur_min_idx = 0;
1496 for (j = 1; j < top_elements; j++) {
1497 if (top_bytes[j] < top_bytes[cur_min_idx]) {
1498 cur_min_idx = j;
1503 /* Add observations of top ports to smartlists. */
1504 written_strings = smartlist_new();
1505 read_strings = smartlist_new();
1506 streams_strings = smartlist_new();
1507 other_read = total_read;
1508 other_written = total_written;
1509 other_streams = total_streams;
1510 /* Sort the ports; this puts them out of sync with top_bytes, but we
1511 * won't be using top_bytes again anyway */
1512 qsort(top_ports, top_elements, sizeof(int), compare_int_);
1513 for (j = 0; j < top_elements; j++) {
1514 cur_port = top_ports[j];
1515 if (exit_bytes_written[cur_port] > 0) {
1516 uint64_t num = round_uint64_to_next_multiple_of(
1517 exit_bytes_written[cur_port],
1518 EXIT_STATS_ROUND_UP_BYTES);
1519 num /= 1024;
1520 smartlist_add_asprintf(written_strings, "%d=%"PRIu64,
1521 cur_port, (num));
1522 other_written -= exit_bytes_written[cur_port];
1524 if (exit_bytes_read[cur_port] > 0) {
1525 uint64_t num = round_uint64_to_next_multiple_of(
1526 exit_bytes_read[cur_port],
1527 EXIT_STATS_ROUND_UP_BYTES);
1528 num /= 1024;
1529 smartlist_add_asprintf(read_strings, "%d=%"PRIu64,
1530 cur_port, (num));
1531 other_read -= exit_bytes_read[cur_port];
1533 if (exit_streams[cur_port] > 0) {
1534 uint32_t num = round_uint32_to_next_multiple_of(
1535 exit_streams[cur_port],
1536 EXIT_STATS_ROUND_UP_STREAMS);
1537 smartlist_add_asprintf(streams_strings, "%d=%u", cur_port, num);
1538 other_streams -= exit_streams[cur_port];
1542 /* Add observations of other ports in a single element. */
1543 other_written = round_uint64_to_next_multiple_of(other_written,
1544 EXIT_STATS_ROUND_UP_BYTES);
1545 other_written /= 1024;
1546 smartlist_add_asprintf(written_strings, "other=%"PRIu64,
1547 (other_written));
1548 other_read = round_uint64_to_next_multiple_of(other_read,
1549 EXIT_STATS_ROUND_UP_BYTES);
1550 other_read /= 1024;
1551 smartlist_add_asprintf(read_strings, "other=%"PRIu64,
1552 (other_read));
1553 other_streams = round_uint32_to_next_multiple_of(other_streams,
1554 EXIT_STATS_ROUND_UP_STREAMS);
1555 smartlist_add_asprintf(streams_strings, "other=%u", other_streams);
1557 /* Join all observations in single strings. */
1558 written_string = smartlist_join_strings(written_strings, ",", 0, NULL);
1559 read_string = smartlist_join_strings(read_strings, ",", 0, NULL);
1560 streams_string = smartlist_join_strings(streams_strings, ",", 0, NULL);
1561 SMARTLIST_FOREACH(written_strings, char *, cp, tor_free(cp));
1562 SMARTLIST_FOREACH(read_strings, char *, cp, tor_free(cp));
1563 SMARTLIST_FOREACH(streams_strings, char *, cp, tor_free(cp));
1564 smartlist_free(written_strings);
1565 smartlist_free(read_strings);
1566 smartlist_free(streams_strings);
1568 /* Put everything together. */
1569 format_iso_time(t, now);
1570 tor_asprintf(&result, "exit-stats-end %s (%d s)\n"
1571 "exit-kibibytes-written %s\n"
1572 "exit-kibibytes-read %s\n"
1573 "exit-streams-opened %s\n",
1574 t, (unsigned) (now - start_of_exit_stats_interval),
1575 written_string,
1576 read_string,
1577 streams_string);
1578 tor_free(written_string);
1579 tor_free(read_string);
1580 tor_free(streams_string);
1581 return result;
1584 /** If 24 hours have passed since the beginning of the current exit port
1585 * stats period, write exit stats to $DATADIR/stats/exit-stats (possibly
1586 * overwriting an existing file) and reset counters. Return when we would
1587 * next want to write exit stats or 0 if we never want to write. */
1588 time_t
1589 rep_hist_exit_stats_write(time_t now)
1591 char *str = NULL;
1593 if (!start_of_exit_stats_interval)
1594 return 0; /* Not initialized. */
1595 if (start_of_exit_stats_interval + WRITE_STATS_INTERVAL > now)
1596 goto done; /* Not ready to write. */
1598 log_info(LD_HIST, "Writing exit port statistics to disk.");
1600 /* Generate history string. */
1601 str = rep_hist_format_exit_stats(now);
1603 /* Reset counters. */
1604 rep_hist_reset_exit_stats(now);
1606 /* Try to write to disk. */
1607 if (!check_or_create_data_subdir("stats")) {
1608 write_to_data_subdir("stats", "exit-stats", str, "exit port statistics");
1611 done:
1612 tor_free(str);
1613 return start_of_exit_stats_interval + WRITE_STATS_INTERVAL;
1616 /** Note that we wrote <b>num_written</b> bytes and read <b>num_read</b>
1617 * bytes to/from an exit connection to <b>port</b>. */
1618 void
1619 rep_hist_note_exit_bytes(uint16_t port, size_t num_written,
1620 size_t num_read)
1622 if (!start_of_exit_stats_interval)
1623 return; /* Not initialized. */
1624 exit_bytes_written[port] += num_written;
1625 exit_bytes_read[port] += num_read;
1626 log_debug(LD_HIST, "Written %lu bytes and read %lu bytes to/from an "
1627 "exit connection to port %d.",
1628 (unsigned long)num_written, (unsigned long)num_read, port);
1631 /** Note that we opened an exit stream to <b>port</b>. */
1632 void
1633 rep_hist_note_exit_stream_opened(uint16_t port)
1635 if (!start_of_exit_stats_interval)
1636 return; /* Not initialized. */
1637 exit_streams[port]++;
1638 log_debug(LD_HIST, "Opened exit stream to port %d", port);
1641 /*** cell statistics ***/
1643 /** Start of the current buffer stats interval or 0 if we're not
1644 * collecting buffer statistics. */
1645 static time_t start_of_buffer_stats_interval;
1647 /** Initialize buffer stats. */
1648 void
1649 rep_hist_buffer_stats_init(time_t now)
1651 start_of_buffer_stats_interval = now;
1654 /** Statistics from a single circuit. Collected when the circuit closes, or
1655 * when we flush statistics to disk. */
1656 typedef struct circ_buffer_stats_t {
1657 /** Average number of cells in the circuit's queue */
1658 double mean_num_cells_in_queue;
1659 /** Average time a cell waits in the queue. */
1660 double mean_time_cells_in_queue;
1661 /** Total number of cells sent over this circuit */
1662 uint32_t processed_cells;
1663 } circ_buffer_stats_t;
1665 /** List of circ_buffer_stats_t. */
1666 static smartlist_t *circuits_for_buffer_stats = NULL;
1668 /** Remember cell statistics <b>mean_num_cells_in_queue</b>,
1669 * <b>mean_time_cells_in_queue</b>, and <b>processed_cells</b> of a
1670 * circuit. */
1671 void
1672 rep_hist_add_buffer_stats(double mean_num_cells_in_queue,
1673 double mean_time_cells_in_queue, uint32_t processed_cells)
1675 circ_buffer_stats_t *stats;
1676 if (!start_of_buffer_stats_interval)
1677 return; /* Not initialized. */
1678 stats = tor_malloc_zero(sizeof(circ_buffer_stats_t));
1679 stats->mean_num_cells_in_queue = mean_num_cells_in_queue;
1680 stats->mean_time_cells_in_queue = mean_time_cells_in_queue;
1681 stats->processed_cells = processed_cells;
1682 if (!circuits_for_buffer_stats)
1683 circuits_for_buffer_stats = smartlist_new();
1684 smartlist_add(circuits_for_buffer_stats, stats);
1687 /** Remember cell statistics for circuit <b>circ</b> at time
1688 * <b>end_of_interval</b> and reset cell counters in case the circuit
1689 * remains open in the next measurement interval. */
1690 void
1691 rep_hist_buffer_stats_add_circ(circuit_t *circ, time_t end_of_interval)
1693 time_t start_of_interval;
1694 int interval_length;
1695 or_circuit_t *orcirc;
1696 double mean_num_cells_in_queue, mean_time_cells_in_queue;
1697 uint32_t processed_cells;
1698 if (CIRCUIT_IS_ORIGIN(circ))
1699 return;
1700 orcirc = TO_OR_CIRCUIT(circ);
1701 if (!orcirc->processed_cells)
1702 return;
1703 start_of_interval = (circ->timestamp_created.tv_sec >
1704 start_of_buffer_stats_interval) ?
1705 (time_t)circ->timestamp_created.tv_sec :
1706 start_of_buffer_stats_interval;
1707 interval_length = (int) (end_of_interval - start_of_interval);
1708 if (interval_length <= 0)
1709 return;
1710 processed_cells = orcirc->processed_cells;
1711 /* 1000.0 for s -> ms; 2.0 because of app-ward and exit-ward queues */
1712 mean_num_cells_in_queue = (double) orcirc->total_cell_waiting_time /
1713 (double) interval_length / 1000.0 / 2.0;
1714 mean_time_cells_in_queue =
1715 (double) orcirc->total_cell_waiting_time /
1716 (double) orcirc->processed_cells;
1717 orcirc->total_cell_waiting_time = 0;
1718 orcirc->processed_cells = 0;
1719 rep_hist_add_buffer_stats(mean_num_cells_in_queue,
1720 mean_time_cells_in_queue,
1721 processed_cells);
1724 /** Sorting helper: return -1, 1, or 0 based on comparison of two
1725 * circ_buffer_stats_t */
1726 static int
1727 buffer_stats_compare_entries_(const void **_a, const void **_b)
1729 const circ_buffer_stats_t *a = *_a, *b = *_b;
1730 if (a->processed_cells < b->processed_cells)
1731 return 1;
1732 else if (a->processed_cells > b->processed_cells)
1733 return -1;
1734 else
1735 return 0;
1738 /** Stop collecting cell stats in a way that we can re-start doing so in
1739 * rep_hist_buffer_stats_init(). */
1740 void
1741 rep_hist_buffer_stats_term(void)
1743 rep_hist_reset_buffer_stats(0);
1746 /** Clear history of circuit statistics and set the measurement interval
1747 * start to <b>now</b>. */
1748 void
1749 rep_hist_reset_buffer_stats(time_t now)
1751 if (!circuits_for_buffer_stats)
1752 circuits_for_buffer_stats = smartlist_new();
1753 SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *,
1754 stats, tor_free(stats));
1755 smartlist_clear(circuits_for_buffer_stats);
1756 start_of_buffer_stats_interval = now;
1759 /** Return a newly allocated string containing the buffer statistics until
1760 * <b>now</b>, or NULL if we're not collecting buffer stats. Caller must
1761 * ensure start_of_buffer_stats_interval is in the past. */
1762 char *
1763 rep_hist_format_buffer_stats(time_t now)
1765 #define SHARES 10
1766 uint64_t processed_cells[SHARES];
1767 uint32_t circs_in_share[SHARES];
1768 int number_of_circuits, i;
1769 double queued_cells[SHARES], time_in_queue[SHARES];
1770 smartlist_t *processed_cells_strings, *queued_cells_strings,
1771 *time_in_queue_strings;
1772 char *processed_cells_string, *queued_cells_string,
1773 *time_in_queue_string;
1774 char t[ISO_TIME_LEN+1];
1775 char *result;
1777 if (!start_of_buffer_stats_interval)
1778 return NULL; /* Not initialized. */
1780 tor_assert(now >= start_of_buffer_stats_interval);
1782 /* Calculate deciles if we saw at least one circuit. */
1783 memset(processed_cells, 0, SHARES * sizeof(uint64_t));
1784 memset(circs_in_share, 0, SHARES * sizeof(uint32_t));
1785 memset(queued_cells, 0, SHARES * sizeof(double));
1786 memset(time_in_queue, 0, SHARES * sizeof(double));
1787 if (!circuits_for_buffer_stats)
1788 circuits_for_buffer_stats = smartlist_new();
1789 number_of_circuits = smartlist_len(circuits_for_buffer_stats);
1790 if (number_of_circuits > 0) {
1791 smartlist_sort(circuits_for_buffer_stats,
1792 buffer_stats_compare_entries_);
1793 i = 0;
1794 SMARTLIST_FOREACH_BEGIN(circuits_for_buffer_stats,
1795 circ_buffer_stats_t *, stats)
1797 int share = i++ * SHARES / number_of_circuits;
1798 processed_cells[share] += stats->processed_cells;
1799 queued_cells[share] += stats->mean_num_cells_in_queue;
1800 time_in_queue[share] += stats->mean_time_cells_in_queue;
1801 circs_in_share[share]++;
1803 SMARTLIST_FOREACH_END(stats);
1806 /* Write deciles to strings. */
1807 processed_cells_strings = smartlist_new();
1808 queued_cells_strings = smartlist_new();
1809 time_in_queue_strings = smartlist_new();
1810 for (i = 0; i < SHARES; i++) {
1811 smartlist_add_asprintf(processed_cells_strings,
1812 "%"PRIu64, !circs_in_share[i] ? 0 :
1813 (processed_cells[i] /
1814 circs_in_share[i]));
1816 for (i = 0; i < SHARES; i++) {
1817 smartlist_add_asprintf(queued_cells_strings, "%.2f",
1818 circs_in_share[i] == 0 ? 0.0 :
1819 queued_cells[i] / (double) circs_in_share[i]);
1821 for (i = 0; i < SHARES; i++) {
1822 smartlist_add_asprintf(time_in_queue_strings, "%.0f",
1823 circs_in_share[i] == 0 ? 0.0 :
1824 time_in_queue[i] / (double) circs_in_share[i]);
1827 /* Join all observations in single strings. */
1828 processed_cells_string = smartlist_join_strings(processed_cells_strings,
1829 ",", 0, NULL);
1830 queued_cells_string = smartlist_join_strings(queued_cells_strings,
1831 ",", 0, NULL);
1832 time_in_queue_string = smartlist_join_strings(time_in_queue_strings,
1833 ",", 0, NULL);
1834 SMARTLIST_FOREACH(processed_cells_strings, char *, cp, tor_free(cp));
1835 SMARTLIST_FOREACH(queued_cells_strings, char *, cp, tor_free(cp));
1836 SMARTLIST_FOREACH(time_in_queue_strings, char *, cp, tor_free(cp));
1837 smartlist_free(processed_cells_strings);
1838 smartlist_free(queued_cells_strings);
1839 smartlist_free(time_in_queue_strings);
1841 /* Put everything together. */
1842 format_iso_time(t, now);
1843 tor_asprintf(&result, "cell-stats-end %s (%d s)\n"
1844 "cell-processed-cells %s\n"
1845 "cell-queued-cells %s\n"
1846 "cell-time-in-queue %s\n"
1847 "cell-circuits-per-decile %d\n",
1848 t, (unsigned) (now - start_of_buffer_stats_interval),
1849 processed_cells_string,
1850 queued_cells_string,
1851 time_in_queue_string,
1852 CEIL_DIV(number_of_circuits, SHARES));
1853 tor_free(processed_cells_string);
1854 tor_free(queued_cells_string);
1855 tor_free(time_in_queue_string);
1856 return result;
1857 #undef SHARES
1860 /** If 24 hours have passed since the beginning of the current buffer
1861 * stats period, write buffer stats to $DATADIR/stats/buffer-stats
1862 * (possibly overwriting an existing file) and reset counters. Return
1863 * when we would next want to write buffer stats or 0 if we never want to
1864 * write. */
1865 time_t
1866 rep_hist_buffer_stats_write(time_t now)
1868 char *str = NULL;
1870 if (!start_of_buffer_stats_interval)
1871 return 0; /* Not initialized. */
1872 if (start_of_buffer_stats_interval + WRITE_STATS_INTERVAL > now)
1873 goto done; /* Not ready to write */
1875 /* Add open circuits to the history. */
1876 SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) {
1877 rep_hist_buffer_stats_add_circ(circ, now);
1879 SMARTLIST_FOREACH_END(circ);
1881 /* Generate history string. */
1882 str = rep_hist_format_buffer_stats(now);
1884 /* Reset both buffer history and counters of open circuits. */
1885 rep_hist_reset_buffer_stats(now);
1887 /* Try to write to disk. */
1888 if (!check_or_create_data_subdir("stats")) {
1889 write_to_data_subdir("stats", "buffer-stats", str, "buffer statistics");
1892 done:
1893 tor_free(str);
1894 return start_of_buffer_stats_interval + WRITE_STATS_INTERVAL;
1897 /*** Descriptor serving statistics ***/
1899 /** Digestmap to track which descriptors were downloaded this stats
1900 * collection interval. It maps descriptor digest to pointers to 1,
1901 * effectively turning this into a list. */
1902 static digestmap_t *served_descs = NULL;
1904 /** Number of how many descriptors were downloaded in total during this
1905 * interval. */
1906 static unsigned long total_descriptor_downloads;
1908 /** Start time of served descs stats or 0 if we're not collecting those. */
1909 static time_t start_of_served_descs_stats_interval;
1911 /** Initialize descriptor stats. */
1912 void
1913 rep_hist_desc_stats_init(time_t now)
1915 if (served_descs) {
1916 log_warn(LD_BUG, "Called rep_hist_desc_stats_init() when desc stats were "
1917 "already initialized. This is probably harmless.");
1918 return; // Already initialized
1920 served_descs = digestmap_new();
1921 total_descriptor_downloads = 0;
1922 start_of_served_descs_stats_interval = now;
1925 /** Reset served descs stats to empty, starting a new interval <b>now</b>. */
1926 static void
1927 rep_hist_reset_desc_stats(time_t now)
1929 rep_hist_desc_stats_term();
1930 rep_hist_desc_stats_init(now);
1933 /** Stop collecting served descs stats, so that rep_hist_desc_stats_init() is
1934 * safe to be called again. */
1935 void
1936 rep_hist_desc_stats_term(void)
1938 digestmap_free(served_descs, NULL);
1939 served_descs = NULL;
1940 start_of_served_descs_stats_interval = 0;
1941 total_descriptor_downloads = 0;
1944 /** Helper for rep_hist_desc_stats_write(). Return a newly allocated string
1945 * containing the served desc statistics until now, or NULL if we're not
1946 * collecting served desc stats. Caller must ensure that now is not before
1947 * start_of_served_descs_stats_interval. */
1948 static char *
1949 rep_hist_format_desc_stats(time_t now)
1951 char t[ISO_TIME_LEN+1];
1952 char *result;
1954 digestmap_iter_t *iter;
1955 const char *key;
1956 void *val;
1957 unsigned size;
1958 int *vals, max = 0, q3 = 0, md = 0, q1 = 0, min = 0;
1959 int n = 0;
1961 if (!start_of_served_descs_stats_interval)
1962 return NULL;
1964 size = digestmap_size(served_descs);
1965 if (size > 0) {
1966 vals = tor_calloc(size, sizeof(int));
1967 for (iter = digestmap_iter_init(served_descs);
1968 !digestmap_iter_done(iter);
1969 iter = digestmap_iter_next(served_descs, iter)) {
1970 uintptr_t count;
1971 digestmap_iter_get(iter, &key, &val);
1972 count = (uintptr_t)val;
1973 vals[n++] = (int)count;
1974 (void)key;
1976 max = find_nth_int(vals, size, size-1);
1977 q3 = find_nth_int(vals, size, (3*size-1)/4);
1978 md = find_nth_int(vals, size, (size-1)/2);
1979 q1 = find_nth_int(vals, size, (size-1)/4);
1980 min = find_nth_int(vals, size, 0);
1981 tor_free(vals);
1984 format_iso_time(t, now);
1986 tor_asprintf(&result,
1987 "served-descs-stats-end %s (%d s) total=%lu unique=%u "
1988 "max=%d q3=%d md=%d q1=%d min=%d\n",
1990 (unsigned) (now - start_of_served_descs_stats_interval),
1991 total_descriptor_downloads,
1992 size, max, q3, md, q1, min);
1994 return result;
1997 /** If WRITE_STATS_INTERVAL seconds have passed since the beginning of
1998 * the current served desc stats interval, write the stats to
1999 * $DATADIR/stats/served-desc-stats (possibly appending to an existing file)
2000 * and reset the state for the next interval. Return when we would next want
2001 * to write served desc stats or 0 if we won't want to write. */
2002 time_t
2003 rep_hist_desc_stats_write(time_t now)
2005 char *filename = NULL, *str = NULL;
2007 if (!start_of_served_descs_stats_interval)
2008 return 0; /* We're not collecting stats. */
2009 if (start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL > now)
2010 return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL;
2012 str = rep_hist_format_desc_stats(now);
2013 tor_assert(str != NULL);
2015 if (check_or_create_data_subdir("stats") < 0) {
2016 goto done;
2018 filename = get_datadir_fname2("stats", "served-desc-stats");
2019 if (append_bytes_to_file(filename, str, strlen(str), 0) < 0)
2020 log_warn(LD_HIST, "Unable to write served descs statistics to disk!");
2022 rep_hist_reset_desc_stats(now);
2024 done:
2025 tor_free(filename);
2026 tor_free(str);
2027 return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL;
2030 /** Called to note that we've served a given descriptor (by
2031 * digest). Increments the count of descriptors served, and the number
2032 * of times we've served this descriptor. */
2033 void
2034 rep_hist_note_desc_served(const char * desc)
2036 void *val;
2037 uintptr_t count;
2038 if (!served_descs)
2039 return; // We're not collecting stats
2040 val = digestmap_get(served_descs, desc);
2041 count = (uintptr_t)val;
2042 if (count != INT_MAX)
2043 ++count;
2044 digestmap_set(served_descs, desc, (void*)count);
2045 total_descriptor_downloads++;
2048 /*** Connection statistics ***/
2050 /** Internal statistics to track how many requests of each type of
2051 * handshake we've received, and how many we've assigned to cpuworkers.
2052 * Useful for seeing trends in cpu load.
2054 * They are reset at every heartbeat.
2055 * @{ */
2056 STATIC int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
2057 STATIC int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
2058 /**@}*/
2060 /** Counters keeping the same stats as above but for the entire duration of the
2061 * process (not reset). */
2062 static uint64_t stats_n_onionskin_assigned[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
2063 static uint64_t stats_n_onionskin_dropped[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
2065 /** A new onionskin (using the <b>type</b> handshake) has arrived. */
2066 void
2067 rep_hist_note_circuit_handshake_requested(uint16_t type)
2069 if (type <= MAX_ONION_HANDSHAKE_TYPE)
2070 onion_handshakes_requested[type]++;
2073 /** We've sent an onionskin (using the <b>type</b> handshake) to a
2074 * cpuworker. */
2075 void
2076 rep_hist_note_circuit_handshake_assigned(uint16_t type)
2078 if (type <= MAX_ONION_HANDSHAKE_TYPE) {
2079 onion_handshakes_assigned[type]++;
2080 stats_n_onionskin_assigned[type]++;
2084 /** We've just drop an onionskin (using the <b>type</b> handshake) due to being
2085 * overloaded. */
2086 void
2087 rep_hist_note_circuit_handshake_dropped(uint16_t type)
2089 if (type <= MAX_ONION_HANDSHAKE_TYPE) {
2090 stats_n_onionskin_dropped[type]++;
2094 /** Get the circuit handshake value that is requested. */
2095 MOCK_IMPL(int,
2096 rep_hist_get_circuit_handshake_requested, (uint16_t type))
2098 if (BUG(type > MAX_ONION_HANDSHAKE_TYPE)) {
2099 return 0;
2101 return onion_handshakes_requested[type];
2104 /** Get the circuit handshake value that is assigned. */
2105 MOCK_IMPL(int,
2106 rep_hist_get_circuit_handshake_assigned, (uint16_t type))
2108 if (BUG(type > MAX_ONION_HANDSHAKE_TYPE)) {
2109 return 0;
2111 return onion_handshakes_assigned[type];
2114 /** Get the total number of circuit handshake value that is assigned. */
2115 MOCK_IMPL(uint64_t,
2116 rep_hist_get_circuit_n_handshake_assigned, (uint16_t type))
2118 if (BUG(type > MAX_ONION_HANDSHAKE_TYPE)) {
2119 return 0;
2121 return stats_n_onionskin_assigned[type];
2124 /** Get the total number of circuit handshake value that is dropped. */
2125 MOCK_IMPL(uint64_t,
2126 rep_hist_get_circuit_n_handshake_dropped, (uint16_t type))
2128 if (BUG(type > MAX_ONION_HANDSHAKE_TYPE)) {
2129 return 0;
2131 return stats_n_onionskin_dropped[type];
2134 /** Log our onionskin statistics since the last time we were called. */
2135 void
2136 rep_hist_log_circuit_handshake_stats(time_t now)
2138 (void)now;
2139 log_notice(LD_HEARTBEAT, "Circuit handshake stats since last time: "
2140 "%d/%d TAP, %d/%d NTor.",
2141 onion_handshakes_assigned[ONION_HANDSHAKE_TYPE_TAP],
2142 onion_handshakes_requested[ONION_HANDSHAKE_TYPE_TAP],
2143 onion_handshakes_assigned[ONION_HANDSHAKE_TYPE_NTOR],
2144 onion_handshakes_requested[ONION_HANDSHAKE_TYPE_NTOR]);
2145 memset(onion_handshakes_assigned, 0, sizeof(onion_handshakes_assigned));
2146 memset(onion_handshakes_requested, 0, sizeof(onion_handshakes_requested));
2149 /* Hidden service statistics section */
2151 /** Start of the current hidden service stats interval or 0 if we're
2152 * not collecting hidden service statistics. */
2153 static time_t start_of_hs_v2_stats_interval;
2155 /** Our v2 statistics structure singleton. */
2156 static hs_v2_stats_t *hs_v2_stats = NULL;
2158 /** HSv2 stats */
2160 /** Allocate, initialize and return an hs_v2_stats_t structure. */
2161 static hs_v2_stats_t *
2162 hs_v2_stats_new(void)
2164 hs_v2_stats_t *new_hs_v2_stats = tor_malloc_zero(sizeof(hs_v2_stats_t));
2166 return new_hs_v2_stats;
2169 #define hs_v2_stats_free(val) \
2170 FREE_AND_NULL(hs_v2_stats_t, hs_v2_stats_free_, (val))
2172 /** Free an hs_v2_stats_t structure. */
2173 static void
2174 hs_v2_stats_free_(hs_v2_stats_t *victim_hs_v2_stats)
2176 if (!victim_hs_v2_stats) {
2177 return;
2179 tor_free(victim_hs_v2_stats);
2182 /** Clear history of hidden service statistics and set the measurement
2183 * interval start to <b>now</b>. */
2184 static void
2185 rep_hist_reset_hs_v2_stats(time_t now)
2187 if (!hs_v2_stats) {
2188 hs_v2_stats = hs_v2_stats_new();
2191 hs_v2_stats->rp_v2_relay_cells_seen = 0;
2193 start_of_hs_v2_stats_interval = now;
2196 /*** HSv3 stats ******/
2198 /** Start of the current hidden service stats interval or 0 if we're not
2199 * collecting hidden service statistics.
2201 * This is particularly important for v3 statistics since this variable
2202 * controls the start time of initial v3 stats collection. It's initialized by
2203 * rep_hist_hs_stats_init() to the next time period start (i.e. 12:00UTC), and
2204 * should_collect_v3_stats() ensures that functions that collect v3 stats do
2205 * not do so sooner than that.
2207 * Collecting stats from 12:00UTC to 12:00UTC is extremely important for v3
2208 * stats because rep_hist_hsdir_stored_maybe_new_v3_onion() uses the blinded
2209 * key of each onion service as its double-counting index. Onion services
2210 * rotate their descriptor at around 00:00UTC which means that their blinded
2211 * key also changes around that time. However the precise time that onion
2212 * services rotate their descriptors is actually when they fetch a new
2213 * 00:00UTC consensus and that happens at a random time (e.g. it can even
2214 * happen at 02:00UTC). This means that if we started keeping v3 stats at
2215 * around 00:00UTC we wouldn't be able to tell when onion services change
2216 * their blinded key and hence we would double count an unpredictable amount
2217 * of them (for example, if an onion service fetches the 00:00UTC consensus at
2218 * 01:00UTC it would upload to its old HSDir at 00:45UTC, and then to a
2219 * different HSDir at 01:50UTC).
2221 * For this reason, we start collecting statistics at 12:00UTC. This way we
2222 * know that by the time we stop collecting statistics for that time period 24
2223 * hours later, all the onion services have switched to their new blinded
2224 * key. This way we can predict much better how much double counting has been
2225 * performed.
2227 static time_t start_of_hs_v3_stats_interval;
2229 /** Our v3 statistics structure singleton. */
2230 static hs_v3_stats_t *hs_v3_stats = NULL;
2232 /** Allocate, initialize and return an hs_v3_stats_t structure. */
2233 static hs_v3_stats_t *
2234 hs_v3_stats_new(void)
2236 hs_v3_stats_t *new_hs_v3_stats = tor_malloc_zero(sizeof(hs_v3_stats_t));
2237 new_hs_v3_stats->v3_onions_seen_this_period = digest256map_new();
2239 return new_hs_v3_stats;
2242 #define hs_v3_stats_free(val) \
2243 FREE_AND_NULL(hs_v3_stats_t, hs_v3_stats_free_, (val))
2245 /** Free an hs_v3_stats_t structure. */
2246 static void
2247 hs_v3_stats_free_(hs_v3_stats_t *victim_hs_v3_stats)
2249 if (!victim_hs_v3_stats) {
2250 return;
2253 digest256map_free(victim_hs_v3_stats->v3_onions_seen_this_period, NULL);
2254 tor_free(victim_hs_v3_stats);
2257 /** Clear history of hidden service statistics and set the measurement
2258 * interval start to <b>now</b>. */
2259 static void
2260 rep_hist_reset_hs_v3_stats(time_t now)
2262 if (!hs_v3_stats) {
2263 hs_v3_stats = hs_v3_stats_new();
2266 digest256map_free(hs_v3_stats->v3_onions_seen_this_period, NULL);
2267 hs_v3_stats->v3_onions_seen_this_period = digest256map_new();
2269 hs_v3_stats->rp_v3_relay_cells_seen = 0;
2271 start_of_hs_v3_stats_interval = now;
2274 /** Return true if it's a good time to collect v3 stats.
2276 * v3 stats have a strict stats collection period (from 12:00UTC to 12:00UTC
2277 * on the real network). We don't want to collect statistics if (for example)
2278 * we just booted and it's 03:00UTC; we will wait until 12:00UTC before we
2279 * start collecting statistics to make sure that the final result represents
2280 * the whole collection period. This behavior is controlled by
2281 * rep_hist_hs_stats_init().
2283 MOCK_IMPL(STATIC bool,
2284 should_collect_v3_stats,(void))
2286 return start_of_hs_v3_stats_interval <= approx_time();
2289 /** We just received a new descriptor with <b>blinded_key</b>. See if we've
2290 * seen this blinded key before, and if not add it to the stats. */
2291 void
2292 rep_hist_hsdir_stored_maybe_new_v3_onion(const uint8_t *blinded_key)
2294 /* Return early if we don't collect HSv3 stats, or if it's not yet the time
2295 * to collect them. */
2296 if (!hs_v3_stats || !should_collect_v3_stats()) {
2297 return;
2300 bool seen_before =
2301 !!digest256map_get(hs_v3_stats->v3_onions_seen_this_period,
2302 blinded_key);
2304 log_info(LD_GENERAL, "Considering v3 descriptor with %s (%sseen before)",
2305 safe_str(hex_str((char*)blinded_key, 32)),
2306 seen_before ? "" : "not ");
2308 /* Count it if we haven't seen it before. */
2309 if (!seen_before) {
2310 digest256map_set(hs_v3_stats->v3_onions_seen_this_period,
2311 blinded_key, (void*)(uintptr_t)1);
2315 /** We saw a new HS relay cell: count it!
2316 * If <b>is_v2</b> is set then it's a v2 RP cell, otherwise it's a v3. */
2317 void
2318 rep_hist_seen_new_rp_cell(bool is_v2)
2320 log_debug(LD_GENERAL, "New RP cell (%d)", is_v2);
2322 if (is_v2 && hs_v2_stats) {
2323 hs_v2_stats->rp_v2_relay_cells_seen++;
2324 } else if (!is_v2 && hs_v3_stats && should_collect_v3_stats()) {
2325 hs_v3_stats->rp_v3_relay_cells_seen++;
2329 /** Generic HS stats code */
2331 /** Initialize v2 and v3 hidden service statistics. */
2332 void
2333 rep_hist_hs_stats_init(time_t now)
2335 if (!hs_v2_stats) {
2336 hs_v2_stats = hs_v2_stats_new();
2339 /* Start collecting v2 stats straight away */
2340 start_of_hs_v2_stats_interval = now;
2342 if (!hs_v3_stats) {
2343 hs_v3_stats = hs_v3_stats_new();
2346 /* Start collecting v3 stats at the next 12:00 UTC */
2347 start_of_hs_v3_stats_interval = hs_get_start_time_of_next_time_period(now);
2350 /** Stop collecting hidden service stats in a way that we can re-start
2351 * doing so in rep_hist_buffer_stats_init(). */
2352 void
2353 rep_hist_hs_stats_term(void)
2355 rep_hist_reset_hs_v2_stats(0);
2356 rep_hist_reset_hs_v3_stats(0);
2359 /** Stats reporting code */
2361 /* The number of cells that are supposed to be hidden from the adversary
2362 * by adding noise from the Laplace distribution. This value, divided by
2363 * EPSILON, is Laplace parameter b. It must be greater than 0. */
2364 #define REND_CELLS_DELTA_F 2048
2365 /* Security parameter for obfuscating number of cells with a value between
2366 * ]0.0, 1.0]. Smaller values obfuscate observations more, but at the same
2367 * time make statistics less usable. */
2368 #define REND_CELLS_EPSILON 0.3
2369 /* The number of cells that are supposed to be hidden from the adversary
2370 * by rounding up to the next multiple of this number. */
2371 #define REND_CELLS_BIN_SIZE 1024
2372 /* The number of service identities that are supposed to be hidden from the
2373 * adversary by adding noise from the Laplace distribution. This value,
2374 * divided by EPSILON, is Laplace parameter b. It must be greater than 0. */
2375 #define ONIONS_SEEN_DELTA_F 8
2376 /* Security parameter for obfuscating number of service identities with a
2377 * value between ]0.0, 1.0]. Smaller values obfuscate observations more, but
2378 * at the same time make statistics less usable. */
2379 #define ONIONS_SEEN_EPSILON 0.3
2380 /* The number of service identities that are supposed to be hidden from
2381 * the adversary by rounding up to the next multiple of this number. */
2382 #define ONIONS_SEEN_BIN_SIZE 8
2384 /** Allocate and return a string containing hidden service stats that
2385 * are meant to be placed in the extra-info descriptor.
2387 * Function works for both v2 and v3 stats depending on <b>is_v3</b>. */
2388 STATIC char *
2389 rep_hist_format_hs_stats(time_t now, bool is_v3)
2391 char t[ISO_TIME_LEN+1];
2392 char *hs_stats_string;
2393 int64_t obfuscated_onions_seen, obfuscated_cells_seen;
2395 uint64_t rp_cells_seen = is_v3 ?
2396 hs_v3_stats->rp_v3_relay_cells_seen : hs_v2_stats->rp_v2_relay_cells_seen;
2397 size_t onions_seen = is_v3 ?
2398 digest256map_size(hs_v3_stats->v3_onions_seen_this_period) : 0;
2399 time_t start_of_hs_stats_interval = is_v3 ?
2400 start_of_hs_v3_stats_interval : start_of_hs_v2_stats_interval;
2402 uint64_t rounded_cells_seen
2403 = round_uint64_to_next_multiple_of(rp_cells_seen, REND_CELLS_BIN_SIZE);
2404 rounded_cells_seen = MIN(rounded_cells_seen, INT64_MAX);
2405 obfuscated_cells_seen = add_laplace_noise((int64_t)rounded_cells_seen,
2406 crypto_rand_double(),
2407 REND_CELLS_DELTA_F, REND_CELLS_EPSILON);
2409 uint64_t rounded_onions_seen =
2410 round_uint64_to_next_multiple_of(onions_seen, ONIONS_SEEN_BIN_SIZE);
2411 rounded_onions_seen = MIN(rounded_onions_seen, INT64_MAX);
2412 obfuscated_onions_seen = add_laplace_noise((int64_t)rounded_onions_seen,
2413 crypto_rand_double(), ONIONS_SEEN_DELTA_F,
2414 ONIONS_SEEN_EPSILON);
2416 format_iso_time(t, now);
2417 tor_asprintf(&hs_stats_string, "%s %s (%u s)\n"
2418 "%s %"PRId64" delta_f=%d epsilon=%.2f bin_size=%d\n"
2419 "%s %"PRId64" delta_f=%d epsilon=%.2f bin_size=%d\n",
2420 is_v3 ? "hidserv-v3-stats-end" : "hidserv-stats-end",
2421 t, (unsigned) (now - start_of_hs_stats_interval),
2422 is_v3 ?
2423 "hidserv-rend-v3-relayed-cells" : "hidserv-rend-relayed-cells",
2424 obfuscated_cells_seen, REND_CELLS_DELTA_F,
2425 REND_CELLS_EPSILON, REND_CELLS_BIN_SIZE,
2426 is_v3 ? "hidserv-dir-v3-onions-seen" :"hidserv-dir-onions-seen",
2427 obfuscated_onions_seen, ONIONS_SEEN_DELTA_F,
2428 ONIONS_SEEN_EPSILON, ONIONS_SEEN_BIN_SIZE);
2430 return hs_stats_string;
2433 /** If 24 hours have passed since the beginning of the current HS
2434 * stats period, write buffer stats to $DATADIR/stats/hidserv-v3-stats
2435 * (possibly overwriting an existing file) and reset counters. Return
2436 * when we would next want to write buffer stats or 0 if we never want to
2437 * write. Function works for both v2 and v3 stats depending on <b>is_v3</b>.
2439 time_t
2440 rep_hist_hs_stats_write(time_t now, bool is_v3)
2442 char *str = NULL;
2444 time_t start_of_hs_stats_interval = is_v3 ?
2445 start_of_hs_v3_stats_interval : start_of_hs_v2_stats_interval;
2447 if (!start_of_hs_stats_interval) {
2448 return 0; /* Not initialized. */
2451 if (start_of_hs_stats_interval + WRITE_STATS_INTERVAL > now) {
2452 goto done; /* Not ready to write */
2455 /* Generate history string. */
2456 str = rep_hist_format_hs_stats(now, is_v3);
2458 /* Reset HS history. */
2459 if (is_v3) {
2460 rep_hist_reset_hs_v3_stats(now);
2461 } else {
2462 rep_hist_reset_hs_v2_stats(now);
2465 /* Try to write to disk. */
2466 if (!check_or_create_data_subdir("stats")) {
2467 write_to_data_subdir("stats",
2468 is_v3 ? "hidserv-v3-stats" : "hidserv-stats",
2469 str, "hidden service stats");
2472 done:
2473 tor_free(str);
2474 return start_of_hs_stats_interval + WRITE_STATS_INTERVAL;
2477 static uint64_t link_proto_count[MAX_LINK_PROTO+1][2];
2479 /** Note that we negotiated link protocol version <b>link_proto</b>, on
2480 * a connection that started here iff <b>started_here</b> is true.
2482 void
2483 rep_hist_note_negotiated_link_proto(unsigned link_proto, int started_here)
2485 started_here = !!started_here; /* force to 0 or 1 */
2486 if (link_proto > MAX_LINK_PROTO) {
2487 log_warn(LD_BUG, "Can't log link protocol %u", link_proto);
2488 return;
2491 link_proto_count[link_proto][started_here]++;
2495 * Update the maximum count of total pending channel padding timers
2496 * in this period.
2498 void
2499 rep_hist_padding_count_timers(uint64_t num_timers)
2501 if (num_timers > padding_current.maximum_chanpad_timers) {
2502 padding_current.maximum_chanpad_timers = num_timers;
2507 * Count a cell that we sent for padding overhead statistics.
2509 * RELAY_COMMAND_DROP and CELL_PADDING are accounted separately. Both should be
2510 * counted for PADDING_TYPE_TOTAL.
2512 void
2513 rep_hist_padding_count_write(padding_type_t type)
2515 switch (type) {
2516 case PADDING_TYPE_DROP:
2517 padding_current.write_drop_cell_count++;
2518 break;
2519 case PADDING_TYPE_CELL:
2520 padding_current.write_pad_cell_count++;
2521 break;
2522 case PADDING_TYPE_TOTAL:
2523 padding_current.write_cell_count++;
2524 break;
2525 case PADDING_TYPE_ENABLED_TOTAL:
2526 padding_current.enabled_write_cell_count++;
2527 break;
2528 case PADDING_TYPE_ENABLED_CELL:
2529 padding_current.enabled_write_pad_cell_count++;
2530 break;
2535 * Count a cell that we've received for padding overhead statistics.
2537 * RELAY_COMMAND_DROP and CELL_PADDING are accounted separately. Both should be
2538 * counted for PADDING_TYPE_TOTAL.
2540 void
2541 rep_hist_padding_count_read(padding_type_t type)
2543 switch (type) {
2544 case PADDING_TYPE_DROP:
2545 padding_current.read_drop_cell_count++;
2546 break;
2547 case PADDING_TYPE_CELL:
2548 padding_current.read_pad_cell_count++;
2549 break;
2550 case PADDING_TYPE_TOTAL:
2551 padding_current.read_cell_count++;
2552 break;
2553 case PADDING_TYPE_ENABLED_TOTAL:
2554 padding_current.enabled_read_cell_count++;
2555 break;
2556 case PADDING_TYPE_ENABLED_CELL:
2557 padding_current.enabled_read_pad_cell_count++;
2558 break;
2563 * Reset our current padding statistics. Called once every 24 hours.
2565 void
2566 rep_hist_reset_padding_counts(void)
2568 memset(&padding_current, 0, sizeof(padding_current));
2572 * Copy our current cell counts into a structure for listing in our
2573 * extra-info descriptor. Also perform appropriate rounding and redaction.
2575 * This function is called once every 24 hours.
2577 #define MIN_CELL_COUNTS_TO_PUBLISH 1
2578 #define ROUND_CELL_COUNTS_TO 10000
2579 void
2580 rep_hist_prep_published_padding_counts(time_t now)
2582 memcpy(&padding_published, &padding_current, sizeof(padding_published));
2584 if (padding_published.read_cell_count < MIN_CELL_COUNTS_TO_PUBLISH ||
2585 padding_published.write_cell_count < MIN_CELL_COUNTS_TO_PUBLISH) {
2586 memset(&padding_published, 0, sizeof(padding_published));
2587 return;
2590 format_iso_time(padding_published.first_published_at, now);
2591 #define ROUND_AND_SET_COUNT(x) (x) = round_uint64_to_next_multiple_of((x), \
2592 ROUND_CELL_COUNTS_TO)
2593 ROUND_AND_SET_COUNT(padding_published.read_pad_cell_count);
2594 ROUND_AND_SET_COUNT(padding_published.write_pad_cell_count);
2595 ROUND_AND_SET_COUNT(padding_published.read_drop_cell_count);
2596 ROUND_AND_SET_COUNT(padding_published.write_drop_cell_count);
2597 ROUND_AND_SET_COUNT(padding_published.write_cell_count);
2598 ROUND_AND_SET_COUNT(padding_published.read_cell_count);
2599 ROUND_AND_SET_COUNT(padding_published.enabled_read_cell_count);
2600 ROUND_AND_SET_COUNT(padding_published.enabled_read_pad_cell_count);
2601 ROUND_AND_SET_COUNT(padding_published.enabled_write_cell_count);
2602 ROUND_AND_SET_COUNT(padding_published.enabled_write_pad_cell_count);
2603 #undef ROUND_AND_SET_COUNT
2607 * Returns an allocated string for extra-info documents for publishing
2608 * padding statistics from the last 24 hour interval.
2610 char *
2611 rep_hist_get_padding_count_lines(void)
2613 char *result = NULL;
2615 if (!padding_published.read_cell_count ||
2616 !padding_published.write_cell_count) {
2617 return NULL;
2620 tor_asprintf(&result, "padding-counts %s (%d s)"
2621 " bin-size=%"PRIu64
2622 " write-drop=%"PRIu64
2623 " write-pad=%"PRIu64
2624 " write-total=%"PRIu64
2625 " read-drop=%"PRIu64
2626 " read-pad=%"PRIu64
2627 " read-total=%"PRIu64
2628 " enabled-read-pad=%"PRIu64
2629 " enabled-read-total=%"PRIu64
2630 " enabled-write-pad=%"PRIu64
2631 " enabled-write-total=%"PRIu64
2632 " max-chanpad-timers=%"PRIu64
2633 "\n",
2634 padding_published.first_published_at,
2635 REPHIST_CELL_PADDING_COUNTS_INTERVAL,
2636 (uint64_t)ROUND_CELL_COUNTS_TO,
2637 (padding_published.write_drop_cell_count),
2638 (padding_published.write_pad_cell_count),
2639 (padding_published.write_cell_count),
2640 (padding_published.read_drop_cell_count),
2641 (padding_published.read_pad_cell_count),
2642 (padding_published.read_cell_count),
2643 (padding_published.enabled_read_pad_cell_count),
2644 (padding_published.enabled_read_cell_count),
2645 (padding_published.enabled_write_pad_cell_count),
2646 (padding_published.enabled_write_cell_count),
2647 (padding_published.maximum_chanpad_timers)
2650 return result;
2653 /** Log a heartbeat message explaining how many connections of each link
2654 * protocol version we have used.
2656 void
2657 rep_hist_log_link_protocol_counts(void)
2659 smartlist_t *lines = smartlist_new();
2661 for (int i = 1; i <= MAX_LINK_PROTO; i++) {
2662 char *line = NULL;
2663 tor_asprintf(&line, "initiated %"PRIu64" and received "
2664 "%"PRIu64" v%d connections", link_proto_count[i][1],
2665 link_proto_count[i][0], i);
2666 smartlist_add(lines, line);
2669 char *log_line = smartlist_join_strings(lines, "; ", 0, NULL);
2671 log_notice(LD_HEARTBEAT, "Since startup we %s.", log_line);
2673 SMARTLIST_FOREACH(lines, char *, s, tor_free(s));
2674 smartlist_free(lines);
2675 tor_free(log_line);
2678 /** Free all storage held by the OR/link history caches, by the
2679 * bandwidth history arrays, by the port history, or by statistics . */
2680 void
2681 rep_hist_free_all(void)
2683 hs_v2_stats_free(hs_v2_stats);
2684 hs_v3_stats_free(hs_v3_stats);
2685 digestmap_free(history_map, free_or_history);
2687 tor_free(exit_bytes_read);
2688 tor_free(exit_bytes_written);
2689 tor_free(exit_streams);
2690 predicted_ports_free_all();
2691 conn_stats_free_all();
2693 if (circuits_for_buffer_stats) {
2694 SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *, s,
2695 tor_free(s));
2696 smartlist_free(circuits_for_buffer_stats);
2697 circuits_for_buffer_stats = NULL;
2699 rep_hist_desc_stats_term();
2700 total_descriptor_downloads = 0;
2702 tor_assert_nonfatal(rephist_total_alloc == 0);
2703 tor_assert_nonfatal_once(rephist_total_num == 0);
2706 #ifdef TOR_UNIT_TESTS
2707 /* only exists for unit tests: get HSv2 stats object */
2708 const hs_v2_stats_t *
2709 rep_hist_get_hs_v2_stats(void)
2711 return hs_v2_stats;
2714 /* only exists for unit tests: get HSv2 stats object */
2715 const hs_v3_stats_t *
2716 rep_hist_get_hs_v3_stats(void)
2718 return hs_v3_stats;
2720 #endif /* defined(TOR_UNIT_TESTS) */