relay: Change DNS timeout label on MetricsPort
[tor.git] / src / feature / relay / relay_metrics.c
blobfc8eb10d1b0d6f43f7ffaee4bb12eb6fcfdcaf31
1 /* Copyright (c) 2021, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * @file relay_metrics.c
6 * @brief Relay metrics exposed through the MetricsPort
7 **/
9 #define RELAY_METRICS_ENTRY_PRIVATE
11 #include "orconfig.h"
13 #include "core/or/or.h"
14 #include "core/or/relay.h"
16 #include "lib/malloc/malloc.h"
17 #include "lib/container/smartlist.h"
18 #include "lib/metrics/metrics_store.h"
19 #include "lib/log/util_bug.h"
21 #include "feature/relay/relay_metrics.h"
22 #include "feature/stats/rephist.h"
24 #include <event2/dns.h>
26 /** Declarations of each fill function for metrics defined in base_metrics. */
27 static void fill_dns_error_values(void);
28 static void fill_dns_query_values(void);
29 static void fill_global_bw_limit_values(void);
30 static void fill_socket_values(void);
31 static void fill_onionskins_values(void);
32 static void fill_oom_values(void);
33 static void fill_tcp_exhaustion_values(void);
35 /** The base metrics that is a static array of metrics added to the metrics
36 * store.
38 * The key member MUST be also the index of the entry in the array. */
39 static const relay_metrics_entry_t base_metrics[] =
42 .key = RELAY_METRICS_NUM_OOM_BYTES,
43 .type = METRICS_TYPE_COUNTER,
44 .name = METRICS_NAME(relay_load_oom_bytes_total),
45 .help = "Total number of bytes the OOM has freed by subsystem",
46 .fill_fn = fill_oom_values,
49 .key = RELAY_METRICS_NUM_ONIONSKINS,
50 .type = METRICS_TYPE_COUNTER,
51 .name = METRICS_NAME(relay_load_onionskins_total),
52 .help = "Total number of onionskins handled",
53 .fill_fn = fill_onionskins_values,
56 .key = RELAY_METRICS_NUM_SOCKETS,
57 .type = METRICS_TYPE_GAUGE,
58 .name = METRICS_NAME(relay_load_socket_total),
59 .help = "Total number of sockets",
60 .fill_fn = fill_socket_values,
63 .key = RELAY_METRICS_NUM_GLOBAL_RW_LIMIT,
64 .type = METRICS_TYPE_COUNTER,
65 .name = METRICS_NAME(relay_load_global_rate_limit_reached_total),
66 .help = "Total number of global connection bucket limit reached",
67 .fill_fn = fill_global_bw_limit_values,
70 .key = RELAY_METRICS_NUM_DNS,
71 .type = METRICS_TYPE_COUNTER,
72 .name = METRICS_NAME(relay_exit_dns_query_total),
73 .help = "Total number of DNS queries done by this relay",
74 .fill_fn = fill_dns_query_values,
77 .key = RELAY_METRICS_NUM_DNS_ERRORS,
78 .type = METRICS_TYPE_COUNTER,
79 .name = METRICS_NAME(relay_exit_dns_error_total),
80 .help = "Total number of DNS errors encountered by this relay",
81 .fill_fn = fill_dns_error_values,
84 .key = RELAY_METRICS_NUM_TCP_EXHAUSTION,
85 .type = METRICS_TYPE_COUNTER,
86 .name = METRICS_NAME(relay_load_tcp_exhaustion_total),
87 .help = "Total number of times we ran out of TCP ports",
88 .fill_fn = fill_tcp_exhaustion_values,
91 static const size_t num_base_metrics = ARRAY_LENGTH(base_metrics);
93 /** The only and single store of all the relay metrics. */
94 static metrics_store_t *the_store;
96 /** Helper function to convert an handshake type into a string. */
97 static inline const char *
98 handshake_type_to_str(const uint16_t type)
100 switch (type) {
101 case ONION_HANDSHAKE_TYPE_TAP:
102 return "tap";
103 case ONION_HANDSHAKE_TYPE_FAST:
104 return "fast";
105 case ONION_HANDSHAKE_TYPE_NTOR:
106 return "ntor";
107 default:
108 // LCOV_EXCL_START
109 tor_assert_unreached();
110 // LCOV_EXCL_STOP
114 /** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
115 static void
116 fill_tcp_exhaustion_values(void)
118 metrics_store_entry_t *sentry;
119 const relay_metrics_entry_t *rentry =
120 &base_metrics[RELAY_METRICS_NUM_TCP_EXHAUSTION];
122 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
123 rentry->help);
124 metrics_store_entry_update(sentry, rep_hist_get_n_tcp_exhaustion());
127 /* NOTE: Disable the record type label until libevent is fixed. */
128 #if 0
129 /** Helper array containing mapping for the name of the different DNS records
130 * and their corresponding libevent values. */
131 static struct dns_type {
132 const char *name;
133 uint8_t type;
134 } dns_types[] = {
135 { .name = "A", .type = DNS_IPv4_A },
136 { .name = "PTR", .type = DNS_PTR },
137 { .name = "AAAA", .type = DNS_IPv6_AAAA },
139 static const size_t num_dns_types = ARRAY_LENGTH(dns_types);
140 #endif
142 /** Fill function for the RELAY_METRICS_NUM_DNS_ERRORS metrics. */
143 static void
144 fill_dns_error_values(void)
146 metrics_store_entry_t *sentry;
147 const relay_metrics_entry_t *rentry =
148 &base_metrics[RELAY_METRICS_NUM_DNS_ERRORS];
150 /* Helper array to map libeven DNS errors to their names and so we can
151 * iterate over this array to add all metrics. */
152 static struct dns_error {
153 const char *name;
154 uint8_t key;
155 } errors[] = {
156 { .name = "success", .key = DNS_ERR_NONE },
157 { .name = "format", .key = DNS_ERR_FORMAT },
158 { .name = "serverfailed", .key = DNS_ERR_SERVERFAILED },
159 { .name = "notexist", .key = DNS_ERR_NOTEXIST },
160 { .name = "notimpl", .key = DNS_ERR_NOTIMPL },
161 { .name = "refused", .key = DNS_ERR_REFUSED },
162 { .name = "truncated", .key = DNS_ERR_TRUNCATED },
163 { .name = "unknown", .key = DNS_ERR_UNKNOWN },
164 { .name = "tor_timeout", .key = DNS_ERR_TIMEOUT },
165 { .name = "shutdown", .key = DNS_ERR_SHUTDOWN },
166 { .name = "cancel", .key = DNS_ERR_CANCEL },
167 { .name = "nodata", .key = DNS_ERR_NODATA },
169 static const size_t num_errors = ARRAY_LENGTH(errors);
171 /* NOTE: Disable the record type label until libevent is fixed. */
172 #if 0
173 for (size_t i = 0; i < num_dns_types; i++) {
174 /* Dup the label because metrics_format_label() returns a pointer to a
175 * string on the stack and we need that label for all metrics. */
176 char *record_label =
177 tor_strdup(metrics_format_label("record", dns_types[i].name));
179 for (size_t j = 0; j < num_errors; j++) {
180 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
181 rentry->help);
182 metrics_store_entry_add_label(sentry, record_label);
183 metrics_store_entry_add_label(sentry,
184 metrics_format_label("reason", errors[j].name));
185 metrics_store_entry_update(sentry,
186 rep_hist_get_n_dns_error(dns_types[i].type, errors[j].key));
188 tor_free(record_label);
190 #endif
192 /* Put in the DNS errors, unfortunately not per-type for now. */
193 for (size_t j = 0; j < num_errors; j++) {
194 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
195 rentry->help);
196 metrics_store_entry_add_label(sentry,
197 metrics_format_label("reason", errors[j].name));
198 metrics_store_entry_update(sentry,
199 rep_hist_get_n_dns_error(0, errors[j].key));
203 /** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
204 static void
205 fill_dns_query_values(void)
207 metrics_store_entry_t *sentry;
208 const relay_metrics_entry_t *rentry =
209 &base_metrics[RELAY_METRICS_NUM_DNS];
211 /* NOTE: Disable the record type label until libevent is fixed (#40490). */
212 #if 0
213 for (size_t i = 0; i < num_dns_types; i++) {
214 /* Dup the label because metrics_format_label() returns a pointer to a
215 * string on the stack and we need that label for all metrics. */
216 char *record_label =
217 tor_strdup(metrics_format_label("record", dns_types[i].name));
218 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
219 rentry->help);
220 metrics_store_entry_add_label(sentry, record_label);
221 metrics_store_entry_update(sentry,
222 rep_hist_get_n_dns_request(dns_types[i].type));
223 tor_free(record_label);
225 #endif
227 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
228 rentry->help);
229 metrics_store_entry_update(sentry, rep_hist_get_n_dns_request(0));
232 /** Fill function for the RELAY_METRICS_NUM_GLOBAL_RW_LIMIT metrics. */
233 static void
234 fill_global_bw_limit_values(void)
236 metrics_store_entry_t *sentry;
237 const relay_metrics_entry_t *rentry =
238 &base_metrics[RELAY_METRICS_NUM_GLOBAL_RW_LIMIT];
240 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
241 rentry->help);
242 metrics_store_entry_add_label(sentry,
243 metrics_format_label("side", "read"));
244 metrics_store_entry_update(sentry, rep_hist_get_n_read_limit_reached());
246 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
247 rentry->help);
248 metrics_store_entry_add_label(sentry,
249 metrics_format_label("side", "write"));
250 metrics_store_entry_update(sentry, rep_hist_get_n_write_limit_reached());
253 /** Fill function for the RELAY_METRICS_NUM_SOCKETS metrics. */
254 static void
255 fill_socket_values(void)
257 metrics_store_entry_t *sentry;
258 const relay_metrics_entry_t *rentry =
259 &base_metrics[RELAY_METRICS_NUM_SOCKETS];
261 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
262 rentry->help);
263 metrics_store_entry_add_label(sentry,
264 metrics_format_label("state", "opened"));
265 metrics_store_entry_update(sentry, get_n_open_sockets());
267 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
268 rentry->help);
269 metrics_store_entry_update(sentry, get_max_sockets());
272 /** Fill function for the RELAY_METRICS_NUM_ONIONSKINS metrics. */
273 static void
274 fill_onionskins_values(void)
276 metrics_store_entry_t *sentry;
277 const relay_metrics_entry_t *rentry =
278 &base_metrics[RELAY_METRICS_NUM_ONIONSKINS];
280 for (uint16_t t = 0; t <= MAX_ONION_HANDSHAKE_TYPE; t++) {
281 /* Dup the label because metrics_format_label() returns a pointer to a
282 * string on the stack and we need that label for all metrics. */
283 char *type_label =
284 tor_strdup(metrics_format_label("type", handshake_type_to_str(t)));
285 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
286 rentry->help);
287 metrics_store_entry_add_label(sentry, type_label);
288 metrics_store_entry_add_label(sentry,
289 metrics_format_label("action", "processed"));
290 metrics_store_entry_update(sentry,
291 rep_hist_get_circuit_n_handshake_assigned(t));
293 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
294 rentry->help);
295 metrics_store_entry_add_label(sentry, type_label);
296 metrics_store_entry_add_label(sentry,
297 metrics_format_label("action", "dropped"));
298 metrics_store_entry_update(sentry,
299 rep_hist_get_circuit_n_handshake_dropped(t));
300 tor_free(type_label);
304 /** Fill function for the RELAY_METRICS_NUM_OOM_BYTES metrics. */
305 static void
306 fill_oom_values(void)
308 metrics_store_entry_t *sentry;
309 const relay_metrics_entry_t *rentry =
310 &base_metrics[RELAY_METRICS_NUM_OOM_BYTES];
312 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
313 rentry->help);
314 metrics_store_entry_add_label(sentry,
315 metrics_format_label("subsys", "cell"));
316 metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_cell);
318 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
319 rentry->help);
320 metrics_store_entry_add_label(sentry,
321 metrics_format_label("subsys", "dns"));
322 metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_dns);
324 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
325 rentry->help);
326 metrics_store_entry_add_label(sentry,
327 metrics_format_label("subsys", "geoip"));
328 metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_geoip);
330 sentry = metrics_store_add(the_store, rentry->type, rentry->name,
331 rentry->help);
332 metrics_store_entry_add_label(sentry,
333 metrics_format_label("subsys", "hsdir"));
334 metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_hsdir);
337 /** Reset the global store and fill it with all the metrics from base_metrics
338 * and their associated values.
340 * To pull this off, every metrics has a "fill" function that is called and in
341 * charge of adding the metrics to the store, appropriate labels and finally
342 * updating the value to report. */
343 static void
344 fill_store(void)
346 /* Reset the current store, we are about to fill it with all the things. */
347 metrics_store_reset(the_store);
349 /* Call the fill function for each metrics. */
350 for (size_t i = 0; i < num_base_metrics; i++) {
351 if (BUG(!base_metrics[i].fill_fn)) {
352 continue;
354 base_metrics[i].fill_fn();
358 /** Return a list of all the relay metrics stores. This is the
359 * function attached to the .get_metrics() member of the subsys_t. */
360 const smartlist_t *
361 relay_metrics_get_stores(void)
363 /* We can't have the caller to free the returned list so keep it static,
364 * simply update it. */
365 static smartlist_t *stores_list = NULL;
367 /* We dynamically fill the store with all the metrics upon a request. The
368 * reason for this is because the exposed metrics of a relay are often
369 * internal counters in the fast path and thus we fetch the value when a
370 * metrics port request arrives instead of keeping a local metrics store of
371 * those values. */
372 fill_store();
374 if (!stores_list) {
375 stores_list = smartlist_new();
376 smartlist_add(stores_list, the_store);
379 return stores_list;
382 /** Initialize the relay metrics. */
383 void
384 relay_metrics_init(void)
386 if (BUG(the_store)) {
387 return;
389 the_store = metrics_store_new();
392 /** Free the relay metrics. */
393 void
394 relay_metrics_free(void)
396 if (!the_store) {
397 return;
399 /* NULL is set with this call. */
400 metrics_store_free(the_store);