Logs and debug info that I used for finding bug 16844
[tor.git] / src / or / geoip.c
blob120ce479ccc2dc13f4b25c2a4bc7a05be7b85c38
1 /* Copyright (c) 2007-2015, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * \file geoip.c
6 * \brief Functions related to maintaining an IP-to-country database;
7 * to summarizing client connections by country to entry guards, bridges,
8 * and directory servers; and for statistics on answering network status
9 * requests.
12 #define GEOIP_PRIVATE
13 #include "or.h"
14 #include "ht.h"
15 #include "config.h"
16 #include "control.h"
17 #include "dnsserv.h"
18 #include "geoip.h"
19 #include "routerlist.h"
21 static void clear_geoip_db(void);
22 static void init_geoip_countries(void);
24 /** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */
25 typedef struct geoip_ipv4_entry_t {
26 uint32_t ip_low; /**< The lowest IP in the range, in host order */
27 uint32_t ip_high; /**< The highest IP in the range, in host order */
28 intptr_t country; /**< An index into geoip_countries */
29 } geoip_ipv4_entry_t;
31 /** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
32 typedef struct geoip_ipv6_entry_t {
33 struct in6_addr ip_low; /**< The lowest IP in the range, in host order */
34 struct in6_addr ip_high; /**< The highest IP in the range, in host order */
35 intptr_t country; /**< An index into geoip_countries */
36 } geoip_ipv6_entry_t;
38 /** A per-country record for GeoIP request history. */
39 typedef struct geoip_country_t {
40 char countrycode[3];
41 uint32_t n_v3_ns_requests;
42 } geoip_country_t;
44 /** A list of geoip_country_t */
45 static smartlist_t *geoip_countries = NULL;
46 /** A map from lowercased country codes to their position in geoip_countries.
47 * The index is encoded in the pointer, and 1 is added so that NULL can mean
48 * not found. */
49 static strmap_t *country_idxplus1_by_lc_code = NULL;
50 /** Lists of all known geoip_ipv4_entry_t and geoip_ipv6_entry_t, sorted
51 * by their respective ip_low. */
52 static smartlist_t *geoip_ipv4_entries = NULL, *geoip_ipv6_entries = NULL;
54 /** SHA1 digest of the GeoIP files to include in extra-info descriptors. */
55 static char geoip_digest[DIGEST_LEN];
56 static char geoip6_digest[DIGEST_LEN];
58 /** Return the index of the <b>country</b>'s entry in the GeoIP
59 * country list if it is a valid 2-letter country code, otherwise
60 * return -1. */
61 MOCK_IMPL(country_t,
62 geoip_get_country,(const char *country))
64 void *idxplus1_;
65 intptr_t idx;
67 idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
68 if (!idxplus1_)
69 return -1;
71 idx = ((uintptr_t)idxplus1_)-1;
72 return (country_t)idx;
75 /** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b>
76 * and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */
77 static void
78 geoip_add_entry(const tor_addr_t *low, const tor_addr_t *high,
79 const char *country)
81 intptr_t idx;
82 void *idxplus1_;
84 if (tor_addr_family(low) != tor_addr_family(high))
85 return;
86 if (tor_addr_compare(high, low, CMP_EXACT) < 0)
87 return;
89 idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
91 if (!idxplus1_) {
92 geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
93 strlcpy(c->countrycode, country, sizeof(c->countrycode));
94 tor_strlower(c->countrycode);
95 smartlist_add(geoip_countries, c);
96 idx = smartlist_len(geoip_countries) - 1;
97 strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
98 } else {
99 idx = ((uintptr_t)idxplus1_)-1;
102 geoip_country_t *c = smartlist_get(geoip_countries, idx);
103 tor_assert(!strcasecmp(c->countrycode, country));
106 if (tor_addr_family(low) == AF_INET) {
107 geoip_ipv4_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv4_entry_t));
108 ent->ip_low = tor_addr_to_ipv4h(low);
109 ent->ip_high = tor_addr_to_ipv4h(high);
110 ent->country = idx;
111 smartlist_add(geoip_ipv4_entries, ent);
112 } else if (tor_addr_family(low) == AF_INET6) {
113 geoip_ipv6_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t));
114 ent->ip_low = *tor_addr_to_in6(low);
115 ent->ip_high = *tor_addr_to_in6(high);
116 ent->country = idx;
117 smartlist_add(geoip_ipv6_entries, ent);
121 /** Add an entry to the GeoIP table indicated by <b>family</b>,
122 * parsing it from <b>line</b>. The format is as for geoip_load_file(). */
123 STATIC int
124 geoip_parse_entry(const char *line, sa_family_t family)
126 tor_addr_t low_addr, high_addr;
127 char c[3];
128 char *country = NULL;
130 if (!geoip_countries)
131 init_geoip_countries();
132 if (family == AF_INET) {
133 if (!geoip_ipv4_entries)
134 geoip_ipv4_entries = smartlist_new();
135 } else if (family == AF_INET6) {
136 if (!geoip_ipv6_entries)
137 geoip_ipv6_entries = smartlist_new();
138 } else {
139 log_warn(LD_GENERAL, "Unsupported family: %d", family);
140 return -1;
143 while (TOR_ISSPACE(*line))
144 ++line;
145 if (*line == '#')
146 return 0;
148 if (family == AF_INET) {
149 unsigned int low, high;
150 if (tor_sscanf(line,"%u,%u,%2s", &low, &high, c) == 3 ||
151 tor_sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, c) == 3) {
152 tor_addr_from_ipv4h(&low_addr, low);
153 tor_addr_from_ipv4h(&high_addr, high);
154 } else
155 goto fail;
156 country = c;
157 } else { /* AF_INET6 */
158 char buf[512];
159 char *low_str, *high_str;
160 struct in6_addr low, high;
161 char *strtok_state;
162 strlcpy(buf, line, sizeof(buf));
163 low_str = tor_strtok_r(buf, ",", &strtok_state);
164 if (!low_str)
165 goto fail;
166 high_str = tor_strtok_r(NULL, ",", &strtok_state);
167 if (!high_str)
168 goto fail;
169 country = tor_strtok_r(NULL, "\n", &strtok_state);
170 if (!country)
171 goto fail;
172 if (strlen(country) != 2)
173 goto fail;
174 if (tor_inet_pton(AF_INET6, low_str, &low) <= 0)
175 goto fail;
176 tor_addr_from_in6(&low_addr, &low);
177 if (tor_inet_pton(AF_INET6, high_str, &high) <= 0)
178 goto fail;
179 tor_addr_from_in6(&high_addr, &high);
181 geoip_add_entry(&low_addr, &high_addr, country);
182 return 0;
184 fail:
185 log_warn(LD_GENERAL, "Unable to parse line from GEOIP %s file: %s",
186 family == AF_INET ? "IPv4" : "IPv6", escaped(line));
187 return -1;
190 /** Sorting helper: return -1, 1, or 0 based on comparison of two
191 * geoip_ipv4_entry_t */
192 static int
193 geoip_ipv4_compare_entries_(const void **_a, const void **_b)
195 const geoip_ipv4_entry_t *a = *_a, *b = *_b;
196 if (a->ip_low < b->ip_low)
197 return -1;
198 else if (a->ip_low > b->ip_low)
199 return 1;
200 else
201 return 0;
204 /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
205 * to a uint32_t in host order) to a geoip_ipv4_entry_t */
206 static int
207 geoip_ipv4_compare_key_to_entry_(const void *_key, const void **_member)
209 /* No alignment issue here, since _key really is a pointer to uint32_t */
210 const uint32_t addr = *(uint32_t *)_key;
211 const geoip_ipv4_entry_t *entry = *_member;
212 if (addr < entry->ip_low)
213 return -1;
214 else if (addr > entry->ip_high)
215 return 1;
216 else
217 return 0;
220 /** Sorting helper: return -1, 1, or 0 based on comparison of two
221 * geoip_ipv6_entry_t */
222 static int
223 geoip_ipv6_compare_entries_(const void **_a, const void **_b)
225 const geoip_ipv6_entry_t *a = *_a, *b = *_b;
226 return fast_memcmp(a->ip_low.s6_addr, b->ip_low.s6_addr,
227 sizeof(struct in6_addr));
230 /** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6
231 * (a pointer to a in6_addr) to a geoip_ipv6_entry_t */
232 static int
233 geoip_ipv6_compare_key_to_entry_(const void *_key, const void **_member)
235 const struct in6_addr *addr = (struct in6_addr *)_key;
236 const geoip_ipv6_entry_t *entry = *_member;
238 if (fast_memcmp(addr->s6_addr, entry->ip_low.s6_addr,
239 sizeof(struct in6_addr)) < 0)
240 return -1;
241 else if (fast_memcmp(addr->s6_addr, entry->ip_high.s6_addr,
242 sizeof(struct in6_addr)) > 0)
243 return 1;
244 else
245 return 0;
248 /** Return 1 if we should collect geoip stats on bridge users, and
249 * include them in our extrainfo descriptor. Else return 0. */
251 should_record_bridge_info(const or_options_t *options)
253 return options->BridgeRelay && options->BridgeRecordUsageByCountry;
256 /** Set up a new list of geoip countries with no countries (yet) set in it,
257 * except for the unknown country.
259 static void
260 init_geoip_countries(void)
262 geoip_country_t *geoip_unresolved;
263 geoip_countries = smartlist_new();
264 /* Add a geoip_country_t for requests that could not be resolved to a
265 * country as first element (index 0) to geoip_countries. */
266 geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
267 strlcpy(geoip_unresolved->countrycode, "??",
268 sizeof(geoip_unresolved->countrycode));
269 smartlist_add(geoip_countries, geoip_unresolved);
270 country_idxplus1_by_lc_code = strmap_new();
271 strmap_set_lc(country_idxplus1_by_lc_code, "??", (void*)(1));
274 /** Clear appropriate GeoIP database, based on <b>family</b>, and
275 * reload it from the file <b>filename</b>. Return 0 on success, -1 on
276 * failure.
278 * Recognized line formats for IPv4 are:
279 * INTIPLOW,INTIPHIGH,CC
280 * and
281 * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
282 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
283 * integers, and CC is a country code.
285 * Recognized line format for IPv6 is:
286 * IPV6LOW,IPV6HIGH,CC
287 * where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
289 * It also recognizes, and skips over, blank lines and lines that start
290 * with '#' (comments).
293 geoip_load_file(sa_family_t family, const char *filename)
295 FILE *f;
296 const char *msg = "";
297 const or_options_t *options = get_options();
298 int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
299 crypto_digest_t *geoip_digest_env = NULL;
301 tor_assert(family == AF_INET || family == AF_INET6);
303 if (!(f = tor_fopen_cloexec(filename, "r"))) {
304 log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s. %s",
305 filename, msg);
306 return -1;
308 if (!geoip_countries)
309 init_geoip_countries();
311 if (family == AF_INET) {
312 if (geoip_ipv4_entries) {
313 SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e,
314 tor_free(e));
315 smartlist_free(geoip_ipv4_entries);
317 geoip_ipv4_entries = smartlist_new();
318 } else { /* AF_INET6 */
319 if (geoip_ipv6_entries) {
320 SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e,
321 tor_free(e));
322 smartlist_free(geoip_ipv6_entries);
324 geoip_ipv6_entries = smartlist_new();
326 geoip_digest_env = crypto_digest_new();
328 log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.",
329 (family == AF_INET) ? "IPv4" : "IPv6", filename);
330 while (!feof(f)) {
331 char buf[512];
332 if (fgets(buf, (int)sizeof(buf), f) == NULL)
333 break;
334 crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf));
335 /* FFFF track full country name. */
336 geoip_parse_entry(buf, family);
338 /*XXXX abort and return -1 if no entries/illformed?*/
339 fclose(f);
341 /* Sort list and remember file digests so that we can include it in
342 * our extra-info descriptors. */
343 if (family == AF_INET) {
344 smartlist_sort(geoip_ipv4_entries, geoip_ipv4_compare_entries_);
345 /* Okay, now we need to maybe change our mind about what is in
346 * which country. We do this for IPv4 only since that's what we
347 * store in node->country. */
348 refresh_all_country_info();
349 crypto_digest_get_digest(geoip_digest_env, geoip_digest, DIGEST_LEN);
350 } else {
351 /* AF_INET6 */
352 smartlist_sort(geoip_ipv6_entries, geoip_ipv6_compare_entries_);
353 crypto_digest_get_digest(geoip_digest_env, geoip6_digest, DIGEST_LEN);
355 crypto_digest_free(geoip_digest_env);
357 return 0;
360 /** Given an IP address in host order, return a number representing the
361 * country to which that address belongs, -1 for "No geoip information
362 * available", or 0 for the 'unknown country'. The return value will always
363 * be less than geoip_get_n_countries(). To decode it, call
364 * geoip_get_country_name().
366 STATIC int
367 geoip_get_country_by_ipv4(uint32_t ipaddr)
369 geoip_ipv4_entry_t *ent;
370 if (!geoip_ipv4_entries)
371 return -1;
372 ent = smartlist_bsearch(geoip_ipv4_entries, &ipaddr,
373 geoip_ipv4_compare_key_to_entry_);
374 return ent ? (int)ent->country : 0;
377 /** Given an IPv6 address, return a number representing the country to
378 * which that address belongs, -1 for "No geoip information available", or
379 * 0 for the 'unknown country'. The return value will always be less than
380 * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
382 STATIC int
383 geoip_get_country_by_ipv6(const struct in6_addr *addr)
385 geoip_ipv6_entry_t *ent;
387 if (!geoip_ipv6_entries)
388 return -1;
389 ent = smartlist_bsearch(geoip_ipv6_entries, addr,
390 geoip_ipv6_compare_key_to_entry_);
391 return ent ? (int)ent->country : 0;
394 /** Given an IP address, return a number representing the country to which
395 * that address belongs, -1 for "No geoip information available", or 0 for
396 * the 'unknown country'. The return value will always be less than
397 * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
399 MOCK_IMPL(int,
400 geoip_get_country_by_addr,(const tor_addr_t *addr))
402 if (tor_addr_family(addr) == AF_INET) {
403 return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
404 } else if (tor_addr_family(addr) == AF_INET6) {
405 return geoip_get_country_by_ipv6(tor_addr_to_in6(addr));
406 } else {
407 return -1;
411 /** Return the number of countries recognized by the GeoIP country list. */
412 MOCK_IMPL(int,
413 geoip_get_n_countries,(void))
415 if (!geoip_countries)
416 init_geoip_countries();
417 return (int) smartlist_len(geoip_countries);
420 /** Return the two-letter country code associated with the number <b>num</b>,
421 * or "??" for an unknown value. */
422 const char *
423 geoip_get_country_name(country_t num)
425 if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
426 geoip_country_t *c = smartlist_get(geoip_countries, num);
427 return c->countrycode;
428 } else
429 return "??";
432 /** Return true iff we have loaded a GeoIP database.*/
433 MOCK_IMPL(int,
434 geoip_is_loaded,(sa_family_t family))
436 tor_assert(family == AF_INET || family == AF_INET6);
437 if (geoip_countries == NULL)
438 return 0;
439 if (family == AF_INET)
440 return geoip_ipv4_entries != NULL;
441 else /* AF_INET6 */
442 return geoip_ipv6_entries != NULL;
445 /** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
446 * result does not need to be deallocated, but will be overwritten by the
447 * next call of hex_str(). */
448 const char *
449 geoip_db_digest(sa_family_t family)
451 tor_assert(family == AF_INET || family == AF_INET6);
452 if (family == AF_INET)
453 return hex_str(geoip_digest, DIGEST_LEN);
454 else /* AF_INET6 */
455 return hex_str(geoip6_digest, DIGEST_LEN);
458 /** Entry in a map from IP address to the last time we've seen an incoming
459 * connection from that IP address. Used by bridges only, to track which
460 * countries have them blocked. */
461 typedef struct clientmap_entry_t {
462 HT_ENTRY(clientmap_entry_t) node;
463 tor_addr_t addr;
464 /* Name of pluggable transport used by this client. NULL if no
465 pluggable transport was used. */
466 char *transport_name;
468 /** Time when we last saw this IP address, in MINUTES since the epoch.
470 * (This will run out of space around 4011 CE. If Tor is still in use around
471 * 4000 CE, please remember to add more bits to last_seen_in_minutes.) */
472 unsigned int last_seen_in_minutes:30;
473 unsigned int action:2;
474 } clientmap_entry_t;
476 /** Largest allowable value for last_seen_in_minutes. (It's a 30-bit field,
477 * so it can hold up to (1u<<30)-1, or 0x3fffffffu.
479 #define MAX_LAST_SEEN_IN_MINUTES 0X3FFFFFFFu
481 /** Map from client IP address to last time seen. */
482 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
483 HT_INITIALIZER();
485 /** Hashtable helper: compute a hash of a clientmap_entry_t. */
486 static INLINE unsigned
487 clientmap_entry_hash(const clientmap_entry_t *a)
489 unsigned h = (unsigned) tor_addr_hash(&a->addr);
491 if (a->transport_name)
492 h += (unsigned) siphash24g(a->transport_name, strlen(a->transport_name));
494 return h;
496 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
497 static INLINE int
498 clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
500 if (strcmp_opt(a->transport_name, b->transport_name))
501 return 0;
503 return !tor_addr_compare(&a->addr, &b->addr, CMP_EXACT) &&
504 a->action == b->action;
507 HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
508 clientmap_entries_eq);
509 HT_GENERATE2(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
510 clientmap_entries_eq, 0.6, tor_reallocarray_, tor_free_)
512 /** Free all storage held by <b>ent</b>. */
513 static void
514 clientmap_entry_free(clientmap_entry_t *ent)
516 if (!ent)
517 return;
519 tor_free(ent->transport_name);
520 tor_free(ent);
523 /** Clear history of connecting clients used by entry and bridge stats. */
524 static void
525 client_history_clear(void)
527 clientmap_entry_t **ent, **next, *this;
528 for (ent = HT_START(clientmap, &client_history); ent != NULL;
529 ent = next) {
530 if ((*ent)->action == GEOIP_CLIENT_CONNECT) {
531 this = *ent;
532 next = HT_NEXT_RMV(clientmap, &client_history, ent);
533 clientmap_entry_free(this);
534 } else {
535 next = HT_NEXT(clientmap, &client_history, ent);
540 /** Note that we've seen a client connect from the IP <b>addr</b>
541 * at time <b>now</b>. Ignored by all but bridges and directories if
542 * configured accordingly. */
543 void
544 geoip_note_client_seen(geoip_client_action_t action,
545 const tor_addr_t *addr,
546 const char *transport_name,
547 time_t now)
549 const or_options_t *options = get_options();
550 clientmap_entry_t lookup, *ent;
551 memset(&lookup, 0, sizeof(clientmap_entry_t));
553 if (action == GEOIP_CLIENT_CONNECT) {
554 /* Only remember statistics as entry guard or as bridge. */
555 if (!options->EntryStatistics &&
556 (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)))
557 return;
558 } else {
559 /* Only gather directory-request statistics if configured, and
560 * forcibly disable them on bridge authorities. */
561 if (!options->DirReqStatistics || options->BridgeAuthoritativeDir)
562 return;
565 log_debug(LD_GENERAL, "Seen client from '%s' with transport '%s'.",
566 safe_str_client(fmt_addr((addr))),
567 transport_name ? transport_name : "<no transport>");
569 tor_addr_copy(&lookup.addr, addr);
570 lookup.action = (int)action;
571 lookup.transport_name = (char*) transport_name;
572 ent = HT_FIND(clientmap, &client_history, &lookup);
574 if (! ent) {
575 ent = tor_malloc_zero(sizeof(clientmap_entry_t));
576 tor_addr_copy(&ent->addr, addr);
577 if (transport_name)
578 ent->transport_name = tor_strdup(transport_name);
579 ent->action = (int)action;
580 HT_INSERT(clientmap, &client_history, ent);
582 if (now / 60 <= (int)MAX_LAST_SEEN_IN_MINUTES && now >= 0)
583 ent->last_seen_in_minutes = (unsigned)(now/60);
584 else
585 ent->last_seen_in_minutes = 0;
587 if (action == GEOIP_CLIENT_NETWORKSTATUS) {
588 int country_idx = geoip_get_country_by_addr(addr);
589 if (country_idx < 0)
590 country_idx = 0; /** unresolved requests are stored at index 0. */
591 if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
592 geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
593 ++country->n_v3_ns_requests;
598 /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
599 * older than a certain time. */
600 static int
601 remove_old_client_helper_(struct clientmap_entry_t *ent, void *_cutoff)
603 time_t cutoff = *(time_t*)_cutoff / 60;
604 if (ent->last_seen_in_minutes < cutoff) {
605 clientmap_entry_free(ent);
606 return 1;
607 } else {
608 return 0;
612 /** Forget about all clients that haven't connected since <b>cutoff</b>. */
613 void
614 geoip_remove_old_clients(time_t cutoff)
616 clientmap_HT_FOREACH_FN(&client_history,
617 remove_old_client_helper_,
618 &cutoff);
621 /** How many responses are we giving to clients requesting v3 network
622 * statuses? */
623 static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM];
625 /** Note that we've rejected a client's request for a v3 network status
626 * for reason <b>reason</b> at time <b>now</b>. */
627 void
628 geoip_note_ns_response(geoip_ns_response_t response)
630 static int arrays_initialized = 0;
631 if (!get_options()->DirReqStatistics)
632 return;
633 if (!arrays_initialized) {
634 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
635 arrays_initialized = 1;
637 tor_assert(response < GEOIP_NS_RESPONSE_NUM);
638 ns_v3_responses[response]++;
641 /** Do not mention any country from which fewer than this number of IPs have
642 * connected. This conceivably avoids reporting information that could
643 * deanonymize users, though analysis is lacking. */
644 #define MIN_IPS_TO_NOTE_COUNTRY 1
645 /** Do not report any geoip data at all if we have fewer than this number of
646 * IPs to report about. */
647 #define MIN_IPS_TO_NOTE_ANYTHING 1
648 /** When reporting geoip data about countries, round up to the nearest
649 * multiple of this value. */
650 #define IP_GRANULARITY 8
652 /** Helper type: used to sort per-country totals by value. */
653 typedef struct c_hist_t {
654 char country[3]; /**< Two-letter country code. */
655 unsigned total; /**< Total IP addresses seen in this country. */
656 } c_hist_t;
658 /** Sorting helper: return -1, 1, or 0 based on comparison of two
659 * geoip_ipv4_entry_t. Sort in descending order of total, and then by country
660 * code. */
661 static int
662 c_hist_compare_(const void **_a, const void **_b)
664 const c_hist_t *a = *_a, *b = *_b;
665 if (a->total > b->total)
666 return -1;
667 else if (a->total < b->total)
668 return 1;
669 else
670 return strcmp(a->country, b->country);
673 /** When there are incomplete directory requests at the end of a 24-hour
674 * period, consider those requests running for longer than this timeout as
675 * failed, the others as still running. */
676 #define DIRREQ_TIMEOUT (10*60)
678 /** Entry in a map from either chan->global_identifier for direct requests
679 * or a unique circuit identifier for tunneled requests to request time,
680 * response size, and completion time of a network status request. Used to
681 * measure download times of requests to derive average client
682 * bandwidths. */
683 typedef struct dirreq_map_entry_t {
684 HT_ENTRY(dirreq_map_entry_t) node;
685 /** Unique identifier for this network status request; this is either the
686 * chan->global_identifier of the dir channel (direct request) or a new
687 * locally unique identifier of a circuit (tunneled request). This ID is
688 * only unique among other direct or tunneled requests, respectively. */
689 uint64_t dirreq_id;
690 unsigned int state:3; /**< State of this directory request. */
691 unsigned int type:1; /**< Is this a direct or a tunneled request? */
692 unsigned int completed:1; /**< Is this request complete? */
693 /** When did we receive the request and started sending the response? */
694 struct timeval request_time;
695 size_t response_size; /**< What is the size of the response in bytes? */
696 struct timeval completion_time; /**< When did the request succeed? */
697 } dirreq_map_entry_t;
699 /** Map of all directory requests asking for v2 or v3 network statuses in
700 * the current geoip-stats interval. Values are
701 * of type *<b>dirreq_map_entry_t</b>. */
702 static HT_HEAD(dirreqmap, dirreq_map_entry_t) dirreq_map =
703 HT_INITIALIZER();
705 static int
706 dirreq_map_ent_eq(const dirreq_map_entry_t *a,
707 const dirreq_map_entry_t *b)
709 return a->dirreq_id == b->dirreq_id && a->type == b->type;
712 /* DOCDOC dirreq_map_ent_hash */
713 static unsigned
714 dirreq_map_ent_hash(const dirreq_map_entry_t *entry)
716 unsigned u = (unsigned) entry->dirreq_id;
717 u += entry->type << 20;
718 return u;
721 HT_PROTOTYPE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
722 dirreq_map_ent_eq);
723 HT_GENERATE2(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
724 dirreq_map_ent_eq, 0.6, tor_reallocarray_, tor_free_)
726 /** Helper: Put <b>entry</b> into map of directory requests using
727 * <b>type</b> and <b>dirreq_id</b> as key parts. If there is
728 * already an entry for that key, print out a BUG warning and return. */
729 static void
730 dirreq_map_put_(dirreq_map_entry_t *entry, dirreq_type_t type,
731 uint64_t dirreq_id)
733 dirreq_map_entry_t *old_ent;
734 tor_assert(entry->type == type);
735 tor_assert(entry->dirreq_id == dirreq_id);
737 /* XXXX we could switch this to HT_INSERT some time, since it seems that
738 * this bug doesn't happen. But since this function doesn't seem to be
739 * critical-path, it's sane to leave it alone. */
740 old_ent = HT_REPLACE(dirreqmap, &dirreq_map, entry);
741 if (old_ent && old_ent != entry) {
742 log_warn(LD_BUG, "Error when putting directory request into local "
743 "map. There was already an entry for the same identifier.");
744 return;
748 /** Helper: Look up and return an entry in the map of directory requests
749 * using <b>type</b> and <b>dirreq_id</b> as key parts. If there
750 * is no such entry, return NULL. */
751 static dirreq_map_entry_t *
752 dirreq_map_get_(dirreq_type_t type, uint64_t dirreq_id)
754 dirreq_map_entry_t lookup;
755 lookup.type = type;
756 lookup.dirreq_id = dirreq_id;
757 return HT_FIND(dirreqmap, &dirreq_map, &lookup);
760 /** Note that an either direct or tunneled (see <b>type</b>) directory
761 * request for a v3 network status with unique ID <b>dirreq_id</b> of size
762 * <b>response_size</b> has started. */
763 void
764 geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
765 dirreq_type_t type)
767 dirreq_map_entry_t *ent;
768 if (!get_options()->DirReqStatistics)
769 return;
770 ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
771 ent->dirreq_id = dirreq_id;
772 tor_gettimeofday(&ent->request_time);
773 ent->response_size = response_size;
774 ent->type = type;
775 dirreq_map_put_(ent, type, dirreq_id);
778 /** Change the state of the either direct or tunneled (see <b>type</b>)
779 * directory request with <b>dirreq_id</b> to <b>new_state</b> and
780 * possibly mark it as completed. If no entry can be found for the given
781 * key parts (e.g., if this is a directory request that we are not
782 * measuring, or one that was started in the previous measurement period),
783 * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
784 void
785 geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
786 dirreq_state_t new_state)
788 dirreq_map_entry_t *ent;
789 if (!get_options()->DirReqStatistics)
790 return;
791 ent = dirreq_map_get_(type, dirreq_id);
792 if (!ent)
793 return;
794 if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS)
795 return;
796 if (new_state - 1 != ent->state)
797 return;
798 ent->state = new_state;
799 if ((type == DIRREQ_DIRECT &&
800 new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) ||
801 (type == DIRREQ_TUNNELED &&
802 new_state == DIRREQ_CHANNEL_BUFFER_FLUSHED)) {
803 tor_gettimeofday(&ent->completion_time);
804 ent->completed = 1;
808 /** Return the bridge-ip-transports string that should be inserted in
809 * our extra-info descriptor. Return NULL if the bridge-ip-transports
810 * line should be empty. */
811 char *
812 geoip_get_transport_history(void)
814 unsigned granularity = IP_GRANULARITY;
815 /** String hash table (name of transport) -> (number of users). */
816 strmap_t *transport_counts = strmap_new();
818 /** Smartlist that contains copies of the names of the transports
819 that have been used. */
820 smartlist_t *transports_used = smartlist_new();
822 /* Special string to signify that no transport was used for this
823 connection. Pluggable transport names can't have symbols in their
824 names, so this string will never collide with a real transport. */
825 static const char* no_transport_str = "<OR>";
827 clientmap_entry_t **ent;
828 const char *transport_name = NULL;
829 smartlist_t *string_chunks = smartlist_new();
830 char *the_string = NULL;
832 /* If we haven't seen any clients yet, return NULL. */
833 if (HT_EMPTY(&client_history))
834 goto done;
836 /** We do the following steps to form the transport history string:
837 * a) Foreach client that uses a pluggable transport, we increase the
838 * times that transport was used by one. If the client did not use
839 * a transport, we increase the number of times someone connected
840 * without obfuscation.
841 * b) Foreach transport we observed, we write its transport history
842 * string and push it to string_chunks. So, for example, if we've
843 * seen 665 obfs2 clients, we write "obfs2=665".
844 * c) We concatenate string_chunks to form the final string.
847 log_debug(LD_GENERAL,"Starting iteration for transport history. %d clients.",
848 HT_SIZE(&client_history));
850 /* Loop through all clients. */
851 HT_FOREACH(ent, clientmap, &client_history) {
852 uintptr_t val;
853 void *ptr;
854 transport_name = (*ent)->transport_name;
855 if (!transport_name)
856 transport_name = no_transport_str;
858 /* Increase the count for this transport name. */
859 ptr = strmap_get(transport_counts, transport_name);
860 val = (uintptr_t)ptr;
861 val++;
862 ptr = (void*)val;
863 strmap_set(transport_counts, transport_name, ptr);
865 /* If it's the first time we see this transport, note it. */
866 if (val == 1)
867 smartlist_add(transports_used, tor_strdup(transport_name));
869 log_debug(LD_GENERAL, "Client from '%s' with transport '%s'. "
870 "I've now seen %d clients.",
871 safe_str_client(fmt_addr(&(*ent)->addr)),
872 transport_name ? transport_name : "<no transport>",
873 (int)val);
876 /* Sort the transport names (helps with unit testing). */
877 smartlist_sort_strings(transports_used);
879 /* Loop through all seen transports. */
880 SMARTLIST_FOREACH_BEGIN(transports_used, const char *, transport_name) {
881 void *transport_count_ptr = strmap_get(transport_counts, transport_name);
882 uintptr_t transport_count = (uintptr_t) transport_count_ptr;
884 log_debug(LD_GENERAL, "We got "U64_FORMAT" clients with transport '%s'.",
885 U64_PRINTF_ARG((uint64_t)transport_count), transport_name);
887 smartlist_add_asprintf(string_chunks, "%s="U64_FORMAT,
888 transport_name,
889 U64_PRINTF_ARG(round_uint64_to_next_multiple_of(
890 (uint64_t)transport_count,
891 granularity)));
892 } SMARTLIST_FOREACH_END(transport_name);
894 the_string = smartlist_join_strings(string_chunks, ",", 0, NULL);
896 log_debug(LD_GENERAL, "Final bridge-ip-transports string: '%s'", the_string);
898 done:
899 strmap_free(transport_counts, NULL);
900 SMARTLIST_FOREACH(transports_used, char *, s, tor_free(s));
901 smartlist_free(transports_used);
902 SMARTLIST_FOREACH(string_chunks, char *, s, tor_free(s));
903 smartlist_free(string_chunks);
905 return the_string;
908 /** Return a newly allocated comma-separated string containing statistics
909 * on network status downloads. The string contains the number of completed
910 * requests, timeouts, and still running requests as well as the download
911 * times by deciles and quartiles. Return NULL if we have not observed
912 * requests for long enough. */
913 static char *
914 geoip_get_dirreq_history(dirreq_type_t type)
916 char *result = NULL;
917 smartlist_t *dirreq_completed = NULL;
918 uint32_t complete = 0, timeouts = 0, running = 0;
919 int bufsize = 1024, written;
920 dirreq_map_entry_t **ptr, **next, *ent;
921 struct timeval now;
923 tor_gettimeofday(&now);
924 dirreq_completed = smartlist_new();
925 for (ptr = HT_START(dirreqmap, &dirreq_map); ptr; ptr = next) {
926 ent = *ptr;
927 if (ent->type != type) {
928 next = HT_NEXT(dirreqmap, &dirreq_map, ptr);
929 continue;
930 } else {
931 if (ent->completed) {
932 smartlist_add(dirreq_completed, ent);
933 complete++;
934 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
935 } else {
936 if (tv_mdiff(&ent->request_time, &now) / 1000 > DIRREQ_TIMEOUT)
937 timeouts++;
938 else
939 running++;
940 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
941 tor_free(ent);
945 #define DIR_REQ_GRANULARITY 4
946 complete = round_uint32_to_next_multiple_of(complete,
947 DIR_REQ_GRANULARITY);
948 timeouts = round_uint32_to_next_multiple_of(timeouts,
949 DIR_REQ_GRANULARITY);
950 running = round_uint32_to_next_multiple_of(running,
951 DIR_REQ_GRANULARITY);
952 result = tor_malloc_zero(bufsize);
953 written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u,"
954 "running=%u", complete, timeouts, running);
955 if (written < 0) {
956 tor_free(result);
957 goto done;
960 #define MIN_DIR_REQ_RESPONSES 16
961 if (complete >= MIN_DIR_REQ_RESPONSES) {
962 uint32_t *dltimes;
963 /* We may have rounded 'completed' up. Here we want to use the
964 * real value. */
965 complete = smartlist_len(dirreq_completed);
966 dltimes = tor_calloc(complete, sizeof(uint32_t));
967 SMARTLIST_FOREACH_BEGIN(dirreq_completed, dirreq_map_entry_t *, ent) {
968 uint32_t bytes_per_second;
969 uint32_t time_diff = (uint32_t) tv_mdiff(&ent->request_time,
970 &ent->completion_time);
971 if (time_diff == 0)
972 time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible
973 * by law of nature or something, but a milisecond
974 * is a bit greater than "instantly" */
975 bytes_per_second = (uint32_t)(1000 * ent->response_size / time_diff);
976 dltimes[ent_sl_idx] = bytes_per_second;
977 } SMARTLIST_FOREACH_END(ent);
978 median_uint32(dltimes, complete); /* sorts as a side effect. */
979 written = tor_snprintf(result + written, bufsize - written,
980 ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
981 "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
982 dltimes[0],
983 dltimes[1*complete/10-1],
984 dltimes[2*complete/10-1],
985 dltimes[1*complete/4-1],
986 dltimes[3*complete/10-1],
987 dltimes[4*complete/10-1],
988 dltimes[5*complete/10-1],
989 dltimes[6*complete/10-1],
990 dltimes[7*complete/10-1],
991 dltimes[3*complete/4-1],
992 dltimes[8*complete/10-1],
993 dltimes[9*complete/10-1],
994 dltimes[complete-1]);
995 if (written<0)
996 tor_free(result);
997 tor_free(dltimes);
999 done:
1000 SMARTLIST_FOREACH(dirreq_completed, dirreq_map_entry_t *, ent,
1001 tor_free(ent));
1002 smartlist_free(dirreq_completed);
1003 return result;
1006 /** Store a newly allocated comma-separated string in
1007 * *<a>country_str</a> containing entries for all the countries from
1008 * which we've seen enough clients connect as a bridge, directory
1009 * server, or entry guard. The entry format is cc=num where num is the
1010 * number of IPs we've seen connecting from that country, and cc is a
1011 * lowercased country code. *<a>country_str</a> is set to NULL if
1012 * we're not ready to export per country data yet.
1014 * Store a newly allocated comma-separated string in <a>ipver_str</a>
1015 * containing entries for clients connecting over IPv4 and IPv6. The
1016 * format is family=num where num is the nubmer of IPs we've seen
1017 * connecting over that protocol family, and family is 'v4' or 'v6'.
1019 * Return 0 on success and -1 if we're missing geoip data. */
1021 geoip_get_client_history(geoip_client_action_t action,
1022 char **country_str, char **ipver_str)
1024 unsigned granularity = IP_GRANULARITY;
1025 smartlist_t *entries = NULL;
1026 int n_countries = geoip_get_n_countries();
1027 int i;
1028 clientmap_entry_t **ent;
1029 unsigned *counts = NULL;
1030 unsigned total = 0;
1031 unsigned ipv4_count = 0, ipv6_count = 0;
1033 if (!geoip_is_loaded(AF_INET) && !geoip_is_loaded(AF_INET6))
1034 return -1;
1036 counts = tor_calloc(n_countries, sizeof(unsigned));
1037 HT_FOREACH(ent, clientmap, &client_history) {
1038 int country;
1039 if ((*ent)->action != (int)action)
1040 continue;
1041 country = geoip_get_country_by_addr(&(*ent)->addr);
1042 if (country < 0)
1043 country = 0; /** unresolved requests are stored at index 0. */
1044 tor_assert(0 <= country && country < n_countries);
1045 ++counts[country];
1046 ++total;
1047 switch (tor_addr_family(&(*ent)->addr)) {
1048 case AF_INET:
1049 ipv4_count++;
1050 break;
1051 case AF_INET6:
1052 ipv6_count++;
1053 break;
1056 if (ipver_str) {
1057 smartlist_t *chunks = smartlist_new();
1058 smartlist_add_asprintf(chunks, "v4=%u",
1059 round_to_next_multiple_of(ipv4_count, granularity));
1060 smartlist_add_asprintf(chunks, "v6=%u",
1061 round_to_next_multiple_of(ipv6_count, granularity));
1062 *ipver_str = smartlist_join_strings(chunks, ",", 0, NULL);
1063 SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
1064 smartlist_free(chunks);
1067 /* Don't record per country data if we haven't seen enough IPs. */
1068 if (total < MIN_IPS_TO_NOTE_ANYTHING) {
1069 tor_free(counts);
1070 if (country_str)
1071 *country_str = NULL;
1072 return 0;
1075 /* Make a list of c_hist_t */
1076 entries = smartlist_new();
1077 for (i = 0; i < n_countries; ++i) {
1078 unsigned c = counts[i];
1079 const char *countrycode;
1080 c_hist_t *ent;
1081 /* Only report a country if it has a minimum number of IPs. */
1082 if (c >= MIN_IPS_TO_NOTE_COUNTRY) {
1083 c = round_to_next_multiple_of(c, granularity);
1084 countrycode = geoip_get_country_name(i);
1085 ent = tor_malloc(sizeof(c_hist_t));
1086 strlcpy(ent->country, countrycode, sizeof(ent->country));
1087 ent->total = c;
1088 smartlist_add(entries, ent);
1091 /* Sort entries. Note that we must do this _AFTER_ rounding, or else
1092 * the sort order could leak info. */
1093 smartlist_sort(entries, c_hist_compare_);
1095 if (country_str) {
1096 smartlist_t *chunks = smartlist_new();
1097 SMARTLIST_FOREACH(entries, c_hist_t *, ch, {
1098 smartlist_add_asprintf(chunks, "%s=%u", ch->country, ch->total);
1100 *country_str = smartlist_join_strings(chunks, ",", 0, NULL);
1101 SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
1102 smartlist_free(chunks);
1105 SMARTLIST_FOREACH(entries, c_hist_t *, c, tor_free(c));
1106 smartlist_free(entries);
1107 tor_free(counts);
1109 return 0;
1112 /** Return a newly allocated string holding the per-country request history
1113 * for v3 network statuses in a format suitable for an extra-info document,
1114 * or NULL on failure. */
1115 char *
1116 geoip_get_request_history(void)
1118 smartlist_t *entries, *strings;
1119 char *result;
1120 unsigned granularity = IP_GRANULARITY;
1122 if (!geoip_countries)
1123 return NULL;
1125 entries = smartlist_new();
1126 SMARTLIST_FOREACH_BEGIN(geoip_countries, geoip_country_t *, c) {
1127 uint32_t tot = 0;
1128 c_hist_t *ent;
1129 tot = c->n_v3_ns_requests;
1130 if (!tot)
1131 continue;
1132 ent = tor_malloc_zero(sizeof(c_hist_t));
1133 strlcpy(ent->country, c->countrycode, sizeof(ent->country));
1134 ent->total = round_to_next_multiple_of(tot, granularity);
1135 smartlist_add(entries, ent);
1136 } SMARTLIST_FOREACH_END(c);
1137 smartlist_sort(entries, c_hist_compare_);
1139 strings = smartlist_new();
1140 SMARTLIST_FOREACH(entries, c_hist_t *, ent, {
1141 smartlist_add_asprintf(strings, "%s=%u", ent->country, ent->total);
1143 result = smartlist_join_strings(strings, ",", 0, NULL);
1144 SMARTLIST_FOREACH(strings, char *, cp, tor_free(cp));
1145 SMARTLIST_FOREACH(entries, c_hist_t *, ent, tor_free(ent));
1146 smartlist_free(strings);
1147 smartlist_free(entries);
1148 return result;
1151 /** Start time of directory request stats or 0 if we're not collecting
1152 * directory request statistics. */
1153 static time_t start_of_dirreq_stats_interval;
1155 /** Initialize directory request stats. */
1156 void
1157 geoip_dirreq_stats_init(time_t now)
1159 start_of_dirreq_stats_interval = now;
1162 /** Reset counters for dirreq stats. */
1163 void
1164 geoip_reset_dirreq_stats(time_t now)
1166 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
1167 c->n_v3_ns_requests = 0;
1170 clientmap_entry_t **ent, **next, *this;
1171 for (ent = HT_START(clientmap, &client_history); ent != NULL;
1172 ent = next) {
1173 if ((*ent)->action == GEOIP_CLIENT_NETWORKSTATUS) {
1174 this = *ent;
1175 next = HT_NEXT_RMV(clientmap, &client_history, ent);
1176 clientmap_entry_free(this);
1177 } else {
1178 next = HT_NEXT(clientmap, &client_history, ent);
1182 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
1184 dirreq_map_entry_t **ent, **next, *this;
1185 for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) {
1186 this = *ent;
1187 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent);
1188 tor_free(this);
1191 start_of_dirreq_stats_interval = now;
1194 /** Stop collecting directory request stats in a way that we can re-start
1195 * doing so in geoip_dirreq_stats_init(). */
1196 void
1197 geoip_dirreq_stats_term(void)
1199 geoip_reset_dirreq_stats(0);
1202 /** Return a newly allocated string containing the dirreq statistics
1203 * until <b>now</b>, or NULL if we're not collecting dirreq stats. Caller
1204 * must ensure start_of_dirreq_stats_interval is in the past. */
1205 char *
1206 geoip_format_dirreq_stats(time_t now)
1208 char t[ISO_TIME_LEN+1];
1209 int i;
1210 char *v3_ips_string, *v3_reqs_string, *v3_direct_dl_string,
1211 *v3_tunneled_dl_string;
1212 char *result;
1214 if (!start_of_dirreq_stats_interval)
1215 return NULL; /* Not initialized. */
1217 tor_assert(now >= start_of_dirreq_stats_interval);
1219 format_iso_time(t, now);
1220 geoip_get_client_history(GEOIP_CLIENT_NETWORKSTATUS, &v3_ips_string, NULL);
1221 v3_reqs_string = geoip_get_request_history();
1223 #define RESPONSE_GRANULARITY 8
1224 for (i = 0; i < GEOIP_NS_RESPONSE_NUM; i++) {
1225 ns_v3_responses[i] = round_uint32_to_next_multiple_of(
1226 ns_v3_responses[i], RESPONSE_GRANULARITY);
1228 #undef RESPONSE_GRANULARITY
1230 v3_direct_dl_string = geoip_get_dirreq_history(DIRREQ_DIRECT);
1231 v3_tunneled_dl_string = geoip_get_dirreq_history(DIRREQ_TUNNELED);
1233 /* Put everything together into a single string. */
1234 tor_asprintf(&result, "dirreq-stats-end %s (%d s)\n"
1235 "dirreq-v3-ips %s\n"
1236 "dirreq-v3-reqs %s\n"
1237 "dirreq-v3-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
1238 "not-found=%u,not-modified=%u,busy=%u\n"
1239 "dirreq-v3-direct-dl %s\n"
1240 "dirreq-v3-tunneled-dl %s\n",
1242 (unsigned) (now - start_of_dirreq_stats_interval),
1243 v3_ips_string ? v3_ips_string : "",
1244 v3_reqs_string ? v3_reqs_string : "",
1245 ns_v3_responses[GEOIP_SUCCESS],
1246 ns_v3_responses[GEOIP_REJECT_NOT_ENOUGH_SIGS],
1247 ns_v3_responses[GEOIP_REJECT_UNAVAILABLE],
1248 ns_v3_responses[GEOIP_REJECT_NOT_FOUND],
1249 ns_v3_responses[GEOIP_REJECT_NOT_MODIFIED],
1250 ns_v3_responses[GEOIP_REJECT_BUSY],
1251 v3_direct_dl_string ? v3_direct_dl_string : "",
1252 v3_tunneled_dl_string ? v3_tunneled_dl_string : "");
1254 /* Free partial strings. */
1255 tor_free(v3_ips_string);
1256 tor_free(v3_reqs_string);
1257 tor_free(v3_direct_dl_string);
1258 tor_free(v3_tunneled_dl_string);
1260 return result;
1263 /** If 24 hours have passed since the beginning of the current dirreq
1264 * stats period, write dirreq stats to $DATADIR/stats/dirreq-stats
1265 * (possibly overwriting an existing file) and reset counters. Return
1266 * when we would next want to write dirreq stats or 0 if we never want to
1267 * write. */
1268 time_t
1269 geoip_dirreq_stats_write(time_t now)
1271 char *str = NULL;
1273 if (!start_of_dirreq_stats_interval)
1274 return 0; /* Not initialized. */
1275 if (start_of_dirreq_stats_interval + WRITE_STATS_INTERVAL > now)
1276 goto done; /* Not ready to write. */
1278 /* Discard all items in the client history that are too old. */
1279 geoip_remove_old_clients(start_of_dirreq_stats_interval);
1281 /* Generate history string .*/
1282 str = geoip_format_dirreq_stats(now);
1284 /* Write dirreq-stats string to disk. */
1285 if (!check_or_create_data_subdir("stats")) {
1286 write_to_data_subdir("stats", "dirreq-stats", str, "dirreq statistics");
1287 /* Reset measurement interval start. */
1288 geoip_reset_dirreq_stats(now);
1291 done:
1292 tor_free(str);
1293 return start_of_dirreq_stats_interval + WRITE_STATS_INTERVAL;
1296 /** Start time of bridge stats or 0 if we're not collecting bridge
1297 * statistics. */
1298 static time_t start_of_bridge_stats_interval;
1300 /** Initialize bridge stats. */
1301 void
1302 geoip_bridge_stats_init(time_t now)
1304 start_of_bridge_stats_interval = now;
1307 /** Stop collecting bridge stats in a way that we can re-start doing so in
1308 * geoip_bridge_stats_init(). */
1309 void
1310 geoip_bridge_stats_term(void)
1312 client_history_clear();
1313 start_of_bridge_stats_interval = 0;
1316 /** Validate a bridge statistics string as it would be written to a
1317 * current extra-info descriptor. Return 1 if the string is valid and
1318 * recent enough, or 0 otherwise. */
1319 static int
1320 validate_bridge_stats(const char *stats_str, time_t now)
1322 char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1],
1323 *eos;
1325 const char *BRIDGE_STATS_END = "bridge-stats-end ";
1326 const char *BRIDGE_IPS = "bridge-ips ";
1327 const char *BRIDGE_IPS_EMPTY_LINE = "bridge-ips\n";
1328 const char *BRIDGE_TRANSPORTS = "bridge-ip-transports ";
1329 const char *BRIDGE_TRANSPORTS_EMPTY_LINE = "bridge-ip-transports\n";
1330 const char *tmp;
1331 time_t stats_end_time;
1332 int seconds;
1333 tor_assert(stats_str);
1335 /* Parse timestamp and number of seconds from
1336 "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */
1337 tmp = find_str_at_start_of_line(stats_str, BRIDGE_STATS_END);
1338 if (!tmp)
1339 return 0;
1340 tmp += strlen(BRIDGE_STATS_END);
1342 if (strlen(tmp) < ISO_TIME_LEN + 6)
1343 return 0;
1344 strlcpy(stats_end_str, tmp, sizeof(stats_end_str));
1345 if (parse_iso_time(stats_end_str, &stats_end_time) < 0)
1346 return 0;
1347 if (stats_end_time < now - (25*60*60) ||
1348 stats_end_time > now + (1*60*60))
1349 return 0;
1350 seconds = (int)strtol(tmp + ISO_TIME_LEN + 2, &eos, 10);
1351 if (!eos || seconds < 23*60*60)
1352 return 0;
1353 format_iso_time(stats_start_str, stats_end_time - seconds);
1355 /* Parse: "bridge-ips CC=N,CC=N,..." */
1356 tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS);
1357 if (!tmp) {
1358 /* Look if there is an empty "bridge-ips" line */
1359 tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS_EMPTY_LINE);
1360 if (!tmp)
1361 return 0;
1364 /* Parse: "bridge-ip-transports PT=N,PT=N,..." */
1365 tmp = find_str_at_start_of_line(stats_str, BRIDGE_TRANSPORTS);
1366 if (!tmp) {
1367 /* Look if there is an empty "bridge-ip-transports" line */
1368 tmp = find_str_at_start_of_line(stats_str, BRIDGE_TRANSPORTS_EMPTY_LINE);
1369 if (!tmp)
1370 return 0;
1373 return 1;
1376 /** Most recent bridge statistics formatted to be written to extra-info
1377 * descriptors. */
1378 static char *bridge_stats_extrainfo = NULL;
1380 /** Return a newly allocated string holding our bridge usage stats by country
1381 * in a format suitable for inclusion in an extrainfo document. Return NULL on
1382 * failure. */
1383 char *
1384 geoip_format_bridge_stats(time_t now)
1386 char *out = NULL;
1387 char *country_data = NULL, *ipver_data = NULL, *transport_data = NULL;
1388 long duration = now - start_of_bridge_stats_interval;
1389 char written[ISO_TIME_LEN+1];
1391 if (duration < 0)
1392 return NULL;
1393 if (!start_of_bridge_stats_interval)
1394 return NULL; /* Not initialized. */
1396 format_iso_time(written, now);
1397 geoip_get_client_history(GEOIP_CLIENT_CONNECT, &country_data, &ipver_data);
1398 transport_data = geoip_get_transport_history();
1400 tor_asprintf(&out,
1401 "bridge-stats-end %s (%ld s)\n"
1402 "bridge-ips %s\n"
1403 "bridge-ip-versions %s\n"
1404 "bridge-ip-transports %s\n",
1405 written, duration,
1406 country_data ? country_data : "",
1407 ipver_data ? ipver_data : "",
1408 transport_data ? transport_data : "");
1409 tor_free(country_data);
1410 tor_free(ipver_data);
1411 tor_free(transport_data);
1413 return out;
1416 /** Return a newly allocated string holding our bridge usage stats by country
1417 * in a format suitable for the answer to a controller request. Return NULL on
1418 * failure. */
1419 static char *
1420 format_bridge_stats_controller(time_t now)
1422 char *out = NULL, *country_data = NULL, *ipver_data = NULL;
1423 char started[ISO_TIME_LEN+1];
1424 (void) now;
1426 format_iso_time(started, start_of_bridge_stats_interval);
1427 geoip_get_client_history(GEOIP_CLIENT_CONNECT, &country_data, &ipver_data);
1429 tor_asprintf(&out,
1430 "TimeStarted=\"%s\" CountrySummary=%s IPVersions=%s",
1431 started,
1432 country_data ? country_data : "",
1433 ipver_data ? ipver_data : "");
1434 tor_free(country_data);
1435 tor_free(ipver_data);
1436 return out;
1439 /** Return a newly allocated string holding our bridge usage stats by
1440 * country in a format suitable for inclusion in our heartbeat
1441 * message. Return NULL on failure. */
1442 char *
1443 format_client_stats_heartbeat(time_t now)
1445 const int n_hours = 6;
1446 char *out = NULL;
1447 int n_clients = 0;
1448 clientmap_entry_t **ent;
1449 unsigned cutoff = (unsigned)( (now-n_hours*3600)/60 );
1451 if (!start_of_bridge_stats_interval)
1452 return NULL; /* Not initialized. */
1454 /* count unique IPs */
1455 HT_FOREACH(ent, clientmap, &client_history) {
1456 /* only count directly connecting clients */
1457 if ((*ent)->action != GEOIP_CLIENT_CONNECT)
1458 continue;
1459 if ((*ent)->last_seen_in_minutes < cutoff)
1460 continue;
1461 n_clients++;
1464 tor_asprintf(&out, "Heartbeat: "
1465 "In the last %d hours, I have seen %d unique clients.",
1466 n_hours,
1467 n_clients);
1469 return out;
1472 /** Write bridge statistics to $DATADIR/stats/bridge-stats and return
1473 * when we should next try to write statistics. */
1474 time_t
1475 geoip_bridge_stats_write(time_t now)
1477 char *val = NULL;
1479 /* Check if 24 hours have passed since starting measurements. */
1480 if (now < start_of_bridge_stats_interval + WRITE_STATS_INTERVAL)
1481 return start_of_bridge_stats_interval + WRITE_STATS_INTERVAL;
1483 /* Discard all items in the client history that are too old. */
1484 geoip_remove_old_clients(start_of_bridge_stats_interval);
1486 /* Generate formatted string */
1487 val = geoip_format_bridge_stats(now);
1488 if (val == NULL)
1489 goto done;
1491 /* Update the stored value. */
1492 tor_free(bridge_stats_extrainfo);
1493 bridge_stats_extrainfo = val;
1494 start_of_bridge_stats_interval = now;
1496 /* Write it to disk. */
1497 if (!check_or_create_data_subdir("stats")) {
1498 write_to_data_subdir("stats", "bridge-stats",
1499 bridge_stats_extrainfo, "bridge statistics");
1501 /* Tell the controller, "hey, there are clients!" */
1503 char *controller_str = format_bridge_stats_controller(now);
1504 if (controller_str)
1505 control_event_clients_seen(controller_str);
1506 tor_free(controller_str);
1510 done:
1511 return start_of_bridge_stats_interval + WRITE_STATS_INTERVAL;
1514 /** Try to load the most recent bridge statistics from disk, unless we
1515 * have finished a measurement interval lately, and check whether they
1516 * are still recent enough. */
1517 static void
1518 load_bridge_stats(time_t now)
1520 char *fname, *contents;
1521 if (bridge_stats_extrainfo)
1522 return;
1524 fname = get_datadir_fname2("stats", "bridge-stats");
1525 contents = read_file_to_str(fname, RFTS_IGNORE_MISSING, NULL);
1526 if (contents && validate_bridge_stats(contents, now)) {
1527 bridge_stats_extrainfo = contents;
1528 } else {
1529 tor_free(contents);
1532 tor_free(fname);
1535 /** Return most recent bridge statistics for inclusion in extra-info
1536 * descriptors, or NULL if we don't have recent bridge statistics. */
1537 const char *
1538 geoip_get_bridge_stats_extrainfo(time_t now)
1540 load_bridge_stats(now);
1541 return bridge_stats_extrainfo;
1544 /** Return a new string containing the recent bridge statistics to be returned
1545 * to controller clients, or NULL if we don't have any bridge statistics. */
1546 char *
1547 geoip_get_bridge_stats_controller(time_t now)
1549 return format_bridge_stats_controller(now);
1552 /** Start time of entry stats or 0 if we're not collecting entry
1553 * statistics. */
1554 static time_t start_of_entry_stats_interval;
1556 /** Initialize entry stats. */
1557 void
1558 geoip_entry_stats_init(time_t now)
1560 start_of_entry_stats_interval = now;
1563 /** Reset counters for entry stats. */
1564 void
1565 geoip_reset_entry_stats(time_t now)
1567 client_history_clear();
1568 start_of_entry_stats_interval = now;
1571 /** Stop collecting entry stats in a way that we can re-start doing so in
1572 * geoip_entry_stats_init(). */
1573 void
1574 geoip_entry_stats_term(void)
1576 geoip_reset_entry_stats(0);
1579 /** Return a newly allocated string containing the entry statistics
1580 * until <b>now</b>, or NULL if we're not collecting entry stats. Caller
1581 * must ensure start_of_entry_stats_interval lies in the past. */
1582 char *
1583 geoip_format_entry_stats(time_t now)
1585 char t[ISO_TIME_LEN+1];
1586 char *data = NULL;
1587 char *result;
1589 if (!start_of_entry_stats_interval)
1590 return NULL; /* Not initialized. */
1592 tor_assert(now >= start_of_entry_stats_interval);
1594 geoip_get_client_history(GEOIP_CLIENT_CONNECT, &data, NULL);
1595 format_iso_time(t, now);
1596 tor_asprintf(&result,
1597 "entry-stats-end %s (%u s)\n"
1598 "entry-ips %s\n",
1599 t, (unsigned) (now - start_of_entry_stats_interval),
1600 data ? data : "");
1601 tor_free(data);
1602 return result;
1605 /** If 24 hours have passed since the beginning of the current entry stats
1606 * period, write entry stats to $DATADIR/stats/entry-stats (possibly
1607 * overwriting an existing file) and reset counters. Return when we would
1608 * next want to write entry stats or 0 if we never want to write. */
1609 time_t
1610 geoip_entry_stats_write(time_t now)
1612 char *str = NULL;
1614 if (!start_of_entry_stats_interval)
1615 return 0; /* Not initialized. */
1616 if (start_of_entry_stats_interval + WRITE_STATS_INTERVAL > now)
1617 goto done; /* Not ready to write. */
1619 /* Discard all items in the client history that are too old. */
1620 geoip_remove_old_clients(start_of_entry_stats_interval);
1622 /* Generate history string .*/
1623 str = geoip_format_entry_stats(now);
1625 /* Write entry-stats string to disk. */
1626 if (!check_or_create_data_subdir("stats")) {
1627 write_to_data_subdir("stats", "entry-stats", str, "entry statistics");
1629 /* Reset measurement interval start. */
1630 geoip_reset_entry_stats(now);
1633 done:
1634 tor_free(str);
1635 return start_of_entry_stats_interval + WRITE_STATS_INTERVAL;
1638 /** Helper used to implement GETINFO ip-to-country/... controller command. */
1640 getinfo_helper_geoip(control_connection_t *control_conn,
1641 const char *question, char **answer,
1642 const char **errmsg)
1644 (void)control_conn;
1645 if (!strcmpstart(question, "ip-to-country/")) {
1646 int c;
1647 sa_family_t family;
1648 tor_addr_t addr;
1649 question += strlen("ip-to-country/");
1650 family = tor_addr_parse(&addr, question);
1651 if (family != AF_INET && family != AF_INET6) {
1652 *errmsg = "Invalid address family";
1653 return -1;
1655 if (!geoip_is_loaded(family)) {
1656 *errmsg = "GeoIP data not loaded";
1657 return -1;
1659 if (family == AF_INET)
1660 c = geoip_get_country_by_ipv4(tor_addr_to_ipv4h(&addr));
1661 else /* AF_INET6 */
1662 c = geoip_get_country_by_ipv6(tor_addr_to_in6(&addr));
1663 *answer = tor_strdup(geoip_get_country_name(c));
1665 return 0;
1668 /** Release all storage held by the GeoIP databases and country list. */
1669 static void
1670 clear_geoip_db(void)
1672 if (geoip_countries) {
1673 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
1674 smartlist_free(geoip_countries);
1677 strmap_free(country_idxplus1_by_lc_code, NULL);
1678 if (geoip_ipv4_entries) {
1679 SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, ent,
1680 tor_free(ent));
1681 smartlist_free(geoip_ipv4_entries);
1683 if (geoip_ipv6_entries) {
1684 SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, ent,
1685 tor_free(ent));
1686 smartlist_free(geoip_ipv6_entries);
1688 geoip_countries = NULL;
1689 country_idxplus1_by_lc_code = NULL;
1690 geoip_ipv4_entries = NULL;
1691 geoip_ipv6_entries = NULL;
1694 /** Release all storage held in this file. */
1695 void
1696 geoip_free_all(void)
1699 clientmap_entry_t **ent, **next, *this;
1700 for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
1701 this = *ent;
1702 next = HT_NEXT_RMV(clientmap, &client_history, ent);
1703 clientmap_entry_free(this);
1705 HT_CLEAR(clientmap, &client_history);
1708 dirreq_map_entry_t **ent, **next, *this;
1709 for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) {
1710 this = *ent;
1711 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent);
1712 tor_free(this);
1714 HT_CLEAR(dirreqmap, &dirreq_map);
1717 clear_geoip_db();
1718 tor_free(bridge_stats_extrainfo);