minor updates on upcoming changelog
[tor.git] / src / or / geoip.c
blob3944b2cf69ce9a520f50bda8dc09cd17c5dd8a2d
1 /* Copyright (c) 2007-2017, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * \file geoip.c
6 * \brief Functions related to maintaining an IP-to-country database;
7 * to summarizing client connections by country to entry guards, bridges,
8 * and directory servers; and for statistics on answering network status
9 * requests.
11 * There are two main kinds of functions in this module: geoip functions,
12 * which map groups of IPv4 and IPv6 addresses to country codes, and
13 * statistical functions, which collect statistics about different kinds of
14 * per-country usage.
16 * The geoip lookup tables are implemented as sorted lists of disjoint address
17 * ranges, each mapping to a singleton geoip_country_t. These country objects
18 * are also indexed by their names in a hashtable.
20 * The tables are populated from disk at startup by the geoip_load_file()
21 * function. For more information on the file format they read, see that
22 * function. See the scripts and the README file in src/config for more
23 * information about how those files are generated.
25 * Tor uses GeoIP information in order to implement user requests (such as
26 * ExcludeNodes {cc}), and to keep track of how much usage relays are getting
27 * for each country.
30 #define GEOIP_PRIVATE
31 #include "or.h"
32 #include "ht.h"
33 #include "config.h"
34 #include "control.h"
35 #include "dnsserv.h"
36 #include "geoip.h"
37 #include "routerlist.h"
39 static void init_geoip_countries(void);
41 /** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */
42 typedef struct geoip_ipv4_entry_t {
43 uint32_t ip_low; /**< The lowest IP in the range, in host order */
44 uint32_t ip_high; /**< The highest IP in the range, in host order */
45 intptr_t country; /**< An index into geoip_countries */
46 } geoip_ipv4_entry_t;
48 /** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
49 typedef struct geoip_ipv6_entry_t {
50 struct in6_addr ip_low; /**< The lowest IP in the range, in host order */
51 struct in6_addr ip_high; /**< The highest IP in the range, in host order */
52 intptr_t country; /**< An index into geoip_countries */
53 } geoip_ipv6_entry_t;
55 /** A per-country record for GeoIP request history. */
56 typedef struct geoip_country_t {
57 char countrycode[3];
58 uint32_t n_v3_ns_requests;
59 } geoip_country_t;
61 /** A list of geoip_country_t */
62 static smartlist_t *geoip_countries = NULL;
63 /** A map from lowercased country codes to their position in geoip_countries.
64 * The index is encoded in the pointer, and 1 is added so that NULL can mean
65 * not found. */
66 static strmap_t *country_idxplus1_by_lc_code = NULL;
67 /** Lists of all known geoip_ipv4_entry_t and geoip_ipv6_entry_t, sorted
68 * by their respective ip_low. */
69 static smartlist_t *geoip_ipv4_entries = NULL, *geoip_ipv6_entries = NULL;
71 /** SHA1 digest of the GeoIP files to include in extra-info descriptors. */
72 static char geoip_digest[DIGEST_LEN];
73 static char geoip6_digest[DIGEST_LEN];
75 /** Return the index of the <b>country</b>'s entry in the GeoIP
76 * country list if it is a valid 2-letter country code, otherwise
77 * return -1. */
78 MOCK_IMPL(country_t,
79 geoip_get_country,(const char *country))
81 void *idxplus1_;
82 intptr_t idx;
84 idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
85 if (!idxplus1_)
86 return -1;
88 idx = ((uintptr_t)idxplus1_)-1;
89 return (country_t)idx;
92 /** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b>
93 * and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */
94 static void
95 geoip_add_entry(const tor_addr_t *low, const tor_addr_t *high,
96 const char *country)
98 intptr_t idx;
99 void *idxplus1_;
101 IF_BUG_ONCE(tor_addr_family(low) != tor_addr_family(high))
102 return;
103 IF_BUG_ONCE(tor_addr_compare(high, low, CMP_EXACT) < 0)
104 return;
106 idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
108 if (!idxplus1_) {
109 geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
110 strlcpy(c->countrycode, country, sizeof(c->countrycode));
111 tor_strlower(c->countrycode);
112 smartlist_add(geoip_countries, c);
113 idx = smartlist_len(geoip_countries) - 1;
114 strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
115 } else {
116 idx = ((uintptr_t)idxplus1_)-1;
119 geoip_country_t *c = smartlist_get(geoip_countries, idx);
120 tor_assert(!strcasecmp(c->countrycode, country));
123 if (tor_addr_family(low) == AF_INET) {
124 geoip_ipv4_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv4_entry_t));
125 ent->ip_low = tor_addr_to_ipv4h(low);
126 ent->ip_high = tor_addr_to_ipv4h(high);
127 ent->country = idx;
128 smartlist_add(geoip_ipv4_entries, ent);
129 } else if (tor_addr_family(low) == AF_INET6) {
130 geoip_ipv6_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t));
131 ent->ip_low = *tor_addr_to_in6_assert(low);
132 ent->ip_high = *tor_addr_to_in6_assert(high);
133 ent->country = idx;
134 smartlist_add(geoip_ipv6_entries, ent);
138 /** Add an entry to the GeoIP table indicated by <b>family</b>,
139 * parsing it from <b>line</b>. The format is as for geoip_load_file(). */
140 STATIC int
141 geoip_parse_entry(const char *line, sa_family_t family)
143 tor_addr_t low_addr, high_addr;
144 char c[3];
145 char *country = NULL;
147 if (!geoip_countries)
148 init_geoip_countries();
149 if (family == AF_INET) {
150 if (!geoip_ipv4_entries)
151 geoip_ipv4_entries = smartlist_new();
152 } else if (family == AF_INET6) {
153 if (!geoip_ipv6_entries)
154 geoip_ipv6_entries = smartlist_new();
155 } else {
156 log_warn(LD_GENERAL, "Unsupported family: %d", family);
157 return -1;
160 while (TOR_ISSPACE(*line))
161 ++line;
162 if (*line == '#')
163 return 0;
165 char buf[512];
166 if (family == AF_INET) {
167 unsigned int low, high;
168 if (tor_sscanf(line,"%u,%u,%2s", &low, &high, c) == 3 ||
169 tor_sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, c) == 3) {
170 tor_addr_from_ipv4h(&low_addr, low);
171 tor_addr_from_ipv4h(&high_addr, high);
172 } else
173 goto fail;
174 country = c;
175 } else { /* AF_INET6 */
176 char *low_str, *high_str;
177 struct in6_addr low, high;
178 char *strtok_state;
179 strlcpy(buf, line, sizeof(buf));
180 low_str = tor_strtok_r(buf, ",", &strtok_state);
181 if (!low_str)
182 goto fail;
183 high_str = tor_strtok_r(NULL, ",", &strtok_state);
184 if (!high_str)
185 goto fail;
186 country = tor_strtok_r(NULL, "\n", &strtok_state);
187 if (!country)
188 goto fail;
189 if (strlen(country) != 2)
190 goto fail;
191 if (tor_inet_pton(AF_INET6, low_str, &low) <= 0)
192 goto fail;
193 tor_addr_from_in6(&low_addr, &low);
194 if (tor_inet_pton(AF_INET6, high_str, &high) <= 0)
195 goto fail;
196 tor_addr_from_in6(&high_addr, &high);
198 geoip_add_entry(&low_addr, &high_addr, country);
199 return 0;
201 fail:
202 log_warn(LD_GENERAL, "Unable to parse line from GEOIP %s file: %s",
203 family == AF_INET ? "IPv4" : "IPv6", escaped(line));
204 return -1;
207 /** Sorting helper: return -1, 1, or 0 based on comparison of two
208 * geoip_ipv4_entry_t */
209 static int
210 geoip_ipv4_compare_entries_(const void **_a, const void **_b)
212 const geoip_ipv4_entry_t *a = *_a, *b = *_b;
213 if (a->ip_low < b->ip_low)
214 return -1;
215 else if (a->ip_low > b->ip_low)
216 return 1;
217 else
218 return 0;
221 /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
222 * to a uint32_t in host order) to a geoip_ipv4_entry_t */
223 static int
224 geoip_ipv4_compare_key_to_entry_(const void *_key, const void **_member)
226 /* No alignment issue here, since _key really is a pointer to uint32_t */
227 const uint32_t addr = *(uint32_t *)_key;
228 const geoip_ipv4_entry_t *entry = *_member;
229 if (addr < entry->ip_low)
230 return -1;
231 else if (addr > entry->ip_high)
232 return 1;
233 else
234 return 0;
237 /** Sorting helper: return -1, 1, or 0 based on comparison of two
238 * geoip_ipv6_entry_t */
239 static int
240 geoip_ipv6_compare_entries_(const void **_a, const void **_b)
242 const geoip_ipv6_entry_t *a = *_a, *b = *_b;
243 return fast_memcmp(a->ip_low.s6_addr, b->ip_low.s6_addr,
244 sizeof(struct in6_addr));
247 /** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6
248 * (a pointer to a in6_addr) to a geoip_ipv6_entry_t */
249 static int
250 geoip_ipv6_compare_key_to_entry_(const void *_key, const void **_member)
252 const struct in6_addr *addr = (struct in6_addr *)_key;
253 const geoip_ipv6_entry_t *entry = *_member;
255 if (fast_memcmp(addr->s6_addr, entry->ip_low.s6_addr,
256 sizeof(struct in6_addr)) < 0)
257 return -1;
258 else if (fast_memcmp(addr->s6_addr, entry->ip_high.s6_addr,
259 sizeof(struct in6_addr)) > 0)
260 return 1;
261 else
262 return 0;
265 /** Return 1 if we should collect geoip stats on bridge users, and
266 * include them in our extrainfo descriptor. Else return 0. */
268 should_record_bridge_info(const or_options_t *options)
270 return options->BridgeRelay && options->BridgeRecordUsageByCountry;
273 /** Set up a new list of geoip countries with no countries (yet) set in it,
274 * except for the unknown country.
276 static void
277 init_geoip_countries(void)
279 geoip_country_t *geoip_unresolved;
280 geoip_countries = smartlist_new();
281 /* Add a geoip_country_t for requests that could not be resolved to a
282 * country as first element (index 0) to geoip_countries. */
283 geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
284 strlcpy(geoip_unresolved->countrycode, "??",
285 sizeof(geoip_unresolved->countrycode));
286 smartlist_add(geoip_countries, geoip_unresolved);
287 country_idxplus1_by_lc_code = strmap_new();
288 strmap_set_lc(country_idxplus1_by_lc_code, "??", (void*)(1));
291 /** Clear appropriate GeoIP database, based on <b>family</b>, and
292 * reload it from the file <b>filename</b>. Return 0 on success, -1 on
293 * failure.
295 * Recognized line formats for IPv4 are:
296 * INTIPLOW,INTIPHIGH,CC
297 * and
298 * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
299 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
300 * integers, and CC is a country code.
302 * Recognized line format for IPv6 is:
303 * IPV6LOW,IPV6HIGH,CC
304 * where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
306 * It also recognizes, and skips over, blank lines and lines that start
307 * with '#' (comments).
310 geoip_load_file(sa_family_t family, const char *filename)
312 FILE *f;
313 const char *msg = "";
314 const or_options_t *options = get_options();
315 int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
316 crypto_digest_t *geoip_digest_env = NULL;
318 tor_assert(family == AF_INET || family == AF_INET6);
320 if (!(f = tor_fopen_cloexec(filename, "r"))) {
321 log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s. %s",
322 filename, msg);
323 return -1;
325 if (!geoip_countries)
326 init_geoip_countries();
328 if (family == AF_INET) {
329 if (geoip_ipv4_entries) {
330 SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e,
331 tor_free(e));
332 smartlist_free(geoip_ipv4_entries);
334 geoip_ipv4_entries = smartlist_new();
335 } else { /* AF_INET6 */
336 if (geoip_ipv6_entries) {
337 SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e,
338 tor_free(e));
339 smartlist_free(geoip_ipv6_entries);
341 geoip_ipv6_entries = smartlist_new();
343 geoip_digest_env = crypto_digest_new();
345 log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.",
346 (family == AF_INET) ? "IPv4" : "IPv6", filename);
347 while (!feof(f)) {
348 char buf[512];
349 if (fgets(buf, (int)sizeof(buf), f) == NULL)
350 break;
351 crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf));
352 /* FFFF track full country name. */
353 geoip_parse_entry(buf, family);
355 /*XXXX abort and return -1 if no entries/illformed?*/
356 fclose(f);
358 /* Sort list and remember file digests so that we can include it in
359 * our extra-info descriptors. */
360 if (family == AF_INET) {
361 smartlist_sort(geoip_ipv4_entries, geoip_ipv4_compare_entries_);
362 /* Okay, now we need to maybe change our mind about what is in
363 * which country. We do this for IPv4 only since that's what we
364 * store in node->country. */
365 refresh_all_country_info();
366 crypto_digest_get_digest(geoip_digest_env, geoip_digest, DIGEST_LEN);
367 } else {
368 /* AF_INET6 */
369 smartlist_sort(geoip_ipv6_entries, geoip_ipv6_compare_entries_);
370 crypto_digest_get_digest(geoip_digest_env, geoip6_digest, DIGEST_LEN);
372 crypto_digest_free(geoip_digest_env);
374 return 0;
377 /** Given an IP address in host order, return a number representing the
378 * country to which that address belongs, -1 for "No geoip information
379 * available", or 0 for the 'unknown country'. The return value will always
380 * be less than geoip_get_n_countries(). To decode it, call
381 * geoip_get_country_name().
383 STATIC int
384 geoip_get_country_by_ipv4(uint32_t ipaddr)
386 geoip_ipv4_entry_t *ent;
387 if (!geoip_ipv4_entries)
388 return -1;
389 ent = smartlist_bsearch(geoip_ipv4_entries, &ipaddr,
390 geoip_ipv4_compare_key_to_entry_);
391 return ent ? (int)ent->country : 0;
394 /** Given an IPv6 address, return a number representing the country to
395 * which that address belongs, -1 for "No geoip information available", or
396 * 0 for the 'unknown country'. The return value will always be less than
397 * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
399 STATIC int
400 geoip_get_country_by_ipv6(const struct in6_addr *addr)
402 geoip_ipv6_entry_t *ent;
404 if (!geoip_ipv6_entries)
405 return -1;
406 ent = smartlist_bsearch(geoip_ipv6_entries, addr,
407 geoip_ipv6_compare_key_to_entry_);
408 return ent ? (int)ent->country : 0;
411 /** Given an IP address, return a number representing the country to which
412 * that address belongs, -1 for "No geoip information available", or 0 for
413 * the 'unknown country'. The return value will always be less than
414 * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
416 MOCK_IMPL(int,
417 geoip_get_country_by_addr,(const tor_addr_t *addr))
419 if (tor_addr_family(addr) == AF_INET) {
420 return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
421 } else if (tor_addr_family(addr) == AF_INET6) {
422 return geoip_get_country_by_ipv6(tor_addr_to_in6(addr));
423 } else {
424 return -1;
428 /** Return the number of countries recognized by the GeoIP country list. */
429 MOCK_IMPL(int,
430 geoip_get_n_countries,(void))
432 if (!geoip_countries)
433 init_geoip_countries();
434 return (int) smartlist_len(geoip_countries);
437 /** Return the two-letter country code associated with the number <b>num</b>,
438 * or "??" for an unknown value. */
439 const char *
440 geoip_get_country_name(country_t num)
442 if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
443 geoip_country_t *c = smartlist_get(geoip_countries, num);
444 return c->countrycode;
445 } else
446 return "??";
449 /** Return true iff we have loaded a GeoIP database.*/
450 MOCK_IMPL(int,
451 geoip_is_loaded,(sa_family_t family))
453 tor_assert(family == AF_INET || family == AF_INET6);
454 if (geoip_countries == NULL)
455 return 0;
456 if (family == AF_INET)
457 return geoip_ipv4_entries != NULL;
458 else /* AF_INET6 */
459 return geoip_ipv6_entries != NULL;
462 /** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
463 * result does not need to be deallocated, but will be overwritten by the
464 * next call of hex_str(). */
465 const char *
466 geoip_db_digest(sa_family_t family)
468 tor_assert(family == AF_INET || family == AF_INET6);
469 if (family == AF_INET)
470 return hex_str(geoip_digest, DIGEST_LEN);
471 else /* AF_INET6 */
472 return hex_str(geoip6_digest, DIGEST_LEN);
475 /** Entry in a map from IP address to the last time we've seen an incoming
476 * connection from that IP address. Used by bridges only, to track which
477 * countries have them blocked. */
478 typedef struct clientmap_entry_t {
479 HT_ENTRY(clientmap_entry_t) node;
480 tor_addr_t addr;
481 /* Name of pluggable transport used by this client. NULL if no
482 pluggable transport was used. */
483 char *transport_name;
485 /** Time when we last saw this IP address, in MINUTES since the epoch.
487 * (This will run out of space around 4011 CE. If Tor is still in use around
488 * 4000 CE, please remember to add more bits to last_seen_in_minutes.) */
489 unsigned int last_seen_in_minutes:30;
490 unsigned int action:2;
491 } clientmap_entry_t;
493 /** Largest allowable value for last_seen_in_minutes. (It's a 30-bit field,
494 * so it can hold up to (1u<<30)-1, or 0x3fffffffu.
496 #define MAX_LAST_SEEN_IN_MINUTES 0X3FFFFFFFu
498 /** Map from client IP address to last time seen. */
499 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
500 HT_INITIALIZER();
502 /** Hashtable helper: compute a hash of a clientmap_entry_t. */
503 static inline unsigned
504 clientmap_entry_hash(const clientmap_entry_t *a)
506 unsigned h = (unsigned) tor_addr_hash(&a->addr);
508 if (a->transport_name)
509 h += (unsigned) siphash24g(a->transport_name, strlen(a->transport_name));
511 return h;
513 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
514 static inline int
515 clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
517 if (strcmp_opt(a->transport_name, b->transport_name))
518 return 0;
520 return !tor_addr_compare(&a->addr, &b->addr, CMP_EXACT) &&
521 a->action == b->action;
524 HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
525 clientmap_entries_eq)
526 HT_GENERATE2(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
527 clientmap_entries_eq, 0.6, tor_reallocarray_, tor_free_)
529 /** Free all storage held by <b>ent</b>. */
530 static void
531 clientmap_entry_free(clientmap_entry_t *ent)
533 if (!ent)
534 return;
536 tor_free(ent->transport_name);
537 tor_free(ent);
540 /** Clear history of connecting clients used by entry and bridge stats. */
541 static void
542 client_history_clear(void)
544 clientmap_entry_t **ent, **next, *this;
545 for (ent = HT_START(clientmap, &client_history); ent != NULL;
546 ent = next) {
547 if ((*ent)->action == GEOIP_CLIENT_CONNECT) {
548 this = *ent;
549 next = HT_NEXT_RMV(clientmap, &client_history, ent);
550 clientmap_entry_free(this);
551 } else {
552 next = HT_NEXT(clientmap, &client_history, ent);
557 /** Note that we've seen a client connect from the IP <b>addr</b>
558 * at time <b>now</b>. Ignored by all but bridges and directories if
559 * configured accordingly. */
560 void
561 geoip_note_client_seen(geoip_client_action_t action,
562 const tor_addr_t *addr,
563 const char *transport_name,
564 time_t now)
566 const or_options_t *options = get_options();
567 clientmap_entry_t lookup, *ent;
568 memset(&lookup, 0, sizeof(clientmap_entry_t));
570 if (action == GEOIP_CLIENT_CONNECT) {
571 /* Only remember statistics as entry guard or as bridge. */
572 if (!options->EntryStatistics &&
573 (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)))
574 return;
575 } else {
576 /* Only gather directory-request statistics if configured, and
577 * forcibly disable them on bridge authorities. */
578 if (!options->DirReqStatistics || options->BridgeAuthoritativeDir)
579 return;
582 log_debug(LD_GENERAL, "Seen client from '%s' with transport '%s'.",
583 safe_str_client(fmt_addr((addr))),
584 transport_name ? transport_name : "<no transport>");
586 tor_addr_copy(&lookup.addr, addr);
587 lookup.action = (int)action;
588 lookup.transport_name = (char*) transport_name;
589 ent = HT_FIND(clientmap, &client_history, &lookup);
591 if (! ent) {
592 ent = tor_malloc_zero(sizeof(clientmap_entry_t));
593 tor_addr_copy(&ent->addr, addr);
594 if (transport_name)
595 ent->transport_name = tor_strdup(transport_name);
596 ent->action = (int)action;
597 HT_INSERT(clientmap, &client_history, ent);
599 if (now / 60 <= (int)MAX_LAST_SEEN_IN_MINUTES && now >= 0)
600 ent->last_seen_in_minutes = (unsigned)(now/60);
601 else
602 ent->last_seen_in_minutes = 0;
604 if (action == GEOIP_CLIENT_NETWORKSTATUS) {
605 int country_idx = geoip_get_country_by_addr(addr);
606 if (country_idx < 0)
607 country_idx = 0; /** unresolved requests are stored at index 0. */
608 if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
609 geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
610 ++country->n_v3_ns_requests;
615 /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
616 * older than a certain time. */
617 static int
618 remove_old_client_helper_(struct clientmap_entry_t *ent, void *_cutoff)
620 time_t cutoff = *(time_t*)_cutoff / 60;
621 if (ent->last_seen_in_minutes < cutoff) {
622 clientmap_entry_free(ent);
623 return 1;
624 } else {
625 return 0;
629 /** Forget about all clients that haven't connected since <b>cutoff</b>. */
630 void
631 geoip_remove_old_clients(time_t cutoff)
633 clientmap_HT_FOREACH_FN(&client_history,
634 remove_old_client_helper_,
635 &cutoff);
638 /** How many responses are we giving to clients requesting v3 network
639 * statuses? */
640 static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM];
642 /** Note that we've rejected a client's request for a v3 network status
643 * for reason <b>reason</b> at time <b>now</b>. */
644 void
645 geoip_note_ns_response(geoip_ns_response_t response)
647 static int arrays_initialized = 0;
648 if (!get_options()->DirReqStatistics)
649 return;
650 if (!arrays_initialized) {
651 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
652 arrays_initialized = 1;
654 tor_assert(response < GEOIP_NS_RESPONSE_NUM);
655 ns_v3_responses[response]++;
658 /** Do not mention any country from which fewer than this number of IPs have
659 * connected. This conceivably avoids reporting information that could
660 * deanonymize users, though analysis is lacking. */
661 #define MIN_IPS_TO_NOTE_COUNTRY 1
662 /** Do not report any geoip data at all if we have fewer than this number of
663 * IPs to report about. */
664 #define MIN_IPS_TO_NOTE_ANYTHING 1
665 /** When reporting geoip data about countries, round up to the nearest
666 * multiple of this value. */
667 #define IP_GRANULARITY 8
669 /** Helper type: used to sort per-country totals by value. */
670 typedef struct c_hist_t {
671 char country[3]; /**< Two-letter country code. */
672 unsigned total; /**< Total IP addresses seen in this country. */
673 } c_hist_t;
675 /** Sorting helper: return -1, 1, or 0 based on comparison of two
676 * geoip_ipv4_entry_t. Sort in descending order of total, and then by country
677 * code. */
678 static int
679 c_hist_compare_(const void **_a, const void **_b)
681 const c_hist_t *a = *_a, *b = *_b;
682 if (a->total > b->total)
683 return -1;
684 else if (a->total < b->total)
685 return 1;
686 else
687 return strcmp(a->country, b->country);
690 /** When there are incomplete directory requests at the end of a 24-hour
691 * period, consider those requests running for longer than this timeout as
692 * failed, the others as still running. */
693 #define DIRREQ_TIMEOUT (10*60)
695 /** Entry in a map from either chan->global_identifier for direct requests
696 * or a unique circuit identifier for tunneled requests to request time,
697 * response size, and completion time of a network status request. Used to
698 * measure download times of requests to derive average client
699 * bandwidths. */
700 typedef struct dirreq_map_entry_t {
701 HT_ENTRY(dirreq_map_entry_t) node;
702 /** Unique identifier for this network status request; this is either the
703 * chan->global_identifier of the dir channel (direct request) or a new
704 * locally unique identifier of a circuit (tunneled request). This ID is
705 * only unique among other direct or tunneled requests, respectively. */
706 uint64_t dirreq_id;
707 unsigned int state:3; /**< State of this directory request. */
708 unsigned int type:1; /**< Is this a direct or a tunneled request? */
709 unsigned int completed:1; /**< Is this request complete? */
710 /** When did we receive the request and started sending the response? */
711 struct timeval request_time;
712 size_t response_size; /**< What is the size of the response in bytes? */
713 struct timeval completion_time; /**< When did the request succeed? */
714 } dirreq_map_entry_t;
716 /** Map of all directory requests asking for v2 or v3 network statuses in
717 * the current geoip-stats interval. Values are
718 * of type *<b>dirreq_map_entry_t</b>. */
719 static HT_HEAD(dirreqmap, dirreq_map_entry_t) dirreq_map =
720 HT_INITIALIZER();
722 static int
723 dirreq_map_ent_eq(const dirreq_map_entry_t *a,
724 const dirreq_map_entry_t *b)
726 return a->dirreq_id == b->dirreq_id && a->type == b->type;
729 /* DOCDOC dirreq_map_ent_hash */
730 static unsigned
731 dirreq_map_ent_hash(const dirreq_map_entry_t *entry)
733 unsigned u = (unsigned) entry->dirreq_id;
734 u += entry->type << 20;
735 return u;
738 HT_PROTOTYPE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
739 dirreq_map_ent_eq)
740 HT_GENERATE2(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
741 dirreq_map_ent_eq, 0.6, tor_reallocarray_, tor_free_)
743 /** Helper: Put <b>entry</b> into map of directory requests using
744 * <b>type</b> and <b>dirreq_id</b> as key parts. If there is
745 * already an entry for that key, print out a BUG warning and return. */
746 static void
747 dirreq_map_put_(dirreq_map_entry_t *entry, dirreq_type_t type,
748 uint64_t dirreq_id)
750 dirreq_map_entry_t *old_ent;
751 tor_assert(entry->type == type);
752 tor_assert(entry->dirreq_id == dirreq_id);
754 /* XXXX we could switch this to HT_INSERT some time, since it seems that
755 * this bug doesn't happen. But since this function doesn't seem to be
756 * critical-path, it's sane to leave it alone. */
757 old_ent = HT_REPLACE(dirreqmap, &dirreq_map, entry);
758 if (old_ent && old_ent != entry) {
759 log_warn(LD_BUG, "Error when putting directory request into local "
760 "map. There was already an entry for the same identifier.");
761 return;
765 /** Helper: Look up and return an entry in the map of directory requests
766 * using <b>type</b> and <b>dirreq_id</b> as key parts. If there
767 * is no such entry, return NULL. */
768 static dirreq_map_entry_t *
769 dirreq_map_get_(dirreq_type_t type, uint64_t dirreq_id)
771 dirreq_map_entry_t lookup;
772 lookup.type = type;
773 lookup.dirreq_id = dirreq_id;
774 return HT_FIND(dirreqmap, &dirreq_map, &lookup);
777 /** Note that an either direct or tunneled (see <b>type</b>) directory
778 * request for a v3 network status with unique ID <b>dirreq_id</b> of size
779 * <b>response_size</b> has started. */
780 void
781 geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
782 dirreq_type_t type)
784 dirreq_map_entry_t *ent;
785 if (!get_options()->DirReqStatistics)
786 return;
787 ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
788 ent->dirreq_id = dirreq_id;
789 tor_gettimeofday(&ent->request_time);
790 ent->response_size = response_size;
791 ent->type = type;
792 dirreq_map_put_(ent, type, dirreq_id);
795 /** Change the state of the either direct or tunneled (see <b>type</b>)
796 * directory request with <b>dirreq_id</b> to <b>new_state</b> and
797 * possibly mark it as completed. If no entry can be found for the given
798 * key parts (e.g., if this is a directory request that we are not
799 * measuring, or one that was started in the previous measurement period),
800 * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
801 void
802 geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
803 dirreq_state_t new_state)
805 dirreq_map_entry_t *ent;
806 if (!get_options()->DirReqStatistics)
807 return;
808 ent = dirreq_map_get_(type, dirreq_id);
809 if (!ent)
810 return;
811 if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS)
812 return;
813 if (new_state - 1 != ent->state)
814 return;
815 ent->state = new_state;
816 if ((type == DIRREQ_DIRECT &&
817 new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) ||
818 (type == DIRREQ_TUNNELED &&
819 new_state == DIRREQ_CHANNEL_BUFFER_FLUSHED)) {
820 tor_gettimeofday(&ent->completion_time);
821 ent->completed = 1;
825 /** Return the bridge-ip-transports string that should be inserted in
826 * our extra-info descriptor. Return NULL if the bridge-ip-transports
827 * line should be empty. */
828 char *
829 geoip_get_transport_history(void)
831 unsigned granularity = IP_GRANULARITY;
832 /** String hash table (name of transport) -> (number of users). */
833 strmap_t *transport_counts = strmap_new();
835 /** Smartlist that contains copies of the names of the transports
836 that have been used. */
837 smartlist_t *transports_used = smartlist_new();
839 /* Special string to signify that no transport was used for this
840 connection. Pluggable transport names can't have symbols in their
841 names, so this string will never collide with a real transport. */
842 static const char* no_transport_str = "<OR>";
844 clientmap_entry_t **ent;
845 smartlist_t *string_chunks = smartlist_new();
846 char *the_string = NULL;
848 /* If we haven't seen any clients yet, return NULL. */
849 if (HT_EMPTY(&client_history))
850 goto done;
852 /** We do the following steps to form the transport history string:
853 * a) Foreach client that uses a pluggable transport, we increase the
854 * times that transport was used by one. If the client did not use
855 * a transport, we increase the number of times someone connected
856 * without obfuscation.
857 * b) Foreach transport we observed, we write its transport history
858 * string and push it to string_chunks. So, for example, if we've
859 * seen 665 obfs2 clients, we write "obfs2=665".
860 * c) We concatenate string_chunks to form the final string.
863 log_debug(LD_GENERAL,"Starting iteration for transport history. %d clients.",
864 HT_SIZE(&client_history));
866 /* Loop through all clients. */
867 HT_FOREACH(ent, clientmap, &client_history) {
868 uintptr_t val;
869 void *ptr;
870 const char *transport_name = (*ent)->transport_name;
871 if (!transport_name)
872 transport_name = no_transport_str;
874 /* Increase the count for this transport name. */
875 ptr = strmap_get(transport_counts, transport_name);
876 val = (uintptr_t)ptr;
877 val++;
878 ptr = (void*)val;
879 strmap_set(transport_counts, transport_name, ptr);
881 /* If it's the first time we see this transport, note it. */
882 if (val == 1)
883 smartlist_add_strdup(transports_used, transport_name);
885 log_debug(LD_GENERAL, "Client from '%s' with transport '%s'. "
886 "I've now seen %d clients.",
887 safe_str_client(fmt_addr(&(*ent)->addr)),
888 transport_name ? transport_name : "<no transport>",
889 (int)val);
892 /* Sort the transport names (helps with unit testing). */
893 smartlist_sort_strings(transports_used);
895 /* Loop through all seen transports. */
896 SMARTLIST_FOREACH_BEGIN(transports_used, const char *, transport_name) {
897 void *transport_count_ptr = strmap_get(transport_counts, transport_name);
898 uintptr_t transport_count = (uintptr_t) transport_count_ptr;
900 log_debug(LD_GENERAL, "We got "U64_FORMAT" clients with transport '%s'.",
901 U64_PRINTF_ARG((uint64_t)transport_count), transport_name);
903 smartlist_add_asprintf(string_chunks, "%s="U64_FORMAT,
904 transport_name,
905 U64_PRINTF_ARG(round_uint64_to_next_multiple_of(
906 (uint64_t)transport_count,
907 granularity)));
908 } SMARTLIST_FOREACH_END(transport_name);
910 the_string = smartlist_join_strings(string_chunks, ",", 0, NULL);
912 log_debug(LD_GENERAL, "Final bridge-ip-transports string: '%s'", the_string);
914 done:
915 strmap_free(transport_counts, NULL);
916 SMARTLIST_FOREACH(transports_used, char *, s, tor_free(s));
917 smartlist_free(transports_used);
918 SMARTLIST_FOREACH(string_chunks, char *, s, tor_free(s));
919 smartlist_free(string_chunks);
921 return the_string;
924 /** Return a newly allocated comma-separated string containing statistics
925 * on network status downloads. The string contains the number of completed
926 * requests, timeouts, and still running requests as well as the download
927 * times by deciles and quartiles. Return NULL if we have not observed
928 * requests for long enough. */
929 static char *
930 geoip_get_dirreq_history(dirreq_type_t type)
932 char *result = NULL;
933 smartlist_t *dirreq_completed = NULL;
934 uint32_t complete = 0, timeouts = 0, running = 0;
935 int bufsize = 1024, written;
936 dirreq_map_entry_t **ptr, **next;
937 struct timeval now;
939 tor_gettimeofday(&now);
940 dirreq_completed = smartlist_new();
941 for (ptr = HT_START(dirreqmap, &dirreq_map); ptr; ptr = next) {
942 dirreq_map_entry_t *ent = *ptr;
943 if (ent->type != type) {
944 next = HT_NEXT(dirreqmap, &dirreq_map, ptr);
945 continue;
946 } else {
947 if (ent->completed) {
948 smartlist_add(dirreq_completed, ent);
949 complete++;
950 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
951 } else {
952 if (tv_mdiff(&ent->request_time, &now) / 1000 > DIRREQ_TIMEOUT)
953 timeouts++;
954 else
955 running++;
956 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
957 tor_free(ent);
961 #define DIR_REQ_GRANULARITY 4
962 complete = round_uint32_to_next_multiple_of(complete,
963 DIR_REQ_GRANULARITY);
964 timeouts = round_uint32_to_next_multiple_of(timeouts,
965 DIR_REQ_GRANULARITY);
966 running = round_uint32_to_next_multiple_of(running,
967 DIR_REQ_GRANULARITY);
968 result = tor_malloc_zero(bufsize);
969 written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u,"
970 "running=%u", complete, timeouts, running);
971 if (written < 0) {
972 tor_free(result);
973 goto done;
976 #define MIN_DIR_REQ_RESPONSES 16
977 if (complete >= MIN_DIR_REQ_RESPONSES) {
978 uint32_t *dltimes;
979 /* We may have rounded 'completed' up. Here we want to use the
980 * real value. */
981 complete = smartlist_len(dirreq_completed);
982 dltimes = tor_calloc(complete, sizeof(uint32_t));
983 SMARTLIST_FOREACH_BEGIN(dirreq_completed, dirreq_map_entry_t *, ent) {
984 uint32_t bytes_per_second;
985 uint32_t time_diff = (uint32_t) tv_mdiff(&ent->request_time,
986 &ent->completion_time);
987 if (time_diff == 0)
988 time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible
989 * by law of nature or something, but a millisecond
990 * is a bit greater than "instantly" */
991 bytes_per_second = (uint32_t)(1000 * ent->response_size / time_diff);
992 dltimes[ent_sl_idx] = bytes_per_second;
993 } SMARTLIST_FOREACH_END(ent);
994 median_uint32(dltimes, complete); /* sorts as a side effect. */
995 written = tor_snprintf(result + written, bufsize - written,
996 ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
997 "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
998 dltimes[0],
999 dltimes[1*complete/10-1],
1000 dltimes[2*complete/10-1],
1001 dltimes[1*complete/4-1],
1002 dltimes[3*complete/10-1],
1003 dltimes[4*complete/10-1],
1004 dltimes[5*complete/10-1],
1005 dltimes[6*complete/10-1],
1006 dltimes[7*complete/10-1],
1007 dltimes[3*complete/4-1],
1008 dltimes[8*complete/10-1],
1009 dltimes[9*complete/10-1],
1010 dltimes[complete-1]);
1011 if (written<0)
1012 tor_free(result);
1013 tor_free(dltimes);
1015 done:
1016 SMARTLIST_FOREACH(dirreq_completed, dirreq_map_entry_t *, ent,
1017 tor_free(ent));
1018 smartlist_free(dirreq_completed);
1019 return result;
1022 /** Store a newly allocated comma-separated string in
1023 * *<a>country_str</a> containing entries for all the countries from
1024 * which we've seen enough clients connect as a bridge, directory
1025 * server, or entry guard. The entry format is cc=num where num is the
1026 * number of IPs we've seen connecting from that country, and cc is a
1027 * lowercased country code. *<a>country_str</a> is set to NULL if
1028 * we're not ready to export per country data yet.
1030 * Store a newly allocated comma-separated string in <a>ipver_str</a>
1031 * containing entries for clients connecting over IPv4 and IPv6. The
1032 * format is family=num where num is the nubmer of IPs we've seen
1033 * connecting over that protocol family, and family is 'v4' or 'v6'.
1035 * Return 0 on success and -1 if we're missing geoip data. */
1037 geoip_get_client_history(geoip_client_action_t action,
1038 char **country_str, char **ipver_str)
1040 unsigned granularity = IP_GRANULARITY;
1041 smartlist_t *entries = NULL;
1042 int n_countries = geoip_get_n_countries();
1043 int i;
1044 clientmap_entry_t **cm_ent;
1045 unsigned *counts = NULL;
1046 unsigned total = 0;
1047 unsigned ipv4_count = 0, ipv6_count = 0;
1049 if (!geoip_is_loaded(AF_INET) && !geoip_is_loaded(AF_INET6))
1050 return -1;
1052 counts = tor_calloc(n_countries, sizeof(unsigned));
1053 HT_FOREACH(cm_ent, clientmap, &client_history) {
1054 int country;
1055 if ((*cm_ent)->action != (int)action)
1056 continue;
1057 country = geoip_get_country_by_addr(&(*cm_ent)->addr);
1058 if (country < 0)
1059 country = 0; /** unresolved requests are stored at index 0. */
1060 tor_assert(0 <= country && country < n_countries);
1061 ++counts[country];
1062 ++total;
1063 switch (tor_addr_family(&(*cm_ent)->addr)) {
1064 case AF_INET:
1065 ipv4_count++;
1066 break;
1067 case AF_INET6:
1068 ipv6_count++;
1069 break;
1072 if (ipver_str) {
1073 smartlist_t *chunks = smartlist_new();
1074 smartlist_add_asprintf(chunks, "v4=%u",
1075 round_to_next_multiple_of(ipv4_count, granularity));
1076 smartlist_add_asprintf(chunks, "v6=%u",
1077 round_to_next_multiple_of(ipv6_count, granularity));
1078 *ipver_str = smartlist_join_strings(chunks, ",", 0, NULL);
1079 SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
1080 smartlist_free(chunks);
1083 /* Don't record per country data if we haven't seen enough IPs. */
1084 if (total < MIN_IPS_TO_NOTE_ANYTHING) {
1085 tor_free(counts);
1086 if (country_str)
1087 *country_str = NULL;
1088 return 0;
1091 /* Make a list of c_hist_t */
1092 entries = smartlist_new();
1093 for (i = 0; i < n_countries; ++i) {
1094 unsigned c = counts[i];
1095 const char *countrycode;
1096 c_hist_t *ent;
1097 /* Only report a country if it has a minimum number of IPs. */
1098 if (c >= MIN_IPS_TO_NOTE_COUNTRY) {
1099 c = round_to_next_multiple_of(c, granularity);
1100 countrycode = geoip_get_country_name(i);
1101 ent = tor_malloc(sizeof(c_hist_t));
1102 strlcpy(ent->country, countrycode, sizeof(ent->country));
1103 ent->total = c;
1104 smartlist_add(entries, ent);
1107 /* Sort entries. Note that we must do this _AFTER_ rounding, or else
1108 * the sort order could leak info. */
1109 smartlist_sort(entries, c_hist_compare_);
1111 if (country_str) {
1112 smartlist_t *chunks = smartlist_new();
1113 SMARTLIST_FOREACH(entries, c_hist_t *, ch, {
1114 smartlist_add_asprintf(chunks, "%s=%u", ch->country, ch->total);
1116 *country_str = smartlist_join_strings(chunks, ",", 0, NULL);
1117 SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
1118 smartlist_free(chunks);
1121 SMARTLIST_FOREACH(entries, c_hist_t *, c, tor_free(c));
1122 smartlist_free(entries);
1123 tor_free(counts);
1125 return 0;
1128 /** Return a newly allocated string holding the per-country request history
1129 * for v3 network statuses in a format suitable for an extra-info document,
1130 * or NULL on failure. */
1131 char *
1132 geoip_get_request_history(void)
1134 smartlist_t *entries, *strings;
1135 char *result;
1136 unsigned granularity = IP_GRANULARITY;
1138 if (!geoip_countries)
1139 return NULL;
1141 entries = smartlist_new();
1142 SMARTLIST_FOREACH_BEGIN(geoip_countries, geoip_country_t *, c) {
1143 uint32_t tot = 0;
1144 c_hist_t *ent;
1145 tot = c->n_v3_ns_requests;
1146 if (!tot)
1147 continue;
1148 ent = tor_malloc_zero(sizeof(c_hist_t));
1149 strlcpy(ent->country, c->countrycode, sizeof(ent->country));
1150 ent->total = round_to_next_multiple_of(tot, granularity);
1151 smartlist_add(entries, ent);
1152 } SMARTLIST_FOREACH_END(c);
1153 smartlist_sort(entries, c_hist_compare_);
1155 strings = smartlist_new();
1156 SMARTLIST_FOREACH(entries, c_hist_t *, ent, {
1157 smartlist_add_asprintf(strings, "%s=%u", ent->country, ent->total);
1159 result = smartlist_join_strings(strings, ",", 0, NULL);
1160 SMARTLIST_FOREACH(strings, char *, cp, tor_free(cp));
1161 SMARTLIST_FOREACH(entries, c_hist_t *, ent, tor_free(ent));
1162 smartlist_free(strings);
1163 smartlist_free(entries);
1164 return result;
1167 /** Start time of directory request stats or 0 if we're not collecting
1168 * directory request statistics. */
1169 static time_t start_of_dirreq_stats_interval;
1171 /** Initialize directory request stats. */
1172 void
1173 geoip_dirreq_stats_init(time_t now)
1175 start_of_dirreq_stats_interval = now;
1178 /** Reset counters for dirreq stats. */
1179 void
1180 geoip_reset_dirreq_stats(time_t now)
1182 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
1183 c->n_v3_ns_requests = 0;
1186 clientmap_entry_t **ent, **next, *this;
1187 for (ent = HT_START(clientmap, &client_history); ent != NULL;
1188 ent = next) {
1189 if ((*ent)->action == GEOIP_CLIENT_NETWORKSTATUS) {
1190 this = *ent;
1191 next = HT_NEXT_RMV(clientmap, &client_history, ent);
1192 clientmap_entry_free(this);
1193 } else {
1194 next = HT_NEXT(clientmap, &client_history, ent);
1198 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
1200 dirreq_map_entry_t **ent, **next, *this;
1201 for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) {
1202 this = *ent;
1203 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent);
1204 tor_free(this);
1207 start_of_dirreq_stats_interval = now;
1210 /** Stop collecting directory request stats in a way that we can re-start
1211 * doing so in geoip_dirreq_stats_init(). */
1212 void
1213 geoip_dirreq_stats_term(void)
1215 geoip_reset_dirreq_stats(0);
1218 /** Return a newly allocated string containing the dirreq statistics
1219 * until <b>now</b>, or NULL if we're not collecting dirreq stats. Caller
1220 * must ensure start_of_dirreq_stats_interval is in the past. */
1221 char *
1222 geoip_format_dirreq_stats(time_t now)
1224 char t[ISO_TIME_LEN+1];
1225 int i;
1226 char *v3_ips_string = NULL, *v3_reqs_string = NULL,
1227 *v3_direct_dl_string = NULL, *v3_tunneled_dl_string = NULL;
1228 char *result = NULL;
1230 if (!start_of_dirreq_stats_interval)
1231 return NULL; /* Not initialized. */
1233 tor_assert(now >= start_of_dirreq_stats_interval);
1235 format_iso_time(t, now);
1236 geoip_get_client_history(GEOIP_CLIENT_NETWORKSTATUS, &v3_ips_string, NULL);
1237 v3_reqs_string = geoip_get_request_history();
1239 #define RESPONSE_GRANULARITY 8
1240 for (i = 0; i < GEOIP_NS_RESPONSE_NUM; i++) {
1241 ns_v3_responses[i] = round_uint32_to_next_multiple_of(
1242 ns_v3_responses[i], RESPONSE_GRANULARITY);
1244 #undef RESPONSE_GRANULARITY
1246 v3_direct_dl_string = geoip_get_dirreq_history(DIRREQ_DIRECT);
1247 v3_tunneled_dl_string = geoip_get_dirreq_history(DIRREQ_TUNNELED);
1249 /* Put everything together into a single string. */
1250 tor_asprintf(&result, "dirreq-stats-end %s (%d s)\n"
1251 "dirreq-v3-ips %s\n"
1252 "dirreq-v3-reqs %s\n"
1253 "dirreq-v3-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
1254 "not-found=%u,not-modified=%u,busy=%u\n"
1255 "dirreq-v3-direct-dl %s\n"
1256 "dirreq-v3-tunneled-dl %s\n",
1258 (unsigned) (now - start_of_dirreq_stats_interval),
1259 v3_ips_string ? v3_ips_string : "",
1260 v3_reqs_string ? v3_reqs_string : "",
1261 ns_v3_responses[GEOIP_SUCCESS],
1262 ns_v3_responses[GEOIP_REJECT_NOT_ENOUGH_SIGS],
1263 ns_v3_responses[GEOIP_REJECT_UNAVAILABLE],
1264 ns_v3_responses[GEOIP_REJECT_NOT_FOUND],
1265 ns_v3_responses[GEOIP_REJECT_NOT_MODIFIED],
1266 ns_v3_responses[GEOIP_REJECT_BUSY],
1267 v3_direct_dl_string ? v3_direct_dl_string : "",
1268 v3_tunneled_dl_string ? v3_tunneled_dl_string : "");
1270 /* Free partial strings. */
1271 tor_free(v3_ips_string);
1272 tor_free(v3_reqs_string);
1273 tor_free(v3_direct_dl_string);
1274 tor_free(v3_tunneled_dl_string);
1276 return result;
1279 /** If 24 hours have passed since the beginning of the current dirreq
1280 * stats period, write dirreq stats to $DATADIR/stats/dirreq-stats
1281 * (possibly overwriting an existing file) and reset counters. Return
1282 * when we would next want to write dirreq stats or 0 if we never want to
1283 * write. */
1284 time_t
1285 geoip_dirreq_stats_write(time_t now)
1287 char *str = NULL;
1289 if (!start_of_dirreq_stats_interval)
1290 return 0; /* Not initialized. */
1291 if (start_of_dirreq_stats_interval + WRITE_STATS_INTERVAL > now)
1292 goto done; /* Not ready to write. */
1294 /* Discard all items in the client history that are too old. */
1295 geoip_remove_old_clients(start_of_dirreq_stats_interval);
1297 /* Generate history string .*/
1298 str = geoip_format_dirreq_stats(now);
1299 if (! str)
1300 goto done;
1302 /* Write dirreq-stats string to disk. */
1303 if (!check_or_create_data_subdir("stats")) {
1304 write_to_data_subdir("stats", "dirreq-stats", str, "dirreq statistics");
1305 /* Reset measurement interval start. */
1306 geoip_reset_dirreq_stats(now);
1309 done:
1310 tor_free(str);
1311 return start_of_dirreq_stats_interval + WRITE_STATS_INTERVAL;
1314 /** Start time of bridge stats or 0 if we're not collecting bridge
1315 * statistics. */
1316 static time_t start_of_bridge_stats_interval;
1318 /** Initialize bridge stats. */
1319 void
1320 geoip_bridge_stats_init(time_t now)
1322 start_of_bridge_stats_interval = now;
1325 /** Stop collecting bridge stats in a way that we can re-start doing so in
1326 * geoip_bridge_stats_init(). */
1327 void
1328 geoip_bridge_stats_term(void)
1330 client_history_clear();
1331 start_of_bridge_stats_interval = 0;
1334 /** Validate a bridge statistics string as it would be written to a
1335 * current extra-info descriptor. Return 1 if the string is valid and
1336 * recent enough, or 0 otherwise. */
1337 static int
1338 validate_bridge_stats(const char *stats_str, time_t now)
1340 char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1],
1341 *eos;
1343 const char *BRIDGE_STATS_END = "bridge-stats-end ";
1344 const char *BRIDGE_IPS = "bridge-ips ";
1345 const char *BRIDGE_IPS_EMPTY_LINE = "bridge-ips\n";
1346 const char *BRIDGE_TRANSPORTS = "bridge-ip-transports ";
1347 const char *BRIDGE_TRANSPORTS_EMPTY_LINE = "bridge-ip-transports\n";
1348 const char *tmp;
1349 time_t stats_end_time;
1350 int seconds;
1351 tor_assert(stats_str);
1353 /* Parse timestamp and number of seconds from
1354 "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */
1355 tmp = find_str_at_start_of_line(stats_str, BRIDGE_STATS_END);
1356 if (!tmp)
1357 return 0;
1358 tmp += strlen(BRIDGE_STATS_END);
1360 if (strlen(tmp) < ISO_TIME_LEN + 6)
1361 return 0;
1362 strlcpy(stats_end_str, tmp, sizeof(stats_end_str));
1363 if (parse_iso_time(stats_end_str, &stats_end_time) < 0)
1364 return 0;
1365 if (stats_end_time < now - (25*60*60) ||
1366 stats_end_time > now + (1*60*60))
1367 return 0;
1368 seconds = (int)strtol(tmp + ISO_TIME_LEN + 2, &eos, 10);
1369 if (!eos || seconds < 23*60*60)
1370 return 0;
1371 format_iso_time(stats_start_str, stats_end_time - seconds);
1373 /* Parse: "bridge-ips CC=N,CC=N,..." */
1374 tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS);
1375 if (!tmp) {
1376 /* Look if there is an empty "bridge-ips" line */
1377 tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS_EMPTY_LINE);
1378 if (!tmp)
1379 return 0;
1382 /* Parse: "bridge-ip-transports PT=N,PT=N,..." */
1383 tmp = find_str_at_start_of_line(stats_str, BRIDGE_TRANSPORTS);
1384 if (!tmp) {
1385 /* Look if there is an empty "bridge-ip-transports" line */
1386 tmp = find_str_at_start_of_line(stats_str, BRIDGE_TRANSPORTS_EMPTY_LINE);
1387 if (!tmp)
1388 return 0;
1391 return 1;
1394 /** Most recent bridge statistics formatted to be written to extra-info
1395 * descriptors. */
1396 static char *bridge_stats_extrainfo = NULL;
1398 /** Return a newly allocated string holding our bridge usage stats by country
1399 * in a format suitable for inclusion in an extrainfo document. Return NULL on
1400 * failure. */
1401 char *
1402 geoip_format_bridge_stats(time_t now)
1404 char *out = NULL;
1405 char *country_data = NULL, *ipver_data = NULL, *transport_data = NULL;
1406 long duration = now - start_of_bridge_stats_interval;
1407 char written[ISO_TIME_LEN+1];
1409 if (duration < 0)
1410 return NULL;
1411 if (!start_of_bridge_stats_interval)
1412 return NULL; /* Not initialized. */
1414 format_iso_time(written, now);
1415 geoip_get_client_history(GEOIP_CLIENT_CONNECT, &country_data, &ipver_data);
1416 transport_data = geoip_get_transport_history();
1418 tor_asprintf(&out,
1419 "bridge-stats-end %s (%ld s)\n"
1420 "bridge-ips %s\n"
1421 "bridge-ip-versions %s\n"
1422 "bridge-ip-transports %s\n",
1423 written, duration,
1424 country_data ? country_data : "",
1425 ipver_data ? ipver_data : "",
1426 transport_data ? transport_data : "");
1427 tor_free(country_data);
1428 tor_free(ipver_data);
1429 tor_free(transport_data);
1431 return out;
1434 /** Return a newly allocated string holding our bridge usage stats by country
1435 * in a format suitable for the answer to a controller request. Return NULL on
1436 * failure. */
1437 static char *
1438 format_bridge_stats_controller(time_t now)
1440 char *out = NULL, *country_data = NULL, *ipver_data = NULL;
1441 char started[ISO_TIME_LEN+1];
1442 (void) now;
1444 format_iso_time(started, start_of_bridge_stats_interval);
1445 geoip_get_client_history(GEOIP_CLIENT_CONNECT, &country_data, &ipver_data);
1447 tor_asprintf(&out,
1448 "TimeStarted=\"%s\" CountrySummary=%s IPVersions=%s",
1449 started,
1450 country_data ? country_data : "",
1451 ipver_data ? ipver_data : "");
1452 tor_free(country_data);
1453 tor_free(ipver_data);
1454 return out;
1457 /** Return a newly allocated string holding our bridge usage stats by
1458 * country in a format suitable for inclusion in our heartbeat
1459 * message. Return NULL on failure. */
1460 char *
1461 format_client_stats_heartbeat(time_t now)
1463 const int n_hours = 6;
1464 char *out = NULL;
1465 int n_clients = 0;
1466 clientmap_entry_t **ent;
1467 unsigned cutoff = (unsigned)( (now-n_hours*3600)/60 );
1469 if (!start_of_bridge_stats_interval)
1470 return NULL; /* Not initialized. */
1472 /* count unique IPs */
1473 HT_FOREACH(ent, clientmap, &client_history) {
1474 /* only count directly connecting clients */
1475 if ((*ent)->action != GEOIP_CLIENT_CONNECT)
1476 continue;
1477 if ((*ent)->last_seen_in_minutes < cutoff)
1478 continue;
1479 n_clients++;
1482 tor_asprintf(&out, "Heartbeat: "
1483 "In the last %d hours, I have seen %d unique clients.",
1484 n_hours,
1485 n_clients);
1487 return out;
1490 /** Write bridge statistics to $DATADIR/stats/bridge-stats and return
1491 * when we should next try to write statistics. */
1492 time_t
1493 geoip_bridge_stats_write(time_t now)
1495 char *val = NULL;
1497 /* Check if 24 hours have passed since starting measurements. */
1498 if (now < start_of_bridge_stats_interval + WRITE_STATS_INTERVAL)
1499 return start_of_bridge_stats_interval + WRITE_STATS_INTERVAL;
1501 /* Discard all items in the client history that are too old. */
1502 geoip_remove_old_clients(start_of_bridge_stats_interval);
1504 /* Generate formatted string */
1505 val = geoip_format_bridge_stats(now);
1506 if (val == NULL)
1507 goto done;
1509 /* Update the stored value. */
1510 tor_free(bridge_stats_extrainfo);
1511 bridge_stats_extrainfo = val;
1512 start_of_bridge_stats_interval = now;
1514 /* Write it to disk. */
1515 if (!check_or_create_data_subdir("stats")) {
1516 write_to_data_subdir("stats", "bridge-stats",
1517 bridge_stats_extrainfo, "bridge statistics");
1519 /* Tell the controller, "hey, there are clients!" */
1521 char *controller_str = format_bridge_stats_controller(now);
1522 if (controller_str)
1523 control_event_clients_seen(controller_str);
1524 tor_free(controller_str);
1528 done:
1529 return start_of_bridge_stats_interval + WRITE_STATS_INTERVAL;
1532 /** Try to load the most recent bridge statistics from disk, unless we
1533 * have finished a measurement interval lately, and check whether they
1534 * are still recent enough. */
1535 static void
1536 load_bridge_stats(time_t now)
1538 char *fname, *contents;
1539 if (bridge_stats_extrainfo)
1540 return;
1542 fname = get_datadir_fname2("stats", "bridge-stats");
1543 contents = read_file_to_str(fname, RFTS_IGNORE_MISSING, NULL);
1544 if (contents && validate_bridge_stats(contents, now)) {
1545 bridge_stats_extrainfo = contents;
1546 } else {
1547 tor_free(contents);
1550 tor_free(fname);
1553 /** Return most recent bridge statistics for inclusion in extra-info
1554 * descriptors, or NULL if we don't have recent bridge statistics. */
1555 const char *
1556 geoip_get_bridge_stats_extrainfo(time_t now)
1558 load_bridge_stats(now);
1559 return bridge_stats_extrainfo;
1562 /** Return a new string containing the recent bridge statistics to be returned
1563 * to controller clients, or NULL if we don't have any bridge statistics. */
1564 char *
1565 geoip_get_bridge_stats_controller(time_t now)
1567 return format_bridge_stats_controller(now);
1570 /** Start time of entry stats or 0 if we're not collecting entry
1571 * statistics. */
1572 static time_t start_of_entry_stats_interval;
1574 /** Initialize entry stats. */
1575 void
1576 geoip_entry_stats_init(time_t now)
1578 start_of_entry_stats_interval = now;
1581 /** Reset counters for entry stats. */
1582 void
1583 geoip_reset_entry_stats(time_t now)
1585 client_history_clear();
1586 start_of_entry_stats_interval = now;
1589 /** Stop collecting entry stats in a way that we can re-start doing so in
1590 * geoip_entry_stats_init(). */
1591 void
1592 geoip_entry_stats_term(void)
1594 geoip_reset_entry_stats(0);
1597 /** Return a newly allocated string containing the entry statistics
1598 * until <b>now</b>, or NULL if we're not collecting entry stats. Caller
1599 * must ensure start_of_entry_stats_interval lies in the past. */
1600 char *
1601 geoip_format_entry_stats(time_t now)
1603 char t[ISO_TIME_LEN+1];
1604 char *data = NULL;
1605 char *result;
1607 if (!start_of_entry_stats_interval)
1608 return NULL; /* Not initialized. */
1610 tor_assert(now >= start_of_entry_stats_interval);
1612 geoip_get_client_history(GEOIP_CLIENT_CONNECT, &data, NULL);
1613 format_iso_time(t, now);
1614 tor_asprintf(&result,
1615 "entry-stats-end %s (%u s)\n"
1616 "entry-ips %s\n",
1617 t, (unsigned) (now - start_of_entry_stats_interval),
1618 data ? data : "");
1619 tor_free(data);
1620 return result;
1623 /** If 24 hours have passed since the beginning of the current entry stats
1624 * period, write entry stats to $DATADIR/stats/entry-stats (possibly
1625 * overwriting an existing file) and reset counters. Return when we would
1626 * next want to write entry stats or 0 if we never want to write. */
1627 time_t
1628 geoip_entry_stats_write(time_t now)
1630 char *str = NULL;
1632 if (!start_of_entry_stats_interval)
1633 return 0; /* Not initialized. */
1634 if (start_of_entry_stats_interval + WRITE_STATS_INTERVAL > now)
1635 goto done; /* Not ready to write. */
1637 /* Discard all items in the client history that are too old. */
1638 geoip_remove_old_clients(start_of_entry_stats_interval);
1640 /* Generate history string .*/
1641 str = geoip_format_entry_stats(now);
1643 /* Write entry-stats string to disk. */
1644 if (!check_or_create_data_subdir("stats")) {
1645 write_to_data_subdir("stats", "entry-stats", str, "entry statistics");
1647 /* Reset measurement interval start. */
1648 geoip_reset_entry_stats(now);
1651 done:
1652 tor_free(str);
1653 return start_of_entry_stats_interval + WRITE_STATS_INTERVAL;
1656 /** Helper used to implement GETINFO ip-to-country/... controller command. */
1658 getinfo_helper_geoip(control_connection_t *control_conn,
1659 const char *question, char **answer,
1660 const char **errmsg)
1662 (void)control_conn;
1663 if (!strcmpstart(question, "ip-to-country/")) {
1664 int c;
1665 sa_family_t family;
1666 tor_addr_t addr;
1667 question += strlen("ip-to-country/");
1669 if (!strcmp(question, "ipv4-available") ||
1670 !strcmp(question, "ipv6-available")) {
1671 family = !strcmp(question, "ipv4-available") ? AF_INET : AF_INET6;
1672 const int available = geoip_is_loaded(family);
1673 tor_asprintf(answer, "%d", !! available);
1674 return 0;
1677 family = tor_addr_parse(&addr, question);
1678 if (family != AF_INET && family != AF_INET6) {
1679 *errmsg = "Invalid address family";
1680 return -1;
1682 if (!geoip_is_loaded(family)) {
1683 *errmsg = "GeoIP data not loaded";
1684 return -1;
1686 if (family == AF_INET)
1687 c = geoip_get_country_by_ipv4(tor_addr_to_ipv4h(&addr));
1688 else /* AF_INET6 */
1689 c = geoip_get_country_by_ipv6(tor_addr_to_in6(&addr));
1690 *answer = tor_strdup(geoip_get_country_name(c));
1692 return 0;
1695 /** Release all storage held by the GeoIP databases and country list. */
1696 STATIC void
1697 clear_geoip_db(void)
1699 if (geoip_countries) {
1700 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
1701 smartlist_free(geoip_countries);
1704 strmap_free(country_idxplus1_by_lc_code, NULL);
1705 if (geoip_ipv4_entries) {
1706 SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, ent,
1707 tor_free(ent));
1708 smartlist_free(geoip_ipv4_entries);
1710 if (geoip_ipv6_entries) {
1711 SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, ent,
1712 tor_free(ent));
1713 smartlist_free(geoip_ipv6_entries);
1715 geoip_countries = NULL;
1716 country_idxplus1_by_lc_code = NULL;
1717 geoip_ipv4_entries = NULL;
1718 geoip_ipv6_entries = NULL;
1721 /** Release all storage held in this file. */
1722 void
1723 geoip_free_all(void)
1726 clientmap_entry_t **ent, **next, *this;
1727 for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
1728 this = *ent;
1729 next = HT_NEXT_RMV(clientmap, &client_history, ent);
1730 clientmap_entry_free(this);
1732 HT_CLEAR(clientmap, &client_history);
1735 dirreq_map_entry_t **ent, **next, *this;
1736 for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) {
1737 this = *ent;
1738 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent);
1739 tor_free(this);
1741 HT_CLEAR(dirreqmap, &dirreq_map);
1744 clear_geoip_db();
1745 tor_free(bridge_stats_extrainfo);