Remove dirreq-v2-* lines from extra-info descriptors.
[tor.git] / src / or / geoip.c
blob9ba1e31b8b774eaecf3c88c7dfb36e0afc36f8ab
1 /* Copyright (c) 2007-2013, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * \file geoip.c
6 * \brief Functions related to maintaining an IP-to-country database;
7 * to summarizing client connections by country to entry guards, bridges,
8 * and directory servers; and for statistics on answering network status
9 * requests.
12 #define GEOIP_PRIVATE
13 #include "or.h"
14 #include "ht.h"
15 #include "config.h"
16 #include "control.h"
17 #include "dnsserv.h"
18 #include "geoip.h"
19 #include "routerlist.h"
21 static void clear_geoip_db(void);
22 static void init_geoip_countries(void);
24 /** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */
25 typedef struct geoip_ipv4_entry_t {
26 uint32_t ip_low; /**< The lowest IP in the range, in host order */
27 uint32_t ip_high; /**< The highest IP in the range, in host order */
28 intptr_t country; /**< An index into geoip_countries */
29 } geoip_ipv4_entry_t;
31 /** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
32 typedef struct geoip_ipv6_entry_t {
33 struct in6_addr ip_low; /**< The lowest IP in the range, in host order */
34 struct in6_addr ip_high; /**< The highest IP in the range, in host order */
35 intptr_t country; /**< An index into geoip_countries */
36 } geoip_ipv6_entry_t;
38 /** A per-country record for GeoIP request history. */
39 typedef struct geoip_country_t {
40 char countrycode[3];
41 uint32_t n_v3_ns_requests;
42 } geoip_country_t;
44 /** A list of geoip_country_t */
45 static smartlist_t *geoip_countries = NULL;
46 /** A map from lowercased country codes to their position in geoip_countries.
47 * The index is encoded in the pointer, and 1 is added so that NULL can mean
48 * not found. */
49 static strmap_t *country_idxplus1_by_lc_code = NULL;
50 /** Lists of all known geoip_ipv4_entry_t and geoip_ipv6_entry_t, sorted
51 * by their respective ip_low. */
52 static smartlist_t *geoip_ipv4_entries = NULL, *geoip_ipv6_entries = NULL;
54 /** SHA1 digest of the GeoIP files to include in extra-info descriptors. */
55 static char geoip_digest[DIGEST_LEN];
56 static char geoip6_digest[DIGEST_LEN];
58 /** Return the index of the <b>country</b>'s entry in the GeoIP
59 * country list if it is a valid 2-letter country code, otherwise
60 * return -1. */
61 country_t
62 geoip_get_country(const char *country)
64 void *idxplus1_;
65 intptr_t idx;
67 idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
68 if (!idxplus1_)
69 return -1;
71 idx = ((uintptr_t)idxplus1_)-1;
72 return (country_t)idx;
75 /** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b>
76 * and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */
77 static void
78 geoip_add_entry(const tor_addr_t *low, const tor_addr_t *high,
79 const char *country)
81 intptr_t idx;
82 void *idxplus1_;
84 if (tor_addr_family(low) != tor_addr_family(high))
85 return;
86 if (tor_addr_compare(high, low, CMP_EXACT) < 0)
87 return;
89 idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
91 if (!idxplus1_) {
92 geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
93 strlcpy(c->countrycode, country, sizeof(c->countrycode));
94 tor_strlower(c->countrycode);
95 smartlist_add(geoip_countries, c);
96 idx = smartlist_len(geoip_countries) - 1;
97 strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
98 } else {
99 idx = ((uintptr_t)idxplus1_)-1;
102 geoip_country_t *c = smartlist_get(geoip_countries, idx);
103 tor_assert(!strcasecmp(c->countrycode, country));
106 if (tor_addr_family(low) == AF_INET) {
107 geoip_ipv4_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv4_entry_t));
108 ent->ip_low = tor_addr_to_ipv4h(low);
109 ent->ip_high = tor_addr_to_ipv4h(high);
110 ent->country = idx;
111 smartlist_add(geoip_ipv4_entries, ent);
112 } else if (tor_addr_family(low) == AF_INET6) {
113 geoip_ipv6_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t));
114 ent->ip_low = *tor_addr_to_in6(low);
115 ent->ip_high = *tor_addr_to_in6(high);
116 ent->country = idx;
117 smartlist_add(geoip_ipv6_entries, ent);
121 /** Add an entry to the GeoIP table indicated by <b>family</b>,
122 * parsing it from <b>line</b>. The format is as for geoip_load_file(). */
123 /*private*/ int
124 geoip_parse_entry(const char *line, sa_family_t family)
126 tor_addr_t low_addr, high_addr;
127 char c[3];
128 char *country = NULL;
130 if (!geoip_countries)
131 init_geoip_countries();
132 if (family == AF_INET) {
133 if (!geoip_ipv4_entries)
134 geoip_ipv4_entries = smartlist_new();
135 } else if (family == AF_INET6) {
136 if (!geoip_ipv6_entries)
137 geoip_ipv6_entries = smartlist_new();
138 } else {
139 log_warn(LD_GENERAL, "Unsupported family: %d", family);
140 return -1;
143 while (TOR_ISSPACE(*line))
144 ++line;
145 if (*line == '#')
146 return 0;
148 if (family == AF_INET) {
149 unsigned int low, high;
150 if (tor_sscanf(line,"%u,%u,%2s", &low, &high, c) == 3 ||
151 tor_sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, c) == 3) {
152 tor_addr_from_ipv4h(&low_addr, low);
153 tor_addr_from_ipv4h(&high_addr, high);
154 } else
155 goto fail;
156 country = c;
157 } else { /* AF_INET6 */
158 char buf[512];
159 char *low_str, *high_str;
160 struct in6_addr low, high;
161 char *strtok_state;
162 strlcpy(buf, line, sizeof(buf));
163 low_str = tor_strtok_r(buf, ",", &strtok_state);
164 if (!low_str)
165 goto fail;
166 high_str = tor_strtok_r(NULL, ",", &strtok_state);
167 if (!high_str)
168 goto fail;
169 country = tor_strtok_r(NULL, "\n", &strtok_state);
170 if (!country)
171 goto fail;
172 if (strlen(country) != 2)
173 goto fail;
174 if (tor_inet_pton(AF_INET6, low_str, &low) <= 0)
175 goto fail;
176 tor_addr_from_in6(&low_addr, &low);
177 if (tor_inet_pton(AF_INET6, high_str, &high) <= 0)
178 goto fail;
179 tor_addr_from_in6(&high_addr, &high);
181 geoip_add_entry(&low_addr, &high_addr, country);
182 return 0;
184 fail:
185 log_warn(LD_GENERAL, "Unable to parse line from GEOIP %s file: %s",
186 family == AF_INET ? "IPv4" : "IPv6", escaped(line));
187 return -1;
190 /** Sorting helper: return -1, 1, or 0 based on comparison of two
191 * geoip_ipv4_entry_t */
192 static int
193 geoip_ipv4_compare_entries_(const void **_a, const void **_b)
195 const geoip_ipv4_entry_t *a = *_a, *b = *_b;
196 if (a->ip_low < b->ip_low)
197 return -1;
198 else if (a->ip_low > b->ip_low)
199 return 1;
200 else
201 return 0;
204 /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
205 * to a uint32_t in host order) to a geoip_ipv4_entry_t */
206 static int
207 geoip_ipv4_compare_key_to_entry_(const void *_key, const void **_member)
209 /* No alignment issue here, since _key really is a pointer to uint32_t */
210 const uint32_t addr = *(uint32_t *)_key;
211 const geoip_ipv4_entry_t *entry = *_member;
212 if (addr < entry->ip_low)
213 return -1;
214 else if (addr > entry->ip_high)
215 return 1;
216 else
217 return 0;
220 /** Sorting helper: return -1, 1, or 0 based on comparison of two
221 * geoip_ipv6_entry_t */
222 static int
223 geoip_ipv6_compare_entries_(const void **_a, const void **_b)
225 const geoip_ipv6_entry_t *a = *_a, *b = *_b;
226 return fast_memcmp(a->ip_low.s6_addr, b->ip_low.s6_addr,
227 sizeof(struct in6_addr));
230 /** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6
231 * (a pointer to a in6_addr) to a geoip_ipv6_entry_t */
232 static int
233 geoip_ipv6_compare_key_to_entry_(const void *_key, const void **_member)
235 const struct in6_addr *addr = (struct in6_addr *)_key;
236 const geoip_ipv6_entry_t *entry = *_member;
238 if (fast_memcmp(addr->s6_addr, entry->ip_low.s6_addr,
239 sizeof(struct in6_addr)) < 0)
240 return -1;
241 else if (fast_memcmp(addr->s6_addr, entry->ip_high.s6_addr,
242 sizeof(struct in6_addr)) > 0)
243 return 1;
244 else
245 return 0;
248 /** Return 1 if we should collect geoip stats on bridge users, and
249 * include them in our extrainfo descriptor. Else return 0. */
251 should_record_bridge_info(const or_options_t *options)
253 return options->BridgeRelay && options->BridgeRecordUsageByCountry;
256 /** Set up a new list of geoip countries with no countries (yet) set in it,
257 * except for the unknown country.
259 static void
260 init_geoip_countries(void)
262 geoip_country_t *geoip_unresolved;
263 geoip_countries = smartlist_new();
264 /* Add a geoip_country_t for requests that could not be resolved to a
265 * country as first element (index 0) to geoip_countries. */
266 geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
267 strlcpy(geoip_unresolved->countrycode, "??",
268 sizeof(geoip_unresolved->countrycode));
269 smartlist_add(geoip_countries, geoip_unresolved);
270 country_idxplus1_by_lc_code = strmap_new();
271 strmap_set_lc(country_idxplus1_by_lc_code, "??", (void*)(1));
274 /** Clear appropriate GeoIP database, based on <b>family</b>, and
275 * reload it from the file <b>filename</b>. Return 0 on success, -1 on
276 * failure.
278 * Recognized line formats for IPv4 are:
279 * INTIPLOW,INTIPHIGH,CC
280 * and
281 * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
282 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
283 * integers, and CC is a country code.
285 * Recognized line format for IPv6 is:
286 * IPV6LOW,IPV6HIGH,CC
287 * where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
289 * It also recognizes, and skips over, blank lines and lines that start
290 * with '#' (comments).
293 geoip_load_file(sa_family_t family, const char *filename)
295 FILE *f;
296 const char *msg = "";
297 const or_options_t *options = get_options();
298 int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
299 crypto_digest_t *geoip_digest_env = NULL;
301 tor_assert(family == AF_INET || family == AF_INET6);
303 if (!(f = tor_fopen_cloexec(filename, "r"))) {
304 log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s. %s",
305 filename, msg);
306 return -1;
308 if (!geoip_countries)
309 init_geoip_countries();
311 if (family == AF_INET) {
312 if (geoip_ipv4_entries) {
313 SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e,
314 tor_free(e));
315 smartlist_free(geoip_ipv4_entries);
317 geoip_ipv4_entries = smartlist_new();
318 } else { /* AF_INET6 */
319 if (geoip_ipv6_entries) {
320 SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e,
321 tor_free(e));
322 smartlist_free(geoip_ipv6_entries);
324 geoip_ipv6_entries = smartlist_new();
326 geoip_digest_env = crypto_digest_new();
328 log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.",
329 (family == AF_INET) ? "IPv4" : "IPv6", filename);
330 while (!feof(f)) {
331 char buf[512];
332 if (fgets(buf, (int)sizeof(buf), f) == NULL)
333 break;
334 crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf));
335 /* FFFF track full country name. */
336 geoip_parse_entry(buf, family);
338 /*XXXX abort and return -1 if no entries/illformed?*/
339 fclose(f);
341 /* Sort list and remember file digests so that we can include it in
342 * our extra-info descriptors. */
343 if (family == AF_INET) {
344 smartlist_sort(geoip_ipv4_entries, geoip_ipv4_compare_entries_);
345 /* Okay, now we need to maybe change our mind about what is in
346 * which country. We do this for IPv4 only since that's what we
347 * store in node->country. */
348 refresh_all_country_info();
349 crypto_digest_get_digest(geoip_digest_env, geoip_digest, DIGEST_LEN);
350 } else {
351 /* AF_INET6 */
352 smartlist_sort(geoip_ipv6_entries, geoip_ipv6_compare_entries_);
353 crypto_digest_get_digest(geoip_digest_env, geoip6_digest, DIGEST_LEN);
355 crypto_digest_free(geoip_digest_env);
357 return 0;
360 /** Given an IP address in host order, return a number representing the
361 * country to which that address belongs, -1 for "No geoip information
362 * available", or 0 for the 'unknown country'. The return value will always
363 * be less than geoip_get_n_countries(). To decode it, call
364 * geoip_get_country_name().
367 geoip_get_country_by_ipv4(uint32_t ipaddr)
369 geoip_ipv4_entry_t *ent;
370 if (!geoip_ipv4_entries)
371 return -1;
372 ent = smartlist_bsearch(geoip_ipv4_entries, &ipaddr,
373 geoip_ipv4_compare_key_to_entry_);
374 return ent ? (int)ent->country : 0;
377 /** Given an IPv6 address, return a number representing the country to
378 * which that address belongs, -1 for "No geoip information available", or
379 * 0 for the 'unknown country'. The return value will always be less than
380 * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
383 geoip_get_country_by_ipv6(const struct in6_addr *addr)
385 geoip_ipv6_entry_t *ent;
387 if (!geoip_ipv6_entries)
388 return -1;
389 ent = smartlist_bsearch(geoip_ipv6_entries, addr,
390 geoip_ipv6_compare_key_to_entry_);
391 return ent ? (int)ent->country : 0;
394 /** Given an IP address, return a number representing the country to which
395 * that address belongs, -1 for "No geoip information available", or 0 for
396 * the 'unknown country'. The return value will always be less than
397 * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
400 geoip_get_country_by_addr(const tor_addr_t *addr)
402 if (tor_addr_family(addr) == AF_INET) {
403 return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
404 } else if (tor_addr_family(addr) == AF_INET6) {
405 return geoip_get_country_by_ipv6(tor_addr_to_in6(addr));
406 } else {
407 return -1;
411 /** Return the number of countries recognized by the GeoIP country list. */
413 geoip_get_n_countries(void)
415 if (!geoip_countries)
416 init_geoip_countries();
417 return (int) smartlist_len(geoip_countries);
420 /** Return the two-letter country code associated with the number <b>num</b>,
421 * or "??" for an unknown value. */
422 const char *
423 geoip_get_country_name(country_t num)
425 if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
426 geoip_country_t *c = smartlist_get(geoip_countries, num);
427 return c->countrycode;
428 } else
429 return "??";
432 /** Return true iff we have loaded a GeoIP database.*/
434 geoip_is_loaded(sa_family_t family)
436 tor_assert(family == AF_INET || family == AF_INET6);
437 if (geoip_countries == NULL)
438 return 0;
439 if (family == AF_INET)
440 return geoip_ipv4_entries != NULL;
441 else /* AF_INET6 */
442 return geoip_ipv6_entries != NULL;
445 /** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
446 * result does not need to be deallocated, but will be overwritten by the
447 * next call of hex_str(). */
448 const char *
449 geoip_db_digest(sa_family_t family)
451 tor_assert(family == AF_INET || family == AF_INET6);
452 if (family == AF_INET)
453 return hex_str(geoip_digest, DIGEST_LEN);
454 else /* AF_INET6 */
455 return hex_str(geoip6_digest, DIGEST_LEN);
458 /** Entry in a map from IP address to the last time we've seen an incoming
459 * connection from that IP address. Used by bridges only, to track which
460 * countries have them blocked. */
461 typedef struct clientmap_entry_t {
462 HT_ENTRY(clientmap_entry_t) node;
463 tor_addr_t addr;
464 /** Time when we last saw this IP address, in MINUTES since the epoch.
466 * (This will run out of space around 4011 CE. If Tor is still in use around
467 * 4000 CE, please remember to add more bits to last_seen_in_minutes.) */
468 unsigned int last_seen_in_minutes:30;
469 unsigned int action:2;
470 } clientmap_entry_t;
472 /** Largest allowable value for last_seen_in_minutes. (It's a 30-bit field,
473 * so it can hold up to (1u<<30)-1, or 0x3fffffffu.
475 #define MAX_LAST_SEEN_IN_MINUTES 0X3FFFFFFFu
477 /** Map from client IP address to last time seen. */
478 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
479 HT_INITIALIZER();
481 /** Hashtable helper: compute a hash of a clientmap_entry_t. */
482 static INLINE unsigned
483 clientmap_entry_hash(const clientmap_entry_t *a)
485 return ht_improve_hash(tor_addr_hash(&a->addr));
487 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
488 static INLINE int
489 clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
491 return !tor_addr_compare(&a->addr, &b->addr, CMP_EXACT) &&
492 a->action == b->action;
495 HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
496 clientmap_entries_eq);
497 HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
498 clientmap_entries_eq, 0.6, malloc, realloc, free);
500 /** Clear history of connecting clients used by entry and bridge stats. */
501 static void
502 client_history_clear(void)
504 clientmap_entry_t **ent, **next, *this;
505 for (ent = HT_START(clientmap, &client_history); ent != NULL;
506 ent = next) {
507 if ((*ent)->action == GEOIP_CLIENT_CONNECT) {
508 this = *ent;
509 next = HT_NEXT_RMV(clientmap, &client_history, ent);
510 tor_free(this);
511 } else {
512 next = HT_NEXT(clientmap, &client_history, ent);
517 /** Note that we've seen a client connect from the IP <b>addr</b>
518 * at time <b>now</b>. Ignored by all but bridges and directories if
519 * configured accordingly. */
520 void
521 geoip_note_client_seen(geoip_client_action_t action,
522 const tor_addr_t *addr, time_t now)
524 const or_options_t *options = get_options();
525 clientmap_entry_t lookup, *ent;
526 if (action == GEOIP_CLIENT_CONNECT) {
527 /* Only remember statistics as entry guard or as bridge. */
528 if (!options->EntryStatistics &&
529 (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)))
530 return;
531 } else {
532 if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
533 !options->DirReqStatistics)
534 return;
537 tor_addr_copy(&lookup.addr, addr);
538 lookup.action = (int)action;
539 ent = HT_FIND(clientmap, &client_history, &lookup);
540 if (! ent) {
541 ent = tor_malloc_zero(sizeof(clientmap_entry_t));
542 tor_addr_copy(&ent->addr, addr);
543 ent->action = (int)action;
544 HT_INSERT(clientmap, &client_history, ent);
546 if (now / 60 <= (int)MAX_LAST_SEEN_IN_MINUTES && now >= 0)
547 ent->last_seen_in_minutes = (unsigned)(now/60);
548 else
549 ent->last_seen_in_minutes = 0;
551 if (action == GEOIP_CLIENT_NETWORKSTATUS) {
552 int country_idx = geoip_get_country_by_addr(addr);
553 if (country_idx < 0)
554 country_idx = 0; /** unresolved requests are stored at index 0. */
555 if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
556 geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
557 ++country->n_v3_ns_requests;
562 /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
563 * older than a certain time. */
564 static int
565 remove_old_client_helper_(struct clientmap_entry_t *ent, void *_cutoff)
567 time_t cutoff = *(time_t*)_cutoff / 60;
568 if (ent->last_seen_in_minutes < cutoff) {
569 tor_free(ent);
570 return 1;
571 } else {
572 return 0;
576 /** Forget about all clients that haven't connected since <b>cutoff</b>. */
577 void
578 geoip_remove_old_clients(time_t cutoff)
580 clientmap_HT_FOREACH_FN(&client_history,
581 remove_old_client_helper_,
582 &cutoff);
585 /** How many responses are we giving to clients requesting v3 network
586 * statuses? */
587 static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM];
589 /** Note that we've rejected a client's request for a v3 network status
590 * for reason <b>reason</b> at time <b>now</b>. */
591 void
592 geoip_note_ns_response(geoip_ns_response_t response)
594 static int arrays_initialized = 0;
595 if (!get_options()->DirReqStatistics)
596 return;
597 if (!arrays_initialized) {
598 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
599 arrays_initialized = 1;
601 tor_assert(response < GEOIP_NS_RESPONSE_NUM);
602 ns_v3_responses[response]++;
605 /** Do not mention any country from which fewer than this number of IPs have
606 * connected. This conceivably avoids reporting information that could
607 * deanonymize users, though analysis is lacking. */
608 #define MIN_IPS_TO_NOTE_COUNTRY 1
609 /** Do not report any geoip data at all if we have fewer than this number of
610 * IPs to report about. */
611 #define MIN_IPS_TO_NOTE_ANYTHING 1
612 /** When reporting geoip data about countries, round up to the nearest
613 * multiple of this value. */
614 #define IP_GRANULARITY 8
616 /** Helper type: used to sort per-country totals by value. */
617 typedef struct c_hist_t {
618 char country[3]; /**< Two-letter country code. */
619 unsigned total; /**< Total IP addresses seen in this country. */
620 } c_hist_t;
622 /** Sorting helper: return -1, 1, or 0 based on comparison of two
623 * geoip_ipv4_entry_t. Sort in descending order of total, and then by country
624 * code. */
625 static int
626 c_hist_compare_(const void **_a, const void **_b)
628 const c_hist_t *a = *_a, *b = *_b;
629 if (a->total > b->total)
630 return -1;
631 else if (a->total < b->total)
632 return 1;
633 else
634 return strcmp(a->country, b->country);
637 /** When there are incomplete directory requests at the end of a 24-hour
638 * period, consider those requests running for longer than this timeout as
639 * failed, the others as still running. */
640 #define DIRREQ_TIMEOUT (10*60)
642 /** Entry in a map from either chan->global_identifier for direct requests
643 * or a unique circuit identifier for tunneled requests to request time,
644 * response size, and completion time of a network status request. Used to
645 * measure download times of requests to derive average client
646 * bandwidths. */
647 typedef struct dirreq_map_entry_t {
648 HT_ENTRY(dirreq_map_entry_t) node;
649 /** Unique identifier for this network status request; this is either the
650 * chan->global_identifier of the dir channel (direct request) or a new
651 * locally unique identifier of a circuit (tunneled request). This ID is
652 * only unique among other direct or tunneled requests, respectively. */
653 uint64_t dirreq_id;
654 unsigned int state:3; /**< State of this directory request. */
655 unsigned int type:1; /**< Is this a direct or a tunneled request? */
656 unsigned int completed:1; /**< Is this request complete? */
657 /** When did we receive the request and started sending the response? */
658 struct timeval request_time;
659 size_t response_size; /**< What is the size of the response in bytes? */
660 struct timeval completion_time; /**< When did the request succeed? */
661 } dirreq_map_entry_t;
663 /** Map of all directory requests asking for v2 or v3 network statuses in
664 * the current geoip-stats interval. Values are
665 * of type *<b>dirreq_map_entry_t</b>. */
666 static HT_HEAD(dirreqmap, dirreq_map_entry_t) dirreq_map =
667 HT_INITIALIZER();
669 static int
670 dirreq_map_ent_eq(const dirreq_map_entry_t *a,
671 const dirreq_map_entry_t *b)
673 return a->dirreq_id == b->dirreq_id && a->type == b->type;
676 /* DOCDOC dirreq_map_ent_hash */
677 static unsigned
678 dirreq_map_ent_hash(const dirreq_map_entry_t *entry)
680 unsigned u = (unsigned) entry->dirreq_id;
681 u += entry->type << 20;
682 return u;
685 HT_PROTOTYPE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
686 dirreq_map_ent_eq);
687 HT_GENERATE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
688 dirreq_map_ent_eq, 0.6, malloc, realloc, free);
690 /** Helper: Put <b>entry</b> into map of directory requests using
691 * <b>type</b> and <b>dirreq_id</b> as key parts. If there is
692 * already an entry for that key, print out a BUG warning and return. */
693 static void
694 dirreq_map_put_(dirreq_map_entry_t *entry, dirreq_type_t type,
695 uint64_t dirreq_id)
697 dirreq_map_entry_t *old_ent;
698 tor_assert(entry->type == type);
699 tor_assert(entry->dirreq_id == dirreq_id);
701 /* XXXX we could switch this to HT_INSERT some time, since it seems that
702 * this bug doesn't happen. But since this function doesn't seem to be
703 * critical-path, it's sane to leave it alone. */
704 old_ent = HT_REPLACE(dirreqmap, &dirreq_map, entry);
705 if (old_ent && old_ent != entry) {
706 log_warn(LD_BUG, "Error when putting directory request into local "
707 "map. There was already an entry for the same identifier.");
708 return;
712 /** Helper: Look up and return an entry in the map of directory requests
713 * using <b>type</b> and <b>dirreq_id</b> as key parts. If there
714 * is no such entry, return NULL. */
715 static dirreq_map_entry_t *
716 dirreq_map_get_(dirreq_type_t type, uint64_t dirreq_id)
718 dirreq_map_entry_t lookup;
719 lookup.type = type;
720 lookup.dirreq_id = dirreq_id;
721 return HT_FIND(dirreqmap, &dirreq_map, &lookup);
724 /** Note that an either direct or tunneled (see <b>type</b>) directory
725 * request for a v3 network status with unique ID <b>dirreq_id</b> of size
726 * <b>response_size</b> has started. */
727 void
728 geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
729 dirreq_type_t type)
731 dirreq_map_entry_t *ent;
732 if (!get_options()->DirReqStatistics)
733 return;
734 ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
735 ent->dirreq_id = dirreq_id;
736 tor_gettimeofday(&ent->request_time);
737 ent->response_size = response_size;
738 ent->type = type;
739 dirreq_map_put_(ent, type, dirreq_id);
742 /** Change the state of the either direct or tunneled (see <b>type</b>)
743 * directory request with <b>dirreq_id</b> to <b>new_state</b> and
744 * possibly mark it as completed. If no entry can be found for the given
745 * key parts (e.g., if this is a directory request that we are not
746 * measuring, or one that was started in the previous measurement period),
747 * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
748 void
749 geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
750 dirreq_state_t new_state)
752 dirreq_map_entry_t *ent;
753 if (!get_options()->DirReqStatistics)
754 return;
755 ent = dirreq_map_get_(type, dirreq_id);
756 if (!ent)
757 return;
758 if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS)
759 return;
760 if (new_state - 1 != ent->state)
761 return;
762 ent->state = new_state;
763 if ((type == DIRREQ_DIRECT &&
764 new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) ||
765 (type == DIRREQ_TUNNELED &&
766 new_state == DIRREQ_CHANNEL_BUFFER_FLUSHED)) {
767 tor_gettimeofday(&ent->completion_time);
768 ent->completed = 1;
772 /** Return a newly allocated comma-separated string containing statistics
773 * on network status downloads. The string contains the number of completed
774 * requests, timeouts, and still running requests as well as the download
775 * times by deciles and quartiles. Return NULL if we have not observed
776 * requests for long enough. */
777 static char *
778 geoip_get_dirreq_history(dirreq_type_t type)
780 char *result = NULL;
781 smartlist_t *dirreq_completed = NULL;
782 uint32_t complete = 0, timeouts = 0, running = 0;
783 int bufsize = 1024, written;
784 dirreq_map_entry_t **ptr, **next, *ent;
785 struct timeval now;
787 tor_gettimeofday(&now);
788 dirreq_completed = smartlist_new();
789 for (ptr = HT_START(dirreqmap, &dirreq_map); ptr; ptr = next) {
790 ent = *ptr;
791 if (ent->type != type) {
792 next = HT_NEXT(dirreqmap, &dirreq_map, ptr);
793 continue;
794 } else {
795 if (ent->completed) {
796 smartlist_add(dirreq_completed, ent);
797 complete++;
798 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
799 } else {
800 if (tv_mdiff(&ent->request_time, &now) / 1000 > DIRREQ_TIMEOUT)
801 timeouts++;
802 else
803 running++;
804 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
805 tor_free(ent);
809 #define DIR_REQ_GRANULARITY 4
810 complete = round_uint32_to_next_multiple_of(complete,
811 DIR_REQ_GRANULARITY);
812 timeouts = round_uint32_to_next_multiple_of(timeouts,
813 DIR_REQ_GRANULARITY);
814 running = round_uint32_to_next_multiple_of(running,
815 DIR_REQ_GRANULARITY);
816 result = tor_malloc_zero(bufsize);
817 written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u,"
818 "running=%u", complete, timeouts, running);
819 if (written < 0) {
820 tor_free(result);
821 goto done;
824 #define MIN_DIR_REQ_RESPONSES 16
825 if (complete >= MIN_DIR_REQ_RESPONSES) {
826 uint32_t *dltimes;
827 /* We may have rounded 'completed' up. Here we want to use the
828 * real value. */
829 complete = smartlist_len(dirreq_completed);
830 dltimes = tor_malloc_zero(sizeof(uint32_t) * complete);
831 SMARTLIST_FOREACH_BEGIN(dirreq_completed, dirreq_map_entry_t *, ent) {
832 uint32_t bytes_per_second;
833 uint32_t time_diff = (uint32_t) tv_mdiff(&ent->request_time,
834 &ent->completion_time);
835 if (time_diff == 0)
836 time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible
837 * by law of nature or something, but a milisecond
838 * is a bit greater than "instantly" */
839 bytes_per_second = (uint32_t)(1000 * ent->response_size / time_diff);
840 dltimes[ent_sl_idx] = bytes_per_second;
841 } SMARTLIST_FOREACH_END(ent);
842 median_uint32(dltimes, complete); /* sorts as a side effect. */
843 written = tor_snprintf(result + written, bufsize - written,
844 ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
845 "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
846 dltimes[0],
847 dltimes[1*complete/10-1],
848 dltimes[2*complete/10-1],
849 dltimes[1*complete/4-1],
850 dltimes[3*complete/10-1],
851 dltimes[4*complete/10-1],
852 dltimes[5*complete/10-1],
853 dltimes[6*complete/10-1],
854 dltimes[7*complete/10-1],
855 dltimes[3*complete/4-1],
856 dltimes[8*complete/10-1],
857 dltimes[9*complete/10-1],
858 dltimes[complete-1]);
859 if (written<0)
860 tor_free(result);
861 tor_free(dltimes);
863 done:
864 SMARTLIST_FOREACH(dirreq_completed, dirreq_map_entry_t *, ent,
865 tor_free(ent));
866 smartlist_free(dirreq_completed);
867 return result;
870 /** Store a newly allocated comma-separated string in
871 * *<a>country_str</a> containing entries for all the countries from
872 * which we've seen enough clients connect as a bridge, directory
873 * server, or entry guard. The entry format is cc=num where num is the
874 * number of IPs we've seen connecting from that country, and cc is a
875 * lowercased country code. *<a>country_str</a> is set to NULL if
876 * we're not ready to export per country data yet.
878 * Store a newly allocated comma-separated string in <a>ipver_str</a>
879 * containing entries for clients connecting over IPv4 and IPv6. The
880 * format is family=num where num is the nubmer of IPs we've seen
881 * connecting over that protocol family, and family is 'v4' or 'v6'.
883 * Return 0 on success and -1 if we're missing geoip data. */
885 geoip_get_client_history(geoip_client_action_t action,
886 char **country_str, char **ipver_str)
888 unsigned granularity = IP_GRANULARITY;
889 smartlist_t *entries = NULL;
890 int n_countries = geoip_get_n_countries();
891 int i;
892 clientmap_entry_t **ent;
893 unsigned *counts = NULL;
894 unsigned total = 0;
895 unsigned ipv4_count = 0, ipv6_count = 0;
897 if (!geoip_is_loaded(AF_INET) && !geoip_is_loaded(AF_INET6))
898 return -1;
900 counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
901 HT_FOREACH(ent, clientmap, &client_history) {
902 int country;
903 if ((*ent)->action != (int)action)
904 continue;
905 country = geoip_get_country_by_addr(&(*ent)->addr);
906 if (country < 0)
907 country = 0; /** unresolved requests are stored at index 0. */
908 tor_assert(0 <= country && country < n_countries);
909 ++counts[country];
910 ++total;
911 switch (tor_addr_family(&(*ent)->addr)) {
912 case AF_INET:
913 ipv4_count++;
914 break;
915 case AF_INET6:
916 ipv6_count++;
917 break;
920 if (ipver_str) {
921 smartlist_t *chunks = smartlist_new();
922 smartlist_add_asprintf(chunks, "v4=%u",
923 round_to_next_multiple_of(ipv4_count, granularity));
924 smartlist_add_asprintf(chunks, "v6=%u",
925 round_to_next_multiple_of(ipv6_count, granularity));
926 *ipver_str = smartlist_join_strings(chunks, ",", 0, NULL);
927 SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
928 smartlist_free(chunks);
931 /* Don't record per country data if we haven't seen enough IPs. */
932 if (total < MIN_IPS_TO_NOTE_ANYTHING) {
933 tor_free(counts);
934 if (country_str)
935 *country_str = NULL;
936 return 0;
939 /* Make a list of c_hist_t */
940 entries = smartlist_new();
941 for (i = 0; i < n_countries; ++i) {
942 unsigned c = counts[i];
943 const char *countrycode;
944 c_hist_t *ent;
945 /* Only report a country if it has a minimum number of IPs. */
946 if (c >= MIN_IPS_TO_NOTE_COUNTRY) {
947 c = round_to_next_multiple_of(c, granularity);
948 countrycode = geoip_get_country_name(i);
949 ent = tor_malloc(sizeof(c_hist_t));
950 strlcpy(ent->country, countrycode, sizeof(ent->country));
951 ent->total = c;
952 smartlist_add(entries, ent);
955 /* Sort entries. Note that we must do this _AFTER_ rounding, or else
956 * the sort order could leak info. */
957 smartlist_sort(entries, c_hist_compare_);
959 if (country_str) {
960 smartlist_t *chunks = smartlist_new();
961 SMARTLIST_FOREACH(entries, c_hist_t *, ch, {
962 smartlist_add_asprintf(chunks, "%s=%u", ch->country, ch->total);
964 *country_str = smartlist_join_strings(chunks, ",", 0, NULL);
965 SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
966 smartlist_free(chunks);
969 SMARTLIST_FOREACH(entries, c_hist_t *, c, tor_free(c));
970 smartlist_free(entries);
971 tor_free(counts);
973 return 0;
976 /** Return a newly allocated string holding the per-country request history
977 * for v3 network statuses in a format suitable for an extra-info document,
978 * or NULL on failure. */
979 char *
980 geoip_get_request_history(void)
982 smartlist_t *entries, *strings;
983 char *result;
984 unsigned granularity = IP_GRANULARITY;
986 if (!geoip_countries)
987 return NULL;
989 entries = smartlist_new();
990 SMARTLIST_FOREACH_BEGIN(geoip_countries, geoip_country_t *, c) {
991 uint32_t tot = 0;
992 c_hist_t *ent;
993 tot = c->n_v3_ns_requests;
994 if (!tot)
995 continue;
996 ent = tor_malloc_zero(sizeof(c_hist_t));
997 strlcpy(ent->country, c->countrycode, sizeof(ent->country));
998 ent->total = round_to_next_multiple_of(tot, granularity);
999 smartlist_add(entries, ent);
1000 } SMARTLIST_FOREACH_END(c);
1001 smartlist_sort(entries, c_hist_compare_);
1003 strings = smartlist_new();
1004 SMARTLIST_FOREACH(entries, c_hist_t *, ent, {
1005 smartlist_add_asprintf(strings, "%s=%u", ent->country, ent->total);
1007 result = smartlist_join_strings(strings, ",", 0, NULL);
1008 SMARTLIST_FOREACH(strings, char *, cp, tor_free(cp));
1009 SMARTLIST_FOREACH(entries, c_hist_t *, ent, tor_free(ent));
1010 smartlist_free(strings);
1011 smartlist_free(entries);
1012 return result;
1015 /** Start time of directory request stats or 0 if we're not collecting
1016 * directory request statistics. */
1017 static time_t start_of_dirreq_stats_interval;
1019 /** Initialize directory request stats. */
1020 void
1021 geoip_dirreq_stats_init(time_t now)
1023 start_of_dirreq_stats_interval = now;
1026 /** Reset counters for dirreq stats. */
1027 void
1028 geoip_reset_dirreq_stats(time_t now)
1030 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
1031 c->n_v3_ns_requests = 0;
1034 clientmap_entry_t **ent, **next, *this;
1035 for (ent = HT_START(clientmap, &client_history); ent != NULL;
1036 ent = next) {
1037 if ((*ent)->action == GEOIP_CLIENT_NETWORKSTATUS) {
1038 this = *ent;
1039 next = HT_NEXT_RMV(clientmap, &client_history, ent);
1040 tor_free(this);
1041 } else {
1042 next = HT_NEXT(clientmap, &client_history, ent);
1046 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
1048 dirreq_map_entry_t **ent, **next, *this;
1049 for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) {
1050 this = *ent;
1051 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent);
1052 tor_free(this);
1055 start_of_dirreq_stats_interval = now;
1058 /** Stop collecting directory request stats in a way that we can re-start
1059 * doing so in geoip_dirreq_stats_init(). */
1060 void
1061 geoip_dirreq_stats_term(void)
1063 geoip_reset_dirreq_stats(0);
1066 /** Return a newly allocated string containing the dirreq statistics
1067 * until <b>now</b>, or NULL if we're not collecting dirreq stats. Caller
1068 * must ensure start_of_dirreq_stats_interval is in the past. */
1069 char *
1070 geoip_format_dirreq_stats(time_t now)
1072 char t[ISO_TIME_LEN+1];
1073 int i;
1074 char *v3_ips_string, *v3_reqs_string, *v3_direct_dl_string,
1075 *v3_tunneled_dl_string;
1076 char *result;
1078 if (!start_of_dirreq_stats_interval)
1079 return NULL; /* Not initialized. */
1081 tor_assert(now >= start_of_dirreq_stats_interval);
1083 format_iso_time(t, now);
1084 geoip_get_client_history(GEOIP_CLIENT_NETWORKSTATUS, &v3_ips_string, NULL);
1085 v3_reqs_string = geoip_get_request_history();
1087 #define RESPONSE_GRANULARITY 8
1088 for (i = 0; i < GEOIP_NS_RESPONSE_NUM; i++) {
1089 ns_v3_responses[i] = round_uint32_to_next_multiple_of(
1090 ns_v3_responses[i], RESPONSE_GRANULARITY);
1092 #undef RESPONSE_GRANULARITY
1094 v3_direct_dl_string = geoip_get_dirreq_history(DIRREQ_DIRECT);
1095 v3_tunneled_dl_string = geoip_get_dirreq_history(DIRREQ_TUNNELED);
1097 /* Put everything together into a single string. */
1098 tor_asprintf(&result, "dirreq-stats-end %s (%d s)\n"
1099 "dirreq-v3-ips %s\n"
1100 "dirreq-v3-reqs %s\n"
1101 "dirreq-v3-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
1102 "not-found=%u,not-modified=%u,busy=%u\n"
1103 "dirreq-v3-direct-dl %s\n"
1104 "dirreq-v3-tunneled-dl %s\n",
1106 (unsigned) (now - start_of_dirreq_stats_interval),
1107 v3_ips_string ? v3_ips_string : "",
1108 v3_reqs_string ? v3_reqs_string : "",
1109 ns_v3_responses[GEOIP_SUCCESS],
1110 ns_v3_responses[GEOIP_REJECT_NOT_ENOUGH_SIGS],
1111 ns_v3_responses[GEOIP_REJECT_UNAVAILABLE],
1112 ns_v3_responses[GEOIP_REJECT_NOT_FOUND],
1113 ns_v3_responses[GEOIP_REJECT_NOT_MODIFIED],
1114 ns_v3_responses[GEOIP_REJECT_BUSY],
1115 v3_direct_dl_string ? v3_direct_dl_string : "",
1116 v3_tunneled_dl_string ? v3_tunneled_dl_string : "");
1118 /* Free partial strings. */
1119 tor_free(v3_ips_string);
1120 tor_free(v3_reqs_string);
1121 tor_free(v3_direct_dl_string);
1122 tor_free(v3_tunneled_dl_string);
1124 return result;
1127 /** If 24 hours have passed since the beginning of the current dirreq
1128 * stats period, write dirreq stats to $DATADIR/stats/dirreq-stats
1129 * (possibly overwriting an existing file) and reset counters. Return
1130 * when we would next want to write dirreq stats or 0 if we never want to
1131 * write. */
1132 time_t
1133 geoip_dirreq_stats_write(time_t now)
1135 char *statsdir = NULL, *filename = NULL, *str = NULL;
1137 if (!start_of_dirreq_stats_interval)
1138 return 0; /* Not initialized. */
1139 if (start_of_dirreq_stats_interval + WRITE_STATS_INTERVAL > now)
1140 goto done; /* Not ready to write. */
1142 /* Discard all items in the client history that are too old. */
1143 geoip_remove_old_clients(start_of_dirreq_stats_interval);
1145 /* Generate history string .*/
1146 str = geoip_format_dirreq_stats(now);
1148 /* Write dirreq-stats string to disk. */
1149 statsdir = get_datadir_fname("stats");
1150 if (check_private_dir(statsdir, CPD_CREATE, get_options()->User) < 0) {
1151 log_warn(LD_HIST, "Unable to create stats/ directory!");
1152 goto done;
1154 filename = get_datadir_fname2("stats", "dirreq-stats");
1155 if (write_str_to_file(filename, str, 0) < 0)
1156 log_warn(LD_HIST, "Unable to write dirreq statistics to disk!");
1158 /* Reset measurement interval start. */
1159 geoip_reset_dirreq_stats(now);
1161 done:
1162 tor_free(statsdir);
1163 tor_free(filename);
1164 tor_free(str);
1165 return start_of_dirreq_stats_interval + WRITE_STATS_INTERVAL;
1168 /** Start time of bridge stats or 0 if we're not collecting bridge
1169 * statistics. */
1170 static time_t start_of_bridge_stats_interval;
1172 /** Initialize bridge stats. */
1173 void
1174 geoip_bridge_stats_init(time_t now)
1176 start_of_bridge_stats_interval = now;
1179 /** Stop collecting bridge stats in a way that we can re-start doing so in
1180 * geoip_bridge_stats_init(). */
1181 void
1182 geoip_bridge_stats_term(void)
1184 client_history_clear();
1185 start_of_bridge_stats_interval = 0;
1188 /** Validate a bridge statistics string as it would be written to a
1189 * current extra-info descriptor. Return 1 if the string is valid and
1190 * recent enough, or 0 otherwise. */
1191 static int
1192 validate_bridge_stats(const char *stats_str, time_t now)
1194 char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1],
1195 *eos;
1197 const char *BRIDGE_STATS_END = "bridge-stats-end ";
1198 const char *BRIDGE_IPS = "bridge-ips ";
1199 const char *BRIDGE_IPS_EMPTY_LINE = "bridge-ips\n";
1200 const char *tmp;
1201 time_t stats_end_time;
1202 int seconds;
1203 tor_assert(stats_str);
1205 /* Parse timestamp and number of seconds from
1206 "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */
1207 tmp = find_str_at_start_of_line(stats_str, BRIDGE_STATS_END);
1208 if (!tmp)
1209 return 0;
1210 tmp += strlen(BRIDGE_STATS_END);
1212 if (strlen(tmp) < ISO_TIME_LEN + 6)
1213 return 0;
1214 strlcpy(stats_end_str, tmp, sizeof(stats_end_str));
1215 if (parse_iso_time(stats_end_str, &stats_end_time) < 0)
1216 return 0;
1217 if (stats_end_time < now - (25*60*60) ||
1218 stats_end_time > now + (1*60*60))
1219 return 0;
1220 seconds = (int)strtol(tmp + ISO_TIME_LEN + 2, &eos, 10);
1221 if (!eos || seconds < 23*60*60)
1222 return 0;
1223 format_iso_time(stats_start_str, stats_end_time - seconds);
1225 /* Parse: "bridge-ips CC=N,CC=N,..." */
1226 tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS);
1227 if (!tmp) {
1228 /* Look if there is an empty "bridge-ips" line */
1229 tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS_EMPTY_LINE);
1230 if (!tmp)
1231 return 0;
1234 return 1;
1237 /** Most recent bridge statistics formatted to be written to extra-info
1238 * descriptors. */
1239 static char *bridge_stats_extrainfo = NULL;
1241 /** Return a newly allocated string holding our bridge usage stats by country
1242 * in a format suitable for inclusion in an extrainfo document. Return NULL on
1243 * failure. */
1244 char *
1245 geoip_format_bridge_stats(time_t now)
1247 char *out = NULL, *country_data = NULL, *ipver_data = NULL;
1248 long duration = now - start_of_bridge_stats_interval;
1249 char written[ISO_TIME_LEN+1];
1251 if (duration < 0)
1252 return NULL;
1253 if (!start_of_bridge_stats_interval)
1254 return NULL; /* Not initialized. */
1256 format_iso_time(written, now);
1257 geoip_get_client_history(GEOIP_CLIENT_CONNECT, &country_data, &ipver_data);
1259 tor_asprintf(&out,
1260 "bridge-stats-end %s (%ld s)\n"
1261 "bridge-ips %s\n"
1262 "bridge-ip-versions %s\n",
1263 written, duration,
1264 country_data ? country_data : "",
1265 ipver_data ? ipver_data : "");
1266 tor_free(country_data);
1267 tor_free(ipver_data);
1269 return out;
1272 /** Return a newly allocated string holding our bridge usage stats by country
1273 * in a format suitable for the answer to a controller request. Return NULL on
1274 * failure. */
1275 static char *
1276 format_bridge_stats_controller(time_t now)
1278 char *out = NULL, *country_data = NULL, *ipver_data = NULL;
1279 char started[ISO_TIME_LEN+1];
1280 (void) now;
1282 format_iso_time(started, start_of_bridge_stats_interval);
1283 geoip_get_client_history(GEOIP_CLIENT_CONNECT, &country_data, &ipver_data);
1285 tor_asprintf(&out,
1286 "TimeStarted=\"%s\" CountrySummary=%s IPVersions=%s",
1287 started,
1288 country_data ? country_data : "",
1289 ipver_data ? ipver_data : "");
1290 tor_free(country_data);
1291 tor_free(ipver_data);
1292 return out;
1295 /** Write bridge statistics to $DATADIR/stats/bridge-stats and return
1296 * when we should next try to write statistics. */
1297 time_t
1298 geoip_bridge_stats_write(time_t now)
1300 char *filename = NULL, *val = NULL, *statsdir = NULL;
1302 /* Check if 24 hours have passed since starting measurements. */
1303 if (now < start_of_bridge_stats_interval + WRITE_STATS_INTERVAL)
1304 return start_of_bridge_stats_interval + WRITE_STATS_INTERVAL;
1306 /* Discard all items in the client history that are too old. */
1307 geoip_remove_old_clients(start_of_bridge_stats_interval);
1309 /* Generate formatted string */
1310 val = geoip_format_bridge_stats(now);
1311 if (val == NULL)
1312 goto done;
1314 /* Update the stored value. */
1315 tor_free(bridge_stats_extrainfo);
1316 bridge_stats_extrainfo = val;
1317 start_of_bridge_stats_interval = now;
1319 /* Write it to disk. */
1320 statsdir = get_datadir_fname("stats");
1321 if (check_private_dir(statsdir, CPD_CREATE, get_options()->User) < 0)
1322 goto done;
1323 filename = get_datadir_fname2("stats", "bridge-stats");
1325 write_str_to_file(filename, bridge_stats_extrainfo, 0);
1327 /* Tell the controller, "hey, there are clients!" */
1329 char *controller_str = format_bridge_stats_controller(now);
1330 if (controller_str)
1331 control_event_clients_seen(controller_str);
1332 tor_free(controller_str);
1334 done:
1335 tor_free(filename);
1336 tor_free(statsdir);
1338 return start_of_bridge_stats_interval + WRITE_STATS_INTERVAL;
1341 /** Try to load the most recent bridge statistics from disk, unless we
1342 * have finished a measurement interval lately, and check whether they
1343 * are still recent enough. */
1344 static void
1345 load_bridge_stats(time_t now)
1347 char *fname, *contents;
1348 if (bridge_stats_extrainfo)
1349 return;
1351 fname = get_datadir_fname2("stats", "bridge-stats");
1352 contents = read_file_to_str(fname, RFTS_IGNORE_MISSING, NULL);
1353 if (contents && validate_bridge_stats(contents, now))
1354 bridge_stats_extrainfo = contents;
1356 tor_free(fname);
1359 /** Return most recent bridge statistics for inclusion in extra-info
1360 * descriptors, or NULL if we don't have recent bridge statistics. */
1361 const char *
1362 geoip_get_bridge_stats_extrainfo(time_t now)
1364 load_bridge_stats(now);
1365 return bridge_stats_extrainfo;
1368 /** Return a new string containing the recent bridge statistics to be returned
1369 * to controller clients, or NULL if we don't have any bridge statistics. */
1370 char *
1371 geoip_get_bridge_stats_controller(time_t now)
1373 return format_bridge_stats_controller(now);
1376 /** Start time of entry stats or 0 if we're not collecting entry
1377 * statistics. */
1378 static time_t start_of_entry_stats_interval;
1380 /** Initialize entry stats. */
1381 void
1382 geoip_entry_stats_init(time_t now)
1384 start_of_entry_stats_interval = now;
1387 /** Reset counters for entry stats. */
1388 void
1389 geoip_reset_entry_stats(time_t now)
1391 client_history_clear();
1392 start_of_entry_stats_interval = now;
1395 /** Stop collecting entry stats in a way that we can re-start doing so in
1396 * geoip_entry_stats_init(). */
1397 void
1398 geoip_entry_stats_term(void)
1400 geoip_reset_entry_stats(0);
1403 /** Return a newly allocated string containing the entry statistics
1404 * until <b>now</b>, or NULL if we're not collecting entry stats. Caller
1405 * must ensure start_of_entry_stats_interval lies in the past. */
1406 char *
1407 geoip_format_entry_stats(time_t now)
1409 char t[ISO_TIME_LEN+1];
1410 char *data = NULL;
1411 char *result;
1413 if (!start_of_entry_stats_interval)
1414 return NULL; /* Not initialized. */
1416 tor_assert(now >= start_of_entry_stats_interval);
1418 geoip_get_client_history(GEOIP_CLIENT_CONNECT, &data, NULL);
1419 format_iso_time(t, now);
1420 tor_asprintf(&result,
1421 "entry-stats-end %s (%u s)\n"
1422 "entry-ips %s\n",
1423 t, (unsigned) (now - start_of_entry_stats_interval),
1424 data ? data : "");
1425 tor_free(data);
1426 return result;
1429 /** If 24 hours have passed since the beginning of the current entry stats
1430 * period, write entry stats to $DATADIR/stats/entry-stats (possibly
1431 * overwriting an existing file) and reset counters. Return when we would
1432 * next want to write entry stats or 0 if we never want to write. */
1433 time_t
1434 geoip_entry_stats_write(time_t now)
1436 char *statsdir = NULL, *filename = NULL, *str = NULL;
1438 if (!start_of_entry_stats_interval)
1439 return 0; /* Not initialized. */
1440 if (start_of_entry_stats_interval + WRITE_STATS_INTERVAL > now)
1441 goto done; /* Not ready to write. */
1443 /* Discard all items in the client history that are too old. */
1444 geoip_remove_old_clients(start_of_entry_stats_interval);
1446 /* Generate history string .*/
1447 str = geoip_format_entry_stats(now);
1449 /* Write entry-stats string to disk. */
1450 statsdir = get_datadir_fname("stats");
1451 if (check_private_dir(statsdir, CPD_CREATE, get_options()->User) < 0) {
1452 log_warn(LD_HIST, "Unable to create stats/ directory!");
1453 goto done;
1455 filename = get_datadir_fname2("stats", "entry-stats");
1456 if (write_str_to_file(filename, str, 0) < 0)
1457 log_warn(LD_HIST, "Unable to write entry statistics to disk!");
1459 /* Reset measurement interval start. */
1460 geoip_reset_entry_stats(now);
1462 done:
1463 tor_free(statsdir);
1464 tor_free(filename);
1465 tor_free(str);
1466 return start_of_entry_stats_interval + WRITE_STATS_INTERVAL;
1469 /** Helper used to implement GETINFO ip-to-country/... controller command. */
1471 getinfo_helper_geoip(control_connection_t *control_conn,
1472 const char *question, char **answer,
1473 const char **errmsg)
1475 (void)control_conn;
1476 if (!strcmpstart(question, "ip-to-country/")) {
1477 int c;
1478 sa_family_t family;
1479 tor_addr_t addr;
1480 question += strlen("ip-to-country/");
1481 family = tor_addr_parse(&addr, question);
1482 if (family != AF_INET && family != AF_INET6) {
1483 *errmsg = "Invalid address family";
1484 return -1;
1486 if (!geoip_is_loaded(family)) {
1487 *errmsg = "GeoIP data not loaded";
1488 return -1;
1490 if (family == AF_INET)
1491 c = geoip_get_country_by_ipv4(tor_addr_to_ipv4h(&addr));
1492 else /* AF_INET6 */
1493 c = geoip_get_country_by_ipv6(tor_addr_to_in6(&addr));
1494 *answer = tor_strdup(geoip_get_country_name(c));
1496 return 0;
1499 /** Release all storage held by the GeoIP databases and country list. */
1500 static void
1501 clear_geoip_db(void)
1503 if (geoip_countries) {
1504 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
1505 smartlist_free(geoip_countries);
1508 strmap_free(country_idxplus1_by_lc_code, NULL);
1509 if (geoip_ipv4_entries) {
1510 SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, ent,
1511 tor_free(ent));
1512 smartlist_free(geoip_ipv4_entries);
1514 if (geoip_ipv6_entries) {
1515 SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, ent,
1516 tor_free(ent));
1517 smartlist_free(geoip_ipv6_entries);
1519 geoip_countries = NULL;
1520 country_idxplus1_by_lc_code = NULL;
1521 geoip_ipv4_entries = NULL;
1522 geoip_ipv6_entries = NULL;
1525 /** Release all storage held in this file. */
1526 void
1527 geoip_free_all(void)
1530 clientmap_entry_t **ent, **next, *this;
1531 for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
1532 this = *ent;
1533 next = HT_NEXT_RMV(clientmap, &client_history, ent);
1534 tor_free(this);
1536 HT_CLEAR(clientmap, &client_history);
1539 dirreq_map_entry_t **ent, **next, *this;
1540 for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) {
1541 this = *ent;
1542 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent);
1543 tor_free(this);
1545 HT_CLEAR(dirreqmap, &dirreq_map);
1548 clear_geoip_db();