Some tweaks to statistics.
[tor/rransom.git] / src / or / geoip.c
blob7aeec8f5f1838684e8383b58952e564b77e73684
1 /* Copyright (c) 2007-2009, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * \file geoip.c
6 * \brief Functions related to maintaining an IP-to-country database and to
7 * summarizing client connections by country.
8 */
10 #define GEOIP_PRIVATE
11 #include "or.h"
12 #include "ht.h"
14 static void clear_geoip_db(void);
15 static void dump_geoip_stats(void);
16 static void dump_entry_stats(void);
18 /** An entry from the GeoIP file: maps an IP range to a country. */
19 typedef struct geoip_entry_t {
20 uint32_t ip_low; /**< The lowest IP in the range, in host order */
21 uint32_t ip_high; /**< The highest IP in the range, in host order */
22 intptr_t country; /**< An index into geoip_countries */
23 } geoip_entry_t;
25 /** For how many periods should we remember per-country request history? */
26 #define REQUEST_HIST_LEN 1
27 /** How long are the periods for which we should remember request history? */
28 #define REQUEST_HIST_PERIOD (24*60*60)
30 /** A per-country record for GeoIP request history. */
31 typedef struct geoip_country_t {
32 char countrycode[3];
33 uint32_t n_v2_ns_requests[REQUEST_HIST_LEN];
34 uint32_t n_v3_ns_requests[REQUEST_HIST_LEN];
35 } geoip_country_t;
37 /** A list of geoip_country_t */
38 static smartlist_t *geoip_countries = NULL;
39 /** A map from lowercased country codes to their position in geoip_countries.
40 * The index is encoded in the pointer, and 1 is added so that NULL can mean
41 * not found. */
42 static strmap_t *country_idxplus1_by_lc_code = NULL;
43 /** A list of all known geoip_entry_t, sorted by ip_low. */
44 static smartlist_t *geoip_entries = NULL;
46 /** Return the index of the <b>country</b>'s entry in the GeoIP DB
47 * if it is a valid 2-letter country code, otherwise return -1.
49 country_t
50 geoip_get_country(const char *country)
52 void *_idxplus1;
53 intptr_t idx;
55 _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
56 if (!_idxplus1)
57 return -1;
59 idx = ((uintptr_t)_idxplus1)-1;
60 return (country_t)idx;
63 /** Add an entry to the GeoIP table, mapping all IPs between <b>low</b> and
64 * <b>high</b>, inclusive, to the 2-letter country code <b>country</b>.
66 static void
67 geoip_add_entry(uint32_t low, uint32_t high, const char *country)
69 intptr_t idx;
70 geoip_entry_t *ent;
71 void *_idxplus1;
73 if (high < low)
74 return;
76 _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
78 if (!_idxplus1) {
79 geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
80 strlcpy(c->countrycode, country, sizeof(c->countrycode));
81 tor_strlower(c->countrycode);
82 smartlist_add(geoip_countries, c);
83 idx = smartlist_len(geoip_countries) - 1;
84 strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
85 } else {
86 idx = ((uintptr_t)_idxplus1)-1;
89 geoip_country_t *c = smartlist_get(geoip_countries, idx);
90 tor_assert(!strcasecmp(c->countrycode, country));
92 ent = tor_malloc_zero(sizeof(geoip_entry_t));
93 ent->ip_low = low;
94 ent->ip_high = high;
95 ent->country = idx;
96 smartlist_add(geoip_entries, ent);
99 /** Add an entry to the GeoIP table, parsing it from <b>line</b>. The
100 * format is as for geoip_load_file(). */
101 /*private*/ int
102 geoip_parse_entry(const char *line)
104 unsigned int low, high;
105 char b[3];
106 if (!geoip_countries) {
107 geoip_countries = smartlist_create();
108 geoip_entries = smartlist_create();
109 country_idxplus1_by_lc_code = strmap_new();
111 while (TOR_ISSPACE(*line))
112 ++line;
113 if (*line == '#')
114 return 0;
115 if (sscanf(line,"%u,%u,%2s", &low, &high, b) == 3) {
116 geoip_add_entry(low, high, b);
117 return 0;
118 } else if (sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, b) == 3) {
119 geoip_add_entry(low, high, b);
120 return 0;
121 } else {
122 log_warn(LD_GENERAL, "Unable to parse line from GEOIP file: %s",
123 escaped(line));
124 return -1;
128 /** Sorting helper: return -1, 1, or 0 based on comparison of two
129 * geoip_entry_t */
130 static int
131 _geoip_compare_entries(const void **_a, const void **_b)
133 const geoip_entry_t *a = *_a, *b = *_b;
134 if (a->ip_low < b->ip_low)
135 return -1;
136 else if (a->ip_low > b->ip_low)
137 return 1;
138 else
139 return 0;
142 /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
143 * to a uint32_t in host order) to a geoip_entry_t */
144 static int
145 _geoip_compare_key_to_entry(const void *_key, const void **_member)
147 const uint32_t addr = *(uint32_t *)_key;
148 const geoip_entry_t *entry = *_member;
149 if (addr < entry->ip_low)
150 return -1;
151 else if (addr > entry->ip_high)
152 return 1;
153 else
154 return 0;
157 /** Return 1 if we should collect geoip stats on bridge users, and
158 * include them in our extrainfo descriptor. Else return 0. */
160 should_record_bridge_info(or_options_t *options)
162 return options->BridgeRelay && options->BridgeRecordUsageByCountry;
165 /** Clear the GeoIP database and reload it from the file
166 * <b>filename</b>. Return 0 on success, -1 on failure.
168 * Recognized line formats are:
169 * INTIPLOW,INTIPHIGH,CC
170 * and
171 * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
172 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
173 * integers, and CC is a country code.
175 * It also recognizes, and skips over, blank lines and lines that start
176 * with '#' (comments).
179 geoip_load_file(const char *filename, or_options_t *options)
181 FILE *f;
182 const char *msg = "";
183 int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
184 clear_geoip_db();
185 if (!(f = fopen(filename, "r"))) {
186 log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s. %s",
187 filename, msg);
188 return -1;
190 if (!geoip_countries) {
191 geoip_country_t *geoip_unresolved;
192 geoip_countries = smartlist_create();
193 /* Add a geoip_country_t for requests that could not be resolved to a
194 * country as first element (index 0) to geoip_countries. */
195 geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
196 strlcpy(geoip_unresolved->countrycode, "??",
197 sizeof(geoip_unresolved->countrycode));
198 smartlist_add(geoip_countries, geoip_unresolved);
199 country_idxplus1_by_lc_code = strmap_new();
201 if (geoip_entries) {
202 SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, e, tor_free(e));
203 smartlist_free(geoip_entries);
205 geoip_entries = smartlist_create();
206 log_notice(LD_GENERAL, "Parsing GEOIP file.");
207 while (!feof(f)) {
208 char buf[512];
209 if (fgets(buf, (int)sizeof(buf), f) == NULL)
210 break;
211 /* FFFF track full country name. */
212 geoip_parse_entry(buf);
214 /*XXXX abort and return -1 if no entries/illformed?*/
215 fclose(f);
217 smartlist_sort(geoip_entries, _geoip_compare_entries);
219 /* Okay, now we need to maybe change our mind about what is in which
220 * country. */
221 refresh_all_country_info();
223 return 0;
226 /** Given an IP address in host order, return a number representing the
227 * country to which that address belongs, or -1 for unknown. The return value
228 * will always be less than geoip_get_n_countries(). To decode it,
229 * call geoip_get_country_name().
232 geoip_get_country_by_ip(uint32_t ipaddr)
234 geoip_entry_t *ent;
235 if (!geoip_entries)
236 return -1;
237 ent = smartlist_bsearch(geoip_entries, &ipaddr, _geoip_compare_key_to_entry);
238 return ent ? (int)ent->country : -1;
241 /** Return the number of countries recognized by the GeoIP database. */
243 geoip_get_n_countries(void)
245 return (int) smartlist_len(geoip_countries);
248 /** Return the two-letter country code associated with the number <b>num</b>,
249 * or "??" for an unknown value. */
250 const char *
251 geoip_get_country_name(country_t num)
253 if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
254 geoip_country_t *c = smartlist_get(geoip_countries, num);
255 return c->countrycode;
256 } else
257 return "??";
260 /** Return true iff we have loaded a GeoIP database.*/
262 geoip_is_loaded(void)
264 return geoip_countries != NULL && geoip_entries != NULL;
267 /** Entry in a map from IP address to the last time we've seen an incoming
268 * connection from that IP address. Used by bridges only, to track which
269 * countries have them blocked. */
270 typedef struct clientmap_entry_t {
271 HT_ENTRY(clientmap_entry_t) node;
272 uint32_t ipaddr;
273 unsigned int last_seen_in_minutes:30;
274 unsigned int action:2;
275 } clientmap_entry_t;
277 #define ACTION_MASK 3
279 /** Map from client IP address to last time seen. */
280 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
281 HT_INITIALIZER();
282 /** Time at which we started tracking client IP history. */
283 static time_t client_history_starts = 0;
285 /** When did the current period of checking per-country request history
286 * start? */
287 static time_t current_request_period_starts = 0;
288 /** How many older request periods are we remembering? */
289 static int n_old_request_periods = 0;
291 /** Hashtable helper: compute a hash of a clientmap_entry_t. */
292 static INLINE unsigned
293 clientmap_entry_hash(const clientmap_entry_t *a)
295 return ht_improve_hash((unsigned) a->ipaddr);
297 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
298 static INLINE int
299 clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
301 return a->ipaddr == b->ipaddr && a->action == b->action;
304 HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
305 clientmap_entries_eq);
306 HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
307 clientmap_entries_eq, 0.6, malloc, realloc, free);
309 /** How often do we update our estimate which share of v2 and v3 directory
310 * requests is sent to us? We could as well trigger updates of shares from
311 * network status updates, but that means adding a lot of calls into code
312 * that is independent from geoip stats (and keeping them up-to-date). We
313 * are perfectly fine with an approximation of 15-minute granularity. */
314 #define REQUEST_SHARE_INTERVAL (15 * 60)
316 /** When did we last determine which share of v2 and v3 directory requests
317 * is sent to us? */
318 static time_t last_time_determined_shares = 0;
320 /** Sum of products of v2 shares times the number of seconds for which we
321 * consider these shares as valid. */
322 static double v2_share_times_seconds;
324 /** Sum of products of v3 shares times the number of seconds for which we
325 * consider these shares as valid. */
326 static double v3_share_times_seconds;
328 /** Number of seconds we are determining v2 and v3 shares. */
329 static int share_seconds;
331 /** Try to determine which fraction of v2 and v3 directory requests aimed at
332 * caches will be sent to us at time <b>now</b> and store that value in
333 * order to take a mean value later on. */
334 static void
335 geoip_determine_shares(time_t now)
337 double v2_share = 0.0, v3_share = 0.0;
338 if (router_get_my_share_of_directory_requests(&v2_share, &v3_share) < 0)
339 return;
340 if (last_time_determined_shares) {
341 v2_share_times_seconds += v2_share *
342 ((double) (now - last_time_determined_shares));
343 v3_share_times_seconds += v3_share *
344 ((double) (now - last_time_determined_shares));
345 share_seconds += now - last_time_determined_shares;
347 last_time_determined_shares = now;
350 #ifdef ENABLE_DIRREQ_STATS
351 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
352 * have been sent to us since the last call of this function up to time
353 * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
354 * fractions of v2 and v3 protocol shares we expect to have seen. Reset
355 * counters afterwards. Return 0 on success, -1 on failure (e.g. when zero
356 * seconds have passed since the last call).*/
357 static int
358 geoip_get_mean_shares(time_t now, double *v2_share_out,
359 double *v3_share_out)
361 geoip_determine_shares(now);
362 if (!share_seconds)
363 return -1;
364 *v2_share_out = v2_share_times_seconds / ((double) share_seconds);
365 *v3_share_out = v3_share_times_seconds / ((double) share_seconds);
366 v2_share_times_seconds = v3_share_times_seconds = 0.0;
367 share_seconds = 0;
368 return 0;
370 #endif
372 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
373 * at time <b>now</b>. Ignored by all but bridges and directories if
374 * configured accordingly. */
375 void
376 geoip_note_client_seen(geoip_client_action_t action,
377 uint32_t addr, time_t now)
379 or_options_t *options = get_options();
380 clientmap_entry_t lookup, *ent;
381 if (action == GEOIP_CLIENT_CONNECT) {
382 #ifdef ENABLE_ENTRY_STATS
383 if (!options->EntryStatistics)
384 return;
385 #else
386 if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
387 return;
388 #endif
389 /* Did we recently switch from bridge to relay or back? */
390 if (client_history_starts > now)
391 return;
392 } else {
393 #ifndef ENABLE_DIRREQ_STATS
394 return;
395 #else
396 if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
397 !options->DirReqStatistics)
398 return;
399 #endif
402 /* Rotate the current request period. */
403 while (current_request_period_starts + REQUEST_HIST_PERIOD < now) {
404 if (!geoip_countries)
405 geoip_countries = smartlist_create();
406 if (!current_request_period_starts) {
407 current_request_period_starts = now;
408 break;
410 /* Also discard all items in the client history that are too old.
411 * (This only works here because bridge and directory stats are
412 * independent. Otherwise, we'd only want to discard those items
413 * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */
414 geoip_remove_old_clients(current_request_period_starts);
415 /* Before rotating, write the current stats to disk. */
416 dump_geoip_stats();
417 if (get_options()->EntryStatistics)
418 dump_entry_stats();
419 /* Now rotate request period */
420 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
421 memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
422 sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
423 memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1],
424 sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
425 c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0;
426 c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0;
428 current_request_period_starts += REQUEST_HIST_PERIOD;
429 if (n_old_request_periods < REQUEST_HIST_LEN-1)
430 ++n_old_request_periods;
433 lookup.ipaddr = addr;
434 lookup.action = (int)action;
435 ent = HT_FIND(clientmap, &client_history, &lookup);
436 if (ent) {
437 ent->last_seen_in_minutes = now / 60;
438 } else {
439 ent = tor_malloc_zero(sizeof(clientmap_entry_t));
440 ent->ipaddr = addr;
441 ent->last_seen_in_minutes = now / 60;
442 ent->action = (int)action;
443 HT_INSERT(clientmap, &client_history, ent);
446 if (action == GEOIP_CLIENT_NETWORKSTATUS ||
447 action == GEOIP_CLIENT_NETWORKSTATUS_V2) {
448 int country_idx = geoip_get_country_by_ip(addr);
449 if (country_idx < 0)
450 country_idx = 0; /** unresolved requests are stored at index 0. */
451 if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
452 geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
453 if (action == GEOIP_CLIENT_NETWORKSTATUS)
454 ++country->n_v3_ns_requests[REQUEST_HIST_LEN-1];
455 else
456 ++country->n_v2_ns_requests[REQUEST_HIST_LEN-1];
459 /* Periodically determine share of requests that we should see */
460 if (last_time_determined_shares + REQUEST_SHARE_INTERVAL < now)
461 geoip_determine_shares(now);
464 if (!client_history_starts) {
465 client_history_starts = now;
466 current_request_period_starts = now;
470 /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
471 * older than a certain time. */
472 static int
473 _remove_old_client_helper(struct clientmap_entry_t *ent, void *_cutoff)
475 time_t cutoff = *(time_t*)_cutoff / 60;
476 if (ent->last_seen_in_minutes < cutoff) {
477 tor_free(ent);
478 return 1;
479 } else {
480 return 0;
484 /** Forget about all clients that haven't connected since <b>cutoff</b>.
485 * If <b>cutoff</b> is in the future, clients won't be added to the history
486 * until this time is reached. This is useful to prevent relays that switch
487 * to bridges from reporting unbelievable numbers of clients. */
488 void
489 geoip_remove_old_clients(time_t cutoff)
491 clientmap_HT_FOREACH_FN(&client_history,
492 _remove_old_client_helper,
493 &cutoff);
494 if (client_history_starts < cutoff)
495 client_history_starts = cutoff;
498 #ifdef ENABLE_DIRREQ_STATS
499 /** How many responses are we giving to clients requesting v2 network
500 * statuses? */
501 static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM];
503 /** How many responses are we giving to clients requesting v3 network
504 * statuses? */
505 static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM];
506 #endif
508 /** Note that we've rejected a client's request for a v2 or v3 network
509 * status, encoded in <b>action</b> for reason <b>reason</b> at time
510 * <b>now</b>. */
511 void
512 geoip_note_ns_response(geoip_client_action_t action,
513 geoip_ns_response_t response)
515 #ifdef ENABLE_DIRREQ_STATS
516 static int arrays_initialized = 0;
517 if (!get_options()->DirReqStatistics)
518 return;
519 if (!arrays_initialized) {
520 memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
521 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
522 arrays_initialized = 1;
524 tor_assert(action == GEOIP_CLIENT_NETWORKSTATUS ||
525 action == GEOIP_CLIENT_NETWORKSTATUS_V2);
526 tor_assert(response < GEOIP_NS_RESPONSE_NUM);
527 if (action == GEOIP_CLIENT_NETWORKSTATUS)
528 ns_v3_responses[response]++;
529 else
530 ns_v2_responses[response]++;
531 #else
532 (void) action;
533 (void) response;
534 #endif
537 /** Do not mention any country from which fewer than this number of IPs have
538 * connected. This conceivably avoids reporting information that could
539 * deanonymize users, though analysis is lacking. */
540 #define MIN_IPS_TO_NOTE_COUNTRY 1
541 /** Do not report any geoip data at all if we have fewer than this number of
542 * IPs to report about. */
543 #define MIN_IPS_TO_NOTE_ANYTHING 1
544 /** When reporting geoip data about countries, round up to the nearest
545 * multiple of this value. */
546 #define IP_GRANULARITY 8
548 /** Return the time at which we started recording geoip data. */
549 time_t
550 geoip_get_history_start(void)
552 return client_history_starts;
555 /** Helper type: used to sort per-country totals by value. */
556 typedef struct c_hist_t {
557 char country[3]; /**< Two-letter country code. */
558 unsigned total; /**< Total IP addresses seen in this country. */
559 } c_hist_t;
561 /** Sorting helper: return -1, 1, or 0 based on comparison of two
562 * geoip_entry_t. Sort in descending order of total, and then by country
563 * code. */
564 static int
565 _c_hist_compare(const void **_a, const void **_b)
567 const c_hist_t *a = *_a, *b = *_b;
568 if (a->total > b->total)
569 return -1;
570 else if (a->total < b->total)
571 return 1;
572 else
573 return strcmp(a->country, b->country);
576 /** When there are incomplete directory requests at the end of a 24-hour
577 * period, consider those requests running for longer than this timeout as
578 * failed, the others as still running. */
579 #define DIRREQ_TIMEOUT (10*60)
581 /** Entry in a map from either conn->global_identifier for direct requests
582 * or a unique circuit identifier for tunneled requests to request time,
583 * response size, and completion time of a network status request. Used to
584 * measure download times of requests to derive average client
585 * bandwidths. */
586 typedef struct dirreq_map_entry_t {
587 /** Unique identifier for this network status request; this is either the
588 * conn->global_identifier of the dir conn (direct request) or a new
589 * locally unique identifier of a circuit (tunneled request). This ID is
590 * only unique among other direct or tunneled requests, respectively. */
591 uint64_t dirreq_id;
592 unsigned int state:3; /**< State of this directory request. */
593 unsigned int type:1; /**< Is this a direct or a tunneled request? */
594 unsigned int completed:1; /**< Is this request complete? */
595 unsigned int action:2; /**< Is this a v2 or v3 request? */
596 /** When did we receive the request and started sending the response? */
597 struct timeval request_time;
598 size_t response_size; /**< What is the size of the response in bytes? */
599 struct timeval completion_time; /**< When did the request succeed? */
600 } dirreq_map_entry_t;
602 /** Map of all directory requests asking for v2 or v3 network statuses in
603 * the current geoip-stats interval. Keys are strings starting with either
604 * "dir" for direct requests or "tun" for tunneled requests, followed by
605 * a unique uint64_t identifier represented as decimal string. Values are
606 * of type *<b>dirreq_map_entry_t</b>. */
607 static strmap_t *dirreq_map = NULL;
609 /** Helper: Put <b>entry</b> into map of directory requests using
610 * <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there is
611 * already an entry for that key, print out a BUG warning and return. */
612 static void
613 _dirreq_map_put(dirreq_map_entry_t *entry, dirreq_type_t type,
614 uint64_t dirreq_id)
616 char key[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */
617 dirreq_map_entry_t *ent;
618 if (!dirreq_map)
619 dirreq_map = strmap_new();
620 tor_snprintf(key, sizeof(key), "%s"U64_FORMAT,
621 type == DIRREQ_TUNNELED ? "tun" : "dir",
622 U64_PRINTF_ARG(dirreq_id));
623 ent = strmap_get(dirreq_map, key);
624 if (ent) {
625 log_warn(LD_BUG, "Error when putting directory request into local "
626 "map. There is already an entry for the same identifier.");
627 return;
629 strmap_set(dirreq_map, key, entry);
632 /** Helper: Look up and return an entry in the map of directory requests
633 * using <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there
634 * is no such entry, return NULL. */
635 static dirreq_map_entry_t *
636 _dirreq_map_get(dirreq_type_t type, uint64_t dirreq_id)
638 char key[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */
639 if (!dirreq_map)
640 dirreq_map = strmap_new();
641 tor_snprintf(key, sizeof(key), "%s"U64_FORMAT,
642 type == DIRREQ_TUNNELED ? "tun" : "dir",
643 U64_PRINTF_ARG(dirreq_id));
644 return strmap_get(dirreq_map, key);
647 /** Note that an either direct or tunneled (see <b>type</b>) directory
648 * request for a network status with unique ID <b>dirreq_id</b> of size
649 * <b>response_size</b> and action <b>action</b> (either v2 or v3) has
650 * started. */
651 void
652 geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
653 geoip_client_action_t action, dirreq_type_t type)
655 dirreq_map_entry_t *ent;
656 if (!get_options()->DirReqStatistics)
657 return;
658 ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
659 ent->dirreq_id = dirreq_id;
660 tor_gettimeofday(&ent->request_time);
661 ent->response_size = response_size;
662 ent->action = action;
663 ent->type = type;
664 _dirreq_map_put(ent, type, dirreq_id);
667 /** Change the state of the either direct or tunneled (see <b>type</b>)
668 * directory request with <b>dirreq_id</b> to <b>new_state</b> and
669 * possibly mark it as completed. If no entry can be found for the given
670 * key parts (e.g., if this is a directory request that we are not
671 * measuring, or one that was started in the previous measurement period),
672 * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
673 void
674 geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
675 dirreq_state_t new_state)
677 dirreq_map_entry_t *ent;
678 if (!get_options()->DirReqStatistics)
679 return;
680 ent = _dirreq_map_get(type, dirreq_id);
681 if (!ent)
682 return;
683 if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS)
684 return;
685 if (new_state - 1 != ent->state)
686 return;
687 ent->state = new_state;
688 if ((type == DIRREQ_DIRECT &&
689 new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) ||
690 (type == DIRREQ_TUNNELED &&
691 new_state == DIRREQ_OR_CONN_BUFFER_FLUSHED)) {
692 tor_gettimeofday(&ent->completion_time);
693 ent->completed = 1;
697 #ifdef ENABLE_DIRREQ_STATS
698 /** Return a newly allocated comma-separated string containing statistics
699 * on network status downloads. The string contains the number of completed
700 * requests, timeouts, and still running requests as well as the download
701 * times by deciles and quartiles. Return NULL if we have not observed
702 * requests for long enough. */
703 static char *
704 geoip_get_dirreq_history(geoip_client_action_t action,
705 dirreq_type_t type)
707 char *result = NULL;
708 smartlist_t *dirreq_times = NULL;
709 uint32_t complete = 0, timeouts = 0, running = 0;
710 int i = 0, bufsize = 1024, written;
711 struct timeval now;
712 tor_gettimeofday(&now);
713 if (!dirreq_map)
714 return NULL;
715 if (action != GEOIP_CLIENT_NETWORKSTATUS &&
716 action != GEOIP_CLIENT_NETWORKSTATUS_V2)
717 return NULL;
718 dirreq_times = smartlist_create();
719 STRMAP_FOREACH_MODIFY(dirreq_map, key, dirreq_map_entry_t *, ent) {
720 if (ent->action == action && type == ent->type) {
721 if (ent->completed) {
722 uint32_t *bytes_per_second = tor_malloc_zero(sizeof(uint32_t));
723 uint32_t time_diff = (uint32_t) tv_udiff(&ent->request_time,
724 &ent->completion_time);
725 if (time_diff == 0)
726 time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible
727 * anyway by law of nature or something.. */
728 *bytes_per_second = 1000000 * ent->response_size / time_diff;
729 smartlist_add(dirreq_times, bytes_per_second);
730 complete++;
731 } else {
732 if (tv_udiff(&ent->request_time, &now) / 1000000 > DIRREQ_TIMEOUT)
733 timeouts++;
734 else
735 running++;
737 tor_free(ent);
738 MAP_DEL_CURRENT(key);
740 } STRMAP_FOREACH_END;
741 #define DIR_REQ_GRANULARITY 4
742 complete = round_uint32_to_next_multiple_of(complete,
743 DIR_REQ_GRANULARITY);
744 timeouts = round_uint32_to_next_multiple_of(timeouts,
745 DIR_REQ_GRANULARITY);
746 running = round_uint32_to_next_multiple_of(running,
747 DIR_REQ_GRANULARITY);
748 result = tor_malloc_zero(bufsize);
749 written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u,"
750 "running=%u", complete, timeouts, running);
751 if (written < 0)
752 return NULL;
753 #define MIN_DIR_REQ_RESPONSES 16
754 if (complete >= MIN_DIR_REQ_RESPONSES) {
755 uint32_t *dltimes = tor_malloc(sizeof(uint32_t) * complete);
756 SMARTLIST_FOREACH(dirreq_times, uint32_t *, dlt, {
757 dltimes[i++] = *dlt;
758 tor_free(dlt);
760 median_uint32(dltimes, complete); /* sort */
761 written = tor_snprintf(result + written, bufsize - written,
762 ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
763 "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
764 dltimes[0],
765 dltimes[1*complete/10-1],
766 dltimes[2*complete/10-1],
767 dltimes[1*complete/4-1],
768 dltimes[3*complete/10-1],
769 dltimes[4*complete/10-1],
770 dltimes[5*complete/10-1],
771 dltimes[6*complete/10-1],
772 dltimes[7*complete/10-1],
773 dltimes[3*complete/4-1],
774 dltimes[8*complete/10-1],
775 dltimes[9*complete/10-1],
776 dltimes[complete-1]);
777 tor_free(dltimes);
779 if (written < 0)
780 result = NULL;
781 smartlist_free(dirreq_times);
782 return result;
784 #endif
786 /** How long do we have to have observed per-country request history before we
787 * are willing to talk about it? */
788 #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
790 /** Return a newly allocated comma-separated string containing entries for all
791 * the countries from which we've seen enough clients connect. The entry
792 * format is cc=num where num is the number of IPs we've seen connecting from
793 * that country, and cc is a lowercased country code. Returns NULL if we don't
794 * want to export geoip data yet. */
795 char *
796 geoip_get_client_history(time_t now, geoip_client_action_t action)
798 char *result = NULL;
799 int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
800 #ifdef ENABLE_DIRREQ_STATS
801 min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
802 #endif
803 if (!geoip_is_loaded())
804 return NULL;
805 if (client_history_starts < (now - min_observation_time)) {
806 char buf[32];
807 smartlist_t *chunks = NULL;
808 smartlist_t *entries = NULL;
809 int n_countries = geoip_get_n_countries();
810 int i;
811 clientmap_entry_t **ent;
812 unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
813 unsigned total = 0;
814 unsigned granularity = IP_GRANULARITY;
815 #ifdef ENABLE_DIRREQ_STATS
816 granularity = DIR_RECORD_USAGE_GRANULARITY;
817 #endif
818 HT_FOREACH(ent, clientmap, &client_history) {
819 int country;
820 if ((*ent)->action != (int)action)
821 continue;
822 country = geoip_get_country_by_ip((*ent)->ipaddr);
823 if (country < 0)
824 country = 0; /** unresolved requests are stored at index 0. */
825 tor_assert(0 <= country && country < n_countries);
826 ++counts[country];
827 ++total;
829 /* Don't record anything if we haven't seen enough IPs. */
830 if (total < MIN_IPS_TO_NOTE_ANYTHING)
831 goto done;
832 /* Make a list of c_hist_t */
833 entries = smartlist_create();
834 for (i = 0; i < n_countries; ++i) {
835 unsigned c = counts[i];
836 const char *countrycode;
837 c_hist_t *ent;
838 /* Only report a country if it has a minimum number of IPs. */
839 if (c >= MIN_IPS_TO_NOTE_COUNTRY) {
840 c = round_to_next_multiple_of(c, granularity);
841 countrycode = geoip_get_country_name(i);
842 ent = tor_malloc(sizeof(c_hist_t));
843 strlcpy(ent->country, countrycode, sizeof(ent->country));
844 ent->total = c;
845 smartlist_add(entries, ent);
848 /* Sort entries. Note that we must do this _AFTER_ rounding, or else
849 * the sort order could leak info. */
850 smartlist_sort(entries, _c_hist_compare);
852 /* Build the result. */
853 chunks = smartlist_create();
854 SMARTLIST_FOREACH(entries, c_hist_t *, ch, {
855 tor_snprintf(buf, sizeof(buf), "%s=%u", ch->country, ch->total);
856 smartlist_add(chunks, tor_strdup(buf));
858 result = smartlist_join_strings(chunks, ",", 0, NULL);
859 done:
860 tor_free(counts);
861 if (chunks) {
862 SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
863 smartlist_free(chunks);
865 if (entries) {
866 SMARTLIST_FOREACH(entries, c_hist_t *, c, tor_free(c));
867 smartlist_free(entries);
870 return result;
873 /** Return a newly allocated string holding the per-country request history
874 * for <b>action</b> in a format suitable for an extra-info document, or NULL
875 * on failure. */
876 char *
877 geoip_get_request_history(time_t now, geoip_client_action_t action)
879 smartlist_t *entries, *strings;
880 char *result;
881 unsigned granularity = IP_GRANULARITY;
882 int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
883 #ifdef ENABLE_DIRREQ_STATS
884 granularity = DIR_RECORD_USAGE_GRANULARITY;
885 min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
886 #endif
888 if (client_history_starts >= (now - min_observation_time))
889 return NULL;
890 if (action != GEOIP_CLIENT_NETWORKSTATUS &&
891 action != GEOIP_CLIENT_NETWORKSTATUS_V2)
892 return NULL;
893 if (!geoip_countries)
894 return NULL;
896 entries = smartlist_create();
897 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
898 uint32_t *n = (action == GEOIP_CLIENT_NETWORKSTATUS)
899 ? c->n_v3_ns_requests : c->n_v2_ns_requests;
900 uint32_t tot = 0;
901 int i;
902 c_hist_t *ent;
903 for (i=0; i < REQUEST_HIST_LEN; ++i)
904 tot += n[i];
905 if (!tot)
906 continue;
907 ent = tor_malloc_zero(sizeof(c_hist_t));
908 strlcpy(ent->country, c->countrycode, sizeof(ent->country));
909 ent->total = round_to_next_multiple_of(tot, granularity);
910 smartlist_add(entries, ent);
912 smartlist_sort(entries, _c_hist_compare);
914 strings = smartlist_create();
915 SMARTLIST_FOREACH(entries, c_hist_t *, ent, {
916 char buf[32];
917 tor_snprintf(buf, sizeof(buf), "%s=%u", ent->country, ent->total);
918 smartlist_add(strings, tor_strdup(buf));
920 result = smartlist_join_strings(strings, ",", 0, NULL);
921 SMARTLIST_FOREACH(strings, char *, cp, tor_free(cp));
922 SMARTLIST_FOREACH(entries, c_hist_t *, ent, tor_free(ent));
923 smartlist_free(strings);
924 smartlist_free(entries);
925 return result;
928 /** Store all our geoip statistics into $DATADIR/dirreq-stats. */
929 static void
930 dump_geoip_stats(void)
932 #ifdef ENABLE_DIRREQ_STATS
933 time_t now = time(NULL);
934 time_t request_start;
935 char *filename = get_datadir_fname("dirreq-stats");
936 char *data_v2 = NULL, *data_v3 = NULL;
937 char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
938 open_file_t *open_file = NULL;
939 double v2_share = 0.0, v3_share = 0.0;
940 FILE *out;
941 int i;
943 if (!get_options()->DirReqStatistics)
944 goto done;
946 data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
947 data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
948 format_iso_time(since, geoip_get_history_start());
949 format_iso_time(written, now);
950 out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
951 0600, &open_file);
952 if (!out)
953 goto done;
954 if (fprintf(out, "written %s\nstarted-at %s\nns-ips %s\nns-v2-ips %s\n",
955 written, since,
956 data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
957 goto done;
958 tor_free(data_v2);
959 tor_free(data_v3);
961 request_start = current_request_period_starts -
962 (n_old_request_periods * REQUEST_HIST_PERIOD);
963 format_iso_time(since, request_start);
964 data_v2 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
965 data_v3 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS);
966 if (fprintf(out, "requests-start %s\nn-ns-reqs %s\nn-v2-ns-reqs %s\n",
967 since,
968 data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
969 goto done;
970 #define RESPONSE_GRANULARITY 8
971 for (i = 0; i < GEOIP_NS_RESPONSE_NUM; i++) {
972 ns_v2_responses[i] = round_uint32_to_next_multiple_of(
973 ns_v2_responses[i], RESPONSE_GRANULARITY);
974 ns_v3_responses[i] = round_uint32_to_next_multiple_of(
975 ns_v3_responses[i], RESPONSE_GRANULARITY);
977 #undef RESPONSE_GRANULARITY
978 if (fprintf(out, "n-ns-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
979 "not-found=%u,not-modified=%u,busy=%u\n",
980 ns_v3_responses[GEOIP_SUCCESS],
981 ns_v3_responses[GEOIP_REJECT_NOT_ENOUGH_SIGS],
982 ns_v3_responses[GEOIP_REJECT_UNAVAILABLE],
983 ns_v3_responses[GEOIP_REJECT_NOT_FOUND],
984 ns_v3_responses[GEOIP_REJECT_NOT_MODIFIED],
985 ns_v3_responses[GEOIP_REJECT_BUSY]) < 0)
986 goto done;
987 if (fprintf(out, "n-v2-ns-resp ok=%u,unavailable=%u,"
988 "not-found=%u,not-modified=%u,busy=%u\n",
989 ns_v2_responses[GEOIP_SUCCESS],
990 ns_v2_responses[GEOIP_REJECT_UNAVAILABLE],
991 ns_v2_responses[GEOIP_REJECT_NOT_FOUND],
992 ns_v2_responses[GEOIP_REJECT_NOT_MODIFIED],
993 ns_v2_responses[GEOIP_REJECT_BUSY]) < 0)
994 goto done;
995 memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
996 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
997 if (!geoip_get_mean_shares(now, &v2_share, &v3_share)) {
998 if (fprintf(out, "v2-ns-share %0.2lf%%\n", v2_share*100) < 0)
999 goto done;
1000 if (fprintf(out, "v3-ns-share %0.2lf%%\n", v3_share*100) < 0)
1001 goto done;
1004 data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
1005 DIRREQ_DIRECT);
1006 data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
1007 DIRREQ_DIRECT);
1008 if (fprintf(out, "ns-direct-dl %s\nns-v2-direct-dl %s\n",
1009 data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
1010 goto done;
1011 tor_free(data_v2);
1012 tor_free(data_v3);
1013 data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
1014 DIRREQ_TUNNELED);
1015 data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
1016 DIRREQ_TUNNELED);
1017 if (fprintf(out, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n",
1018 data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
1019 goto done;
1021 finish_writing_to_file(open_file);
1022 open_file = NULL;
1023 done:
1024 if (open_file)
1025 abort_writing_to_file(open_file);
1026 tor_free(filename);
1027 tor_free(data_v2);
1028 tor_free(data_v3);
1029 #endif
1032 /** Store all our geoip statistics as entry guards into
1033 * $DATADIR/entry-stats. */
1034 static void
1035 dump_entry_stats(void)
1037 #ifdef ENABLE_ENTRY_STATS
1038 time_t now = time(NULL);
1039 char *filename = get_datadir_fname("entry-stats");
1040 char *data = NULL;
1041 char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
1042 open_file_t *open_file = NULL;
1043 FILE *out;
1045 data = geoip_get_client_history(now, GEOIP_CLIENT_CONNECT);
1046 format_iso_time(since, geoip_get_history_start());
1047 format_iso_time(written, now);
1048 out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
1049 0600, &open_file);
1050 if (!out)
1051 goto done;
1052 if (fprintf(out, "written %s\nstarted-at %s\nips %s\n",
1053 written, since, data ? data : "") < 0)
1054 goto done;
1056 finish_writing_to_file(open_file);
1057 open_file = NULL;
1058 done:
1059 if (open_file)
1060 abort_writing_to_file(open_file);
1061 tor_free(filename);
1062 tor_free(data);
1063 #endif
1066 /** Helper used to implement GETINFO ip-to-country/... controller command. */
1068 getinfo_helper_geoip(control_connection_t *control_conn,
1069 const char *question, char **answer)
1071 (void)control_conn;
1072 if (geoip_is_loaded() && !strcmpstart(question, "ip-to-country/")) {
1073 int c;
1074 uint32_t ip;
1075 struct in_addr in;
1076 question += strlen("ip-to-country/");
1077 if (tor_inet_aton(question, &in) != 0) {
1078 ip = ntohl(in.s_addr);
1079 c = geoip_get_country_by_ip(ip);
1080 *answer = tor_strdup(geoip_get_country_name(c));
1083 return 0;
1086 /** Release all storage held by the GeoIP database. */
1087 static void
1088 clear_geoip_db(void)
1090 if (geoip_countries) {
1091 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
1092 smartlist_free(geoip_countries);
1094 if (country_idxplus1_by_lc_code)
1095 strmap_free(country_idxplus1_by_lc_code, NULL);
1096 if (geoip_entries) {
1097 SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, ent, tor_free(ent));
1098 smartlist_free(geoip_entries);
1100 geoip_countries = NULL;
1101 country_idxplus1_by_lc_code = NULL;
1102 geoip_entries = NULL;
1105 /** Release all storage held in this file. */
1106 void
1107 geoip_free_all(void)
1109 clientmap_entry_t **ent, **next, *this;
1110 for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
1111 this = *ent;
1112 next = HT_NEXT_RMV(clientmap, &client_history, ent);
1113 tor_free(this);
1115 HT_CLEAR(clientmap, &client_history);
1117 clear_geoip_db();