Fix a couple of smaller issues with gathering statistics.
[tor/rransom.git] / src / or / geoip.c
blob5b40c2e058b1efdf28007336f04e354dbbe856ff
1 /* Copyright (c) 2007-2009, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * \file geoip.c
6 * \brief Functions related to maintaining an IP-to-country database and to
7 * summarizing client connections by country.
8 */
10 #define GEOIP_PRIVATE
11 #include "or.h"
12 #include "ht.h"
14 static void clear_geoip_db(void);
16 /** An entry from the GeoIP file: maps an IP range to a country. */
17 typedef struct geoip_entry_t {
18 uint32_t ip_low; /**< The lowest IP in the range, in host order */
19 uint32_t ip_high; /**< The highest IP in the range, in host order */
20 intptr_t country; /**< An index into geoip_countries */
21 } geoip_entry_t;
23 /** For how many periods should we remember per-country request history? */
24 #define REQUEST_HIST_LEN 1
25 /** How long are the periods for which we should remember request history? */
26 #define REQUEST_HIST_PERIOD (24*60*60)
28 /** A per-country record for GeoIP request history. */
29 typedef struct geoip_country_t {
30 char countrycode[3];
31 uint32_t n_v2_ns_requests[REQUEST_HIST_LEN];
32 uint32_t n_v3_ns_requests[REQUEST_HIST_LEN];
33 } geoip_country_t;
35 /** A list of geoip_country_t */
36 static smartlist_t *geoip_countries = NULL;
37 /** A map from lowercased country codes to their position in geoip_countries.
38 * The index is encoded in the pointer, and 1 is added so that NULL can mean
39 * not found. */
40 static strmap_t *country_idxplus1_by_lc_code = NULL;
41 /** A list of all known geoip_entry_t, sorted by ip_low. */
42 static smartlist_t *geoip_entries = NULL;
44 /** Return the index of the <b>country</b>'s entry in the GeoIP DB
45 * if it is a valid 2-letter country code, otherwise return -1.
47 country_t
48 geoip_get_country(const char *country)
50 void *_idxplus1;
51 intptr_t idx;
53 _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
54 if (!_idxplus1)
55 return -1;
57 idx = ((uintptr_t)_idxplus1)-1;
58 return (country_t)idx;
61 /** Add an entry to the GeoIP table, mapping all IPs between <b>low</b> and
62 * <b>high</b>, inclusive, to the 2-letter country code <b>country</b>.
64 static void
65 geoip_add_entry(uint32_t low, uint32_t high, const char *country)
67 intptr_t idx;
68 geoip_entry_t *ent;
69 void *_idxplus1;
71 if (high < low)
72 return;
74 _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
76 if (!_idxplus1) {
77 geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
78 strlcpy(c->countrycode, country, sizeof(c->countrycode));
79 tor_strlower(c->countrycode);
80 smartlist_add(geoip_countries, c);
81 idx = smartlist_len(geoip_countries) - 1;
82 strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
83 } else {
84 idx = ((uintptr_t)_idxplus1)-1;
87 geoip_country_t *c = smartlist_get(geoip_countries, idx);
88 tor_assert(!strcasecmp(c->countrycode, country));
90 ent = tor_malloc_zero(sizeof(geoip_entry_t));
91 ent->ip_low = low;
92 ent->ip_high = high;
93 ent->country = idx;
94 smartlist_add(geoip_entries, ent);
97 /** Add an entry to the GeoIP table, parsing it from <b>line</b>. The
98 * format is as for geoip_load_file(). */
99 /*private*/ int
100 geoip_parse_entry(const char *line)
102 unsigned int low, high;
103 char b[3];
104 if (!geoip_countries) {
105 geoip_countries = smartlist_create();
106 geoip_entries = smartlist_create();
107 country_idxplus1_by_lc_code = strmap_new();
109 while (TOR_ISSPACE(*line))
110 ++line;
111 if (*line == '#')
112 return 0;
113 if (sscanf(line,"%u,%u,%2s", &low, &high, b) == 3) {
114 geoip_add_entry(low, high, b);
115 return 0;
116 } else if (sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, b) == 3) {
117 geoip_add_entry(low, high, b);
118 return 0;
119 } else {
120 log_warn(LD_GENERAL, "Unable to parse line from GEOIP file: %s",
121 escaped(line));
122 return -1;
126 /** Sorting helper: return -1, 1, or 0 based on comparison of two
127 * geoip_entry_t */
128 static int
129 _geoip_compare_entries(const void **_a, const void **_b)
131 const geoip_entry_t *a = *_a, *b = *_b;
132 if (a->ip_low < b->ip_low)
133 return -1;
134 else if (a->ip_low > b->ip_low)
135 return 1;
136 else
137 return 0;
140 /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
141 * to a uint32_t in host order) to a geoip_entry_t */
142 static int
143 _geoip_compare_key_to_entry(const void *_key, const void **_member)
145 const uint32_t addr = *(uint32_t *)_key;
146 const geoip_entry_t *entry = *_member;
147 if (addr < entry->ip_low)
148 return -1;
149 else if (addr > entry->ip_high)
150 return 1;
151 else
152 return 0;
155 /** Return 1 if we should collect geoip stats on bridge users, and
156 * include them in our extrainfo descriptor. Else return 0. */
158 should_record_bridge_info(or_options_t *options)
160 return options->BridgeRelay && options->BridgeRecordUsageByCountry;
163 /** Clear the GeoIP database and reload it from the file
164 * <b>filename</b>. Return 0 on success, -1 on failure.
166 * Recognized line formats are:
167 * INTIPLOW,INTIPHIGH,CC
168 * and
169 * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
170 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
171 * integers, and CC is a country code.
173 * It also recognizes, and skips over, blank lines and lines that start
174 * with '#' (comments).
177 geoip_load_file(const char *filename, or_options_t *options)
179 FILE *f;
180 const char *msg = "";
181 int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
182 clear_geoip_db();
183 if (!(f = fopen(filename, "r"))) {
184 log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s. %s",
185 filename, msg);
186 return -1;
188 if (!geoip_countries) {
189 geoip_country_t *geoip_unresolved;
190 geoip_countries = smartlist_create();
191 /* Add a geoip_country_t for requests that could not be resolved to a
192 * country as first element (index 0) to geoip_countries. */
193 geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
194 strlcpy(geoip_unresolved->countrycode, "??",
195 sizeof(geoip_unresolved->countrycode));
196 smartlist_add(geoip_countries, geoip_unresolved);
197 country_idxplus1_by_lc_code = strmap_new();
199 if (geoip_entries) {
200 SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, e, tor_free(e));
201 smartlist_free(geoip_entries);
203 geoip_entries = smartlist_create();
204 log_notice(LD_GENERAL, "Parsing GEOIP file.");
205 while (!feof(f)) {
206 char buf[512];
207 if (fgets(buf, (int)sizeof(buf), f) == NULL)
208 break;
209 /* FFFF track full country name. */
210 geoip_parse_entry(buf);
212 /*XXXX abort and return -1 if no entries/illformed?*/
213 fclose(f);
215 smartlist_sort(geoip_entries, _geoip_compare_entries);
217 /* Okay, now we need to maybe change our mind about what is in which
218 * country. */
219 refresh_all_country_info();
221 return 0;
224 /** Given an IP address in host order, return a number representing the
225 * country to which that address belongs, or -1 for unknown. The return value
226 * will always be less than geoip_get_n_countries(). To decode it,
227 * call geoip_get_country_name().
230 geoip_get_country_by_ip(uint32_t ipaddr)
232 geoip_entry_t *ent;
233 if (!geoip_entries)
234 return -1;
235 ent = smartlist_bsearch(geoip_entries, &ipaddr, _geoip_compare_key_to_entry);
236 return ent ? (int)ent->country : -1;
239 /** Return the number of countries recognized by the GeoIP database. */
241 geoip_get_n_countries(void)
243 return (int) smartlist_len(geoip_countries);
246 /** Return the two-letter country code associated with the number <b>num</b>,
247 * or "??" for an unknown value. */
248 const char *
249 geoip_get_country_name(country_t num)
251 if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
252 geoip_country_t *c = smartlist_get(geoip_countries, num);
253 return c->countrycode;
254 } else
255 return "??";
258 /** Return true iff we have loaded a GeoIP database.*/
260 geoip_is_loaded(void)
262 return geoip_countries != NULL && geoip_entries != NULL;
265 /** Entry in a map from IP address to the last time we've seen an incoming
266 * connection from that IP address. Used by bridges only, to track which
267 * countries have them blocked. */
268 typedef struct clientmap_entry_t {
269 HT_ENTRY(clientmap_entry_t) node;
270 uint32_t ipaddr;
271 unsigned int last_seen_in_minutes:30;
272 unsigned int action:2;
273 } clientmap_entry_t;
275 #define ACTION_MASK 3
277 /** Map from client IP address to last time seen. */
278 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
279 HT_INITIALIZER();
280 /** Time at which we started tracking client IP history. */
281 static time_t client_history_starts = 0;
283 /** When did the current period of checking per-country request history
284 * start? */
285 static time_t current_request_period_starts = 0;
286 /** How many older request periods are we remembering? */
287 static int n_old_request_periods = 0;
289 /** Hashtable helper: compute a hash of a clientmap_entry_t. */
290 static INLINE unsigned
291 clientmap_entry_hash(const clientmap_entry_t *a)
293 return ht_improve_hash((unsigned) a->ipaddr);
295 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
296 static INLINE int
297 clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
299 return a->ipaddr == b->ipaddr && a->action == b->action;
302 HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
303 clientmap_entries_eq);
304 HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
305 clientmap_entries_eq, 0.6, malloc, realloc, free);
307 /** How often do we update our estimate which share of v2 and v3 directory
308 * requests is sent to us? We could as well trigger updates of shares from
309 * network status updates, but that means adding a lot of calls into code
310 * that is independent from geoip stats (and keeping them up-to-date). We
311 * are perfectly fine with an approximation of 15-minute granularity. */
312 #define REQUEST_SHARE_INTERVAL (15 * 60)
314 /** When did we last determine which share of v2 and v3 directory requests
315 * is sent to us? */
316 static time_t last_time_determined_shares = 0;
318 /** Sum of products of v2 shares times the number of seconds for which we
319 * consider these shares as valid. */
320 static double v2_share_times_seconds;
322 /** Sum of products of v3 shares times the number of seconds for which we
323 * consider these shares as valid. */
324 static double v3_share_times_seconds;
326 /** Number of seconds we are determining v2 and v3 shares. */
327 static int share_seconds;
329 /** Try to determine which fraction of v2 and v3 directory requests aimed at
330 * caches will be sent to us at time <b>now</b> and store that value in
331 * order to take a mean value later on. */
332 static void
333 geoip_determine_shares(time_t now)
335 double v2_share = 0.0, v3_share = 0.0;
336 if (router_get_my_share_of_directory_requests(&v2_share, &v3_share) < 0)
337 return;
338 if (last_time_determined_shares) {
339 v2_share_times_seconds += v2_share *
340 ((double) (now - last_time_determined_shares));
341 v3_share_times_seconds += v3_share *
342 ((double) (now - last_time_determined_shares));
343 share_seconds += (int)(now - last_time_determined_shares);
345 last_time_determined_shares = now;
348 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
349 * have been sent to us since the last call of this function up to time
350 * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
351 * fractions of v2 and v3 protocol shares we expect to have seen. Reset
352 * counters afterwards. Return 0 on success, -1 on failure (e.g. when zero
353 * seconds have passed since the last call).*/
354 static int
355 geoip_get_mean_shares(time_t now, double *v2_share_out,
356 double *v3_share_out)
358 geoip_determine_shares(now);
359 if (!share_seconds)
360 return -1;
361 *v2_share_out = v2_share_times_seconds / ((double) share_seconds);
362 *v3_share_out = v3_share_times_seconds / ((double) share_seconds);
363 v2_share_times_seconds = v3_share_times_seconds = 0.0;
364 share_seconds = 0;
365 return 0;
368 /* Rotate period of v2 and v3 network status requests. */
369 static void
370 rotate_request_period(void)
372 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
373 #if REQUEST_HIST_LEN > 1
374 memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
375 sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
376 memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1],
377 sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
378 #endif
379 c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0;
380 c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0;
382 current_request_period_starts += REQUEST_HIST_PERIOD;
383 if (n_old_request_periods < REQUEST_HIST_LEN-1)
384 ++n_old_request_periods;
387 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
388 * at time <b>now</b>. Ignored by all but bridges and directories if
389 * configured accordingly. */
390 void
391 geoip_note_client_seen(geoip_client_action_t action,
392 uint32_t addr, time_t now)
394 or_options_t *options = get_options();
395 clientmap_entry_t lookup, *ent;
396 if (action == GEOIP_CLIENT_CONNECT) {
397 /* Only remember statistics as entry guard or as bridge. */
398 if (!options->EntryStatistics &&
399 (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)))
400 return;
401 /* Did we recently switch from bridge to relay or back? */
402 if (client_history_starts > now)
403 return;
404 } else {
405 if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
406 !options->DirReqStatistics)
407 return;
410 /* As a bridge that doesn't rotate request periods every 24 hours,
411 * possibly rotate now. */
412 if (options->BridgeRelay) {
413 while (current_request_period_starts + REQUEST_HIST_PERIOD < now) {
414 if (!geoip_countries)
415 geoip_countries = smartlist_create();
416 if (!current_request_period_starts) {
417 current_request_period_starts = now;
418 break;
420 /* Also discard all items in the client history that are too old.
421 * (This only works here because bridge and directory stats are
422 * independent. Otherwise, we'd only want to discard those items
423 * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */
424 geoip_remove_old_clients(current_request_period_starts);
425 /* Now rotate request period */
426 rotate_request_period();
430 lookup.ipaddr = addr;
431 lookup.action = (int)action;
432 ent = HT_FIND(clientmap, &client_history, &lookup);
433 if (ent) {
434 ent->last_seen_in_minutes = now / 60;
435 } else {
436 ent = tor_malloc_zero(sizeof(clientmap_entry_t));
437 ent->ipaddr = addr;
438 ent->last_seen_in_minutes = now / 60;
439 ent->action = (int)action;
440 HT_INSERT(clientmap, &client_history, ent);
443 if (action == GEOIP_CLIENT_NETWORKSTATUS ||
444 action == GEOIP_CLIENT_NETWORKSTATUS_V2) {
445 int country_idx = geoip_get_country_by_ip(addr);
446 if (country_idx < 0)
447 country_idx = 0; /** unresolved requests are stored at index 0. */
448 if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
449 geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
450 if (action == GEOIP_CLIENT_NETWORKSTATUS)
451 ++country->n_v3_ns_requests[REQUEST_HIST_LEN-1];
452 else
453 ++country->n_v2_ns_requests[REQUEST_HIST_LEN-1];
456 /* Periodically determine share of requests that we should see */
457 if (last_time_determined_shares + REQUEST_SHARE_INTERVAL < now)
458 geoip_determine_shares(now);
461 if (!client_history_starts) {
462 client_history_starts = now;
463 current_request_period_starts = now;
467 /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
468 * older than a certain time. */
469 static int
470 _remove_old_client_helper(struct clientmap_entry_t *ent, void *_cutoff)
472 time_t cutoff = *(time_t*)_cutoff / 60;
473 if (ent->last_seen_in_minutes < cutoff) {
474 tor_free(ent);
475 return 1;
476 } else {
477 return 0;
481 /** Forget about all clients that haven't connected since <b>cutoff</b>.
482 * If <b>cutoff</b> is in the future, clients won't be added to the history
483 * until this time is reached. This is useful to prevent relays that switch
484 * to bridges from reporting unbelievable numbers of clients. */
485 void
486 geoip_remove_old_clients(time_t cutoff)
488 clientmap_HT_FOREACH_FN(&client_history,
489 _remove_old_client_helper,
490 &cutoff);
491 if (client_history_starts < cutoff)
492 client_history_starts = cutoff;
495 /** How many responses are we giving to clients requesting v2 network
496 * statuses? */
497 static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM];
499 /** How many responses are we giving to clients requesting v3 network
500 * statuses? */
501 static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM];
503 /** Note that we've rejected a client's request for a v2 or v3 network
504 * status, encoded in <b>action</b> for reason <b>reason</b> at time
505 * <b>now</b>. */
506 void
507 geoip_note_ns_response(geoip_client_action_t action,
508 geoip_ns_response_t response)
510 static int arrays_initialized = 0;
511 if (!get_options()->DirReqStatistics)
512 return;
513 if (!arrays_initialized) {
514 memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
515 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
516 arrays_initialized = 1;
518 tor_assert(action == GEOIP_CLIENT_NETWORKSTATUS ||
519 action == GEOIP_CLIENT_NETWORKSTATUS_V2);
520 tor_assert(response < GEOIP_NS_RESPONSE_NUM);
521 if (action == GEOIP_CLIENT_NETWORKSTATUS)
522 ns_v3_responses[response]++;
523 else
524 ns_v2_responses[response]++;
527 /** Do not mention any country from which fewer than this number of IPs have
528 * connected. This conceivably avoids reporting information that could
529 * deanonymize users, though analysis is lacking. */
530 #define MIN_IPS_TO_NOTE_COUNTRY 1
531 /** Do not report any geoip data at all if we have fewer than this number of
532 * IPs to report about. */
533 #define MIN_IPS_TO_NOTE_ANYTHING 1
534 /** When reporting geoip data about countries, round up to the nearest
535 * multiple of this value. */
536 #define IP_GRANULARITY 8
538 /** Return the time at which we started recording geoip data. */
539 time_t
540 geoip_get_history_start(void)
542 return client_history_starts;
545 /** Helper type: used to sort per-country totals by value. */
546 typedef struct c_hist_t {
547 char country[3]; /**< Two-letter country code. */
548 unsigned total; /**< Total IP addresses seen in this country. */
549 } c_hist_t;
551 /** Sorting helper: return -1, 1, or 0 based on comparison of two
552 * geoip_entry_t. Sort in descending order of total, and then by country
553 * code. */
554 static int
555 _c_hist_compare(const void **_a, const void **_b)
557 const c_hist_t *a = *_a, *b = *_b;
558 if (a->total > b->total)
559 return -1;
560 else if (a->total < b->total)
561 return 1;
562 else
563 return strcmp(a->country, b->country);
566 /** When there are incomplete directory requests at the end of a 24-hour
567 * period, consider those requests running for longer than this timeout as
568 * failed, the others as still running. */
569 #define DIRREQ_TIMEOUT (10*60)
571 /** Entry in a map from either conn->global_identifier for direct requests
572 * or a unique circuit identifier for tunneled requests to request time,
573 * response size, and completion time of a network status request. Used to
574 * measure download times of requests to derive average client
575 * bandwidths. */
576 typedef struct dirreq_map_entry_t {
577 HT_ENTRY(dirreq_map_entry_t) node;
578 /** Unique identifier for this network status request; this is either the
579 * conn->global_identifier of the dir conn (direct request) or a new
580 * locally unique identifier of a circuit (tunneled request). This ID is
581 * only unique among other direct or tunneled requests, respectively. */
582 uint64_t dirreq_id;
583 unsigned int state:3; /**< State of this directory request. */
584 unsigned int type:1; /**< Is this a direct or a tunneled request? */
585 unsigned int completed:1; /**< Is this request complete? */
586 unsigned int action:2; /**< Is this a v2 or v3 request? */
587 /** When did we receive the request and started sending the response? */
588 struct timeval request_time;
589 size_t response_size; /**< What is the size of the response in bytes? */
590 struct timeval completion_time; /**< When did the request succeed? */
591 } dirreq_map_entry_t;
593 /** Map of all directory requests asking for v2 or v3 network statuses in
594 * the current geoip-stats interval. Values are
595 * of type *<b>dirreq_map_entry_t</b>. */
596 static HT_HEAD(dirreqmap, dirreq_map_entry_t) dirreq_map =
597 HT_INITIALIZER();
599 static int
600 dirreq_map_ent_eq(const dirreq_map_entry_t *a,
601 const dirreq_map_entry_t *b)
603 return a->dirreq_id == b->dirreq_id && a->type == b->type;
606 static unsigned
607 dirreq_map_ent_hash(const dirreq_map_entry_t *entry)
609 unsigned u = (unsigned) entry->dirreq_id;
610 u += entry->type << 20;
611 return u;
614 HT_PROTOTYPE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
615 dirreq_map_ent_eq);
616 HT_GENERATE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
617 dirreq_map_ent_eq, 0.6, malloc, realloc, free);
619 /** Helper: Put <b>entry</b> into map of directory requests using
620 * <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there is
621 * already an entry for that key, print out a BUG warning and return. */
622 static void
623 _dirreq_map_put(dirreq_map_entry_t *entry, dirreq_type_t type,
624 uint64_t dirreq_id)
626 dirreq_map_entry_t *old_ent;
627 tor_assert(entry->type == type);
628 tor_assert(entry->dirreq_id == dirreq_id);
630 /* XXXX022 once we're sure the bug case never happens, we can switch
631 * to HT_INSERT */
632 old_ent = HT_REPLACE(dirreqmap, &dirreq_map, entry);
633 if (old_ent && old_ent != entry) {
634 log_warn(LD_BUG, "Error when putting directory request into local "
635 "map. There was already an entry for the same identifier.");
636 return;
640 /** Helper: Look up and return an entry in the map of directory requests
641 * using <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there
642 * is no such entry, return NULL. */
643 static dirreq_map_entry_t *
644 _dirreq_map_get(dirreq_type_t type, uint64_t dirreq_id)
646 dirreq_map_entry_t lookup;
647 lookup.type = type;
648 lookup.dirreq_id = dirreq_id;
649 return HT_FIND(dirreqmap, &dirreq_map, &lookup);
652 /** Note that an either direct or tunneled (see <b>type</b>) directory
653 * request for a network status with unique ID <b>dirreq_id</b> of size
654 * <b>response_size</b> and action <b>action</b> (either v2 or v3) has
655 * started. */
656 void
657 geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
658 geoip_client_action_t action, dirreq_type_t type)
660 dirreq_map_entry_t *ent;
661 if (!get_options()->DirReqStatistics)
662 return;
663 ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
664 ent->dirreq_id = dirreq_id;
665 tor_gettimeofday(&ent->request_time);
666 ent->response_size = response_size;
667 ent->action = action;
668 ent->type = type;
669 _dirreq_map_put(ent, type, dirreq_id);
672 /** Change the state of the either direct or tunneled (see <b>type</b>)
673 * directory request with <b>dirreq_id</b> to <b>new_state</b> and
674 * possibly mark it as completed. If no entry can be found for the given
675 * key parts (e.g., if this is a directory request that we are not
676 * measuring, or one that was started in the previous measurement period),
677 * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
678 void
679 geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
680 dirreq_state_t new_state)
682 dirreq_map_entry_t *ent;
683 if (!get_options()->DirReqStatistics)
684 return;
685 ent = _dirreq_map_get(type, dirreq_id);
686 if (!ent)
687 return;
688 if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS)
689 return;
690 if (new_state - 1 != ent->state)
691 return;
692 ent->state = new_state;
693 if ((type == DIRREQ_DIRECT &&
694 new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) ||
695 (type == DIRREQ_TUNNELED &&
696 new_state == DIRREQ_OR_CONN_BUFFER_FLUSHED)) {
697 tor_gettimeofday(&ent->completion_time);
698 ent->completed = 1;
702 /** Return a newly allocated comma-separated string containing statistics
703 * on network status downloads. The string contains the number of completed
704 * requests, timeouts, and still running requests as well as the download
705 * times by deciles and quartiles. Return NULL if we have not observed
706 * requests for long enough. */
707 static char *
708 geoip_get_dirreq_history(geoip_client_action_t action,
709 dirreq_type_t type)
711 char *result = NULL;
712 smartlist_t *dirreq_completed = NULL;
713 uint32_t complete = 0, timeouts = 0, running = 0;
714 int bufsize = 1024, written;
715 dirreq_map_entry_t **ptr, **next, *ent;
716 struct timeval now;
718 tor_gettimeofday(&now);
719 if (action != GEOIP_CLIENT_NETWORKSTATUS &&
720 action != GEOIP_CLIENT_NETWORKSTATUS_V2)
721 return NULL;
722 dirreq_completed = smartlist_create();
723 for (ptr = HT_START(dirreqmap, &dirreq_map); ptr; ptr = next) {
724 ent = *ptr;
725 if (ent->action != action || ent->type != type) {
726 next = HT_NEXT(dirreqmap, &dirreq_map, ptr);
727 continue;
728 } else {
729 if (ent->completed) {
730 smartlist_add(dirreq_completed, ent);
731 complete++;
732 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
733 } else {
734 if (tv_mdiff(&ent->request_time, &now) / 1000 > DIRREQ_TIMEOUT)
735 timeouts++;
736 else
737 running++;
738 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
739 tor_free(ent);
743 #define DIR_REQ_GRANULARITY 4
744 complete = round_uint32_to_next_multiple_of(complete,
745 DIR_REQ_GRANULARITY);
746 timeouts = round_uint32_to_next_multiple_of(timeouts,
747 DIR_REQ_GRANULARITY);
748 running = round_uint32_to_next_multiple_of(running,
749 DIR_REQ_GRANULARITY);
750 result = tor_malloc_zero(bufsize);
751 written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u,"
752 "running=%u", complete, timeouts, running);
753 if (written < 0) {
754 tor_free(result);
755 goto done;
758 #define MIN_DIR_REQ_RESPONSES 16
759 if (complete >= MIN_DIR_REQ_RESPONSES) {
760 uint32_t *dltimes;
761 /* We may have rounded 'completed' up. Here we want to use the
762 * real value. */
763 complete = smartlist_len(dirreq_completed);
764 dltimes = tor_malloc_zero(sizeof(uint32_t) * complete);
765 SMARTLIST_FOREACH_BEGIN(dirreq_completed, dirreq_map_entry_t *, ent) {
766 uint32_t bytes_per_second;
767 uint32_t time_diff = (uint32_t) tv_mdiff(&ent->request_time,
768 &ent->completion_time);
769 if (time_diff == 0)
770 time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible
771 * by law of nature or something, but a milisecond
772 * is a bit greater than "instantly" */
773 bytes_per_second = (uint32_t)(1000 * ent->response_size / time_diff);
774 dltimes[ent_sl_idx] = bytes_per_second;
775 } SMARTLIST_FOREACH_END(ent);
776 median_uint32(dltimes, complete); /* sorts as a side effect. */
777 written = tor_snprintf(result + written, bufsize - written,
778 ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
779 "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
780 dltimes[0],
781 dltimes[1*complete/10-1],
782 dltimes[2*complete/10-1],
783 dltimes[1*complete/4-1],
784 dltimes[3*complete/10-1],
785 dltimes[4*complete/10-1],
786 dltimes[5*complete/10-1],
787 dltimes[6*complete/10-1],
788 dltimes[7*complete/10-1],
789 dltimes[3*complete/4-1],
790 dltimes[8*complete/10-1],
791 dltimes[9*complete/10-1],
792 dltimes[complete-1]);
793 if (written<0)
794 tor_free(result);
795 tor_free(dltimes);
797 done:
798 SMARTLIST_FOREACH(dirreq_completed, dirreq_map_entry_t *, ent,
799 tor_free(ent));
800 smartlist_free(dirreq_completed);
801 return result;
804 /** How long do we have to have observed per-country request history before we
805 * are willing to talk about it? */
806 #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
808 /** Helper for geoip_get_client_history_dirreq() and
809 * geoip_get_client_history_bridge(). */
810 static char *
811 geoip_get_client_history(time_t now, geoip_client_action_t action,
812 int min_observation_time, unsigned granularity)
814 char *result = NULL;
815 if (!geoip_is_loaded())
816 return NULL;
817 if (client_history_starts < (now - min_observation_time)) {
818 char buf[32];
819 smartlist_t *chunks = NULL;
820 smartlist_t *entries = NULL;
821 int n_countries = geoip_get_n_countries();
822 int i;
823 clientmap_entry_t **ent;
824 unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
825 unsigned total = 0;
826 HT_FOREACH(ent, clientmap, &client_history) {
827 int country;
828 if ((*ent)->action != (int)action)
829 continue;
830 country = geoip_get_country_by_ip((*ent)->ipaddr);
831 if (country < 0)
832 country = 0; /** unresolved requests are stored at index 0. */
833 tor_assert(0 <= country && country < n_countries);
834 ++counts[country];
835 ++total;
837 /* Don't record anything if we haven't seen enough IPs. */
838 if (total < MIN_IPS_TO_NOTE_ANYTHING)
839 goto done;
840 /* Make a list of c_hist_t */
841 entries = smartlist_create();
842 for (i = 0; i < n_countries; ++i) {
843 unsigned c = counts[i];
844 const char *countrycode;
845 c_hist_t *ent;
846 /* Only report a country if it has a minimum number of IPs. */
847 if (c >= MIN_IPS_TO_NOTE_COUNTRY) {
848 c = round_to_next_multiple_of(c, granularity);
849 countrycode = geoip_get_country_name(i);
850 ent = tor_malloc(sizeof(c_hist_t));
851 strlcpy(ent->country, countrycode, sizeof(ent->country));
852 ent->total = c;
853 smartlist_add(entries, ent);
856 /* Sort entries. Note that we must do this _AFTER_ rounding, or else
857 * the sort order could leak info. */
858 smartlist_sort(entries, _c_hist_compare);
860 /* Build the result. */
861 chunks = smartlist_create();
862 SMARTLIST_FOREACH(entries, c_hist_t *, ch, {
863 tor_snprintf(buf, sizeof(buf), "%s=%u", ch->country, ch->total);
864 smartlist_add(chunks, tor_strdup(buf));
866 result = smartlist_join_strings(chunks, ",", 0, NULL);
867 done:
868 tor_free(counts);
869 if (chunks) {
870 SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
871 smartlist_free(chunks);
873 if (entries) {
874 SMARTLIST_FOREACH(entries, c_hist_t *, c, tor_free(c));
875 smartlist_free(entries);
878 return result;
881 /** Return a newly allocated comma-separated string containing entries for
882 * all the countries from which we've seen enough clients connect as a
883 * directory. The entry format is cc=num where num is the number of IPs
884 * we've seen connecting from that country, and cc is a lowercased country
885 * code. Returns NULL if we don't want to export geoip data yet. */
886 char *
887 geoip_get_client_history_dirreq(time_t now,
888 geoip_client_action_t action)
890 return geoip_get_client_history(now, action,
891 DIR_RECORD_USAGE_MIN_OBSERVATION_TIME,
892 DIR_RECORD_USAGE_GRANULARITY);
895 /** Return a newly allocated comma-separated string containing entries for
896 * all the countries from which we've seen enough clients connect as a
897 * bridge. The entry format is cc=num where num is the number of IPs
898 * we've seen connecting from that country, and cc is a lowercased country
899 * code. Returns NULL if we don't want to export geoip data yet. */
900 char *
901 geoip_get_client_history_bridge(time_t now,
902 geoip_client_action_t action)
904 return geoip_get_client_history(now, action,
905 GEOIP_MIN_OBSERVATION_TIME,
906 IP_GRANULARITY);
909 /** Return a newly allocated string holding the per-country request history
910 * for <b>action</b> in a format suitable for an extra-info document, or NULL
911 * on failure. */
912 char *
913 geoip_get_request_history(time_t now, geoip_client_action_t action)
915 smartlist_t *entries, *strings;
916 char *result;
917 unsigned granularity = IP_GRANULARITY;
918 int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
920 if (client_history_starts >= (now - min_observation_time))
921 return NULL;
922 if (action != GEOIP_CLIENT_NETWORKSTATUS &&
923 action != GEOIP_CLIENT_NETWORKSTATUS_V2)
924 return NULL;
925 if (!geoip_countries)
926 return NULL;
928 entries = smartlist_create();
929 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
930 uint32_t *n = (action == GEOIP_CLIENT_NETWORKSTATUS)
931 ? c->n_v3_ns_requests : c->n_v2_ns_requests;
932 uint32_t tot = 0;
933 int i;
934 c_hist_t *ent;
935 for (i=0; i < REQUEST_HIST_LEN; ++i)
936 tot += n[i];
937 if (!tot)
938 continue;
939 ent = tor_malloc_zero(sizeof(c_hist_t));
940 strlcpy(ent->country, c->countrycode, sizeof(ent->country));
941 ent->total = round_to_next_multiple_of(tot, granularity);
942 smartlist_add(entries, ent);
944 smartlist_sort(entries, _c_hist_compare);
946 strings = smartlist_create();
947 SMARTLIST_FOREACH(entries, c_hist_t *, ent, {
948 char buf[32];
949 tor_snprintf(buf, sizeof(buf), "%s=%u", ent->country, ent->total);
950 smartlist_add(strings, tor_strdup(buf));
952 result = smartlist_join_strings(strings, ",", 0, NULL);
953 SMARTLIST_FOREACH(strings, char *, cp, tor_free(cp));
954 SMARTLIST_FOREACH(entries, c_hist_t *, ent, tor_free(ent));
955 smartlist_free(strings);
956 smartlist_free(entries);
957 return result;
960 /** Start time of directory request stats. */
961 static time_t start_of_dirreq_stats_interval;
963 /** Initialize directory request stats. */
964 void
965 geoip_dirreq_stats_init(time_t now)
967 start_of_dirreq_stats_interval = now;
970 /** Write dirreq statistics to $DATADIR/stats/dirreq-stats. */
971 void
972 geoip_dirreq_stats_write(time_t now)
974 char *statsdir = NULL, *filename = NULL;
975 char *data_v2 = NULL, *data_v3 = NULL;
976 char written[ISO_TIME_LEN+1];
977 open_file_t *open_file = NULL;
978 double v2_share = 0.0, v3_share = 0.0;
979 FILE *out;
980 int i;
982 if (!get_options()->DirReqStatistics)
983 goto done;
985 /* Discard all items in the client history that are too old. */
986 geoip_remove_old_clients(start_of_dirreq_stats_interval);
988 statsdir = get_datadir_fname("stats");
989 if (check_private_dir(statsdir, CPD_CREATE) < 0)
990 goto done;
991 filename = get_datadir_fname("stats"PATH_SEPARATOR"dirreq-stats");
992 data_v2 = geoip_get_client_history_dirreq(now,
993 GEOIP_CLIENT_NETWORKSTATUS_V2);
994 data_v3 = geoip_get_client_history_dirreq(now,
995 GEOIP_CLIENT_NETWORKSTATUS);
996 format_iso_time(written, now);
997 out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
998 0600, &open_file);
999 if (!out)
1000 goto done;
1001 if (fprintf(out, "dirreq-stats-end %s (%d s)\ndirreq-v3-ips %s\n"
1002 "dirreq-v2-ips %s\n", written,
1003 (unsigned) (now - start_of_dirreq_stats_interval),
1004 data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
1005 goto done;
1006 tor_free(data_v2);
1007 tor_free(data_v3);
1009 data_v2 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
1010 data_v3 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS);
1011 if (fprintf(out, "dirreq-v3-reqs %s\ndirreq-v2-reqs %s\n",
1012 data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
1013 goto done;
1014 tor_free(data_v2);
1015 tor_free(data_v3);
1016 #define RESPONSE_GRANULARITY 8
1017 for (i = 0; i < GEOIP_NS_RESPONSE_NUM; i++) {
1018 ns_v2_responses[i] = round_uint32_to_next_multiple_of(
1019 ns_v2_responses[i], RESPONSE_GRANULARITY);
1020 ns_v3_responses[i] = round_uint32_to_next_multiple_of(
1021 ns_v3_responses[i], RESPONSE_GRANULARITY);
1023 #undef RESPONSE_GRANULARITY
1024 if (fprintf(out, "dirreq-v3-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
1025 "not-found=%u,not-modified=%u,busy=%u\n",
1026 ns_v3_responses[GEOIP_SUCCESS],
1027 ns_v3_responses[GEOIP_REJECT_NOT_ENOUGH_SIGS],
1028 ns_v3_responses[GEOIP_REJECT_UNAVAILABLE],
1029 ns_v3_responses[GEOIP_REJECT_NOT_FOUND],
1030 ns_v3_responses[GEOIP_REJECT_NOT_MODIFIED],
1031 ns_v3_responses[GEOIP_REJECT_BUSY]) < 0)
1032 goto done;
1033 if (fprintf(out, "dirreq-v2-resp ok=%u,unavailable=%u,"
1034 "not-found=%u,not-modified=%u,busy=%u\n",
1035 ns_v2_responses[GEOIP_SUCCESS],
1036 ns_v2_responses[GEOIP_REJECT_UNAVAILABLE],
1037 ns_v2_responses[GEOIP_REJECT_NOT_FOUND],
1038 ns_v2_responses[GEOIP_REJECT_NOT_MODIFIED],
1039 ns_v2_responses[GEOIP_REJECT_BUSY]) < 0)
1040 goto done;
1041 memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
1042 memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
1043 if (!geoip_get_mean_shares(now, &v2_share, &v3_share)) {
1044 if (fprintf(out, "dirreq-v2-share %0.2lf%%\n", v2_share*100) < 0)
1045 goto done;
1046 if (fprintf(out, "dirreq-v3-share %0.2lf%%\n", v3_share*100) < 0)
1047 goto done;
1050 data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
1051 DIRREQ_DIRECT);
1052 data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
1053 DIRREQ_DIRECT);
1054 if (fprintf(out, "dirreq-v3-direct-dl %s\ndirreq-v2-direct-dl %s\n",
1055 data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
1056 goto done;
1057 tor_free(data_v2);
1058 tor_free(data_v3);
1059 data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
1060 DIRREQ_TUNNELED);
1061 data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
1062 DIRREQ_TUNNELED);
1063 if (fprintf(out, "dirreq-v3-tunneled-dl %s\ndirreq-v2-tunneled-dl %s\n",
1064 data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
1065 goto done;
1067 finish_writing_to_file(open_file);
1068 open_file = NULL;
1070 /* Rotate request period */
1071 rotate_request_period();
1073 start_of_dirreq_stats_interval = now;
1075 done:
1076 if (open_file)
1077 abort_writing_to_file(open_file);
1078 tor_free(filename);
1079 tor_free(statsdir);
1080 tor_free(data_v2);
1081 tor_free(data_v3);
1084 /** Start time of entry stats. */
1085 static time_t start_of_entry_stats_interval;
1087 /** Initialize entry stats. */
1088 void
1089 geoip_entry_stats_init(time_t now)
1091 start_of_entry_stats_interval = now;
1094 /** Write entry statistics to $DATADIR/stats/entry-stats. */
1095 void
1096 geoip_entry_stats_write(time_t now)
1098 char *statsdir = NULL, *filename = NULL;
1099 char *data = NULL;
1100 char written[ISO_TIME_LEN+1];
1101 open_file_t *open_file = NULL;
1102 FILE *out;
1104 if (!get_options()->EntryStatistics)
1105 goto done;
1107 /* Discard all items in the client history that are too old. */
1108 geoip_remove_old_clients(start_of_entry_stats_interval);
1110 statsdir = get_datadir_fname("stats");
1111 if (check_private_dir(statsdir, CPD_CREATE) < 0)
1112 goto done;
1113 filename = get_datadir_fname("stats"PATH_SEPARATOR"entry-stats");
1114 data = geoip_get_client_history_dirreq(now, GEOIP_CLIENT_CONNECT);
1115 format_iso_time(written, now);
1116 out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
1117 0600, &open_file);
1118 if (!out)
1119 goto done;
1120 if (fprintf(out, "entry-stats-end %s (%u s)\nentry-ips %s\n",
1121 written, (unsigned) (now - start_of_entry_stats_interval),
1122 data ? data : "") < 0)
1123 goto done;
1125 start_of_entry_stats_interval = now;
1127 finish_writing_to_file(open_file);
1128 open_file = NULL;
1129 done:
1130 if (open_file)
1131 abort_writing_to_file(open_file);
1132 tor_free(filename);
1133 tor_free(statsdir);
1134 tor_free(data);
1137 /** Helper used to implement GETINFO ip-to-country/... controller command. */
1139 getinfo_helper_geoip(control_connection_t *control_conn,
1140 const char *question, char **answer)
1142 (void)control_conn;
1143 if (geoip_is_loaded() && !strcmpstart(question, "ip-to-country/")) {
1144 int c;
1145 uint32_t ip;
1146 struct in_addr in;
1147 question += strlen("ip-to-country/");
1148 if (tor_inet_aton(question, &in) != 0) {
1149 ip = ntohl(in.s_addr);
1150 c = geoip_get_country_by_ip(ip);
1151 *answer = tor_strdup(geoip_get_country_name(c));
1154 return 0;
1157 /** Release all storage held by the GeoIP database. */
1158 static void
1159 clear_geoip_db(void)
1161 if (geoip_countries) {
1162 SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
1163 smartlist_free(geoip_countries);
1165 if (country_idxplus1_by_lc_code)
1166 strmap_free(country_idxplus1_by_lc_code, NULL);
1167 if (geoip_entries) {
1168 SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, ent, tor_free(ent));
1169 smartlist_free(geoip_entries);
1171 geoip_countries = NULL;
1172 country_idxplus1_by_lc_code = NULL;
1173 geoip_entries = NULL;
1176 /** Release all storage held in this file. */
1177 void
1178 geoip_free_all(void)
1181 clientmap_entry_t **ent, **next, *this;
1182 for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
1183 this = *ent;
1184 next = HT_NEXT_RMV(clientmap, &client_history, ent);
1185 tor_free(this);
1187 HT_CLEAR(clientmap, &client_history);
1190 dirreq_map_entry_t **ent, **next, *this;
1191 for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) {
1192 this = *ent;
1193 next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent);
1194 tor_free(this);
1196 HT_CLEAR(dirreqmap, &dirreq_map);
1199 clear_geoip_db();