1 /* Copyright (c) 2007-2009, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
6 * \brief Functions related to maintaining an IP-to-country database and to
7 * summarizing client connections by country.
14 static void clear_geoip_db(void);
15 static void dump_geoip_stats(void);
16 static void dump_entry_stats(void);
18 /** An entry from the GeoIP file: maps an IP range to a country. */
19 typedef struct geoip_entry_t
{
20 uint32_t ip_low
; /**< The lowest IP in the range, in host order */
21 uint32_t ip_high
; /**< The highest IP in the range, in host order */
22 intptr_t country
; /**< An index into geoip_countries */
25 /** For how many periods should we remember per-country request history? */
26 #define REQUEST_HIST_LEN 1
27 /** How long are the periods for which we should remember request history? */
28 #define REQUEST_HIST_PERIOD (24*60*60)
30 /** A per-country record for GeoIP request history. */
31 typedef struct geoip_country_t
{
33 uint32_t n_v2_ns_requests
[REQUEST_HIST_LEN
];
34 uint32_t n_v3_ns_requests
[REQUEST_HIST_LEN
];
37 /** A list of geoip_country_t */
38 static smartlist_t
*geoip_countries
= NULL
;
39 /** A map from lowercased country codes to their position in geoip_countries.
40 * The index is encoded in the pointer, and 1 is added so that NULL can mean
42 static strmap_t
*country_idxplus1_by_lc_code
= NULL
;
43 /** A list of all known geoip_entry_t, sorted by ip_low. */
44 static smartlist_t
*geoip_entries
= NULL
;
46 /** Return the index of the <b>country</b>'s entry in the GeoIP DB
47 * if it is a valid 2-letter country code, otherwise return -1.
50 geoip_get_country(const char *country
)
55 _idxplus1
= strmap_get_lc(country_idxplus1_by_lc_code
, country
);
59 idx
= ((uintptr_t)_idxplus1
)-1;
60 return (country_t
)idx
;
63 /** Add an entry to the GeoIP table, mapping all IPs between <b>low</b> and
64 * <b>high</b>, inclusive, to the 2-letter country code <b>country</b>.
67 geoip_add_entry(uint32_t low
, uint32_t high
, const char *country
)
76 _idxplus1
= strmap_get_lc(country_idxplus1_by_lc_code
, country
);
79 geoip_country_t
*c
= tor_malloc_zero(sizeof(geoip_country_t
));
80 strlcpy(c
->countrycode
, country
, sizeof(c
->countrycode
));
81 tor_strlower(c
->countrycode
);
82 smartlist_add(geoip_countries
, c
);
83 idx
= smartlist_len(geoip_countries
) - 1;
84 strmap_set_lc(country_idxplus1_by_lc_code
, country
, (void*)(idx
+1));
86 idx
= ((uintptr_t)_idxplus1
)-1;
89 geoip_country_t
*c
= smartlist_get(geoip_countries
, idx
);
90 tor_assert(!strcasecmp(c
->countrycode
, country
));
92 ent
= tor_malloc_zero(sizeof(geoip_entry_t
));
96 smartlist_add(geoip_entries
, ent
);
99 /** Add an entry to the GeoIP table, parsing it from <b>line</b>. The
100 * format is as for geoip_load_file(). */
102 geoip_parse_entry(const char *line
)
104 unsigned int low
, high
;
106 if (!geoip_countries
) {
107 geoip_countries
= smartlist_create();
108 geoip_entries
= smartlist_create();
109 country_idxplus1_by_lc_code
= strmap_new();
111 while (TOR_ISSPACE(*line
))
115 if (sscanf(line
,"%u,%u,%2s", &low
, &high
, b
) == 3) {
116 geoip_add_entry(low
, high
, b
);
118 } else if (sscanf(line
,"\"%u\",\"%u\",\"%2s\",", &low
, &high
, b
) == 3) {
119 geoip_add_entry(low
, high
, b
);
122 log_warn(LD_GENERAL
, "Unable to parse line from GEOIP file: %s",
128 /** Sorting helper: return -1, 1, or 0 based on comparison of two
131 _geoip_compare_entries(const void **_a
, const void **_b
)
133 const geoip_entry_t
*a
= *_a
, *b
= *_b
;
134 if (a
->ip_low
< b
->ip_low
)
136 else if (a
->ip_low
> b
->ip_low
)
142 /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
143 * to a uint32_t in host order) to a geoip_entry_t */
145 _geoip_compare_key_to_entry(const void *_key
, const void **_member
)
147 const uint32_t addr
= *(uint32_t *)_key
;
148 const geoip_entry_t
*entry
= *_member
;
149 if (addr
< entry
->ip_low
)
151 else if (addr
> entry
->ip_high
)
157 /** Return 1 if we should collect geoip stats on bridge users, and
158 * include them in our extrainfo descriptor. Else return 0. */
160 should_record_bridge_info(or_options_t
*options
)
162 return options
->BridgeRelay
&& options
->BridgeRecordUsageByCountry
;
165 /** Clear the GeoIP database and reload it from the file
166 * <b>filename</b>. Return 0 on success, -1 on failure.
168 * Recognized line formats are:
169 * INTIPLOW,INTIPHIGH,CC
171 * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
172 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
173 * integers, and CC is a country code.
175 * It also recognizes, and skips over, blank lines and lines that start
176 * with '#' (comments).
179 geoip_load_file(const char *filename
, or_options_t
*options
)
182 const char *msg
= "";
183 int severity
= options_need_geoip_info(options
, &msg
) ? LOG_WARN
: LOG_INFO
;
185 if (!(f
= fopen(filename
, "r"))) {
186 log_fn(severity
, LD_GENERAL
, "Failed to open GEOIP file %s. %s",
190 if (!geoip_countries
) {
191 geoip_country_t
*geoip_unresolved
;
192 geoip_countries
= smartlist_create();
193 /* Add a geoip_country_t for requests that could not be resolved to a
194 * country as first element (index 0) to geoip_countries. */
195 geoip_unresolved
= tor_malloc_zero(sizeof(geoip_country_t
));
196 strlcpy(geoip_unresolved
->countrycode
, "??",
197 sizeof(geoip_unresolved
->countrycode
));
198 smartlist_add(geoip_countries
, geoip_unresolved
);
199 country_idxplus1_by_lc_code
= strmap_new();
202 SMARTLIST_FOREACH(geoip_entries
, geoip_entry_t
*, e
, tor_free(e
));
203 smartlist_free(geoip_entries
);
205 geoip_entries
= smartlist_create();
206 log_notice(LD_GENERAL
, "Parsing GEOIP file.");
209 if (fgets(buf
, (int)sizeof(buf
), f
) == NULL
)
211 /* FFFF track full country name. */
212 geoip_parse_entry(buf
);
214 /*XXXX abort and return -1 if no entries/illformed?*/
217 smartlist_sort(geoip_entries
, _geoip_compare_entries
);
219 /* Okay, now we need to maybe change our mind about what is in which
221 refresh_all_country_info();
226 /** Given an IP address in host order, return a number representing the
227 * country to which that address belongs, or -1 for unknown. The return value
228 * will always be less than geoip_get_n_countries(). To decode it,
229 * call geoip_get_country_name().
232 geoip_get_country_by_ip(uint32_t ipaddr
)
237 ent
= smartlist_bsearch(geoip_entries
, &ipaddr
, _geoip_compare_key_to_entry
);
238 return ent
? (int)ent
->country
: -1;
241 /** Return the number of countries recognized by the GeoIP database. */
243 geoip_get_n_countries(void)
245 return (int) smartlist_len(geoip_countries
);
248 /** Return the two-letter country code associated with the number <b>num</b>,
249 * or "??" for an unknown value. */
251 geoip_get_country_name(country_t num
)
253 if (geoip_countries
&& num
>= 0 && num
< smartlist_len(geoip_countries
)) {
254 geoip_country_t
*c
= smartlist_get(geoip_countries
, num
);
255 return c
->countrycode
;
260 /** Return true iff we have loaded a GeoIP database.*/
262 geoip_is_loaded(void)
264 return geoip_countries
!= NULL
&& geoip_entries
!= NULL
;
267 /** Entry in a map from IP address to the last time we've seen an incoming
268 * connection from that IP address. Used by bridges only, to track which
269 * countries have them blocked. */
270 typedef struct clientmap_entry_t
{
271 HT_ENTRY(clientmap_entry_t
) node
;
273 unsigned int last_seen_in_minutes
:30;
274 unsigned int action
:2;
277 #define ACTION_MASK 3
279 /** Map from client IP address to last time seen. */
280 static HT_HEAD(clientmap
, clientmap_entry_t
) client_history
=
282 /** Time at which we started tracking client IP history. */
283 static time_t client_history_starts
= 0;
285 /** When did the current period of checking per-country request history
287 static time_t current_request_period_starts
= 0;
288 /** How many older request periods are we remembering? */
289 static int n_old_request_periods
= 0;
291 /** Hashtable helper: compute a hash of a clientmap_entry_t. */
292 static INLINE
unsigned
293 clientmap_entry_hash(const clientmap_entry_t
*a
)
295 return ht_improve_hash((unsigned) a
->ipaddr
);
297 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
299 clientmap_entries_eq(const clientmap_entry_t
*a
, const clientmap_entry_t
*b
)
301 return a
->ipaddr
== b
->ipaddr
&& a
->action
== b
->action
;
304 HT_PROTOTYPE(clientmap
, clientmap_entry_t
, node
, clientmap_entry_hash
,
305 clientmap_entries_eq
);
306 HT_GENERATE(clientmap
, clientmap_entry_t
, node
, clientmap_entry_hash
,
307 clientmap_entries_eq
, 0.6, malloc
, realloc
, free
);
309 /** How often do we update our estimate which share of v2 and v3 directory
310 * requests is sent to us? We could as well trigger updates of shares from
311 * network status updates, but that means adding a lot of calls into code
312 * that is independent from geoip stats (and keeping them up-to-date). We
313 * are perfectly fine with an approximation of 15-minute granularity. */
314 #define REQUEST_SHARE_INTERVAL (15 * 60)
316 /** When did we last determine which share of v2 and v3 directory requests
318 static time_t last_time_determined_shares
= 0;
320 /** Sum of products of v2 shares times the number of seconds for which we
321 * consider these shares as valid. */
322 static double v2_share_times_seconds
;
324 /** Sum of products of v3 shares times the number of seconds for which we
325 * consider these shares as valid. */
326 static double v3_share_times_seconds
;
328 /** Number of seconds we are determining v2 and v3 shares. */
329 static int share_seconds
;
331 /** Try to determine which fraction of v2 and v3 directory requests aimed at
332 * caches will be sent to us at time <b>now</b> and store that value in
333 * order to take a mean value later on. */
335 geoip_determine_shares(time_t now
)
337 double v2_share
= 0.0, v3_share
= 0.0;
338 if (router_get_my_share_of_directory_requests(&v2_share
, &v3_share
) < 0)
340 if (last_time_determined_shares
) {
341 v2_share_times_seconds
+= v2_share
*
342 ((double) (now
- last_time_determined_shares
));
343 v3_share_times_seconds
+= v3_share
*
344 ((double) (now
- last_time_determined_shares
));
345 share_seconds
+= now
- last_time_determined_shares
;
347 last_time_determined_shares
= now
;
350 #ifdef ENABLE_GEOIP_STATS
351 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
352 * have been sent to us since the last call of this function up to time
353 * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
354 * fractions of v2 and v3 protocol shares we expect to have seen. Reset
355 * counters afterwards. Return 0 on success, -1 on failure (e.g. when zero
356 * seconds have passed since the last call).*/
358 geoip_get_mean_shares(time_t now
, double *v2_share_out
,
359 double *v3_share_out
)
361 geoip_determine_shares(now
);
364 *v2_share_out
= v2_share_times_seconds
/ ((double) share_seconds
);
365 *v3_share_out
= v3_share_times_seconds
/ ((double) share_seconds
);
366 v2_share_times_seconds
= v3_share_times_seconds
= 0.0;
372 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
373 * at time <b>now</b>. Ignored by all but bridges and directories if
374 * configured accordingly. */
376 geoip_note_client_seen(geoip_client_action_t action
,
377 uint32_t addr
, time_t now
)
379 or_options_t
*options
= get_options();
380 clientmap_entry_t lookup
, *ent
;
381 if (action
== GEOIP_CLIENT_CONNECT
) {
382 #ifdef ENABLE_ENTRY_STATS
383 if (!options
->EntryStatistics
)
386 if (!(options
->BridgeRelay
&& options
->BridgeRecordUsageByCountry
))
389 /* Did we recently switch from bridge to relay or back? */
390 if (client_history_starts
> now
)
393 #ifndef ENABLE_GEOIP_STATS
396 if (options
->BridgeRelay
|| options
->BridgeAuthoritativeDir
)
401 /* Rotate the current request period. */
402 while (current_request_period_starts
+ REQUEST_HIST_PERIOD
< now
) {
403 if (!geoip_countries
)
404 geoip_countries
= smartlist_create();
405 if (!current_request_period_starts
) {
406 current_request_period_starts
= now
;
409 /* Also discard all items in the client history that are too old.
410 * (This only works here because bridge and directory stats are
411 * independent. Otherwise, we'd only want to discard those items
412 * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */
413 geoip_remove_old_clients(current_request_period_starts
);
414 /* Before rotating, write the current stats to disk. */
416 if (get_options()->EntryStatistics
)
418 /* Now rotate request period */
419 SMARTLIST_FOREACH(geoip_countries
, geoip_country_t
*, c
, {
420 memmove(&c
->n_v2_ns_requests
[0], &c
->n_v2_ns_requests
[1],
421 sizeof(uint32_t)*(REQUEST_HIST_LEN
-1));
422 memmove(&c
->n_v3_ns_requests
[0], &c
->n_v3_ns_requests
[1],
423 sizeof(uint32_t)*(REQUEST_HIST_LEN
-1));
424 c
->n_v2_ns_requests
[REQUEST_HIST_LEN
-1] = 0;
425 c
->n_v3_ns_requests
[REQUEST_HIST_LEN
-1] = 0;
427 current_request_period_starts
+= REQUEST_HIST_PERIOD
;
428 if (n_old_request_periods
< REQUEST_HIST_LEN
-1)
429 ++n_old_request_periods
;
432 lookup
.ipaddr
= addr
;
433 lookup
.action
= (int)action
;
434 ent
= HT_FIND(clientmap
, &client_history
, &lookup
);
436 ent
->last_seen_in_minutes
= now
/ 60;
438 ent
= tor_malloc_zero(sizeof(clientmap_entry_t
));
440 ent
->last_seen_in_minutes
= now
/ 60;
441 ent
->action
= (int)action
;
442 HT_INSERT(clientmap
, &client_history
, ent
);
445 if (action
== GEOIP_CLIENT_NETWORKSTATUS
||
446 action
== GEOIP_CLIENT_NETWORKSTATUS_V2
) {
447 int country_idx
= geoip_get_country_by_ip(addr
);
449 country_idx
= 0; /** unresolved requests are stored at index 0. */
450 if (country_idx
>= 0 && country_idx
< smartlist_len(geoip_countries
)) {
451 geoip_country_t
*country
= smartlist_get(geoip_countries
, country_idx
);
452 if (action
== GEOIP_CLIENT_NETWORKSTATUS
)
453 ++country
->n_v3_ns_requests
[REQUEST_HIST_LEN
-1];
455 ++country
->n_v2_ns_requests
[REQUEST_HIST_LEN
-1];
458 /* Periodically determine share of requests that we should see */
459 if (last_time_determined_shares
+ REQUEST_SHARE_INTERVAL
< now
)
460 geoip_determine_shares(now
);
463 if (!client_history_starts
) {
464 client_history_starts
= now
;
465 current_request_period_starts
= now
;
469 /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
470 * older than a certain time. */
472 _remove_old_client_helper(struct clientmap_entry_t
*ent
, void *_cutoff
)
474 time_t cutoff
= *(time_t*)_cutoff
/ 60;
475 if (ent
->last_seen_in_minutes
< cutoff
) {
483 /** Forget about all clients that haven't connected since <b>cutoff</b>.
484 * If <b>cutoff</b> is in the future, clients won't be added to the history
485 * until this time is reached. This is useful to prevent relays that switch
486 * to bridges from reporting unbelievable numbers of clients. */
488 geoip_remove_old_clients(time_t cutoff
)
490 clientmap_HT_FOREACH_FN(&client_history
,
491 _remove_old_client_helper
,
493 if (client_history_starts
< cutoff
)
494 client_history_starts
= cutoff
;
497 #ifdef ENABLE_GEOIP_STATS
498 /** How many responses are we giving to clients requesting v2 network
500 static uint32_t ns_v2_responses
[GEOIP_NS_RESPONSE_NUM
];
502 /** How many responses are we giving to clients requesting v3 network
504 static uint32_t ns_v3_responses
[GEOIP_NS_RESPONSE_NUM
];
507 /** Note that we've rejected a client's request for a v2 or v3 network
508 * status, encoded in <b>action</b> for reason <b>reason</b> at time
511 geoip_note_ns_response(geoip_client_action_t action
,
512 geoip_ns_response_t response
)
514 #ifdef ENABLE_GEOIP_STATS
515 static int arrays_initialized
= 0;
516 if (!arrays_initialized
) {
517 memset(ns_v2_responses
, 0, sizeof(ns_v2_responses
));
518 memset(ns_v3_responses
, 0, sizeof(ns_v3_responses
));
519 arrays_initialized
= 1;
521 tor_assert(action
== GEOIP_CLIENT_NETWORKSTATUS
||
522 action
== GEOIP_CLIENT_NETWORKSTATUS_V2
);
523 tor_assert(response
< GEOIP_NS_RESPONSE_NUM
);
524 if (action
== GEOIP_CLIENT_NETWORKSTATUS
)
525 ns_v3_responses
[response
]++;
527 ns_v2_responses
[response
]++;
534 /** Do not mention any country from which fewer than this number of IPs have
535 * connected. This conceivably avoids reporting information that could
536 * deanonymize users, though analysis is lacking. */
537 #define MIN_IPS_TO_NOTE_COUNTRY 1
538 /** Do not report any geoip data at all if we have fewer than this number of
539 * IPs to report about. */
540 #define MIN_IPS_TO_NOTE_ANYTHING 1
541 /** When reporting geoip data about countries, round up to the nearest
542 * multiple of this value. */
543 #define IP_GRANULARITY 8
545 /** Return the time at which we started recording geoip data. */
547 geoip_get_history_start(void)
549 return client_history_starts
;
552 /** Helper type: used to sort per-country totals by value. */
553 typedef struct c_hist_t
{
554 char country
[3]; /**< Two-letter country code. */
555 unsigned total
; /**< Total IP addresses seen in this country. */
558 /** Sorting helper: return -1, 1, or 0 based on comparison of two
559 * geoip_entry_t. Sort in descending order of total, and then by country
562 _c_hist_compare(const void **_a
, const void **_b
)
564 const c_hist_t
*a
= *_a
, *b
= *_b
;
565 if (a
->total
> b
->total
)
567 else if (a
->total
< b
->total
)
570 return strcmp(a
->country
, b
->country
);
573 /** When there are incomplete directory requests at the end of a 24-hour
574 * period, consider those requests running for longer than this timeout as
575 * failed, the others as still running. */
576 #define DIRREQ_TIMEOUT (10*60)
578 /** Entry in a map from either conn->global_identifier for direct requests
579 * or a unique circuit identifier for tunneled requests to request time,
580 * response size, and completion time of a network status request. Used to
581 * measure download times of requests to derive average client
583 typedef struct dirreq_map_entry_t
{
584 /** Unique identifier for this network status request; this is either the
585 * conn->global_identifier of the dir conn (direct request) or a new
586 * locally unique identifier of a circuit (tunneled request). This ID is
587 * only unique among other direct or tunneled requests, respectively. */
589 unsigned int state
:3; /**< State of this directory request. */
590 unsigned int type
:1; /**< Is this a direct or a tunneled request? */
591 unsigned int completed
:1; /**< Is this request complete? */
592 unsigned int action
:2; /**< Is this a v2 or v3 request? */
593 /** When did we receive the request and started sending the response? */
594 struct timeval request_time
;
595 size_t response_size
; /**< What is the size of the response in bytes? */
596 struct timeval completion_time
; /**< When did the request succeed? */
597 } dirreq_map_entry_t
;
599 /** Map of all directory requests asking for v2 or v3 network statuses in
600 * the current geoip-stats interval. Keys are strings starting with either
601 * "dir" for direct requests or "tun" for tunneled requests, followed by
602 * a unique uint64_t identifier represented as decimal string. Values are
603 * of type *<b>dirreq_map_entry_t</b>. */
604 static strmap_t
*dirreq_map
= NULL
;
606 /** Helper: Put <b>entry</b> into map of directory requests using
607 * <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there is
608 * already an entry for that key, print out a BUG warning and return. */
610 _dirreq_map_put(dirreq_map_entry_t
*entry
, dirreq_type_t type
,
613 char key
[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */
614 dirreq_map_entry_t
*ent
;
616 dirreq_map
= strmap_new();
617 tor_snprintf(key
, sizeof(key
), "%s"U64_FORMAT
,
618 type
== DIRREQ_TUNNELED
? "tun" : "dir",
619 U64_PRINTF_ARG(dirreq_id
));
620 ent
= strmap_get(dirreq_map
, key
);
622 log_warn(LD_BUG
, "Error when putting directory request into local "
623 "map. There is already an entry for the same identifier.");
626 strmap_set(dirreq_map
, key
, entry
);
629 /** Helper: Look up and return an entry in the map of directory requests
630 * using <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there
631 * is no such entry, return NULL. */
632 static dirreq_map_entry_t
*
633 _dirreq_map_get(dirreq_type_t type
, uint64_t dirreq_id
)
635 char key
[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */
637 dirreq_map
= strmap_new();
638 tor_snprintf(key
, sizeof(key
), "%s"U64_FORMAT
,
639 type
== DIRREQ_TUNNELED
? "tun" : "dir",
640 U64_PRINTF_ARG(dirreq_id
));
641 return strmap_get(dirreq_map
, key
);
644 /** Note that an either direct or tunneled (see <b>type</b>) directory
645 * request for a network status with unique ID <b>dirreq_id</b> of size
646 * <b>response_size</b> and action <b>action</b> (either v2 or v3) has
649 geoip_start_dirreq(uint64_t dirreq_id
, size_t response_size
,
650 geoip_client_action_t action
, dirreq_type_t type
)
652 dirreq_map_entry_t
*ent
= tor_malloc_zero(sizeof(dirreq_map_entry_t
));
653 ent
->dirreq_id
= dirreq_id
;
654 tor_gettimeofday(&ent
->request_time
);
655 ent
->response_size
= response_size
;
656 ent
->action
= action
;
658 _dirreq_map_put(ent
, type
, dirreq_id
);
661 /** Change the state of the either direct or tunneled (see <b>type</b>)
662 * directory request with <b>dirreq_id</b> to <b>new_state</b> and
663 * possibly mark it as completed. If no entry can be found for the given
664 * key parts (e.g., if this is a directory request that we are not
665 * measuring, or one that was started in the previous measurement period),
666 * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
668 geoip_change_dirreq_state(uint64_t dirreq_id
, dirreq_type_t type
,
669 dirreq_state_t new_state
)
671 dirreq_map_entry_t
*ent
= _dirreq_map_get(type
, dirreq_id
);
674 if (new_state
== DIRREQ_IS_FOR_NETWORK_STATUS
)
676 if (new_state
- 1 != ent
->state
)
678 ent
->state
= new_state
;
679 if ((type
== DIRREQ_DIRECT
&&
680 new_state
== DIRREQ_FLUSHING_DIR_CONN_FINISHED
) ||
681 (type
== DIRREQ_TUNNELED
&&
682 new_state
== DIRREQ_OR_CONN_BUFFER_FLUSHED
)) {
683 tor_gettimeofday(&ent
->completion_time
);
688 #ifdef ENABLE_GEOIP_STATS
689 /** Return a newly allocated comma-separated string containing statistics
690 * on network status downloads. The string contains the number of completed
691 * requests, timeouts, and still running requests as well as the download
692 * times by deciles and quartiles. Return NULL if we have not observed
693 * requests for long enough. */
695 geoip_get_dirreq_history(geoip_client_action_t action
,
699 smartlist_t
*dirreq_times
= NULL
;
700 uint32_t complete
= 0, timeouts
= 0, running
= 0;
701 int i
= 0, bufsize
= 1024, written
;
703 tor_gettimeofday(&now
);
706 if (action
!= GEOIP_CLIENT_NETWORKSTATUS
&&
707 action
!= GEOIP_CLIENT_NETWORKSTATUS_V2
)
709 dirreq_times
= smartlist_create();
710 STRMAP_FOREACH_MODIFY(dirreq_map
, key
, dirreq_map_entry_t
*, ent
) {
711 if (ent
->action
== action
&& type
== ent
->type
) {
712 if (ent
->completed
) {
713 uint32_t *bytes_per_second
= tor_malloc_zero(sizeof(uint32_t));
714 uint32_t time_diff
= (uint32_t) tv_udiff(&ent
->request_time
,
715 &ent
->completion_time
);
717 time_diff
= 1; /* Avoid DIV/0; "instant" answers are impossible
718 * anyway by law of nature or something.. */
719 *bytes_per_second
= 1000000 * ent
->response_size
/ time_diff
;
720 smartlist_add(dirreq_times
, bytes_per_second
);
723 if (tv_udiff(&ent
->request_time
, &now
) / 1000000 > DIRREQ_TIMEOUT
)
729 MAP_DEL_CURRENT(key
);
731 } STRMAP_FOREACH_END
;
732 #define DIR_REQ_GRANULARITY 4
733 complete
= round_uint32_to_next_multiple_of(complete
,
734 DIR_REQ_GRANULARITY
);
735 timeouts
= round_uint32_to_next_multiple_of(timeouts
,
736 DIR_REQ_GRANULARITY
);
737 running
= round_uint32_to_next_multiple_of(running
,
738 DIR_REQ_GRANULARITY
);
739 result
= tor_malloc_zero(bufsize
);
740 written
= tor_snprintf(result
, bufsize
, "complete=%u,timeout=%u,"
741 "running=%u", complete
, timeouts
, running
);
744 #define MIN_DIR_REQ_RESPONSES 16
745 if (complete
>= MIN_DIR_REQ_RESPONSES
) {
746 uint32_t *dltimes
= tor_malloc(sizeof(uint32_t) * complete
);
747 SMARTLIST_FOREACH(dirreq_times
, uint32_t *, dlt
, {
751 median_uint32(dltimes
, complete
); /* sort */
752 written
= tor_snprintf(result
+ written
, bufsize
- written
,
753 ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
754 "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
756 dltimes
[1*complete
/10-1],
757 dltimes
[2*complete
/10-1],
758 dltimes
[1*complete
/4-1],
759 dltimes
[3*complete
/10-1],
760 dltimes
[4*complete
/10-1],
761 dltimes
[5*complete
/10-1],
762 dltimes
[6*complete
/10-1],
763 dltimes
[7*complete
/10-1],
764 dltimes
[3*complete
/4-1],
765 dltimes
[8*complete
/10-1],
766 dltimes
[9*complete
/10-1],
767 dltimes
[complete
-1]);
772 smartlist_free(dirreq_times
);
777 /** How long do we have to have observed per-country request history before we
778 * are willing to talk about it? */
779 #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
781 /** Return a newly allocated comma-separated string containing entries for all
782 * the countries from which we've seen enough clients connect. The entry
783 * format is cc=num where num is the number of IPs we've seen connecting from
784 * that country, and cc is a lowercased country code. Returns NULL if we don't
785 * want to export geoip data yet. */
787 geoip_get_client_history(time_t now
, geoip_client_action_t action
)
790 int min_observation_time
= GEOIP_MIN_OBSERVATION_TIME
;
791 #ifdef ENABLE_GEOIP_STATS
792 min_observation_time
= DIR_RECORD_USAGE_MIN_OBSERVATION_TIME
;
794 if (!geoip_is_loaded())
796 if (client_history_starts
< (now
- min_observation_time
)) {
798 smartlist_t
*chunks
= NULL
;
799 smartlist_t
*entries
= NULL
;
800 int n_countries
= geoip_get_n_countries();
802 clientmap_entry_t
**ent
;
803 unsigned *counts
= tor_malloc_zero(sizeof(unsigned)*n_countries
);
805 unsigned granularity
= IP_GRANULARITY
;
806 #ifdef ENABLE_GEOIP_STATS
807 granularity
= DIR_RECORD_USAGE_GRANULARITY
;
809 HT_FOREACH(ent
, clientmap
, &client_history
) {
811 if ((*ent
)->action
!= (int)action
)
813 country
= geoip_get_country_by_ip((*ent
)->ipaddr
);
815 country
= 0; /** unresolved requests are stored at index 0. */
816 tor_assert(0 <= country
&& country
< n_countries
);
820 /* Don't record anything if we haven't seen enough IPs. */
821 if (total
< MIN_IPS_TO_NOTE_ANYTHING
)
823 /* Make a list of c_hist_t */
824 entries
= smartlist_create();
825 for (i
= 0; i
< n_countries
; ++i
) {
826 unsigned c
= counts
[i
];
827 const char *countrycode
;
829 /* Only report a country if it has a minimum number of IPs. */
830 if (c
>= MIN_IPS_TO_NOTE_COUNTRY
) {
831 c
= round_to_next_multiple_of(c
, granularity
);
832 countrycode
= geoip_get_country_name(i
);
833 ent
= tor_malloc(sizeof(c_hist_t
));
834 strlcpy(ent
->country
, countrycode
, sizeof(ent
->country
));
836 smartlist_add(entries
, ent
);
839 /* Sort entries. Note that we must do this _AFTER_ rounding, or else
840 * the sort order could leak info. */
841 smartlist_sort(entries
, _c_hist_compare
);
843 /* Build the result. */
844 chunks
= smartlist_create();
845 SMARTLIST_FOREACH(entries
, c_hist_t
*, ch
, {
846 tor_snprintf(buf
, sizeof(buf
), "%s=%u", ch
->country
, ch
->total
);
847 smartlist_add(chunks
, tor_strdup(buf
));
849 result
= smartlist_join_strings(chunks
, ",", 0, NULL
);
853 SMARTLIST_FOREACH(chunks
, char *, c
, tor_free(c
));
854 smartlist_free(chunks
);
857 SMARTLIST_FOREACH(entries
, c_hist_t
*, c
, tor_free(c
));
858 smartlist_free(entries
);
864 /** Return a newly allocated string holding the per-country request history
865 * for <b>action</b> in a format suitable for an extra-info document, or NULL
868 geoip_get_request_history(time_t now
, geoip_client_action_t action
)
870 smartlist_t
*entries
, *strings
;
872 unsigned granularity
= IP_GRANULARITY
;
873 int min_observation_time
= GEOIP_MIN_OBSERVATION_TIME
;
874 #ifdef ENABLE_GEOIP_STATS
875 granularity
= DIR_RECORD_USAGE_GRANULARITY
;
876 min_observation_time
= DIR_RECORD_USAGE_MIN_OBSERVATION_TIME
;
879 if (client_history_starts
>= (now
- min_observation_time
))
881 if (action
!= GEOIP_CLIENT_NETWORKSTATUS
&&
882 action
!= GEOIP_CLIENT_NETWORKSTATUS_V2
)
884 if (!geoip_countries
)
887 entries
= smartlist_create();
888 SMARTLIST_FOREACH(geoip_countries
, geoip_country_t
*, c
, {
889 uint32_t *n
= (action
== GEOIP_CLIENT_NETWORKSTATUS
)
890 ? c
->n_v3_ns_requests
: c
->n_v2_ns_requests
;
894 for (i
=0; i
< REQUEST_HIST_LEN
; ++i
)
898 ent
= tor_malloc_zero(sizeof(c_hist_t
));
899 strlcpy(ent
->country
, c
->countrycode
, sizeof(ent
->country
));
900 ent
->total
= round_to_next_multiple_of(tot
, granularity
);
901 smartlist_add(entries
, ent
);
903 smartlist_sort(entries
, _c_hist_compare
);
905 strings
= smartlist_create();
906 SMARTLIST_FOREACH(entries
, c_hist_t
*, ent
, {
908 tor_snprintf(buf
, sizeof(buf
), "%s=%u", ent
->country
, ent
->total
);
909 smartlist_add(strings
, tor_strdup(buf
));
911 result
= smartlist_join_strings(strings
, ",", 0, NULL
);
912 SMARTLIST_FOREACH(strings
, char *, cp
, tor_free(cp
));
913 SMARTLIST_FOREACH(entries
, c_hist_t
*, ent
, tor_free(ent
));
914 smartlist_free(strings
);
915 smartlist_free(entries
);
919 /** Store all our geoip statistics into $DATADIR/geoip-stats. */
921 dump_geoip_stats(void)
923 #ifdef ENABLE_GEOIP_STATS
924 time_t now
= time(NULL
);
925 time_t request_start
;
926 char *filename
= get_datadir_fname("geoip-stats");
927 char *data_v2
= NULL
, *data_v3
= NULL
;
928 char since
[ISO_TIME_LEN
+1], written
[ISO_TIME_LEN
+1];
929 open_file_t
*open_file
= NULL
;
930 double v2_share
= 0.0, v3_share
= 0.0;
934 data_v2
= geoip_get_client_history(now
, GEOIP_CLIENT_NETWORKSTATUS_V2
);
935 data_v3
= geoip_get_client_history(now
, GEOIP_CLIENT_NETWORKSTATUS
);
936 format_iso_time(since
, geoip_get_history_start());
937 format_iso_time(written
, now
);
938 out
= start_writing_to_stdio_file(filename
, OPEN_FLAGS_APPEND
,
942 if (fprintf(out
, "written %s\nstarted-at %s\nns-ips %s\nns-v2-ips %s\n",
944 data_v3
? data_v3
: "", data_v2
? data_v2
: "") < 0)
949 request_start
= current_request_period_starts
-
950 (n_old_request_periods
* REQUEST_HIST_PERIOD
);
951 format_iso_time(since
, request_start
);
952 data_v2
= geoip_get_request_history(now
, GEOIP_CLIENT_NETWORKSTATUS_V2
);
953 data_v3
= geoip_get_request_history(now
, GEOIP_CLIENT_NETWORKSTATUS
);
954 if (fprintf(out
, "requests-start %s\nn-ns-reqs %s\nn-v2-ns-reqs %s\n",
956 data_v3
? data_v3
: "", data_v2
? data_v2
: "") < 0)
958 #define RESPONSE_GRANULARITY 8
959 for (i
= 0; i
< GEOIP_NS_RESPONSE_NUM
; i
++) {
960 ns_v2_responses
[i
] = round_uint32_to_next_multiple_of(
961 ns_v2_responses
[i
], RESPONSE_GRANULARITY
);
962 ns_v3_responses
[i
] = round_uint32_to_next_multiple_of(
963 ns_v3_responses
[i
], RESPONSE_GRANULARITY
);
965 #undef RESPONSE_GRANULARITY
966 if (fprintf(out
, "n-ns-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
967 "not-found=%u,not-modified=%u,busy=%u\n",
968 ns_v3_responses
[GEOIP_SUCCESS
],
969 ns_v3_responses
[GEOIP_REJECT_NOT_ENOUGH_SIGS
],
970 ns_v3_responses
[GEOIP_REJECT_UNAVAILABLE
],
971 ns_v3_responses
[GEOIP_REJECT_NOT_FOUND
],
972 ns_v3_responses
[GEOIP_REJECT_NOT_MODIFIED
],
973 ns_v3_responses
[GEOIP_REJECT_BUSY
]) < 0)
975 if (fprintf(out
, "n-v2-ns-resp ok=%u,unavailable=%u,"
976 "not-found=%u,not-modified=%u,busy=%u\n",
977 ns_v2_responses
[GEOIP_SUCCESS
],
978 ns_v2_responses
[GEOIP_REJECT_UNAVAILABLE
],
979 ns_v2_responses
[GEOIP_REJECT_NOT_FOUND
],
980 ns_v2_responses
[GEOIP_REJECT_NOT_MODIFIED
],
981 ns_v2_responses
[GEOIP_REJECT_BUSY
]) < 0)
983 memset(ns_v2_responses
, 0, sizeof(ns_v2_responses
));
984 memset(ns_v3_responses
, 0, sizeof(ns_v3_responses
));
985 if (!geoip_get_mean_shares(now
, &v2_share
, &v3_share
)) {
986 if (fprintf(out
, "v2-ns-share %0.2lf%%\n", v2_share
*100) < 0)
988 if (fprintf(out
, "v3-ns-share %0.2lf%%\n", v3_share
*100) < 0)
992 data_v2
= geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2
,
994 data_v3
= geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS
,
996 if (fprintf(out
, "ns-direct-dl %s\nns-v2-direct-dl %s\n",
997 data_v3
? data_v3
: "", data_v2
? data_v2
: "") < 0)
1001 data_v2
= geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2
,
1003 data_v3
= geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS
,
1005 if (fprintf(out
, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n",
1006 data_v3
? data_v3
: "", data_v2
? data_v2
: "") < 0)
1009 finish_writing_to_file(open_file
);
1013 abort_writing_to_file(open_file
);
1020 /** Store all our geoip statistics as entry guards into
1021 * $DATADIR/entry-stats. */
1023 dump_entry_stats(void)
1025 #ifdef ENABLE_ENTRY_STATS
1026 time_t now
= time(NULL
);
1027 char *filename
= get_datadir_fname("entry-stats");
1029 char since
[ISO_TIME_LEN
+1], written
[ISO_TIME_LEN
+1];
1030 open_file_t
*open_file
= NULL
;
1033 data
= geoip_get_client_history(now
, GEOIP_CLIENT_CONNECT
);
1034 format_iso_time(since
, geoip_get_history_start());
1035 format_iso_time(written
, now
);
1036 out
= start_writing_to_stdio_file(filename
, OPEN_FLAGS_APPEND
,
1040 if (fprintf(out
, "written %s\nstarted-at %s\nips %s\n",
1041 written
, since
, data
? data
: "") < 0)
1044 finish_writing_to_file(open_file
);
1048 abort_writing_to_file(open_file
);
1054 /** Helper used to implement GETINFO ip-to-country/... controller command. */
1056 getinfo_helper_geoip(control_connection_t
*control_conn
,
1057 const char *question
, char **answer
)
1060 if (geoip_is_loaded() && !strcmpstart(question
, "ip-to-country/")) {
1064 question
+= strlen("ip-to-country/");
1065 if (tor_inet_aton(question
, &in
) != 0) {
1066 ip
= ntohl(in
.s_addr
);
1067 c
= geoip_get_country_by_ip(ip
);
1068 *answer
= tor_strdup(geoip_get_country_name(c
));
1074 /** Release all storage held by the GeoIP database. */
1076 clear_geoip_db(void)
1078 if (geoip_countries
) {
1079 SMARTLIST_FOREACH(geoip_countries
, geoip_country_t
*, c
, tor_free(c
));
1080 smartlist_free(geoip_countries
);
1082 if (country_idxplus1_by_lc_code
)
1083 strmap_free(country_idxplus1_by_lc_code
, NULL
);
1084 if (geoip_entries
) {
1085 SMARTLIST_FOREACH(geoip_entries
, geoip_entry_t
*, ent
, tor_free(ent
));
1086 smartlist_free(geoip_entries
);
1088 geoip_countries
= NULL
;
1089 country_idxplus1_by_lc_code
= NULL
;
1090 geoip_entries
= NULL
;
1093 /** Release all storage held in this file. */
1095 geoip_free_all(void)
1097 clientmap_entry_t
**ent
, **next
, *this;
1098 for (ent
= HT_START(clientmap
, &client_history
); ent
!= NULL
; ent
= next
) {
1100 next
= HT_NEXT_RMV(clientmap
, &client_history
, ent
);
1103 HT_CLEAR(clientmap
, &client_history
);