1 /* Copyright (c) 2007-2009, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
6 * \brief Functions related to maintaining an IP-to-country database and to
7 * summarizing client connections by country.
14 static void clear_geoip_db(void);
15 static void dump_geoip_stats(void);
16 static void dump_entry_stats(void);
18 /** An entry from the GeoIP file: maps an IP range to a country. */
19 typedef struct geoip_entry_t
{
20 uint32_t ip_low
; /**< The lowest IP in the range, in host order */
21 uint32_t ip_high
; /**< The highest IP in the range, in host order */
22 intptr_t country
; /**< An index into geoip_countries */
25 /** For how many periods should we remember per-country request history? */
26 #define REQUEST_HIST_LEN 1
27 /** How long are the periods for which we should remember request history? */
28 #define REQUEST_HIST_PERIOD (24*60*60)
30 /** A per-country record for GeoIP request history. */
31 typedef struct geoip_country_t
{
33 uint32_t n_v2_ns_requests
[REQUEST_HIST_LEN
];
34 uint32_t n_v3_ns_requests
[REQUEST_HIST_LEN
];
37 /** A list of geoip_country_t */
38 static smartlist_t
*geoip_countries
= NULL
;
39 /** A map from lowercased country codes to their position in geoip_countries.
40 * The index is encoded in the pointer, and 1 is added so that NULL can mean
42 static strmap_t
*country_idxplus1_by_lc_code
= NULL
;
43 /** A list of all known geoip_entry_t, sorted by ip_low. */
44 static smartlist_t
*geoip_entries
= NULL
;
46 /** Return the index of the <b>country</b>'s entry in the GeoIP DB
47 * if it is a valid 2-letter country code, otherwise return -1.
50 geoip_get_country(const char *country
)
55 _idxplus1
= strmap_get_lc(country_idxplus1_by_lc_code
, country
);
59 idx
= ((uintptr_t)_idxplus1
)-1;
60 return (country_t
)idx
;
63 /** Add an entry to the GeoIP table, mapping all IPs between <b>low</b> and
64 * <b>high</b>, inclusive, to the 2-letter country code <b>country</b>.
67 geoip_add_entry(uint32_t low
, uint32_t high
, const char *country
)
76 _idxplus1
= strmap_get_lc(country_idxplus1_by_lc_code
, country
);
79 geoip_country_t
*c
= tor_malloc_zero(sizeof(geoip_country_t
));
80 strlcpy(c
->countrycode
, country
, sizeof(c
->countrycode
));
81 tor_strlower(c
->countrycode
);
82 smartlist_add(geoip_countries
, c
);
83 idx
= smartlist_len(geoip_countries
) - 1;
84 strmap_set_lc(country_idxplus1_by_lc_code
, country
, (void*)(idx
+1));
86 idx
= ((uintptr_t)_idxplus1
)-1;
89 geoip_country_t
*c
= smartlist_get(geoip_countries
, idx
);
90 tor_assert(!strcasecmp(c
->countrycode
, country
));
92 ent
= tor_malloc_zero(sizeof(geoip_entry_t
));
96 smartlist_add(geoip_entries
, ent
);
99 /** Add an entry to the GeoIP table, parsing it from <b>line</b>. The
100 * format is as for geoip_load_file(). */
102 geoip_parse_entry(const char *line
)
104 unsigned int low
, high
;
106 if (!geoip_countries
) {
107 geoip_countries
= smartlist_create();
108 geoip_entries
= smartlist_create();
109 country_idxplus1_by_lc_code
= strmap_new();
111 while (TOR_ISSPACE(*line
))
115 if (sscanf(line
,"%u,%u,%2s", &low
, &high
, b
) == 3) {
116 geoip_add_entry(low
, high
, b
);
118 } else if (sscanf(line
,"\"%u\",\"%u\",\"%2s\",", &low
, &high
, b
) == 3) {
119 geoip_add_entry(low
, high
, b
);
122 log_warn(LD_GENERAL
, "Unable to parse line from GEOIP file: %s",
128 /** Sorting helper: return -1, 1, or 0 based on comparison of two
131 _geoip_compare_entries(const void **_a
, const void **_b
)
133 const geoip_entry_t
*a
= *_a
, *b
= *_b
;
134 if (a
->ip_low
< b
->ip_low
)
136 else if (a
->ip_low
> b
->ip_low
)
142 /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
143 * to a uint32_t in host order) to a geoip_entry_t */
145 _geoip_compare_key_to_entry(const void *_key
, const void **_member
)
147 const uint32_t addr
= *(uint32_t *)_key
;
148 const geoip_entry_t
*entry
= *_member
;
149 if (addr
< entry
->ip_low
)
151 else if (addr
> entry
->ip_high
)
157 /** Return 1 if we should collect geoip stats on bridge users, and
158 * include them in our extrainfo descriptor. Else return 0. */
160 should_record_bridge_info(or_options_t
*options
)
162 return options
->BridgeRelay
&& options
->BridgeRecordUsageByCountry
;
165 /** Clear the GeoIP database and reload it from the file
166 * <b>filename</b>. Return 0 on success, -1 on failure.
168 * Recognized line formats are:
169 * INTIPLOW,INTIPHIGH,CC
171 * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
172 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
173 * integers, and CC is a country code.
175 * It also recognizes, and skips over, blank lines and lines that start
176 * with '#' (comments).
179 geoip_load_file(const char *filename
, or_options_t
*options
)
182 const char *msg
= "";
183 int severity
= options_need_geoip_info(options
, &msg
) ? LOG_WARN
: LOG_INFO
;
185 if (!(f
= fopen(filename
, "r"))) {
186 log_fn(severity
, LD_GENERAL
, "Failed to open GEOIP file %s. %s",
190 if (!geoip_countries
) {
191 geoip_country_t
*geoip_unresolved
;
192 geoip_countries
= smartlist_create();
193 /* Add a geoip_country_t for requests that could not be resolved to a
194 * country as first element (index 0) to geoip_countries. */
195 geoip_unresolved
= tor_malloc_zero(sizeof(geoip_country_t
));
196 strlcpy(geoip_unresolved
->countrycode
, "??",
197 sizeof(geoip_unresolved
->countrycode
));
198 smartlist_add(geoip_countries
, geoip_unresolved
);
199 country_idxplus1_by_lc_code
= strmap_new();
202 SMARTLIST_FOREACH(geoip_entries
, geoip_entry_t
*, e
, tor_free(e
));
203 smartlist_free(geoip_entries
);
205 geoip_entries
= smartlist_create();
206 log_notice(LD_GENERAL
, "Parsing GEOIP file.");
209 if (fgets(buf
, (int)sizeof(buf
), f
) == NULL
)
211 /* FFFF track full country name. */
212 geoip_parse_entry(buf
);
214 /*XXXX abort and return -1 if no entries/illformed?*/
217 smartlist_sort(geoip_entries
, _geoip_compare_entries
);
219 /* Okay, now we need to maybe change our mind about what is in which
221 refresh_all_country_info();
226 /** Given an IP address in host order, return a number representing the
227 * country to which that address belongs, or -1 for unknown. The return value
228 * will always be less than geoip_get_n_countries(). To decode it,
229 * call geoip_get_country_name().
232 geoip_get_country_by_ip(uint32_t ipaddr
)
237 ent
= smartlist_bsearch(geoip_entries
, &ipaddr
, _geoip_compare_key_to_entry
);
238 return ent
? (int)ent
->country
: -1;
241 /** Return the number of countries recognized by the GeoIP database. */
243 geoip_get_n_countries(void)
245 return (int) smartlist_len(geoip_countries
);
248 /** Return the two-letter country code associated with the number <b>num</b>,
249 * or "??" for an unknown value. */
251 geoip_get_country_name(country_t num
)
253 if (geoip_countries
&& num
>= 0 && num
< smartlist_len(geoip_countries
)) {
254 geoip_country_t
*c
= smartlist_get(geoip_countries
, num
);
255 return c
->countrycode
;
260 /** Return true iff we have loaded a GeoIP database.*/
262 geoip_is_loaded(void)
264 return geoip_countries
!= NULL
&& geoip_entries
!= NULL
;
267 /** Entry in a map from IP address to the last time we've seen an incoming
268 * connection from that IP address. Used by bridges only, to track which
269 * countries have them blocked. */
270 typedef struct clientmap_entry_t
{
271 HT_ENTRY(clientmap_entry_t
) node
;
273 unsigned int last_seen_in_minutes
:30;
274 unsigned int action
:2;
277 #define ACTION_MASK 3
279 /** Map from client IP address to last time seen. */
280 static HT_HEAD(clientmap
, clientmap_entry_t
) client_history
=
282 /** Time at which we started tracking client IP history. */
283 static time_t client_history_starts
= 0;
285 /** When did the current period of checking per-country request history
287 static time_t current_request_period_starts
= 0;
288 /** How many older request periods are we remembering? */
289 static int n_old_request_periods
= 0;
291 /** Hashtable helper: compute a hash of a clientmap_entry_t. */
292 static INLINE
unsigned
293 clientmap_entry_hash(const clientmap_entry_t
*a
)
295 return ht_improve_hash((unsigned) a
->ipaddr
);
297 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
299 clientmap_entries_eq(const clientmap_entry_t
*a
, const clientmap_entry_t
*b
)
301 return a
->ipaddr
== b
->ipaddr
&& a
->action
== b
->action
;
304 HT_PROTOTYPE(clientmap
, clientmap_entry_t
, node
, clientmap_entry_hash
,
305 clientmap_entries_eq
);
306 HT_GENERATE(clientmap
, clientmap_entry_t
, node
, clientmap_entry_hash
,
307 clientmap_entries_eq
, 0.6, malloc
, realloc
, free
);
309 /** How often do we update our estimate which share of v2 and v3 directory
310 * requests is sent to us? We could as well trigger updates of shares from
311 * network status updates, but that means adding a lot of calls into code
312 * that is independent from geoip stats (and keeping them up-to-date). We
313 * are perfectly fine with an approximation of 15-minute granularity. */
314 #define REQUEST_SHARE_INTERVAL (15 * 60)
316 /** When did we last determine which share of v2 and v3 directory requests
318 static time_t last_time_determined_shares
= 0;
320 /** Sum of products of v2 shares times the number of seconds for which we
321 * consider these shares as valid. */
322 static double v2_share_times_seconds
;
324 /** Sum of products of v3 shares times the number of seconds for which we
325 * consider these shares as valid. */
326 static double v3_share_times_seconds
;
328 /** Number of seconds we are determining v2 and v3 shares. */
329 static int share_seconds
;
331 /** Try to determine which fraction of v2 and v3 directory requests aimed at
332 * caches will be sent to us at time <b>now</b> and store that value in
333 * order to take a mean value later on. */
335 geoip_determine_shares(time_t now
)
337 double v2_share
= 0.0, v3_share
= 0.0;
338 if (router_get_my_share_of_directory_requests(&v2_share
, &v3_share
) < 0)
340 if (last_time_determined_shares
) {
341 v2_share_times_seconds
+= v2_share
*
342 ((double) (now
- last_time_determined_shares
));
343 v3_share_times_seconds
+= v3_share
*
344 ((double) (now
- last_time_determined_shares
));
345 share_seconds
+= now
- last_time_determined_shares
;
347 last_time_determined_shares
= now
;
350 #ifdef ENABLE_DIRREQ_STATS
351 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
352 * have been sent to us since the last call of this function up to time
353 * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
354 * fractions of v2 and v3 protocol shares we expect to have seen. Reset
355 * counters afterwards. Return 0 on success, -1 on failure (e.g. when zero
356 * seconds have passed since the last call).*/
358 geoip_get_mean_shares(time_t now
, double *v2_share_out
,
359 double *v3_share_out
)
361 geoip_determine_shares(now
);
364 *v2_share_out
= v2_share_times_seconds
/ ((double) share_seconds
);
365 *v3_share_out
= v3_share_times_seconds
/ ((double) share_seconds
);
366 v2_share_times_seconds
= v3_share_times_seconds
= 0.0;
372 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
373 * at time <b>now</b>. Ignored by all but bridges and directories if
374 * configured accordingly. */
376 geoip_note_client_seen(geoip_client_action_t action
,
377 uint32_t addr
, time_t now
)
379 or_options_t
*options
= get_options();
380 clientmap_entry_t lookup
, *ent
;
381 if (action
== GEOIP_CLIENT_CONNECT
) {
382 #ifdef ENABLE_ENTRY_STATS
383 if (!options
->EntryStatistics
)
386 if (!(options
->BridgeRelay
&& options
->BridgeRecordUsageByCountry
))
389 /* Did we recently switch from bridge to relay or back? */
390 if (client_history_starts
> now
)
393 #ifndef ENABLE_DIRREQ_STATS
396 if (options
->BridgeRelay
|| options
->BridgeAuthoritativeDir
||
397 !options
->DirReqStatistics
)
402 /* Rotate the current request period. */
403 while (current_request_period_starts
+ REQUEST_HIST_PERIOD
< now
) {
404 if (!geoip_countries
)
405 geoip_countries
= smartlist_create();
406 if (!current_request_period_starts
) {
407 current_request_period_starts
= now
;
410 /* Also discard all items in the client history that are too old.
411 * (This only works here because bridge and directory stats are
412 * independent. Otherwise, we'd only want to discard those items
413 * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */
414 geoip_remove_old_clients(current_request_period_starts
);
415 /* Before rotating, write the current stats to disk. */
417 if (get_options()->EntryStatistics
)
419 /* Now rotate request period */
420 SMARTLIST_FOREACH(geoip_countries
, geoip_country_t
*, c
, {
421 memmove(&c
->n_v2_ns_requests
[0], &c
->n_v2_ns_requests
[1],
422 sizeof(uint32_t)*(REQUEST_HIST_LEN
-1));
423 memmove(&c
->n_v3_ns_requests
[0], &c
->n_v3_ns_requests
[1],
424 sizeof(uint32_t)*(REQUEST_HIST_LEN
-1));
425 c
->n_v2_ns_requests
[REQUEST_HIST_LEN
-1] = 0;
426 c
->n_v3_ns_requests
[REQUEST_HIST_LEN
-1] = 0;
428 current_request_period_starts
+= REQUEST_HIST_PERIOD
;
429 if (n_old_request_periods
< REQUEST_HIST_LEN
-1)
430 ++n_old_request_periods
;
433 lookup
.ipaddr
= addr
;
434 lookup
.action
= (int)action
;
435 ent
= HT_FIND(clientmap
, &client_history
, &lookup
);
437 ent
->last_seen_in_minutes
= now
/ 60;
439 ent
= tor_malloc_zero(sizeof(clientmap_entry_t
));
441 ent
->last_seen_in_minutes
= now
/ 60;
442 ent
->action
= (int)action
;
443 HT_INSERT(clientmap
, &client_history
, ent
);
446 if (action
== GEOIP_CLIENT_NETWORKSTATUS
||
447 action
== GEOIP_CLIENT_NETWORKSTATUS_V2
) {
448 int country_idx
= geoip_get_country_by_ip(addr
);
450 country_idx
= 0; /** unresolved requests are stored at index 0. */
451 if (country_idx
>= 0 && country_idx
< smartlist_len(geoip_countries
)) {
452 geoip_country_t
*country
= smartlist_get(geoip_countries
, country_idx
);
453 if (action
== GEOIP_CLIENT_NETWORKSTATUS
)
454 ++country
->n_v3_ns_requests
[REQUEST_HIST_LEN
-1];
456 ++country
->n_v2_ns_requests
[REQUEST_HIST_LEN
-1];
459 /* Periodically determine share of requests that we should see */
460 if (last_time_determined_shares
+ REQUEST_SHARE_INTERVAL
< now
)
461 geoip_determine_shares(now
);
464 if (!client_history_starts
) {
465 client_history_starts
= now
;
466 current_request_period_starts
= now
;
470 /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
471 * older than a certain time. */
473 _remove_old_client_helper(struct clientmap_entry_t
*ent
, void *_cutoff
)
475 time_t cutoff
= *(time_t*)_cutoff
/ 60;
476 if (ent
->last_seen_in_minutes
< cutoff
) {
484 /** Forget about all clients that haven't connected since <b>cutoff</b>.
485 * If <b>cutoff</b> is in the future, clients won't be added to the history
486 * until this time is reached. This is useful to prevent relays that switch
487 * to bridges from reporting unbelievable numbers of clients. */
489 geoip_remove_old_clients(time_t cutoff
)
491 clientmap_HT_FOREACH_FN(&client_history
,
492 _remove_old_client_helper
,
494 if (client_history_starts
< cutoff
)
495 client_history_starts
= cutoff
;
498 #ifdef ENABLE_DIRREQ_STATS
499 /** How many responses are we giving to clients requesting v2 network
501 static uint32_t ns_v2_responses
[GEOIP_NS_RESPONSE_NUM
];
503 /** How many responses are we giving to clients requesting v3 network
505 static uint32_t ns_v3_responses
[GEOIP_NS_RESPONSE_NUM
];
508 /** Note that we've rejected a client's request for a v2 or v3 network
509 * status, encoded in <b>action</b> for reason <b>reason</b> at time
512 geoip_note_ns_response(geoip_client_action_t action
,
513 geoip_ns_response_t response
)
515 #ifdef ENABLE_DIRREQ_STATS
516 static int arrays_initialized
= 0;
517 if (!get_options()->DirReqStatistics
)
519 if (!arrays_initialized
) {
520 memset(ns_v2_responses
, 0, sizeof(ns_v2_responses
));
521 memset(ns_v3_responses
, 0, sizeof(ns_v3_responses
));
522 arrays_initialized
= 1;
524 tor_assert(action
== GEOIP_CLIENT_NETWORKSTATUS
||
525 action
== GEOIP_CLIENT_NETWORKSTATUS_V2
);
526 tor_assert(response
< GEOIP_NS_RESPONSE_NUM
);
527 if (action
== GEOIP_CLIENT_NETWORKSTATUS
)
528 ns_v3_responses
[response
]++;
530 ns_v2_responses
[response
]++;
537 /** Do not mention any country from which fewer than this number of IPs have
538 * connected. This conceivably avoids reporting information that could
539 * deanonymize users, though analysis is lacking. */
540 #define MIN_IPS_TO_NOTE_COUNTRY 1
541 /** Do not report any geoip data at all if we have fewer than this number of
542 * IPs to report about. */
543 #define MIN_IPS_TO_NOTE_ANYTHING 1
544 /** When reporting geoip data about countries, round up to the nearest
545 * multiple of this value. */
546 #define IP_GRANULARITY 8
548 /** Return the time at which we started recording geoip data. */
550 geoip_get_history_start(void)
552 return client_history_starts
;
555 /** Helper type: used to sort per-country totals by value. */
556 typedef struct c_hist_t
{
557 char country
[3]; /**< Two-letter country code. */
558 unsigned total
; /**< Total IP addresses seen in this country. */
561 /** Sorting helper: return -1, 1, or 0 based on comparison of two
562 * geoip_entry_t. Sort in descending order of total, and then by country
565 _c_hist_compare(const void **_a
, const void **_b
)
567 const c_hist_t
*a
= *_a
, *b
= *_b
;
568 if (a
->total
> b
->total
)
570 else if (a
->total
< b
->total
)
573 return strcmp(a
->country
, b
->country
);
576 /** When there are incomplete directory requests at the end of a 24-hour
577 * period, consider those requests running for longer than this timeout as
578 * failed, the others as still running. */
579 #define DIRREQ_TIMEOUT (10*60)
581 /** Entry in a map from either conn->global_identifier for direct requests
582 * or a unique circuit identifier for tunneled requests to request time,
583 * response size, and completion time of a network status request. Used to
584 * measure download times of requests to derive average client
586 typedef struct dirreq_map_entry_t
{
587 /** Unique identifier for this network status request; this is either the
588 * conn->global_identifier of the dir conn (direct request) or a new
589 * locally unique identifier of a circuit (tunneled request). This ID is
590 * only unique among other direct or tunneled requests, respectively. */
592 unsigned int state
:3; /**< State of this directory request. */
593 unsigned int type
:1; /**< Is this a direct or a tunneled request? */
594 unsigned int completed
:1; /**< Is this request complete? */
595 unsigned int action
:2; /**< Is this a v2 or v3 request? */
596 /** When did we receive the request and started sending the response? */
597 struct timeval request_time
;
598 size_t response_size
; /**< What is the size of the response in bytes? */
599 struct timeval completion_time
; /**< When did the request succeed? */
600 } dirreq_map_entry_t
;
602 /** Map of all directory requests asking for v2 or v3 network statuses in
603 * the current geoip-stats interval. Keys are strings starting with either
604 * "dir" for direct requests or "tun" for tunneled requests, followed by
605 * a unique uint64_t identifier represented as decimal string. Values are
606 * of type *<b>dirreq_map_entry_t</b>. */
607 static strmap_t
*dirreq_map
= NULL
;
609 /** Helper: Put <b>entry</b> into map of directory requests using
610 * <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there is
611 * already an entry for that key, print out a BUG warning and return. */
613 _dirreq_map_put(dirreq_map_entry_t
*entry
, dirreq_type_t type
,
616 char key
[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */
617 dirreq_map_entry_t
*ent
;
619 dirreq_map
= strmap_new();
620 tor_snprintf(key
, sizeof(key
), "%s"U64_FORMAT
,
621 type
== DIRREQ_TUNNELED
? "tun" : "dir",
622 U64_PRINTF_ARG(dirreq_id
));
623 ent
= strmap_get(dirreq_map
, key
);
625 log_warn(LD_BUG
, "Error when putting directory request into local "
626 "map. There is already an entry for the same identifier.");
629 strmap_set(dirreq_map
, key
, entry
);
632 /** Helper: Look up and return an entry in the map of directory requests
633 * using <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there
634 * is no such entry, return NULL. */
635 static dirreq_map_entry_t
*
636 _dirreq_map_get(dirreq_type_t type
, uint64_t dirreq_id
)
638 char key
[3+20+1]; /* dir|tun + 18446744073709551616 + \0 */
640 dirreq_map
= strmap_new();
641 tor_snprintf(key
, sizeof(key
), "%s"U64_FORMAT
,
642 type
== DIRREQ_TUNNELED
? "tun" : "dir",
643 U64_PRINTF_ARG(dirreq_id
));
644 return strmap_get(dirreq_map
, key
);
647 /** Note that an either direct or tunneled (see <b>type</b>) directory
648 * request for a network status with unique ID <b>dirreq_id</b> of size
649 * <b>response_size</b> and action <b>action</b> (either v2 or v3) has
652 geoip_start_dirreq(uint64_t dirreq_id
, size_t response_size
,
653 geoip_client_action_t action
, dirreq_type_t type
)
655 dirreq_map_entry_t
*ent
;
656 if (!get_options()->DirReqStatistics
)
658 ent
= tor_malloc_zero(sizeof(dirreq_map_entry_t
));
659 ent
->dirreq_id
= dirreq_id
;
660 tor_gettimeofday(&ent
->request_time
);
661 ent
->response_size
= response_size
;
662 ent
->action
= action
;
664 _dirreq_map_put(ent
, type
, dirreq_id
);
667 /** Change the state of the either direct or tunneled (see <b>type</b>)
668 * directory request with <b>dirreq_id</b> to <b>new_state</b> and
669 * possibly mark it as completed. If no entry can be found for the given
670 * key parts (e.g., if this is a directory request that we are not
671 * measuring, or one that was started in the previous measurement period),
672 * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
674 geoip_change_dirreq_state(uint64_t dirreq_id
, dirreq_type_t type
,
675 dirreq_state_t new_state
)
677 dirreq_map_entry_t
*ent
;
678 if (!get_options()->DirReqStatistics
)
680 ent
= _dirreq_map_get(type
, dirreq_id
);
683 if (new_state
== DIRREQ_IS_FOR_NETWORK_STATUS
)
685 if (new_state
- 1 != ent
->state
)
687 ent
->state
= new_state
;
688 if ((type
== DIRREQ_DIRECT
&&
689 new_state
== DIRREQ_FLUSHING_DIR_CONN_FINISHED
) ||
690 (type
== DIRREQ_TUNNELED
&&
691 new_state
== DIRREQ_OR_CONN_BUFFER_FLUSHED
)) {
692 tor_gettimeofday(&ent
->completion_time
);
697 #ifdef ENABLE_DIRREQ_STATS
698 /** Return a newly allocated comma-separated string containing statistics
699 * on network status downloads. The string contains the number of completed
700 * requests, timeouts, and still running requests as well as the download
701 * times by deciles and quartiles. Return NULL if we have not observed
702 * requests for long enough. */
704 geoip_get_dirreq_history(geoip_client_action_t action
,
708 smartlist_t
*dirreq_times
= NULL
;
709 uint32_t complete
= 0, timeouts
= 0, running
= 0;
710 int i
= 0, bufsize
= 1024, written
;
712 tor_gettimeofday(&now
);
715 if (action
!= GEOIP_CLIENT_NETWORKSTATUS
&&
716 action
!= GEOIP_CLIENT_NETWORKSTATUS_V2
)
718 dirreq_times
= smartlist_create();
719 STRMAP_FOREACH_MODIFY(dirreq_map
, key
, dirreq_map_entry_t
*, ent
) {
720 if (ent
->action
== action
&& type
== ent
->type
) {
721 if (ent
->completed
) {
722 uint32_t *bytes_per_second
= tor_malloc_zero(sizeof(uint32_t));
723 uint32_t time_diff
= (uint32_t) tv_udiff(&ent
->request_time
,
724 &ent
->completion_time
);
726 time_diff
= 1; /* Avoid DIV/0; "instant" answers are impossible
727 * anyway by law of nature or something.. */
728 *bytes_per_second
= 1000000 * ent
->response_size
/ time_diff
;
729 smartlist_add(dirreq_times
, bytes_per_second
);
732 if (tv_udiff(&ent
->request_time
, &now
) / 1000000 > DIRREQ_TIMEOUT
)
738 MAP_DEL_CURRENT(key
);
740 } STRMAP_FOREACH_END
;
741 #define DIR_REQ_GRANULARITY 4
742 complete
= round_uint32_to_next_multiple_of(complete
,
743 DIR_REQ_GRANULARITY
);
744 timeouts
= round_uint32_to_next_multiple_of(timeouts
,
745 DIR_REQ_GRANULARITY
);
746 running
= round_uint32_to_next_multiple_of(running
,
747 DIR_REQ_GRANULARITY
);
748 result
= tor_malloc_zero(bufsize
);
749 written
= tor_snprintf(result
, bufsize
, "complete=%u,timeout=%u,"
750 "running=%u", complete
, timeouts
, running
);
753 #define MIN_DIR_REQ_RESPONSES 16
754 if (complete
>= MIN_DIR_REQ_RESPONSES
) {
755 uint32_t *dltimes
= tor_malloc(sizeof(uint32_t) * complete
);
756 SMARTLIST_FOREACH(dirreq_times
, uint32_t *, dlt
, {
760 median_uint32(dltimes
, complete
); /* sort */
761 written
= tor_snprintf(result
+ written
, bufsize
- written
,
762 ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
763 "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
765 dltimes
[1*complete
/10-1],
766 dltimes
[2*complete
/10-1],
767 dltimes
[1*complete
/4-1],
768 dltimes
[3*complete
/10-1],
769 dltimes
[4*complete
/10-1],
770 dltimes
[5*complete
/10-1],
771 dltimes
[6*complete
/10-1],
772 dltimes
[7*complete
/10-1],
773 dltimes
[3*complete
/4-1],
774 dltimes
[8*complete
/10-1],
775 dltimes
[9*complete
/10-1],
776 dltimes
[complete
-1]);
781 smartlist_free(dirreq_times
);
786 /** How long do we have to have observed per-country request history before we
787 * are willing to talk about it? */
788 #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
790 /** Return a newly allocated comma-separated string containing entries for all
791 * the countries from which we've seen enough clients connect. The entry
792 * format is cc=num where num is the number of IPs we've seen connecting from
793 * that country, and cc is a lowercased country code. Returns NULL if we don't
794 * want to export geoip data yet. */
796 geoip_get_client_history(time_t now
, geoip_client_action_t action
)
799 int min_observation_time
= GEOIP_MIN_OBSERVATION_TIME
;
800 #ifdef ENABLE_DIRREQ_STATS
801 min_observation_time
= DIR_RECORD_USAGE_MIN_OBSERVATION_TIME
;
803 if (!geoip_is_loaded())
805 if (client_history_starts
< (now
- min_observation_time
)) {
807 smartlist_t
*chunks
= NULL
;
808 smartlist_t
*entries
= NULL
;
809 int n_countries
= geoip_get_n_countries();
811 clientmap_entry_t
**ent
;
812 unsigned *counts
= tor_malloc_zero(sizeof(unsigned)*n_countries
);
814 unsigned granularity
= IP_GRANULARITY
;
815 #ifdef ENABLE_DIRREQ_STATS
816 granularity
= DIR_RECORD_USAGE_GRANULARITY
;
818 HT_FOREACH(ent
, clientmap
, &client_history
) {
820 if ((*ent
)->action
!= (int)action
)
822 country
= geoip_get_country_by_ip((*ent
)->ipaddr
);
824 country
= 0; /** unresolved requests are stored at index 0. */
825 tor_assert(0 <= country
&& country
< n_countries
);
829 /* Don't record anything if we haven't seen enough IPs. */
830 if (total
< MIN_IPS_TO_NOTE_ANYTHING
)
832 /* Make a list of c_hist_t */
833 entries
= smartlist_create();
834 for (i
= 0; i
< n_countries
; ++i
) {
835 unsigned c
= counts
[i
];
836 const char *countrycode
;
838 /* Only report a country if it has a minimum number of IPs. */
839 if (c
>= MIN_IPS_TO_NOTE_COUNTRY
) {
840 c
= round_to_next_multiple_of(c
, granularity
);
841 countrycode
= geoip_get_country_name(i
);
842 ent
= tor_malloc(sizeof(c_hist_t
));
843 strlcpy(ent
->country
, countrycode
, sizeof(ent
->country
));
845 smartlist_add(entries
, ent
);
848 /* Sort entries. Note that we must do this _AFTER_ rounding, or else
849 * the sort order could leak info. */
850 smartlist_sort(entries
, _c_hist_compare
);
852 /* Build the result. */
853 chunks
= smartlist_create();
854 SMARTLIST_FOREACH(entries
, c_hist_t
*, ch
, {
855 tor_snprintf(buf
, sizeof(buf
), "%s=%u", ch
->country
, ch
->total
);
856 smartlist_add(chunks
, tor_strdup(buf
));
858 result
= smartlist_join_strings(chunks
, ",", 0, NULL
);
862 SMARTLIST_FOREACH(chunks
, char *, c
, tor_free(c
));
863 smartlist_free(chunks
);
866 SMARTLIST_FOREACH(entries
, c_hist_t
*, c
, tor_free(c
));
867 smartlist_free(entries
);
873 /** Return a newly allocated string holding the per-country request history
874 * for <b>action</b> in a format suitable for an extra-info document, or NULL
877 geoip_get_request_history(time_t now
, geoip_client_action_t action
)
879 smartlist_t
*entries
, *strings
;
881 unsigned granularity
= IP_GRANULARITY
;
882 int min_observation_time
= GEOIP_MIN_OBSERVATION_TIME
;
883 #ifdef ENABLE_DIRREQ_STATS
884 granularity
= DIR_RECORD_USAGE_GRANULARITY
;
885 min_observation_time
= DIR_RECORD_USAGE_MIN_OBSERVATION_TIME
;
888 if (client_history_starts
>= (now
- min_observation_time
))
890 if (action
!= GEOIP_CLIENT_NETWORKSTATUS
&&
891 action
!= GEOIP_CLIENT_NETWORKSTATUS_V2
)
893 if (!geoip_countries
)
896 entries
= smartlist_create();
897 SMARTLIST_FOREACH(geoip_countries
, geoip_country_t
*, c
, {
898 uint32_t *n
= (action
== GEOIP_CLIENT_NETWORKSTATUS
)
899 ? c
->n_v3_ns_requests
: c
->n_v2_ns_requests
;
903 for (i
=0; i
< REQUEST_HIST_LEN
; ++i
)
907 ent
= tor_malloc_zero(sizeof(c_hist_t
));
908 strlcpy(ent
->country
, c
->countrycode
, sizeof(ent
->country
));
909 ent
->total
= round_to_next_multiple_of(tot
, granularity
);
910 smartlist_add(entries
, ent
);
912 smartlist_sort(entries
, _c_hist_compare
);
914 strings
= smartlist_create();
915 SMARTLIST_FOREACH(entries
, c_hist_t
*, ent
, {
917 tor_snprintf(buf
, sizeof(buf
), "%s=%u", ent
->country
, ent
->total
);
918 smartlist_add(strings
, tor_strdup(buf
));
920 result
= smartlist_join_strings(strings
, ",", 0, NULL
);
921 SMARTLIST_FOREACH(strings
, char *, cp
, tor_free(cp
));
922 SMARTLIST_FOREACH(entries
, c_hist_t
*, ent
, tor_free(ent
));
923 smartlist_free(strings
);
924 smartlist_free(entries
);
928 /** Store all our geoip statistics into $DATADIR/dirreq-stats. */
930 dump_geoip_stats(void)
932 #ifdef ENABLE_DIRREQ_STATS
933 time_t now
= time(NULL
);
934 time_t request_start
;
935 char *filename
= get_datadir_fname("dirreq-stats");
936 char *data_v2
= NULL
, *data_v3
= NULL
;
937 char since
[ISO_TIME_LEN
+1], written
[ISO_TIME_LEN
+1];
938 open_file_t
*open_file
= NULL
;
939 double v2_share
= 0.0, v3_share
= 0.0;
943 if (!get_options()->DirReqStatistics
)
946 data_v2
= geoip_get_client_history(now
, GEOIP_CLIENT_NETWORKSTATUS_V2
);
947 data_v3
= geoip_get_client_history(now
, GEOIP_CLIENT_NETWORKSTATUS
);
948 format_iso_time(since
, geoip_get_history_start());
949 format_iso_time(written
, now
);
950 out
= start_writing_to_stdio_file(filename
, OPEN_FLAGS_APPEND
,
954 if (fprintf(out
, "written %s\nstarted-at %s\nns-ips %s\nns-v2-ips %s\n",
956 data_v3
? data_v3
: "", data_v2
? data_v2
: "") < 0)
961 request_start
= current_request_period_starts
-
962 (n_old_request_periods
* REQUEST_HIST_PERIOD
);
963 format_iso_time(since
, request_start
);
964 data_v2
= geoip_get_request_history(now
, GEOIP_CLIENT_NETWORKSTATUS_V2
);
965 data_v3
= geoip_get_request_history(now
, GEOIP_CLIENT_NETWORKSTATUS
);
966 if (fprintf(out
, "requests-start %s\nn-ns-reqs %s\nn-v2-ns-reqs %s\n",
968 data_v3
? data_v3
: "", data_v2
? data_v2
: "") < 0)
970 #define RESPONSE_GRANULARITY 8
971 for (i
= 0; i
< GEOIP_NS_RESPONSE_NUM
; i
++) {
972 ns_v2_responses
[i
] = round_uint32_to_next_multiple_of(
973 ns_v2_responses
[i
], RESPONSE_GRANULARITY
);
974 ns_v3_responses
[i
] = round_uint32_to_next_multiple_of(
975 ns_v3_responses
[i
], RESPONSE_GRANULARITY
);
977 #undef RESPONSE_GRANULARITY
978 if (fprintf(out
, "n-ns-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
979 "not-found=%u,not-modified=%u,busy=%u\n",
980 ns_v3_responses
[GEOIP_SUCCESS
],
981 ns_v3_responses
[GEOIP_REJECT_NOT_ENOUGH_SIGS
],
982 ns_v3_responses
[GEOIP_REJECT_UNAVAILABLE
],
983 ns_v3_responses
[GEOIP_REJECT_NOT_FOUND
],
984 ns_v3_responses
[GEOIP_REJECT_NOT_MODIFIED
],
985 ns_v3_responses
[GEOIP_REJECT_BUSY
]) < 0)
987 if (fprintf(out
, "n-v2-ns-resp ok=%u,unavailable=%u,"
988 "not-found=%u,not-modified=%u,busy=%u\n",
989 ns_v2_responses
[GEOIP_SUCCESS
],
990 ns_v2_responses
[GEOIP_REJECT_UNAVAILABLE
],
991 ns_v2_responses
[GEOIP_REJECT_NOT_FOUND
],
992 ns_v2_responses
[GEOIP_REJECT_NOT_MODIFIED
],
993 ns_v2_responses
[GEOIP_REJECT_BUSY
]) < 0)
995 memset(ns_v2_responses
, 0, sizeof(ns_v2_responses
));
996 memset(ns_v3_responses
, 0, sizeof(ns_v3_responses
));
997 if (!geoip_get_mean_shares(now
, &v2_share
, &v3_share
)) {
998 if (fprintf(out
, "v2-ns-share %0.2lf%%\n", v2_share
*100) < 0)
1000 if (fprintf(out
, "v3-ns-share %0.2lf%%\n", v3_share
*100) < 0)
1004 data_v2
= geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2
,
1006 data_v3
= geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS
,
1008 if (fprintf(out
, "ns-direct-dl %s\nns-v2-direct-dl %s\n",
1009 data_v3
? data_v3
: "", data_v2
? data_v2
: "") < 0)
1013 data_v2
= geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2
,
1015 data_v3
= geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS
,
1017 if (fprintf(out
, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n",
1018 data_v3
? data_v3
: "", data_v2
? data_v2
: "") < 0)
1021 finish_writing_to_file(open_file
);
1025 abort_writing_to_file(open_file
);
1032 /** Store all our geoip statistics as entry guards into
1033 * $DATADIR/entry-stats. */
1035 dump_entry_stats(void)
1037 #ifdef ENABLE_ENTRY_STATS
1038 time_t now
= time(NULL
);
1039 char *filename
= get_datadir_fname("entry-stats");
1041 char since
[ISO_TIME_LEN
+1], written
[ISO_TIME_LEN
+1];
1042 open_file_t
*open_file
= NULL
;
1045 data
= geoip_get_client_history(now
, GEOIP_CLIENT_CONNECT
);
1046 format_iso_time(since
, geoip_get_history_start());
1047 format_iso_time(written
, now
);
1048 out
= start_writing_to_stdio_file(filename
, OPEN_FLAGS_APPEND
,
1052 if (fprintf(out
, "written %s\nstarted-at %s\nips %s\n",
1053 written
, since
, data
? data
: "") < 0)
1056 finish_writing_to_file(open_file
);
1060 abort_writing_to_file(open_file
);
1066 /** Helper used to implement GETINFO ip-to-country/... controller command. */
1068 getinfo_helper_geoip(control_connection_t
*control_conn
,
1069 const char *question
, char **answer
)
1072 if (geoip_is_loaded() && !strcmpstart(question
, "ip-to-country/")) {
1076 question
+= strlen("ip-to-country/");
1077 if (tor_inet_aton(question
, &in
) != 0) {
1078 ip
= ntohl(in
.s_addr
);
1079 c
= geoip_get_country_by_ip(ip
);
1080 *answer
= tor_strdup(geoip_get_country_name(c
));
1086 /** Release all storage held by the GeoIP database. */
1088 clear_geoip_db(void)
1090 if (geoip_countries
) {
1091 SMARTLIST_FOREACH(geoip_countries
, geoip_country_t
*, c
, tor_free(c
));
1092 smartlist_free(geoip_countries
);
1094 if (country_idxplus1_by_lc_code
)
1095 strmap_free(country_idxplus1_by_lc_code
, NULL
);
1096 if (geoip_entries
) {
1097 SMARTLIST_FOREACH(geoip_entries
, geoip_entry_t
*, ent
, tor_free(ent
));
1098 smartlist_free(geoip_entries
);
1100 geoip_countries
= NULL
;
1101 country_idxplus1_by_lc_code
= NULL
;
1102 geoip_entries
= NULL
;
1105 /** Release all storage held in this file. */
1107 geoip_free_all(void)
1109 clientmap_entry_t
**ent
, **next
, *this;
1110 for (ent
= HT_START(clientmap
, &client_history
); ent
!= NULL
; ent
= next
) {
1112 next
= HT_NEXT_RMV(clientmap
, &client_history
, ent
);
1115 HT_CLEAR(clientmap
, &client_history
);