1 /* Copyright (c) 2007-2015, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
6 * \brief Functions related to maintaining an IP-to-country database;
7 * to summarizing client connections by country to entry guards, bridges,
8 * and directory servers; and for statistics on answering network status
19 #include "routerlist.h"
21 static void clear_geoip_db(void);
22 static void init_geoip_countries(void);
24 /** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */
25 typedef struct geoip_ipv4_entry_t
{
26 uint32_t ip_low
; /**< The lowest IP in the range, in host order */
27 uint32_t ip_high
; /**< The highest IP in the range, in host order */
28 intptr_t country
; /**< An index into geoip_countries */
31 /** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
32 typedef struct geoip_ipv6_entry_t
{
33 struct in6_addr ip_low
; /**< The lowest IP in the range, in host order */
34 struct in6_addr ip_high
; /**< The highest IP in the range, in host order */
35 intptr_t country
; /**< An index into geoip_countries */
38 /** A per-country record for GeoIP request history. */
39 typedef struct geoip_country_t
{
41 uint32_t n_v3_ns_requests
;
44 /** A list of geoip_country_t */
45 static smartlist_t
*geoip_countries
= NULL
;
46 /** A map from lowercased country codes to their position in geoip_countries.
47 * The index is encoded in the pointer, and 1 is added so that NULL can mean
49 static strmap_t
*country_idxplus1_by_lc_code
= NULL
;
50 /** Lists of all known geoip_ipv4_entry_t and geoip_ipv6_entry_t, sorted
51 * by their respective ip_low. */
52 static smartlist_t
*geoip_ipv4_entries
= NULL
, *geoip_ipv6_entries
= NULL
;
54 /** SHA1 digest of the GeoIP files to include in extra-info descriptors. */
55 static char geoip_digest
[DIGEST_LEN
];
56 static char geoip6_digest
[DIGEST_LEN
];
58 /** Return the index of the <b>country</b>'s entry in the GeoIP
59 * country list if it is a valid 2-letter country code, otherwise
62 geoip_get_country
,(const char *country
))
67 idxplus1_
= strmap_get_lc(country_idxplus1_by_lc_code
, country
);
71 idx
= ((uintptr_t)idxplus1_
)-1;
72 return (country_t
)idx
;
75 /** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b>
76 * and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */
78 geoip_add_entry(const tor_addr_t
*low
, const tor_addr_t
*high
,
84 if (tor_addr_family(low
) != tor_addr_family(high
))
86 if (tor_addr_compare(high
, low
, CMP_EXACT
) < 0)
89 idxplus1_
= strmap_get_lc(country_idxplus1_by_lc_code
, country
);
92 geoip_country_t
*c
= tor_malloc_zero(sizeof(geoip_country_t
));
93 strlcpy(c
->countrycode
, country
, sizeof(c
->countrycode
));
94 tor_strlower(c
->countrycode
);
95 smartlist_add(geoip_countries
, c
);
96 idx
= smartlist_len(geoip_countries
) - 1;
97 strmap_set_lc(country_idxplus1_by_lc_code
, country
, (void*)(idx
+1));
99 idx
= ((uintptr_t)idxplus1_
)-1;
102 geoip_country_t
*c
= smartlist_get(geoip_countries
, idx
);
103 tor_assert(!strcasecmp(c
->countrycode
, country
));
106 if (tor_addr_family(low
) == AF_INET
) {
107 geoip_ipv4_entry_t
*ent
= tor_malloc_zero(sizeof(geoip_ipv4_entry_t
));
108 ent
->ip_low
= tor_addr_to_ipv4h(low
);
109 ent
->ip_high
= tor_addr_to_ipv4h(high
);
111 smartlist_add(geoip_ipv4_entries
, ent
);
112 } else if (tor_addr_family(low
) == AF_INET6
) {
113 geoip_ipv6_entry_t
*ent
= tor_malloc_zero(sizeof(geoip_ipv6_entry_t
));
114 ent
->ip_low
= *tor_addr_to_in6(low
);
115 ent
->ip_high
= *tor_addr_to_in6(high
);
117 smartlist_add(geoip_ipv6_entries
, ent
);
121 /** Add an entry to the GeoIP table indicated by <b>family</b>,
122 * parsing it from <b>line</b>. The format is as for geoip_load_file(). */
124 geoip_parse_entry(const char *line
, sa_family_t family
)
126 tor_addr_t low_addr
, high_addr
;
128 char *country
= NULL
;
130 if (!geoip_countries
)
131 init_geoip_countries();
132 if (family
== AF_INET
) {
133 if (!geoip_ipv4_entries
)
134 geoip_ipv4_entries
= smartlist_new();
135 } else if (family
== AF_INET6
) {
136 if (!geoip_ipv6_entries
)
137 geoip_ipv6_entries
= smartlist_new();
139 log_warn(LD_GENERAL
, "Unsupported family: %d", family
);
143 while (TOR_ISSPACE(*line
))
148 if (family
== AF_INET
) {
149 unsigned int low
, high
;
150 if (tor_sscanf(line
,"%u,%u,%2s", &low
, &high
, c
) == 3 ||
151 tor_sscanf(line
,"\"%u\",\"%u\",\"%2s\",", &low
, &high
, c
) == 3) {
152 tor_addr_from_ipv4h(&low_addr
, low
);
153 tor_addr_from_ipv4h(&high_addr
, high
);
157 } else { /* AF_INET6 */
159 char *low_str
, *high_str
;
160 struct in6_addr low
, high
;
162 strlcpy(buf
, line
, sizeof(buf
));
163 low_str
= tor_strtok_r(buf
, ",", &strtok_state
);
166 high_str
= tor_strtok_r(NULL
, ",", &strtok_state
);
169 country
= tor_strtok_r(NULL
, "\n", &strtok_state
);
172 if (strlen(country
) != 2)
174 if (tor_inet_pton(AF_INET6
, low_str
, &low
) <= 0)
176 tor_addr_from_in6(&low_addr
, &low
);
177 if (tor_inet_pton(AF_INET6
, high_str
, &high
) <= 0)
179 tor_addr_from_in6(&high_addr
, &high
);
181 geoip_add_entry(&low_addr
, &high_addr
, country
);
185 log_warn(LD_GENERAL
, "Unable to parse line from GEOIP %s file: %s",
186 family
== AF_INET
? "IPv4" : "IPv6", escaped(line
));
190 /** Sorting helper: return -1, 1, or 0 based on comparison of two
191 * geoip_ipv4_entry_t */
193 geoip_ipv4_compare_entries_(const void **_a
, const void **_b
)
195 const geoip_ipv4_entry_t
*a
= *_a
, *b
= *_b
;
196 if (a
->ip_low
< b
->ip_low
)
198 else if (a
->ip_low
> b
->ip_low
)
204 /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
205 * to a uint32_t in host order) to a geoip_ipv4_entry_t */
207 geoip_ipv4_compare_key_to_entry_(const void *_key
, const void **_member
)
209 /* No alignment issue here, since _key really is a pointer to uint32_t */
210 const uint32_t addr
= *(uint32_t *)_key
;
211 const geoip_ipv4_entry_t
*entry
= *_member
;
212 if (addr
< entry
->ip_low
)
214 else if (addr
> entry
->ip_high
)
220 /** Sorting helper: return -1, 1, or 0 based on comparison of two
221 * geoip_ipv6_entry_t */
223 geoip_ipv6_compare_entries_(const void **_a
, const void **_b
)
225 const geoip_ipv6_entry_t
*a
= *_a
, *b
= *_b
;
226 return fast_memcmp(a
->ip_low
.s6_addr
, b
->ip_low
.s6_addr
,
227 sizeof(struct in6_addr
));
230 /** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6
231 * (a pointer to a in6_addr) to a geoip_ipv6_entry_t */
233 geoip_ipv6_compare_key_to_entry_(const void *_key
, const void **_member
)
235 const struct in6_addr
*addr
= (struct in6_addr
*)_key
;
236 const geoip_ipv6_entry_t
*entry
= *_member
;
238 if (fast_memcmp(addr
->s6_addr
, entry
->ip_low
.s6_addr
,
239 sizeof(struct in6_addr
)) < 0)
241 else if (fast_memcmp(addr
->s6_addr
, entry
->ip_high
.s6_addr
,
242 sizeof(struct in6_addr
)) > 0)
248 /** Return 1 if we should collect geoip stats on bridge users, and
249 * include them in our extrainfo descriptor. Else return 0. */
251 should_record_bridge_info(const or_options_t
*options
)
253 return options
->BridgeRelay
&& options
->BridgeRecordUsageByCountry
;
256 /** Set up a new list of geoip countries with no countries (yet) set in it,
257 * except for the unknown country.
260 init_geoip_countries(void)
262 geoip_country_t
*geoip_unresolved
;
263 geoip_countries
= smartlist_new();
264 /* Add a geoip_country_t for requests that could not be resolved to a
265 * country as first element (index 0) to geoip_countries. */
266 geoip_unresolved
= tor_malloc_zero(sizeof(geoip_country_t
));
267 strlcpy(geoip_unresolved
->countrycode
, "??",
268 sizeof(geoip_unresolved
->countrycode
));
269 smartlist_add(geoip_countries
, geoip_unresolved
);
270 country_idxplus1_by_lc_code
= strmap_new();
271 strmap_set_lc(country_idxplus1_by_lc_code
, "??", (void*)(1));
274 /** Clear appropriate GeoIP database, based on <b>family</b>, and
275 * reload it from the file <b>filename</b>. Return 0 on success, -1 on
278 * Recognized line formats for IPv4 are:
279 * INTIPLOW,INTIPHIGH,CC
281 * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
282 * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
283 * integers, and CC is a country code.
285 * Recognized line format for IPv6 is:
286 * IPV6LOW,IPV6HIGH,CC
287 * where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
289 * It also recognizes, and skips over, blank lines and lines that start
290 * with '#' (comments).
293 geoip_load_file(sa_family_t family
, const char *filename
)
296 const char *msg
= "";
297 const or_options_t
*options
= get_options();
298 int severity
= options_need_geoip_info(options
, &msg
) ? LOG_WARN
: LOG_INFO
;
299 crypto_digest_t
*geoip_digest_env
= NULL
;
301 tor_assert(family
== AF_INET
|| family
== AF_INET6
);
303 if (!(f
= tor_fopen_cloexec(filename
, "r"))) {
304 log_fn(severity
, LD_GENERAL
, "Failed to open GEOIP file %s. %s",
308 if (!geoip_countries
)
309 init_geoip_countries();
311 if (family
== AF_INET
) {
312 if (geoip_ipv4_entries
) {
313 SMARTLIST_FOREACH(geoip_ipv4_entries
, geoip_ipv4_entry_t
*, e
,
315 smartlist_free(geoip_ipv4_entries
);
317 geoip_ipv4_entries
= smartlist_new();
318 } else { /* AF_INET6 */
319 if (geoip_ipv6_entries
) {
320 SMARTLIST_FOREACH(geoip_ipv6_entries
, geoip_ipv6_entry_t
*, e
,
322 smartlist_free(geoip_ipv6_entries
);
324 geoip_ipv6_entries
= smartlist_new();
326 geoip_digest_env
= crypto_digest_new();
328 log_notice(LD_GENERAL
, "Parsing GEOIP %s file %s.",
329 (family
== AF_INET
) ? "IPv4" : "IPv6", filename
);
332 if (fgets(buf
, (int)sizeof(buf
), f
) == NULL
)
334 crypto_digest_add_bytes(geoip_digest_env
, buf
, strlen(buf
));
335 /* FFFF track full country name. */
336 geoip_parse_entry(buf
, family
);
338 /*XXXX abort and return -1 if no entries/illformed?*/
341 /* Sort list and remember file digests so that we can include it in
342 * our extra-info descriptors. */
343 if (family
== AF_INET
) {
344 smartlist_sort(geoip_ipv4_entries
, geoip_ipv4_compare_entries_
);
345 /* Okay, now we need to maybe change our mind about what is in
346 * which country. We do this for IPv4 only since that's what we
347 * store in node->country. */
348 refresh_all_country_info();
349 crypto_digest_get_digest(geoip_digest_env
, geoip_digest
, DIGEST_LEN
);
352 smartlist_sort(geoip_ipv6_entries
, geoip_ipv6_compare_entries_
);
353 crypto_digest_get_digest(geoip_digest_env
, geoip6_digest
, DIGEST_LEN
);
355 crypto_digest_free(geoip_digest_env
);
360 /** Given an IP address in host order, return a number representing the
361 * country to which that address belongs, -1 for "No geoip information
362 * available", or 0 for the 'unknown country'. The return value will always
363 * be less than geoip_get_n_countries(). To decode it, call
364 * geoip_get_country_name().
367 geoip_get_country_by_ipv4(uint32_t ipaddr
)
369 geoip_ipv4_entry_t
*ent
;
370 if (!geoip_ipv4_entries
)
372 ent
= smartlist_bsearch(geoip_ipv4_entries
, &ipaddr
,
373 geoip_ipv4_compare_key_to_entry_
);
374 return ent
? (int)ent
->country
: 0;
377 /** Given an IPv6 address, return a number representing the country to
378 * which that address belongs, -1 for "No geoip information available", or
379 * 0 for the 'unknown country'. The return value will always be less than
380 * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
383 geoip_get_country_by_ipv6(const struct in6_addr
*addr
)
385 geoip_ipv6_entry_t
*ent
;
387 if (!geoip_ipv6_entries
)
389 ent
= smartlist_bsearch(geoip_ipv6_entries
, addr
,
390 geoip_ipv6_compare_key_to_entry_
);
391 return ent
? (int)ent
->country
: 0;
394 /** Given an IP address, return a number representing the country to which
395 * that address belongs, -1 for "No geoip information available", or 0 for
396 * the 'unknown country'. The return value will always be less than
397 * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
400 geoip_get_country_by_addr
,(const tor_addr_t
*addr
))
402 if (tor_addr_family(addr
) == AF_INET
) {
403 return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr
));
404 } else if (tor_addr_family(addr
) == AF_INET6
) {
405 return geoip_get_country_by_ipv6(tor_addr_to_in6(addr
));
411 /** Return the number of countries recognized by the GeoIP country list. */
413 geoip_get_n_countries
,(void))
415 if (!geoip_countries
)
416 init_geoip_countries();
417 return (int) smartlist_len(geoip_countries
);
420 /** Return the two-letter country code associated with the number <b>num</b>,
421 * or "??" for an unknown value. */
423 geoip_get_country_name(country_t num
)
425 if (geoip_countries
&& num
>= 0 && num
< smartlist_len(geoip_countries
)) {
426 geoip_country_t
*c
= smartlist_get(geoip_countries
, num
);
427 return c
->countrycode
;
432 /** Return true iff we have loaded a GeoIP database.*/
434 geoip_is_loaded
,(sa_family_t family
))
436 tor_assert(family
== AF_INET
|| family
== AF_INET6
);
437 if (geoip_countries
== NULL
)
439 if (family
== AF_INET
)
440 return geoip_ipv4_entries
!= NULL
;
442 return geoip_ipv6_entries
!= NULL
;
445 /** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
446 * result does not need to be deallocated, but will be overwritten by the
447 * next call of hex_str(). */
449 geoip_db_digest(sa_family_t family
)
451 tor_assert(family
== AF_INET
|| family
== AF_INET6
);
452 if (family
== AF_INET
)
453 return hex_str(geoip_digest
, DIGEST_LEN
);
455 return hex_str(geoip6_digest
, DIGEST_LEN
);
458 /** Entry in a map from IP address to the last time we've seen an incoming
459 * connection from that IP address. Used by bridges only, to track which
460 * countries have them blocked. */
461 typedef struct clientmap_entry_t
{
462 HT_ENTRY(clientmap_entry_t
) node
;
464 /* Name of pluggable transport used by this client. NULL if no
465 pluggable transport was used. */
466 char *transport_name
;
468 /** Time when we last saw this IP address, in MINUTES since the epoch.
470 * (This will run out of space around 4011 CE. If Tor is still in use around
471 * 4000 CE, please remember to add more bits to last_seen_in_minutes.) */
472 unsigned int last_seen_in_minutes
:30;
473 unsigned int action
:2;
476 /** Largest allowable value for last_seen_in_minutes. (It's a 30-bit field,
477 * so it can hold up to (1u<<30)-1, or 0x3fffffffu.
479 #define MAX_LAST_SEEN_IN_MINUTES 0X3FFFFFFFu
481 /** Map from client IP address to last time seen. */
482 static HT_HEAD(clientmap
, clientmap_entry_t
) client_history
=
485 /** Hashtable helper: compute a hash of a clientmap_entry_t. */
486 static INLINE
unsigned
487 clientmap_entry_hash(const clientmap_entry_t
*a
)
489 unsigned h
= (unsigned) tor_addr_hash(&a
->addr
);
491 if (a
->transport_name
)
492 h
+= (unsigned) siphash24g(a
->transport_name
, strlen(a
->transport_name
));
496 /** Hashtable helper: compare two clientmap_entry_t values for equality. */
498 clientmap_entries_eq(const clientmap_entry_t
*a
, const clientmap_entry_t
*b
)
500 if (strcmp_opt(a
->transport_name
, b
->transport_name
))
503 return !tor_addr_compare(&a
->addr
, &b
->addr
, CMP_EXACT
) &&
504 a
->action
== b
->action
;
507 HT_PROTOTYPE(clientmap
, clientmap_entry_t
, node
, clientmap_entry_hash
,
508 clientmap_entries_eq
);
509 HT_GENERATE2(clientmap
, clientmap_entry_t
, node
, clientmap_entry_hash
,
510 clientmap_entries_eq
, 0.6, tor_reallocarray_
, tor_free_
)
512 /** Free all storage held by <b>ent</b>. */
514 clientmap_entry_free(clientmap_entry_t
*ent
)
519 tor_free(ent
->transport_name
);
523 /** Clear history of connecting clients used by entry and bridge stats. */
525 client_history_clear(void)
527 clientmap_entry_t
**ent
, **next
, *this;
528 for (ent
= HT_START(clientmap
, &client_history
); ent
!= NULL
;
530 if ((*ent
)->action
== GEOIP_CLIENT_CONNECT
) {
532 next
= HT_NEXT_RMV(clientmap
, &client_history
, ent
);
533 clientmap_entry_free(this);
535 next
= HT_NEXT(clientmap
, &client_history
, ent
);
540 /** Note that we've seen a client connect from the IP <b>addr</b>
541 * at time <b>now</b>. Ignored by all but bridges and directories if
542 * configured accordingly. */
544 geoip_note_client_seen(geoip_client_action_t action
,
545 const tor_addr_t
*addr
,
546 const char *transport_name
,
549 const or_options_t
*options
= get_options();
550 clientmap_entry_t lookup
, *ent
;
551 memset(&lookup
, 0, sizeof(clientmap_entry_t
));
553 if (action
== GEOIP_CLIENT_CONNECT
) {
554 /* Only remember statistics as entry guard or as bridge. */
555 if (!options
->EntryStatistics
&&
556 (!(options
->BridgeRelay
&& options
->BridgeRecordUsageByCountry
)))
559 /* Only gather directory-request statistics if configured, and
560 * forcibly disable them on bridge authorities. */
561 if (!options
->DirReqStatistics
|| options
->BridgeAuthoritativeDir
)
565 log_debug(LD_GENERAL
, "Seen client from '%s' with transport '%s'.",
566 safe_str_client(fmt_addr((addr
))),
567 transport_name
? transport_name
: "<no transport>");
569 tor_addr_copy(&lookup
.addr
, addr
);
570 lookup
.action
= (int)action
;
571 lookup
.transport_name
= (char*) transport_name
;
572 ent
= HT_FIND(clientmap
, &client_history
, &lookup
);
575 ent
= tor_malloc_zero(sizeof(clientmap_entry_t
));
576 tor_addr_copy(&ent
->addr
, addr
);
578 ent
->transport_name
= tor_strdup(transport_name
);
579 ent
->action
= (int)action
;
580 HT_INSERT(clientmap
, &client_history
, ent
);
582 if (now
/ 60 <= (int)MAX_LAST_SEEN_IN_MINUTES
&& now
>= 0)
583 ent
->last_seen_in_minutes
= (unsigned)(now
/60);
585 ent
->last_seen_in_minutes
= 0;
587 if (action
== GEOIP_CLIENT_NETWORKSTATUS
) {
588 int country_idx
= geoip_get_country_by_addr(addr
);
590 country_idx
= 0; /** unresolved requests are stored at index 0. */
591 if (country_idx
>= 0 && country_idx
< smartlist_len(geoip_countries
)) {
592 geoip_country_t
*country
= smartlist_get(geoip_countries
, country_idx
);
593 ++country
->n_v3_ns_requests
;
598 /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
599 * older than a certain time. */
601 remove_old_client_helper_(struct clientmap_entry_t
*ent
, void *_cutoff
)
603 time_t cutoff
= *(time_t*)_cutoff
/ 60;
604 if (ent
->last_seen_in_minutes
< cutoff
) {
605 clientmap_entry_free(ent
);
612 /** Forget about all clients that haven't connected since <b>cutoff</b>. */
614 geoip_remove_old_clients(time_t cutoff
)
616 clientmap_HT_FOREACH_FN(&client_history
,
617 remove_old_client_helper_
,
621 /** How many responses are we giving to clients requesting v3 network
623 static uint32_t ns_v3_responses
[GEOIP_NS_RESPONSE_NUM
];
625 /** Note that we've rejected a client's request for a v3 network status
626 * for reason <b>reason</b> at time <b>now</b>. */
628 geoip_note_ns_response(geoip_ns_response_t response
)
630 static int arrays_initialized
= 0;
631 if (!get_options()->DirReqStatistics
)
633 if (!arrays_initialized
) {
634 memset(ns_v3_responses
, 0, sizeof(ns_v3_responses
));
635 arrays_initialized
= 1;
637 tor_assert(response
< GEOIP_NS_RESPONSE_NUM
);
638 ns_v3_responses
[response
]++;
641 /** Do not mention any country from which fewer than this number of IPs have
642 * connected. This conceivably avoids reporting information that could
643 * deanonymize users, though analysis is lacking. */
644 #define MIN_IPS_TO_NOTE_COUNTRY 1
645 /** Do not report any geoip data at all if we have fewer than this number of
646 * IPs to report about. */
647 #define MIN_IPS_TO_NOTE_ANYTHING 1
648 /** When reporting geoip data about countries, round up to the nearest
649 * multiple of this value. */
650 #define IP_GRANULARITY 8
652 /** Helper type: used to sort per-country totals by value. */
653 typedef struct c_hist_t
{
654 char country
[3]; /**< Two-letter country code. */
655 unsigned total
; /**< Total IP addresses seen in this country. */
658 /** Sorting helper: return -1, 1, or 0 based on comparison of two
659 * geoip_ipv4_entry_t. Sort in descending order of total, and then by country
662 c_hist_compare_(const void **_a
, const void **_b
)
664 const c_hist_t
*a
= *_a
, *b
= *_b
;
665 if (a
->total
> b
->total
)
667 else if (a
->total
< b
->total
)
670 return strcmp(a
->country
, b
->country
);
673 /** When there are incomplete directory requests at the end of a 24-hour
674 * period, consider those requests running for longer than this timeout as
675 * failed, the others as still running. */
676 #define DIRREQ_TIMEOUT (10*60)
678 /** Entry in a map from either chan->global_identifier for direct requests
679 * or a unique circuit identifier for tunneled requests to request time,
680 * response size, and completion time of a network status request. Used to
681 * measure download times of requests to derive average client
683 typedef struct dirreq_map_entry_t
{
684 HT_ENTRY(dirreq_map_entry_t
) node
;
685 /** Unique identifier for this network status request; this is either the
686 * chan->global_identifier of the dir channel (direct request) or a new
687 * locally unique identifier of a circuit (tunneled request). This ID is
688 * only unique among other direct or tunneled requests, respectively. */
690 unsigned int state
:3; /**< State of this directory request. */
691 unsigned int type
:1; /**< Is this a direct or a tunneled request? */
692 unsigned int completed
:1; /**< Is this request complete? */
693 /** When did we receive the request and started sending the response? */
694 struct timeval request_time
;
695 size_t response_size
; /**< What is the size of the response in bytes? */
696 struct timeval completion_time
; /**< When did the request succeed? */
697 } dirreq_map_entry_t
;
699 /** Map of all directory requests asking for v2 or v3 network statuses in
700 * the current geoip-stats interval. Values are
701 * of type *<b>dirreq_map_entry_t</b>. */
702 static HT_HEAD(dirreqmap
, dirreq_map_entry_t
) dirreq_map
=
706 dirreq_map_ent_eq(const dirreq_map_entry_t
*a
,
707 const dirreq_map_entry_t
*b
)
709 return a
->dirreq_id
== b
->dirreq_id
&& a
->type
== b
->type
;
712 /* DOCDOC dirreq_map_ent_hash */
714 dirreq_map_ent_hash(const dirreq_map_entry_t
*entry
)
716 unsigned u
= (unsigned) entry
->dirreq_id
;
717 u
+= entry
->type
<< 20;
721 HT_PROTOTYPE(dirreqmap
, dirreq_map_entry_t
, node
, dirreq_map_ent_hash
,
723 HT_GENERATE2(dirreqmap
, dirreq_map_entry_t
, node
, dirreq_map_ent_hash
,
724 dirreq_map_ent_eq
, 0.6, tor_reallocarray_
, tor_free_
)
726 /** Helper: Put <b>entry</b> into map of directory requests using
727 * <b>type</b> and <b>dirreq_id</b> as key parts. If there is
728 * already an entry for that key, print out a BUG warning and return. */
730 dirreq_map_put_(dirreq_map_entry_t
*entry
, dirreq_type_t type
,
733 dirreq_map_entry_t
*old_ent
;
734 tor_assert(entry
->type
== type
);
735 tor_assert(entry
->dirreq_id
== dirreq_id
);
737 /* XXXX we could switch this to HT_INSERT some time, since it seems that
738 * this bug doesn't happen. But since this function doesn't seem to be
739 * critical-path, it's sane to leave it alone. */
740 old_ent
= HT_REPLACE(dirreqmap
, &dirreq_map
, entry
);
741 if (old_ent
&& old_ent
!= entry
) {
742 log_warn(LD_BUG
, "Error when putting directory request into local "
743 "map. There was already an entry for the same identifier.");
748 /** Helper: Look up and return an entry in the map of directory requests
749 * using <b>type</b> and <b>dirreq_id</b> as key parts. If there
750 * is no such entry, return NULL. */
751 static dirreq_map_entry_t
*
752 dirreq_map_get_(dirreq_type_t type
, uint64_t dirreq_id
)
754 dirreq_map_entry_t lookup
;
756 lookup
.dirreq_id
= dirreq_id
;
757 return HT_FIND(dirreqmap
, &dirreq_map
, &lookup
);
760 /** Note that an either direct or tunneled (see <b>type</b>) directory
761 * request for a v3 network status with unique ID <b>dirreq_id</b> of size
762 * <b>response_size</b> has started. */
764 geoip_start_dirreq(uint64_t dirreq_id
, size_t response_size
,
767 dirreq_map_entry_t
*ent
;
768 if (!get_options()->DirReqStatistics
)
770 ent
= tor_malloc_zero(sizeof(dirreq_map_entry_t
));
771 ent
->dirreq_id
= dirreq_id
;
772 tor_gettimeofday(&ent
->request_time
);
773 ent
->response_size
= response_size
;
775 dirreq_map_put_(ent
, type
, dirreq_id
);
778 /** Change the state of the either direct or tunneled (see <b>type</b>)
779 * directory request with <b>dirreq_id</b> to <b>new_state</b> and
780 * possibly mark it as completed. If no entry can be found for the given
781 * key parts (e.g., if this is a directory request that we are not
782 * measuring, or one that was started in the previous measurement period),
783 * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
785 geoip_change_dirreq_state(uint64_t dirreq_id
, dirreq_type_t type
,
786 dirreq_state_t new_state
)
788 dirreq_map_entry_t
*ent
;
789 if (!get_options()->DirReqStatistics
)
791 ent
= dirreq_map_get_(type
, dirreq_id
);
794 if (new_state
== DIRREQ_IS_FOR_NETWORK_STATUS
)
796 if (new_state
- 1 != ent
->state
)
798 ent
->state
= new_state
;
799 if ((type
== DIRREQ_DIRECT
&&
800 new_state
== DIRREQ_FLUSHING_DIR_CONN_FINISHED
) ||
801 (type
== DIRREQ_TUNNELED
&&
802 new_state
== DIRREQ_CHANNEL_BUFFER_FLUSHED
)) {
803 tor_gettimeofday(&ent
->completion_time
);
808 /** Return the bridge-ip-transports string that should be inserted in
809 * our extra-info descriptor. Return NULL if the bridge-ip-transports
810 * line should be empty. */
812 geoip_get_transport_history(void)
814 unsigned granularity
= IP_GRANULARITY
;
815 /** String hash table (name of transport) -> (number of users). */
816 strmap_t
*transport_counts
= strmap_new();
818 /** Smartlist that contains copies of the names of the transports
819 that have been used. */
820 smartlist_t
*transports_used
= smartlist_new();
822 /* Special string to signify that no transport was used for this
823 connection. Pluggable transport names can't have symbols in their
824 names, so this string will never collide with a real transport. */
825 static const char* no_transport_str
= "<OR>";
827 clientmap_entry_t
**ent
;
828 const char *transport_name
= NULL
;
829 smartlist_t
*string_chunks
= smartlist_new();
830 char *the_string
= NULL
;
832 /* If we haven't seen any clients yet, return NULL. */
833 if (HT_EMPTY(&client_history
))
836 /** We do the following steps to form the transport history string:
837 * a) Foreach client that uses a pluggable transport, we increase the
838 * times that transport was used by one. If the client did not use
839 * a transport, we increase the number of times someone connected
840 * without obfuscation.
841 * b) Foreach transport we observed, we write its transport history
842 * string and push it to string_chunks. So, for example, if we've
843 * seen 665 obfs2 clients, we write "obfs2=665".
844 * c) We concatenate string_chunks to form the final string.
847 log_debug(LD_GENERAL
,"Starting iteration for transport history. %d clients.",
848 HT_SIZE(&client_history
));
850 /* Loop through all clients. */
851 HT_FOREACH(ent
, clientmap
, &client_history
) {
854 transport_name
= (*ent
)->transport_name
;
856 transport_name
= no_transport_str
;
858 /* Increase the count for this transport name. */
859 ptr
= strmap_get(transport_counts
, transport_name
);
860 val
= (uintptr_t)ptr
;
863 strmap_set(transport_counts
, transport_name
, ptr
);
865 /* If it's the first time we see this transport, note it. */
867 smartlist_add(transports_used
, tor_strdup(transport_name
));
869 log_debug(LD_GENERAL
, "Client from '%s' with transport '%s'. "
870 "I've now seen %d clients.",
871 safe_str_client(fmt_addr(&(*ent
)->addr
)),
872 transport_name
? transport_name
: "<no transport>",
876 /* Sort the transport names (helps with unit testing). */
877 smartlist_sort_strings(transports_used
);
879 /* Loop through all seen transports. */
880 SMARTLIST_FOREACH_BEGIN(transports_used
, const char *, transport_name
) {
881 void *transport_count_ptr
= strmap_get(transport_counts
, transport_name
);
882 uintptr_t transport_count
= (uintptr_t) transport_count_ptr
;
884 log_debug(LD_GENERAL
, "We got "U64_FORMAT
" clients with transport '%s'.",
885 U64_PRINTF_ARG((uint64_t)transport_count
), transport_name
);
887 smartlist_add_asprintf(string_chunks
, "%s="U64_FORMAT
,
889 U64_PRINTF_ARG(round_uint64_to_next_multiple_of(
890 (uint64_t)transport_count
,
892 } SMARTLIST_FOREACH_END(transport_name
);
894 the_string
= smartlist_join_strings(string_chunks
, ",", 0, NULL
);
896 log_debug(LD_GENERAL
, "Final bridge-ip-transports string: '%s'", the_string
);
899 strmap_free(transport_counts
, NULL
);
900 SMARTLIST_FOREACH(transports_used
, char *, s
, tor_free(s
));
901 smartlist_free(transports_used
);
902 SMARTLIST_FOREACH(string_chunks
, char *, s
, tor_free(s
));
903 smartlist_free(string_chunks
);
908 /** Return a newly allocated comma-separated string containing statistics
909 * on network status downloads. The string contains the number of completed
910 * requests, timeouts, and still running requests as well as the download
911 * times by deciles and quartiles. Return NULL if we have not observed
912 * requests for long enough. */
914 geoip_get_dirreq_history(dirreq_type_t type
)
917 smartlist_t
*dirreq_completed
= NULL
;
918 uint32_t complete
= 0, timeouts
= 0, running
= 0;
919 int bufsize
= 1024, written
;
920 dirreq_map_entry_t
**ptr
, **next
, *ent
;
923 tor_gettimeofday(&now
);
924 dirreq_completed
= smartlist_new();
925 for (ptr
= HT_START(dirreqmap
, &dirreq_map
); ptr
; ptr
= next
) {
927 if (ent
->type
!= type
) {
928 next
= HT_NEXT(dirreqmap
, &dirreq_map
, ptr
);
931 if (ent
->completed
) {
932 smartlist_add(dirreq_completed
, ent
);
934 next
= HT_NEXT_RMV(dirreqmap
, &dirreq_map
, ptr
);
936 if (tv_mdiff(&ent
->request_time
, &now
) / 1000 > DIRREQ_TIMEOUT
)
940 next
= HT_NEXT_RMV(dirreqmap
, &dirreq_map
, ptr
);
945 #define DIR_REQ_GRANULARITY 4
946 complete
= round_uint32_to_next_multiple_of(complete
,
947 DIR_REQ_GRANULARITY
);
948 timeouts
= round_uint32_to_next_multiple_of(timeouts
,
949 DIR_REQ_GRANULARITY
);
950 running
= round_uint32_to_next_multiple_of(running
,
951 DIR_REQ_GRANULARITY
);
952 result
= tor_malloc_zero(bufsize
);
953 written
= tor_snprintf(result
, bufsize
, "complete=%u,timeout=%u,"
954 "running=%u", complete
, timeouts
, running
);
960 #define MIN_DIR_REQ_RESPONSES 16
961 if (complete
>= MIN_DIR_REQ_RESPONSES
) {
963 /* We may have rounded 'completed' up. Here we want to use the
965 complete
= smartlist_len(dirreq_completed
);
966 dltimes
= tor_calloc(complete
, sizeof(uint32_t));
967 SMARTLIST_FOREACH_BEGIN(dirreq_completed
, dirreq_map_entry_t
*, ent
) {
968 uint32_t bytes_per_second
;
969 uint32_t time_diff
= (uint32_t) tv_mdiff(&ent
->request_time
,
970 &ent
->completion_time
);
972 time_diff
= 1; /* Avoid DIV/0; "instant" answers are impossible
973 * by law of nature or something, but a milisecond
974 * is a bit greater than "instantly" */
975 bytes_per_second
= (uint32_t)(1000 * ent
->response_size
/ time_diff
);
976 dltimes
[ent_sl_idx
] = bytes_per_second
;
977 } SMARTLIST_FOREACH_END(ent
);
978 median_uint32(dltimes
, complete
); /* sorts as a side effect. */
979 written
= tor_snprintf(result
+ written
, bufsize
- written
,
980 ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
981 "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
983 dltimes
[1*complete
/10-1],
984 dltimes
[2*complete
/10-1],
985 dltimes
[1*complete
/4-1],
986 dltimes
[3*complete
/10-1],
987 dltimes
[4*complete
/10-1],
988 dltimes
[5*complete
/10-1],
989 dltimes
[6*complete
/10-1],
990 dltimes
[7*complete
/10-1],
991 dltimes
[3*complete
/4-1],
992 dltimes
[8*complete
/10-1],
993 dltimes
[9*complete
/10-1],
994 dltimes
[complete
-1]);
1000 SMARTLIST_FOREACH(dirreq_completed
, dirreq_map_entry_t
*, ent
,
1002 smartlist_free(dirreq_completed
);
1006 /** Store a newly allocated comma-separated string in
1007 * *<a>country_str</a> containing entries for all the countries from
1008 * which we've seen enough clients connect as a bridge, directory
1009 * server, or entry guard. The entry format is cc=num where num is the
1010 * number of IPs we've seen connecting from that country, and cc is a
1011 * lowercased country code. *<a>country_str</a> is set to NULL if
1012 * we're not ready to export per country data yet.
1014 * Store a newly allocated comma-separated string in <a>ipver_str</a>
1015 * containing entries for clients connecting over IPv4 and IPv6. The
1016 * format is family=num where num is the nubmer of IPs we've seen
1017 * connecting over that protocol family, and family is 'v4' or 'v6'.
1019 * Return 0 on success and -1 if we're missing geoip data. */
1021 geoip_get_client_history(geoip_client_action_t action
,
1022 char **country_str
, char **ipver_str
)
1024 unsigned granularity
= IP_GRANULARITY
;
1025 smartlist_t
*entries
= NULL
;
1026 int n_countries
= geoip_get_n_countries();
1028 clientmap_entry_t
**ent
;
1029 unsigned *counts
= NULL
;
1031 unsigned ipv4_count
= 0, ipv6_count
= 0;
1033 if (!geoip_is_loaded(AF_INET
) && !geoip_is_loaded(AF_INET6
))
1036 counts
= tor_calloc(n_countries
, sizeof(unsigned));
1037 HT_FOREACH(ent
, clientmap
, &client_history
) {
1039 if ((*ent
)->action
!= (int)action
)
1041 country
= geoip_get_country_by_addr(&(*ent
)->addr
);
1043 country
= 0; /** unresolved requests are stored at index 0. */
1044 tor_assert(0 <= country
&& country
< n_countries
);
1047 switch (tor_addr_family(&(*ent
)->addr
)) {
1057 smartlist_t
*chunks
= smartlist_new();
1058 smartlist_add_asprintf(chunks
, "v4=%u",
1059 round_to_next_multiple_of(ipv4_count
, granularity
));
1060 smartlist_add_asprintf(chunks
, "v6=%u",
1061 round_to_next_multiple_of(ipv6_count
, granularity
));
1062 *ipver_str
= smartlist_join_strings(chunks
, ",", 0, NULL
);
1063 SMARTLIST_FOREACH(chunks
, char *, c
, tor_free(c
));
1064 smartlist_free(chunks
);
1067 /* Don't record per country data if we haven't seen enough IPs. */
1068 if (total
< MIN_IPS_TO_NOTE_ANYTHING
) {
1071 *country_str
= NULL
;
1075 /* Make a list of c_hist_t */
1076 entries
= smartlist_new();
1077 for (i
= 0; i
< n_countries
; ++i
) {
1078 unsigned c
= counts
[i
];
1079 const char *countrycode
;
1081 /* Only report a country if it has a minimum number of IPs. */
1082 if (c
>= MIN_IPS_TO_NOTE_COUNTRY
) {
1083 c
= round_to_next_multiple_of(c
, granularity
);
1084 countrycode
= geoip_get_country_name(i
);
1085 ent
= tor_malloc(sizeof(c_hist_t
));
1086 strlcpy(ent
->country
, countrycode
, sizeof(ent
->country
));
1088 smartlist_add(entries
, ent
);
1091 /* Sort entries. Note that we must do this _AFTER_ rounding, or else
1092 * the sort order could leak info. */
1093 smartlist_sort(entries
, c_hist_compare_
);
1096 smartlist_t
*chunks
= smartlist_new();
1097 SMARTLIST_FOREACH(entries
, c_hist_t
*, ch
, {
1098 smartlist_add_asprintf(chunks
, "%s=%u", ch
->country
, ch
->total
);
1100 *country_str
= smartlist_join_strings(chunks
, ",", 0, NULL
);
1101 SMARTLIST_FOREACH(chunks
, char *, c
, tor_free(c
));
1102 smartlist_free(chunks
);
1105 SMARTLIST_FOREACH(entries
, c_hist_t
*, c
, tor_free(c
));
1106 smartlist_free(entries
);
1112 /** Return a newly allocated string holding the per-country request history
1113 * for v3 network statuses in a format suitable for an extra-info document,
1114 * or NULL on failure. */
1116 geoip_get_request_history(void)
1118 smartlist_t
*entries
, *strings
;
1120 unsigned granularity
= IP_GRANULARITY
;
1122 if (!geoip_countries
)
1125 entries
= smartlist_new();
1126 SMARTLIST_FOREACH_BEGIN(geoip_countries
, geoip_country_t
*, c
) {
1129 tot
= c
->n_v3_ns_requests
;
1132 ent
= tor_malloc_zero(sizeof(c_hist_t
));
1133 strlcpy(ent
->country
, c
->countrycode
, sizeof(ent
->country
));
1134 ent
->total
= round_to_next_multiple_of(tot
, granularity
);
1135 smartlist_add(entries
, ent
);
1136 } SMARTLIST_FOREACH_END(c
);
1137 smartlist_sort(entries
, c_hist_compare_
);
1139 strings
= smartlist_new();
1140 SMARTLIST_FOREACH(entries
, c_hist_t
*, ent
, {
1141 smartlist_add_asprintf(strings
, "%s=%u", ent
->country
, ent
->total
);
1143 result
= smartlist_join_strings(strings
, ",", 0, NULL
);
1144 SMARTLIST_FOREACH(strings
, char *, cp
, tor_free(cp
));
1145 SMARTLIST_FOREACH(entries
, c_hist_t
*, ent
, tor_free(ent
));
1146 smartlist_free(strings
);
1147 smartlist_free(entries
);
1151 /** Start time of directory request stats or 0 if we're not collecting
1152 * directory request statistics. */
1153 static time_t start_of_dirreq_stats_interval
;
1155 /** Initialize directory request stats. */
1157 geoip_dirreq_stats_init(time_t now
)
1159 start_of_dirreq_stats_interval
= now
;
1162 /** Reset counters for dirreq stats. */
1164 geoip_reset_dirreq_stats(time_t now
)
1166 SMARTLIST_FOREACH(geoip_countries
, geoip_country_t
*, c
, {
1167 c
->n_v3_ns_requests
= 0;
1170 clientmap_entry_t
**ent
, **next
, *this;
1171 for (ent
= HT_START(clientmap
, &client_history
); ent
!= NULL
;
1173 if ((*ent
)->action
== GEOIP_CLIENT_NETWORKSTATUS
) {
1175 next
= HT_NEXT_RMV(clientmap
, &client_history
, ent
);
1176 clientmap_entry_free(this);
1178 next
= HT_NEXT(clientmap
, &client_history
, ent
);
1182 memset(ns_v3_responses
, 0, sizeof(ns_v3_responses
));
1184 dirreq_map_entry_t
**ent
, **next
, *this;
1185 for (ent
= HT_START(dirreqmap
, &dirreq_map
); ent
!= NULL
; ent
= next
) {
1187 next
= HT_NEXT_RMV(dirreqmap
, &dirreq_map
, ent
);
1191 start_of_dirreq_stats_interval
= now
;
1194 /** Stop collecting directory request stats in a way that we can re-start
1195 * doing so in geoip_dirreq_stats_init(). */
1197 geoip_dirreq_stats_term(void)
1199 geoip_reset_dirreq_stats(0);
1202 /** Return a newly allocated string containing the dirreq statistics
1203 * until <b>now</b>, or NULL if we're not collecting dirreq stats. Caller
1204 * must ensure start_of_dirreq_stats_interval is in the past. */
1206 geoip_format_dirreq_stats(time_t now
)
1208 char t
[ISO_TIME_LEN
+1];
1210 char *v3_ips_string
, *v3_reqs_string
, *v3_direct_dl_string
,
1211 *v3_tunneled_dl_string
;
1214 if (!start_of_dirreq_stats_interval
)
1215 return NULL
; /* Not initialized. */
1217 tor_assert(now
>= start_of_dirreq_stats_interval
);
1219 format_iso_time(t
, now
);
1220 geoip_get_client_history(GEOIP_CLIENT_NETWORKSTATUS
, &v3_ips_string
, NULL
);
1221 v3_reqs_string
= geoip_get_request_history();
1223 #define RESPONSE_GRANULARITY 8
1224 for (i
= 0; i
< GEOIP_NS_RESPONSE_NUM
; i
++) {
1225 ns_v3_responses
[i
] = round_uint32_to_next_multiple_of(
1226 ns_v3_responses
[i
], RESPONSE_GRANULARITY
);
1228 #undef RESPONSE_GRANULARITY
1230 v3_direct_dl_string
= geoip_get_dirreq_history(DIRREQ_DIRECT
);
1231 v3_tunneled_dl_string
= geoip_get_dirreq_history(DIRREQ_TUNNELED
);
1233 /* Put everything together into a single string. */
1234 tor_asprintf(&result
, "dirreq-stats-end %s (%d s)\n"
1235 "dirreq-v3-ips %s\n"
1236 "dirreq-v3-reqs %s\n"
1237 "dirreq-v3-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
1238 "not-found=%u,not-modified=%u,busy=%u\n"
1239 "dirreq-v3-direct-dl %s\n"
1240 "dirreq-v3-tunneled-dl %s\n",
1242 (unsigned) (now
- start_of_dirreq_stats_interval
),
1243 v3_ips_string
? v3_ips_string
: "",
1244 v3_reqs_string
? v3_reqs_string
: "",
1245 ns_v3_responses
[GEOIP_SUCCESS
],
1246 ns_v3_responses
[GEOIP_REJECT_NOT_ENOUGH_SIGS
],
1247 ns_v3_responses
[GEOIP_REJECT_UNAVAILABLE
],
1248 ns_v3_responses
[GEOIP_REJECT_NOT_FOUND
],
1249 ns_v3_responses
[GEOIP_REJECT_NOT_MODIFIED
],
1250 ns_v3_responses
[GEOIP_REJECT_BUSY
],
1251 v3_direct_dl_string
? v3_direct_dl_string
: "",
1252 v3_tunneled_dl_string
? v3_tunneled_dl_string
: "");
1254 /* Free partial strings. */
1255 tor_free(v3_ips_string
);
1256 tor_free(v3_reqs_string
);
1257 tor_free(v3_direct_dl_string
);
1258 tor_free(v3_tunneled_dl_string
);
1263 /** If 24 hours have passed since the beginning of the current dirreq
1264 * stats period, write dirreq stats to $DATADIR/stats/dirreq-stats
1265 * (possibly overwriting an existing file) and reset counters. Return
1266 * when we would next want to write dirreq stats or 0 if we never want to
1269 geoip_dirreq_stats_write(time_t now
)
1273 if (!start_of_dirreq_stats_interval
)
1274 return 0; /* Not initialized. */
1275 if (start_of_dirreq_stats_interval
+ WRITE_STATS_INTERVAL
> now
)
1276 goto done
; /* Not ready to write. */
1278 /* Discard all items in the client history that are too old. */
1279 geoip_remove_old_clients(start_of_dirreq_stats_interval
);
1281 /* Generate history string .*/
1282 str
= geoip_format_dirreq_stats(now
);
1284 /* Write dirreq-stats string to disk. */
1285 if (!check_or_create_data_subdir("stats")) {
1286 write_to_data_subdir("stats", "dirreq-stats", str
, "dirreq statistics");
1287 /* Reset measurement interval start. */
1288 geoip_reset_dirreq_stats(now
);
1293 return start_of_dirreq_stats_interval
+ WRITE_STATS_INTERVAL
;
1296 /** Start time of bridge stats or 0 if we're not collecting bridge
1298 static time_t start_of_bridge_stats_interval
;
1300 /** Initialize bridge stats. */
1302 geoip_bridge_stats_init(time_t now
)
1304 start_of_bridge_stats_interval
= now
;
1307 /** Stop collecting bridge stats in a way that we can re-start doing so in
1308 * geoip_bridge_stats_init(). */
1310 geoip_bridge_stats_term(void)
1312 client_history_clear();
1313 start_of_bridge_stats_interval
= 0;
1316 /** Validate a bridge statistics string as it would be written to a
1317 * current extra-info descriptor. Return 1 if the string is valid and
1318 * recent enough, or 0 otherwise. */
1320 validate_bridge_stats(const char *stats_str
, time_t now
)
1322 char stats_end_str
[ISO_TIME_LEN
+1], stats_start_str
[ISO_TIME_LEN
+1],
1325 const char *BRIDGE_STATS_END
= "bridge-stats-end ";
1326 const char *BRIDGE_IPS
= "bridge-ips ";
1327 const char *BRIDGE_IPS_EMPTY_LINE
= "bridge-ips\n";
1328 const char *BRIDGE_TRANSPORTS
= "bridge-ip-transports ";
1329 const char *BRIDGE_TRANSPORTS_EMPTY_LINE
= "bridge-ip-transports\n";
1331 time_t stats_end_time
;
1333 tor_assert(stats_str
);
1335 /* Parse timestamp and number of seconds from
1336 "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */
1337 tmp
= find_str_at_start_of_line(stats_str
, BRIDGE_STATS_END
);
1340 tmp
+= strlen(BRIDGE_STATS_END
);
1342 if (strlen(tmp
) < ISO_TIME_LEN
+ 6)
1344 strlcpy(stats_end_str
, tmp
, sizeof(stats_end_str
));
1345 if (parse_iso_time(stats_end_str
, &stats_end_time
) < 0)
1347 if (stats_end_time
< now
- (25*60*60) ||
1348 stats_end_time
> now
+ (1*60*60))
1350 seconds
= (int)strtol(tmp
+ ISO_TIME_LEN
+ 2, &eos
, 10);
1351 if (!eos
|| seconds
< 23*60*60)
1353 format_iso_time(stats_start_str
, stats_end_time
- seconds
);
1355 /* Parse: "bridge-ips CC=N,CC=N,..." */
1356 tmp
= find_str_at_start_of_line(stats_str
, BRIDGE_IPS
);
1358 /* Look if there is an empty "bridge-ips" line */
1359 tmp
= find_str_at_start_of_line(stats_str
, BRIDGE_IPS_EMPTY_LINE
);
1364 /* Parse: "bridge-ip-transports PT=N,PT=N,..." */
1365 tmp
= find_str_at_start_of_line(stats_str
, BRIDGE_TRANSPORTS
);
1367 /* Look if there is an empty "bridge-ip-transports" line */
1368 tmp
= find_str_at_start_of_line(stats_str
, BRIDGE_TRANSPORTS_EMPTY_LINE
);
1376 /** Most recent bridge statistics formatted to be written to extra-info
1378 static char *bridge_stats_extrainfo
= NULL
;
1380 /** Return a newly allocated string holding our bridge usage stats by country
1381 * in a format suitable for inclusion in an extrainfo document. Return NULL on
1384 geoip_format_bridge_stats(time_t now
)
1387 char *country_data
= NULL
, *ipver_data
= NULL
, *transport_data
= NULL
;
1388 long duration
= now
- start_of_bridge_stats_interval
;
1389 char written
[ISO_TIME_LEN
+1];
1393 if (!start_of_bridge_stats_interval
)
1394 return NULL
; /* Not initialized. */
1396 format_iso_time(written
, now
);
1397 geoip_get_client_history(GEOIP_CLIENT_CONNECT
, &country_data
, &ipver_data
);
1398 transport_data
= geoip_get_transport_history();
1401 "bridge-stats-end %s (%ld s)\n"
1403 "bridge-ip-versions %s\n"
1404 "bridge-ip-transports %s\n",
1406 country_data
? country_data
: "",
1407 ipver_data
? ipver_data
: "",
1408 transport_data
? transport_data
: "");
1409 tor_free(country_data
);
1410 tor_free(ipver_data
);
1411 tor_free(transport_data
);
1416 /** Return a newly allocated string holding our bridge usage stats by country
1417 * in a format suitable for the answer to a controller request. Return NULL on
1420 format_bridge_stats_controller(time_t now
)
1422 char *out
= NULL
, *country_data
= NULL
, *ipver_data
= NULL
;
1423 char started
[ISO_TIME_LEN
+1];
1426 format_iso_time(started
, start_of_bridge_stats_interval
);
1427 geoip_get_client_history(GEOIP_CLIENT_CONNECT
, &country_data
, &ipver_data
);
1430 "TimeStarted=\"%s\" CountrySummary=%s IPVersions=%s",
1432 country_data
? country_data
: "",
1433 ipver_data
? ipver_data
: "");
1434 tor_free(country_data
);
1435 tor_free(ipver_data
);
1439 /** Return a newly allocated string holding our bridge usage stats by
1440 * country in a format suitable for inclusion in our heartbeat
1441 * message. Return NULL on failure. */
1443 format_client_stats_heartbeat(time_t now
)
1445 const int n_hours
= 6;
1448 clientmap_entry_t
**ent
;
1449 unsigned cutoff
= (unsigned)( (now
-n_hours
*3600)/60 );
1451 if (!start_of_bridge_stats_interval
)
1452 return NULL
; /* Not initialized. */
1454 /* count unique IPs */
1455 HT_FOREACH(ent
, clientmap
, &client_history
) {
1456 /* only count directly connecting clients */
1457 if ((*ent
)->action
!= GEOIP_CLIENT_CONNECT
)
1459 if ((*ent
)->last_seen_in_minutes
< cutoff
)
1464 tor_asprintf(&out
, "Heartbeat: "
1465 "In the last %d hours, I have seen %d unique clients.",
1472 /** Write bridge statistics to $DATADIR/stats/bridge-stats and return
1473 * when we should next try to write statistics. */
1475 geoip_bridge_stats_write(time_t now
)
1479 /* Check if 24 hours have passed since starting measurements. */
1480 if (now
< start_of_bridge_stats_interval
+ WRITE_STATS_INTERVAL
)
1481 return start_of_bridge_stats_interval
+ WRITE_STATS_INTERVAL
;
1483 /* Discard all items in the client history that are too old. */
1484 geoip_remove_old_clients(start_of_bridge_stats_interval
);
1486 /* Generate formatted string */
1487 val
= geoip_format_bridge_stats(now
);
1491 /* Update the stored value. */
1492 tor_free(bridge_stats_extrainfo
);
1493 bridge_stats_extrainfo
= val
;
1494 start_of_bridge_stats_interval
= now
;
1496 /* Write it to disk. */
1497 if (!check_or_create_data_subdir("stats")) {
1498 write_to_data_subdir("stats", "bridge-stats",
1499 bridge_stats_extrainfo
, "bridge statistics");
1501 /* Tell the controller, "hey, there are clients!" */
1503 char *controller_str
= format_bridge_stats_controller(now
);
1505 control_event_clients_seen(controller_str
);
1506 tor_free(controller_str
);
1511 return start_of_bridge_stats_interval
+ WRITE_STATS_INTERVAL
;
1514 /** Try to load the most recent bridge statistics from disk, unless we
1515 * have finished a measurement interval lately, and check whether they
1516 * are still recent enough. */
1518 load_bridge_stats(time_t now
)
1520 char *fname
, *contents
;
1521 if (bridge_stats_extrainfo
)
1524 fname
= get_datadir_fname2("stats", "bridge-stats");
1525 contents
= read_file_to_str(fname
, RFTS_IGNORE_MISSING
, NULL
);
1526 if (contents
&& validate_bridge_stats(contents
, now
)) {
1527 bridge_stats_extrainfo
= contents
;
1535 /** Return most recent bridge statistics for inclusion in extra-info
1536 * descriptors, or NULL if we don't have recent bridge statistics. */
1538 geoip_get_bridge_stats_extrainfo(time_t now
)
1540 load_bridge_stats(now
);
1541 return bridge_stats_extrainfo
;
1544 /** Return a new string containing the recent bridge statistics to be returned
1545 * to controller clients, or NULL if we don't have any bridge statistics. */
1547 geoip_get_bridge_stats_controller(time_t now
)
1549 return format_bridge_stats_controller(now
);
1552 /** Start time of entry stats or 0 if we're not collecting entry
1554 static time_t start_of_entry_stats_interval
;
1556 /** Initialize entry stats. */
1558 geoip_entry_stats_init(time_t now
)
1560 start_of_entry_stats_interval
= now
;
1563 /** Reset counters for entry stats. */
1565 geoip_reset_entry_stats(time_t now
)
1567 client_history_clear();
1568 start_of_entry_stats_interval
= now
;
1571 /** Stop collecting entry stats in a way that we can re-start doing so in
1572 * geoip_entry_stats_init(). */
1574 geoip_entry_stats_term(void)
1576 geoip_reset_entry_stats(0);
1579 /** Return a newly allocated string containing the entry statistics
1580 * until <b>now</b>, or NULL if we're not collecting entry stats. Caller
1581 * must ensure start_of_entry_stats_interval lies in the past. */
1583 geoip_format_entry_stats(time_t now
)
1585 char t
[ISO_TIME_LEN
+1];
1589 if (!start_of_entry_stats_interval
)
1590 return NULL
; /* Not initialized. */
1592 tor_assert(now
>= start_of_entry_stats_interval
);
1594 geoip_get_client_history(GEOIP_CLIENT_CONNECT
, &data
, NULL
);
1595 format_iso_time(t
, now
);
1596 tor_asprintf(&result
,
1597 "entry-stats-end %s (%u s)\n"
1599 t
, (unsigned) (now
- start_of_entry_stats_interval
),
1605 /** If 24 hours have passed since the beginning of the current entry stats
1606 * period, write entry stats to $DATADIR/stats/entry-stats (possibly
1607 * overwriting an existing file) and reset counters. Return when we would
1608 * next want to write entry stats or 0 if we never want to write. */
1610 geoip_entry_stats_write(time_t now
)
1614 if (!start_of_entry_stats_interval
)
1615 return 0; /* Not initialized. */
1616 if (start_of_entry_stats_interval
+ WRITE_STATS_INTERVAL
> now
)
1617 goto done
; /* Not ready to write. */
1619 /* Discard all items in the client history that are too old. */
1620 geoip_remove_old_clients(start_of_entry_stats_interval
);
1622 /* Generate history string .*/
1623 str
= geoip_format_entry_stats(now
);
1625 /* Write entry-stats string to disk. */
1626 if (!check_or_create_data_subdir("stats")) {
1627 write_to_data_subdir("stats", "entry-stats", str
, "entry statistics");
1629 /* Reset measurement interval start. */
1630 geoip_reset_entry_stats(now
);
1635 return start_of_entry_stats_interval
+ WRITE_STATS_INTERVAL
;
1638 /** Helper used to implement GETINFO ip-to-country/... controller command. */
1640 getinfo_helper_geoip(control_connection_t
*control_conn
,
1641 const char *question
, char **answer
,
1642 const char **errmsg
)
1645 if (!strcmpstart(question
, "ip-to-country/")) {
1649 question
+= strlen("ip-to-country/");
1650 family
= tor_addr_parse(&addr
, question
);
1651 if (family
!= AF_INET
&& family
!= AF_INET6
) {
1652 *errmsg
= "Invalid address family";
1655 if (!geoip_is_loaded(family
)) {
1656 *errmsg
= "GeoIP data not loaded";
1659 if (family
== AF_INET
)
1660 c
= geoip_get_country_by_ipv4(tor_addr_to_ipv4h(&addr
));
1662 c
= geoip_get_country_by_ipv6(tor_addr_to_in6(&addr
));
1663 *answer
= tor_strdup(geoip_get_country_name(c
));
1668 /** Release all storage held by the GeoIP databases and country list. */
1670 clear_geoip_db(void)
1672 if (geoip_countries
) {
1673 SMARTLIST_FOREACH(geoip_countries
, geoip_country_t
*, c
, tor_free(c
));
1674 smartlist_free(geoip_countries
);
1677 strmap_free(country_idxplus1_by_lc_code
, NULL
);
1678 if (geoip_ipv4_entries
) {
1679 SMARTLIST_FOREACH(geoip_ipv4_entries
, geoip_ipv4_entry_t
*, ent
,
1681 smartlist_free(geoip_ipv4_entries
);
1683 if (geoip_ipv6_entries
) {
1684 SMARTLIST_FOREACH(geoip_ipv6_entries
, geoip_ipv6_entry_t
*, ent
,
1686 smartlist_free(geoip_ipv6_entries
);
1688 geoip_countries
= NULL
;
1689 country_idxplus1_by_lc_code
= NULL
;
1690 geoip_ipv4_entries
= NULL
;
1691 geoip_ipv6_entries
= NULL
;
1694 /** Release all storage held in this file. */
1696 geoip_free_all(void)
1699 clientmap_entry_t
**ent
, **next
, *this;
1700 for (ent
= HT_START(clientmap
, &client_history
); ent
!= NULL
; ent
= next
) {
1702 next
= HT_NEXT_RMV(clientmap
, &client_history
, ent
);
1703 clientmap_entry_free(this);
1705 HT_CLEAR(clientmap
, &client_history
);
1708 dirreq_map_entry_t
**ent
, **next
, *this;
1709 for (ent
= HT_START(dirreqmap
, &dirreq_map
); ent
!= NULL
; ent
= next
) {
1711 next
= HT_NEXT_RMV(dirreqmap
, &dirreq_map
, ent
);
1714 HT_CLEAR(dirreqmap
, &dirreq_map
);
1718 tor_free(bridge_stats_extrainfo
);