1 /* Copyright 2003 Roger Dingledine. */
2 /* See LICENSE for licensing information */
6 * dns.c: Resolve hostnames in separate processes.
9 /* See http://elvin.dstc.com/ListArchive/elvin-dev/archive/2001/09/msg00027.html
10 * for some approaches to asynchronous dns. We will want to switch once one of
11 * them becomes more commonly available.
17 extern or_options_t options
; /* command-line and config-file options */
19 /* Longest hostname we're willing to resolve. */
20 #define MAX_ADDRESSLEN 256
22 /* Maximum DNS processes to spawn. */
23 #define MAX_DNSWORKERS 50
24 /* Minimum DNS processes to spawn. */
25 #define MIN_DNSWORKERS 3
27 /* If more than this many processes are idle, shut down the extras. */
28 #define MAX_IDLE_DNSWORKERS 10
30 /* Possible outcomes from hostname lookup: permanent failure,
31 * transient (retryable) failure, and success */
32 #define DNS_RESOLVE_FAILED_TRANSIENT 1
33 #define DNS_RESOLVE_FAILED_PERMANENT 2
34 #define DNS_RESOLVE_SUCCEEDED 3
37 int num_dnsworkers_busy
=0;
39 /* Linked list of connections waiting for a DNS answer. */
40 struct pending_connection_t
{
41 struct connection_t
*conn
;
42 struct pending_connection_t
*next
;
45 /* A DNS request: possibly completed, possibly pending; cached_resolve
46 * structs are stored at the OR side in a splay tree, and as a linked
47 * list from oldest to newest.
49 struct cached_resolve
{
50 SPLAY_ENTRY(cached_resolve
) node
;
51 char address
[MAX_ADDRESSLEN
]; /* the hostname to be resolved */
52 uint32_t addr
; /* in host order. I know I'm horrible for assuming ipv4 */
53 char state
; /* 0 is pending; 1 means answer is valid; 2 means resolve failed */
54 #define CACHE_STATE_PENDING 0
55 #define CACHE_STATE_VALID 1
56 #define CACHE_STATE_FAILED 2
57 uint32_t expire
; /* remove items from cache after this time */
58 struct pending_connection_t
*pending_connections
;
59 struct cached_resolve
*next
;
62 static void purge_expired_resolves(uint32_t now
);
63 static int assign_to_dnsworker(connection_t
*exitconn
);
64 static void dns_purge_resolve(struct cached_resolve
*resolve
);
65 static void dns_found_answer(char *address
, uint32_t addr
, char outcome
);
66 int dnsworker_main(void *data
);
67 static int spawn_dnsworker(void);
68 static void spawn_enough_dnsworkers(void);
70 /* Splay tree of cached_resolve objects */
71 static SPLAY_HEAD(cache_tree
, cached_resolve
) cache_root
;
73 /* Function to compare hashed resolves on their addresses; used to
74 * implement splay trees. */
75 static int compare_cached_resolves(struct cached_resolve
*a
,
76 struct cached_resolve
*b
) {
77 /* make this smarter one day? */
78 return strncasecmp(a
->address
, b
->address
, MAX_ADDRESSLEN
);
81 SPLAY_PROTOTYPE(cache_tree
, cached_resolve
, node
, compare_cached_resolves
);
82 SPLAY_GENERATE(cache_tree
, cached_resolve
, node
, compare_cached_resolves
);
84 /* Initialize the DNS cache */
85 static void init_cache_tree(void) {
86 SPLAY_INIT(&cache_root
);
89 /* Initialize the DNS subsystem; called by the OR process. */
92 spawn_enough_dnsworkers();
95 static struct cached_resolve
*oldest_cached_resolve
= NULL
; /* linked list, */
96 static struct cached_resolve
*newest_cached_resolve
= NULL
; /* oldest to newest */
98 /* Remove every cached_resolve whose 'expire' time is before 'now'
100 static void purge_expired_resolves(uint32_t now
) {
101 struct cached_resolve
*resolve
;
103 /* this is fast because the linked list
104 * oldest_cached_resolve is ordered by when they came in.
106 while(oldest_cached_resolve
&& (oldest_cached_resolve
->expire
< now
)) {
107 resolve
= oldest_cached_resolve
;
108 log(LOG_DEBUG
,"Forgetting old cached resolve (expires %lu)", (unsigned long)resolve
->expire
);
109 if(resolve
->state
== CACHE_STATE_PENDING
) {
110 log_fn(LOG_WARN
,"Expiring a dns resolve that's still pending. Forgot to cull it?");
111 /* XXX if resolve->pending_connections is used, then we're probably
112 * introducing bugs by closing resolve without notifying those streams.
115 oldest_cached_resolve
= resolve
->next
;
116 if(!oldest_cached_resolve
) /* if there are no more, */
117 newest_cached_resolve
= NULL
; /* then make sure the list's tail knows that too */
118 SPLAY_REMOVE(cache_tree
, &cache_root
, resolve
);
123 /* See if we have a cache entry for 'exitconn->address'. if so,
124 * if resolve valid, put it into exitconn->addr and return 1.
125 * If resolve failed, return -1.
127 * Else, if seen before and pending, add conn to the pending list,
130 * Else, if not seen before, add conn to pending list, hand to
131 * dns farm, and return 0.
133 int dns_resolve(connection_t
*exitconn
) {
134 struct cached_resolve
*resolve
;
135 struct cached_resolve search
;
136 struct pending_connection_t
*pending_connection
;
138 uint32_t now
= time(NULL
);
139 assert_connection_ok(exitconn
, 0);
141 /* first check if exitconn->address is an IP. If so, we already
142 * know the answer. */
143 if (tor_inet_aton(exitconn
->address
, &in
) != 0) {
144 exitconn
->addr
= ntohl(in
.s_addr
);
148 /* then take this opportunity to see if there are any expired
149 * resolves in the tree. */
150 purge_expired_resolves(now
);
152 /* now check the tree to see if 'address' is already there. */
153 strncpy(search
.address
, exitconn
->address
, MAX_ADDRESSLEN
);
154 search
.address
[MAX_ADDRESSLEN
-1] = 0;
155 resolve
= SPLAY_FIND(cache_tree
, &cache_root
, &search
);
156 if(resolve
) { /* already there */
157 switch(resolve
->state
) {
158 case CACHE_STATE_PENDING
:
159 /* add us to the pending list */
160 pending_connection
= tor_malloc(sizeof(struct pending_connection_t
));
161 pending_connection
->conn
= exitconn
;
162 pending_connection
->next
= resolve
->pending_connections
;
163 resolve
->pending_connections
= pending_connection
;
164 log_fn(LOG_DEBUG
,"Connection (fd %d) waiting for pending DNS resolve of '%s'",
165 exitconn
->s
, exitconn
->address
);
166 exitconn
->state
= EXIT_CONN_STATE_RESOLVING
;
168 case CACHE_STATE_VALID
:
169 exitconn
->addr
= resolve
->addr
;
170 log_fn(LOG_DEBUG
,"Connection (fd %d) found cached answer for '%s'",
171 exitconn
->s
, exitconn
->address
);
173 case CACHE_STATE_FAILED
:
178 /* not there, need to add it */
179 resolve
= tor_malloc_zero(sizeof(struct cached_resolve
));
180 resolve
->state
= CACHE_STATE_PENDING
;
181 resolve
->expire
= now
+ MAX_DNS_ENTRY_AGE
;
182 strncpy(resolve
->address
, exitconn
->address
, MAX_ADDRESSLEN
);
183 resolve
->address
[MAX_ADDRESSLEN
-1] = 0;
185 /* add us to the pending list */
186 pending_connection
= tor_malloc(sizeof(struct pending_connection_t
));
187 pending_connection
->conn
= exitconn
;
188 pending_connection
->next
= NULL
;
189 resolve
->pending_connections
= pending_connection
;
190 exitconn
->state
= EXIT_CONN_STATE_RESOLVING
;
192 /* add us to the linked list of resolves */
193 if (!oldest_cached_resolve
) {
194 oldest_cached_resolve
= resolve
;
196 newest_cached_resolve
->next
= resolve
;
198 newest_cached_resolve
= resolve
;
200 SPLAY_INSERT(cache_tree
, &cache_root
, resolve
);
201 return assign_to_dnsworker(exitconn
);
204 /* Find or spawn a dns worker process to handle resolving
205 * exitconn->address; tell that dns worker to begin resolving.
207 static int assign_to_dnsworker(connection_t
*exitconn
) {
208 connection_t
*dnsconn
;
211 tor_assert(exitconn
->state
== EXIT_CONN_STATE_RESOLVING
);
213 spawn_enough_dnsworkers(); /* respawn here, to be sure there are enough */
215 dnsconn
= connection_get_by_type_state(CONN_TYPE_DNSWORKER
, DNSWORKER_STATE_IDLE
);
218 log_fn(LOG_WARN
,"no idle dns workers. Failing.");
219 dns_cancel_pending_resolve(exitconn
->address
);
223 log_fn(LOG_DEBUG
, "Connection (fd %d) needs to resolve '%s'; assigning to DNSWorker (fd %d)",
224 exitconn
->s
, exitconn
->address
, dnsconn
->s
);
226 tor_free(dnsconn
->address
);
227 dnsconn
->address
= tor_strdup(exitconn
->address
);
228 dnsconn
->state
= DNSWORKER_STATE_BUSY
;
229 num_dnsworkers_busy
++;
231 len
= strlen(dnsconn
->address
);
232 connection_write_to_buf(&len
, 1, dnsconn
);
233 connection_write_to_buf(dnsconn
->address
, len
, dnsconn
);
235 // log_fn(LOG_DEBUG,"submitted '%s'", exitconn->address);
239 /* Remove 'conn' from the list of connections waiting for conn->address.
241 void connection_dns_remove(connection_t
*conn
)
243 struct pending_connection_t
*pend
, *victim
;
244 struct cached_resolve search
;
245 struct cached_resolve
*resolve
;
247 strncpy(search
.address
, conn
->address
, MAX_ADDRESSLEN
);
248 search
.address
[MAX_ADDRESSLEN
-1] = 0;
250 resolve
= SPLAY_FIND(cache_tree
, &cache_root
, &search
);
252 log_fn(LOG_WARN
,"Address '%s' is not pending. Dropping.", conn
->address
);
256 tor_assert(resolve
->pending_connections
);
257 assert_connection_ok(conn
,0);
259 pend
= resolve
->pending_connections
;
261 if(pend
->conn
== conn
) {
262 resolve
->pending_connections
= pend
->next
;
264 log_fn(LOG_DEBUG
, "First connection (fd %d) no longer waiting for resolve of '%s'",
265 conn
->s
, conn
->address
);
268 for( ; pend
->next
; pend
= pend
->next
) {
269 if(pend
->next
->conn
== conn
) {
271 pend
->next
= victim
->next
;
273 log_fn(LOG_DEBUG
, "Connection (fd %d) no longer waiting for resolve of '%s'",
274 conn
->s
, conn
->address
);
275 return; /* more are pending */
278 tor_assert(0); /* not reachable unless onlyconn not in pending list */
282 /* Log an error and abort if conn is waiting for a DNS resolve.
284 void assert_connection_edge_not_dns_pending(connection_t
*conn
) {
285 struct pending_connection_t
*pend
;
286 struct cached_resolve
*resolve
;
288 SPLAY_FOREACH(resolve
, cache_tree
, &cache_root
) {
289 for(pend
= resolve
->pending_connections
;
292 tor_assert(pend
->conn
!= conn
);
297 /* Log an error and abort if any connection waiting for a DNS resolve is
299 void assert_all_pending_dns_resolves_ok(void) {
300 struct pending_connection_t
*pend
;
301 struct cached_resolve
*resolve
;
303 SPLAY_FOREACH(resolve
, cache_tree
, &cache_root
) {
304 for(pend
= resolve
->pending_connections
;
307 assert_connection_ok(pend
->conn
, 0);
312 /* Mark all connections waiting for 'address' for close. Then cancel
313 * the resolve for 'address' itself, and remove any cached results for
314 * 'address' from the cache.
316 void dns_cancel_pending_resolve(char *address
) {
317 struct pending_connection_t
*pend
;
318 struct cached_resolve search
;
319 struct cached_resolve
*resolve
;
320 connection_t
*pendconn
;
322 strncpy(search
.address
, address
, MAX_ADDRESSLEN
);
323 search
.address
[MAX_ADDRESSLEN
-1] = 0;
325 resolve
= SPLAY_FIND(cache_tree
, &cache_root
, &search
);
327 log_fn(LOG_WARN
,"Address '%s' is not pending. Dropping.", address
);
331 tor_assert(resolve
->pending_connections
);
333 /* mark all pending connections to fail */
334 log_fn(LOG_DEBUG
, "Failing all connections waiting on DNS resolve of '%s'",
336 while(resolve
->pending_connections
) {
337 pend
= resolve
->pending_connections
;
338 /* So that mark_for_close doesn't double-remove the connection. */
339 pend
->conn
->state
= EXIT_CONN_STATE_RESOLVEFAILED
;
340 pendconn
= pend
->conn
; /* don't pass complex things to the
341 connection_mark_for_close macro */
342 connection_mark_for_close(pendconn
, END_STREAM_REASON_MISC
);
343 resolve
->pending_connections
= pend
->next
;
347 dns_purge_resolve(resolve
);
350 /* Remove 'resolve' from the cache.
352 static void dns_purge_resolve(struct cached_resolve
*resolve
) {
353 struct cached_resolve
*tmp
;
355 /* remove resolve from the linked list */
356 if(resolve
== oldest_cached_resolve
) {
357 oldest_cached_resolve
= resolve
->next
;
358 if(oldest_cached_resolve
== NULL
)
359 newest_cached_resolve
= NULL
;
361 /* FFFF make it a doubly linked list if this becomes too slow */
362 for(tmp
=oldest_cached_resolve
; tmp
&& tmp
->next
!= resolve
; tmp
=tmp
->next
) ;
363 tor_assert(tmp
); /* it's got to be in the list, or we screwed up somewhere else */
364 tmp
->next
= resolve
->next
; /* unlink it */
366 if(newest_cached_resolve
== resolve
)
367 newest_cached_resolve
= tmp
;
370 /* remove resolve from the tree */
371 SPLAY_REMOVE(cache_tree
, &cache_root
, resolve
);
376 /* Called on the OR side when a DNS worker tells us the outcome of a DNS
377 * resolve: tell all pending connections about the result of the lookup, and
378 * cache the value. ('address' is a NUL-terminated string containing the
379 * address to look up; 'addr' is an IPv4 address in host order; 'outcome' is
380 * one of DNS_RESOLVE_{FAILED_TRANSIENT|FAILED_PERMANENT|SUCCEEDED}.
382 static void dns_found_answer(char *address
, uint32_t addr
, char outcome
) {
383 struct pending_connection_t
*pend
;
384 struct cached_resolve search
;
385 struct cached_resolve
*resolve
;
386 connection_t
*pendconn
;
388 strncpy(search
.address
, address
, MAX_ADDRESSLEN
);
389 search
.address
[MAX_ADDRESSLEN
-1] = 0;
391 resolve
= SPLAY_FIND(cache_tree
, &cache_root
, &search
);
393 log_fn(LOG_INFO
,"Resolved unasked address '%s'? Dropping.", address
);
394 /* XXX Why drop? Just because we don't care now doesn't mean we shouldn't
395 * XXX cache the result for later. */
399 if (resolve
->state
!= CACHE_STATE_PENDING
) {
400 /* XXXX Maybe update addr? or check addr for consistency? Or let
401 * VALID replace FAILED? */
402 log_fn(LOG_WARN
, "Resolved '%s' which was already resolved; ignoring",
404 tor_assert(resolve
->pending_connections
== NULL
);
407 /* Removed this assertion: in fact, we'll sometimes get a double answer
408 * to the same question. This can happen when we ask one worker to resolve
409 * X.Y.Z., then we cancel the request, and then we ask another worker to
411 /* tor_assert(resolve->state == CACHE_STATE_PENDING); */
413 resolve
->addr
= ntohl(addr
);
414 if(outcome
== DNS_RESOLVE_SUCCEEDED
)
415 resolve
->state
= CACHE_STATE_VALID
;
417 resolve
->state
= CACHE_STATE_FAILED
;
419 while(resolve
->pending_connections
) {
420 pend
= resolve
->pending_connections
;
421 assert_connection_ok(pend
->conn
,time(NULL
));
422 pend
->conn
->addr
= resolve
->addr
;
425 if(resolve
->state
== CACHE_STATE_FAILED
) {
426 pendconn
= pend
->conn
; /* don't pass complex things to the
427 connection_mark_for_close macro */
428 /* prevent double-remove. */
429 pend
->conn
->state
= EXIT_CONN_STATE_RESOLVEFAILED
;
430 connection_mark_for_close(pendconn
, END_STREAM_REASON_RESOLVEFAILED
);
432 /* prevent double-remove. */
433 pend
->conn
->state
= EXIT_CONN_STATE_CONNECTING
;
434 connection_exit_connect(pend
->conn
);
436 resolve
->pending_connections
= pend
->next
;
440 if(outcome
== DNS_RESOLVE_FAILED_TRANSIENT
) { /* remove from cache */
441 dns_purge_resolve(resolve
);
445 /******************************************************************/
448 * Connection between OR and dnsworker
451 /* Write handler: called when we've pushed a request to a dnsworker. */
452 int connection_dns_finished_flushing(connection_t
*conn
) {
453 tor_assert(conn
&& conn
->type
== CONN_TYPE_DNSWORKER
);
454 connection_stop_writing(conn
);
458 /* Read handler: called when we get data from a dnsworker. If the
459 * connection is closed, mark the dnsworker as dead. Otherwise, see
460 * if we have a complete answer. If so, call dns_found_answer on the
461 * result. If not, wait. Returns 0. */
462 int connection_dns_process_inbuf(connection_t
*conn
) {
466 tor_assert(conn
&& conn
->type
== CONN_TYPE_DNSWORKER
);
468 if(conn
->inbuf_reached_eof
) {
469 log_fn(LOG_WARN
,"Read eof. Worker died unexpectedly.");
470 if(conn
->state
== DNSWORKER_STATE_BUSY
) {
471 dns_cancel_pending_resolve(conn
->address
);
472 num_dnsworkers_busy
--;
475 connection_mark_for_close(conn
,0);
479 tor_assert(conn
->state
== DNSWORKER_STATE_BUSY
);
480 if(buf_datalen(conn
->inbuf
) < 5) /* entire answer available? */
481 return 0; /* not yet */
482 tor_assert(buf_datalen(conn
->inbuf
) == 5);
484 connection_fetch_from_buf(&success
,1,conn
);
485 connection_fetch_from_buf((char *)&addr
,sizeof(uint32_t),conn
);
487 log_fn(LOG_DEBUG
, "DNSWorker (fd %d) returned answer for '%s'",
488 conn
->s
, conn
->address
);
490 tor_assert(success
>= DNS_RESOLVE_FAILED_TRANSIENT
);
491 tor_assert(success
<= DNS_RESOLVE_SUCCEEDED
);
492 dns_found_answer(conn
->address
, addr
, success
);
494 tor_free(conn
->address
);
495 conn
->address
= tor_strdup("<idle>");
496 conn
->state
= DNSWORKER_STATE_IDLE
;
497 num_dnsworkers_busy
--;
502 /* Implementation for DNS workers; this code runs in a separate
503 * execution context. It takes as its argument an fdarray as returned
504 * by socketpair(), and communicates via fdarray[1]. The protocol is
507 * ADDRESSLEN [1 byte]
508 * ADDRESS [ADDRESSLEN bytes]
509 * The DNS worker does the lookup, and replies:
513 * OUTCOME is one of DNS_RESOLVE_{FAILED_TRANSIENT|FAILED_PERMANENT|SUCCEEDED}.
514 * IP is in host order.
516 * The dnsworker runs indefinitely, until its connection is closed or an error
519 int dnsworker_main(void *data
) {
520 char address
[MAX_ADDRESSLEN
];
521 unsigned char address_len
;
527 tor_close_socket(fdarray
[0]); /* this is the side of the socketpair the parent uses */
528 fd
= fdarray
[1]; /* this side is ours */
530 connection_free_all(); /* so the child doesn't hold the parent's fd's open */
535 if(recv(fd
, &address_len
, 1, 0) != 1) {
536 log_fn(LOG_INFO
,"dnsworker exiting because tor process died.");
539 tor_assert(address_len
> 0);
541 if(read_all(fd
, address
, address_len
, 1) != address_len
) {
542 log_fn(LOG_ERR
,"read hostname failed. Child exiting.");
545 address
[address_len
] = 0; /* null terminate it */
547 switch (tor_lookup_hostname(address
, &ip
)) {
549 log_fn(LOG_INFO
,"Could not resolve dest addr %s (transient).",address
);
550 answer
[0] = DNS_RESOLVE_FAILED_TRANSIENT
;
553 log_fn(LOG_INFO
,"Could not resolve dest addr %s (permanent).",address
);
554 answer
[0] = DNS_RESOLVE_FAILED_PERMANENT
;
557 log_fn(LOG_INFO
,"Resolved address '%s'.",address
);
558 answer
[0] = DNS_RESOLVE_SUCCEEDED
;
561 set_uint32(answer
+1, ip
);
562 if(write_all(fd
, answer
, 5, 1) != 5) {
563 log_fn(LOG_ERR
,"writing answer failed. Child exiting.");
567 return 0; /* windows wants this function to return an int */
570 /* Launch a new DNS worker; return 0 on success, -1 on failure.
572 static int spawn_dnsworker(void) {
576 if(tor_socketpair(AF_UNIX
, SOCK_STREAM
, 0, fd
) < 0) {
577 log(LOG_ERR
, "Couldn't construct socketpair: %s",
578 tor_socket_strerror(tor_socket_errno(-1)));
582 spawn_func(dnsworker_main
, (void*)fd
);
583 log_fn(LOG_DEBUG
,"just spawned a worker.");
584 tor_close_socket(fd
[1]); /* we don't need the worker's side of the pipe */
586 conn
= connection_new(CONN_TYPE_DNSWORKER
);
588 set_socket_nonblocking(fd
[0]);
590 /* set up conn so it's got all the data we need to remember */
592 conn
->address
= tor_strdup("<unused>");
594 if(connection_add(conn
) < 0) { /* no space, forget it */
595 log_fn(LOG_WARN
,"connection_add failed. Giving up.");
596 connection_free(conn
); /* this closes fd[0] */
600 conn
->state
= DNSWORKER_STATE_IDLE
;
601 connection_start_reading(conn
);
603 return 0; /* success */
606 /* If we have too many or too few DNS workers, spawn or kill some.
608 static void spawn_enough_dnsworkers(void) {
609 int num_dnsworkers_needed
; /* aim to have 1 more than needed,
610 * but no less than min and no more than max */
611 connection_t
*dnsconn
;
613 /* XXX This may not be the best strategy. Maybe we should queue pending
614 * requests until the old ones finish or time out: otherwise, if
615 * the connection requests come fast enough, we never get any DNS done. -NM
616 * XXX But if we queue them, then the adversary can pile even more
617 * queries onto us, blocking legitimate requests for even longer.
618 * Maybe we should compromise and only kill if it's been at it for
619 * more than, e.g., 2 seconds. -RD
621 if(num_dnsworkers_busy
== MAX_DNSWORKERS
) {
622 /* We always want at least one worker idle.
623 * So find the oldest busy worker and kill it.
625 dnsconn
= connection_get_by_type_state_lastwritten(CONN_TYPE_DNSWORKER
,
626 DNSWORKER_STATE_BUSY
);
629 log_fn(LOG_WARN
, "%d DNS workers are spawned; all are busy. Killing one.",
632 connection_mark_for_close(dnsconn
,0);
633 num_dnsworkers_busy
--;
637 if(num_dnsworkers_busy
>= MIN_DNSWORKERS
)
638 num_dnsworkers_needed
= num_dnsworkers_busy
+1;
640 num_dnsworkers_needed
= MIN_DNSWORKERS
;
642 while(num_dnsworkers
< num_dnsworkers_needed
) {
643 if(spawn_dnsworker() < 0) {
644 log(LOG_WARN
,"spawn_enough_dnsworkers(): spawn failed!");
650 while(num_dnsworkers
> num_dnsworkers_busy
+MAX_IDLE_DNSWORKERS
) { /* too many idle? */
651 /* cull excess workers */
652 log_fn(LOG_WARN
,"%d of %d dnsworkers are idle. Killing one.",
653 num_dnsworkers
-num_dnsworkers_needed
, num_dnsworkers
);
654 dnsconn
= connection_get_by_type_state(CONN_TYPE_DNSWORKER
, DNSWORKER_STATE_IDLE
);
656 connection_mark_for_close(dnsconn
,0);