Detect FPU by checking CPUID features.
[dragonfly.git] / contrib / bind-9.5.2 / lib / dns / dispatch.c
blobf1557ec2cbb471bfaedfbbfd629f8a7790506c2a
1 /*
2 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: dispatch.c,v 1.137.128.24 2009/01/31 00:44:43 marka Exp $ */
20 /*! \file */
22 #include <config.h>
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 #include <stdlib.h>
29 #include <isc/entropy.h>
30 #include <isc/mem.h>
31 #include <isc/mutex.h>
32 #include <isc/portset.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/stats.h>
36 #include <isc/string.h>
37 #include <isc/task.h>
38 #include <isc/time.h>
39 #include <isc/util.h>
41 #include <dns/acl.h>
42 #include <dns/dispatch.h>
43 #include <dns/events.h>
44 #include <dns/log.h>
45 #include <dns/message.h>
46 #include <dns/portlist.h>
47 #include <dns/stats.h>
48 #include <dns/tcpmsg.h>
49 #include <dns/types.h>
51 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
53 typedef struct dispsocket dispsocket_t;
54 typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
56 /* ARC4 Random generator state */
57 typedef struct arc4ctx {
58 isc_uint8_t i;
59 isc_uint8_t j;
60 isc_uint8_t s[256];
61 int count;
62 isc_entropy_t *entropy; /*%< entropy source for ARC4 */
63 isc_mutex_t *lock;
64 } arc4ctx_t;
66 typedef struct dns_qid {
67 unsigned int magic;
68 unsigned int qid_nbuckets; /*%< hash table size */
69 unsigned int qid_increment; /*%< id increment on collision */
70 isc_mutex_t lock;
71 dns_displist_t *qid_table; /*%< the table itself */
72 dispsocketlist_t *sock_table; /*%< socket table */
73 } dns_qid_t;
75 struct dns_dispatchmgr {
76 /* Unlocked. */
77 unsigned int magic;
78 isc_mem_t *mctx;
79 dns_acl_t *blackhole;
80 dns_portlist_t *portlist;
81 isc_stats_t *stats;
82 isc_entropy_t *entropy; /*%< entropy source */
84 /* Locked by "lock". */
85 isc_mutex_t lock;
86 unsigned int state;
87 ISC_LIST(dns_dispatch_t) list;
89 /* Locked by arc4_lock. */
90 isc_mutex_t arc4_lock;
91 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
93 /* locked by buffer lock */
94 dns_qid_t *qid;
95 isc_mutex_t buffer_lock;
96 unsigned int buffers; /*%< allocated buffers */
97 unsigned int buffersize; /*%< size of each buffer */
98 unsigned int maxbuffers; /*%< max buffers */
100 /* Locked internally. */
101 isc_mutex_t pool_lock;
102 isc_mempool_t *epool; /*%< memory pool for events */
103 isc_mempool_t *rpool; /*%< memory pool for replies */
104 isc_mempool_t *dpool; /*%< dispatch allocations */
105 isc_mempool_t *bpool; /*%< memory pool for buffers */
106 isc_mempool_t *spool; /*%< memory pool for dispsocs */
109 * Locked by qid->lock if qid exists; otherwise, can be used without
110 * being locked.
111 * Memory footprint considerations: this is a simple implementation of
112 * available ports, i.e., an ordered array of the actual port numbers.
113 * This will require about 256KB of memory in the worst case (128KB for
114 * each of IPv4 and IPv6). We could reduce it by representing it as a
115 * more sophisticated way such as a list (or array) of ranges that are
116 * searched to identify a specific port. Our decision here is the saved
117 * memory isn't worth the implementation complexity, considering the
118 * fact that the whole BIND9 process (which is mainly named) already
119 * requires a pretty large memory footprint. We may, however, have to
120 * revisit the decision when we want to use it as a separate module for
121 * an environment where memory requirement is severer.
123 in_port_t *v4ports; /*%< available ports for IPv4 */
124 unsigned int nv4ports; /*%< # of available ports for IPv4 */
125 in_port_t *v6ports; /*%< available ports for IPv4 */
126 unsigned int nv6ports; /*%< # of available ports for IPv4 */
129 #define MGR_SHUTTINGDOWN 0x00000001U
130 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
132 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
134 struct dns_dispentry {
135 unsigned int magic;
136 dns_dispatch_t *disp;
137 dns_messageid_t id;
138 in_port_t port;
139 unsigned int bucket;
140 isc_sockaddr_t host;
141 isc_task_t *task;
142 isc_taskaction_t action;
143 void *arg;
144 isc_boolean_t item_out;
145 dispsocket_t *dispsocket;
146 ISC_LIST(dns_dispatchevent_t) items;
147 ISC_LINK(dns_dispentry_t) link;
151 * Maximum number of dispatch sockets that can be pooled for reuse. The
152 * appropriate value may vary, but experiments have shown a busy caching server
153 * may need more than 1000 sockets concurrently opened. The maximum allowable
154 * number of dispatch sockets (per manager) will be set to the double of this
155 * value.
157 #ifndef DNS_DISPATCH_POOLSOCKS
158 #define DNS_DISPATCH_POOLSOCKS 2048
159 #endif
162 * Quota to control the number of dispatch sockets. If a dispatch has more
163 * than the quota of sockets, new queries will purge oldest ones, so that
164 * a massive number of outstanding queries won't prevent subsequent queries
165 * (especially if the older ones take longer time and result in timeout).
167 #ifndef DNS_DISPATCH_SOCKSQUOTA
168 #define DNS_DISPATCH_SOCKSQUOTA 3072
169 #endif
171 struct dispsocket {
172 unsigned int magic;
173 isc_socket_t *socket;
174 dns_dispatch_t *disp;
175 isc_sockaddr_t host;
176 in_port_t localport;
177 dns_dispentry_t *resp;
178 isc_task_t *task;
179 ISC_LINK(dispsocket_t) link;
180 unsigned int bucket;
181 ISC_LINK(dispsocket_t) blink;
184 #define INVALID_BUCKET (0xffffdead)
187 * Number of tasks for each dispatch that use separate sockets for different
188 * transactions. This must be a power of 2 as it will divide 32 bit numbers
189 * to get an uniformly random tasks selection. See get_dispsocket().
191 #define MAX_INTERNAL_TASKS 64
193 struct dns_dispatch {
194 /* Unlocked. */
195 unsigned int magic; /*%< magic */
196 dns_dispatchmgr_t *mgr; /*%< dispatch manager */
197 int ntasks;
199 * internal task buckets. We use multiple tasks to distribute various
200 * socket events well when using separate dispatch sockets. We use the
201 * 1st task (task[0]) for internal control events.
203 isc_task_t *task[MAX_INTERNAL_TASKS];
204 isc_socket_t *socket; /*%< isc socket attached to */
205 isc_sockaddr_t local; /*%< local address */
206 in_port_t localport; /*%< local UDP port */
207 unsigned int maxrequests; /*%< max requests */
208 isc_event_t *ctlevent;
210 /*% Locked by mgr->lock. */
211 ISC_LINK(dns_dispatch_t) link;
213 /* Locked by "lock". */
214 isc_mutex_t lock; /*%< locks all below */
215 isc_sockettype_t socktype;
216 unsigned int attributes;
217 unsigned int refcount; /*%< number of users */
218 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
219 unsigned int shutting_down : 1,
220 shutdown_out : 1,
221 connected : 1,
222 tcpmsg_valid : 1,
223 recv_pending : 1; /*%< is a recv() pending? */
224 isc_result_t shutdown_why;
225 ISC_LIST(dispsocket_t) activesockets;
226 ISC_LIST(dispsocket_t) inactivesockets;
227 unsigned int nsockets;
228 unsigned int requests; /*%< how many requests we have */
229 unsigned int tcpbuffers; /*%< allocated buffers */
230 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
231 dns_qid_t *qid;
232 arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
235 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
236 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
238 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
239 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
241 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
242 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
244 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
245 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
247 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
248 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
250 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
251 (disp)->qid : (disp)->mgr->qid
252 #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
253 (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
256 * Locking a query port buffer is a bit tricky. We access the buffer without
257 * locking until qid is created. Technically, there is a possibility of race
258 * between the creation of qid and access to the port buffer; in practice,
259 * however, this should be safe because qid isn't created until the first
260 * dispatch is created and there should be no contending situation until then.
262 #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
263 #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
266 * Statics.
268 static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
269 dns_messageid_t, in_port_t, unsigned int);
270 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
271 static void destroy_disp(isc_task_t *task, isc_event_t *event);
272 static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
273 static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
274 static void udp_exrecv(isc_task_t *, isc_event_t *);
275 static void udp_shrecv(isc_task_t *, isc_event_t *);
276 static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
277 static void tcp_recv(isc_task_t *, isc_event_t *);
278 static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
279 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
280 in_port_t);
281 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
282 static void *allocate_udp_buffer(dns_dispatch_t *disp);
283 static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
284 static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
285 static void do_cancel(dns_dispatch_t *disp);
286 static dns_dispentry_t *linear_first(dns_qid_t *disp);
287 static dns_dispentry_t *linear_next(dns_qid_t *disp,
288 dns_dispentry_t *resp);
289 static void dispatch_free(dns_dispatch_t **dispp);
290 static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
291 dns_dispatch_t *disp,
292 isc_socketmgr_t *sockmgr,
293 isc_sockaddr_t *localaddr,
294 isc_socket_t **sockp);
295 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
296 isc_socketmgr_t *sockmgr,
297 isc_taskmgr_t *taskmgr,
298 isc_sockaddr_t *localaddr,
299 unsigned int maxrequests,
300 unsigned int attributes,
301 dns_dispatch_t **dispp);
302 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
303 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
304 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
305 unsigned int increment, dns_qid_t **qidp,
306 isc_boolean_t needaddrtable);
307 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
308 static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
309 unsigned int options, isc_socket_t **sockp);
310 static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
311 isc_sockaddr_t *sockaddrp);
313 #define LVL(x) ISC_LOG_DEBUG(x)
315 static void
316 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
317 ISC_FORMAT_PRINTF(3, 4);
319 static void
320 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
321 char msgbuf[2048];
322 va_list ap;
324 if (! isc_log_wouldlog(dns_lctx, level))
325 return;
327 va_start(ap, fmt);
328 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
329 va_end(ap);
331 isc_log_write(dns_lctx,
332 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
333 level, "dispatchmgr %p: %s", mgr, msgbuf);
336 static inline void
337 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
338 if (mgr->stats != NULL)
339 isc_stats_increment(mgr->stats, counter);
342 static void
343 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
344 ISC_FORMAT_PRINTF(3, 4);
346 static void
347 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
348 char msgbuf[2048];
349 va_list ap;
351 if (! isc_log_wouldlog(dns_lctx, level))
352 return;
354 va_start(ap, fmt);
355 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
356 va_end(ap);
358 isc_log_write(dns_lctx,
359 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
360 level, "dispatch %p: %s", disp, msgbuf);
363 static void
364 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
365 int level, const char *fmt, ...)
366 ISC_FORMAT_PRINTF(4, 5);
368 static void
369 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
370 int level, const char *fmt, ...)
372 char msgbuf[2048];
373 char peerbuf[256];
374 va_list ap;
376 if (! isc_log_wouldlog(dns_lctx, level))
377 return;
379 va_start(ap, fmt);
380 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
381 va_end(ap);
383 if (VALID_RESPONSE(resp)) {
384 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
385 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
386 DNS_LOGMODULE_DISPATCH, level,
387 "dispatch %p response %p %s: %s", disp, resp,
388 peerbuf, msgbuf);
389 } else {
390 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
391 DNS_LOGMODULE_DISPATCH, level,
392 "dispatch %p req/resp %p: %s", disp, resp,
393 msgbuf);
398 * ARC4 random number generator derived from OpenBSD.
399 * Only dispatch_arc4random() and dispatch_arc4uniformrandom() are expected
400 * to be called from general dispatch routines; the rest of them are subroutines
401 * for these two.
403 * The original copyright follows:
404 * Copyright (c) 1996, David Mazieres <dm@uun.org>
405 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
407 * Permission to use, copy, modify, and distribute this software for any
408 * purpose with or without fee is hereby granted, provided that the above
409 * copyright notice and this permission notice appear in all copies.
411 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
412 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
413 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
414 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
415 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
416 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
417 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
419 static void
420 dispatch_arc4init(arc4ctx_t *actx, isc_entropy_t *entropy, isc_mutex_t *lock) {
421 int n;
422 for (n = 0; n < 256; n++)
423 actx->s[n] = n;
424 actx->i = 0;
425 actx->j = 0;
426 actx->count = 0;
427 actx->entropy = entropy; /* don't have to attach */
428 actx->lock = lock;
431 static void
432 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
433 int n;
434 isc_uint8_t si;
436 actx->i--;
437 for (n = 0; n < 256; n++) {
438 actx->i = (actx->i + 1);
439 si = actx->s[actx->i];
440 actx->j = (actx->j + si + dat[n % datlen]);
441 actx->s[actx->i] = actx->s[actx->j];
442 actx->s[actx->j] = si;
444 actx->j = actx->i;
447 static inline isc_uint8_t
448 dispatch_arc4get8(arc4ctx_t *actx) {
449 isc_uint8_t si, sj;
451 actx->i = (actx->i + 1);
452 si = actx->s[actx->i];
453 actx->j = (actx->j + si);
454 sj = actx->s[actx->j];
455 actx->s[actx->i] = sj;
456 actx->s[actx->j] = si;
458 return (actx->s[(si + sj) & 0xff]);
461 static inline isc_uint16_t
462 dispatch_arc4get16(arc4ctx_t *actx) {
463 isc_uint16_t val;
465 val = dispatch_arc4get8(actx) << 8;
466 val |= dispatch_arc4get8(actx);
468 return (val);
471 static void
472 dispatch_arc4stir(arc4ctx_t *actx) {
473 int i;
474 union {
475 unsigned char rnd[128];
476 isc_uint32_t rnd32[32];
477 } rnd;
478 isc_result_t result;
480 if (actx->entropy != NULL) {
482 * We accept any quality of random data to avoid blocking.
484 result = isc_entropy_getdata(actx->entropy, rnd.rnd,
485 sizeof(rnd), NULL, 0);
486 RUNTIME_CHECK(result == ISC_R_SUCCESS);
487 } else {
488 for (i = 0; i < 32; i++)
489 isc_random_get(&rnd.rnd32[i]);
491 dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
494 * Discard early keystream, as per recommendations in:
495 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
497 for (i = 0; i < 256; i++)
498 (void)dispatch_arc4get8(actx);
501 * Derived from OpenBSD's implementation. The rationale is not clear,
502 * but should be conservative enough in safety, and reasonably large
503 * for efficiency.
505 actx->count = 1600000;
508 static isc_uint16_t
509 dispatch_arc4random(arc4ctx_t *actx) {
510 isc_uint16_t result;
512 if (actx->lock != NULL)
513 LOCK(actx->lock);
515 actx->count -= sizeof(isc_uint16_t);
516 if (actx->count <= 0)
517 dispatch_arc4stir(actx);
518 result = dispatch_arc4get16(actx);
520 if (actx->lock != NULL)
521 UNLOCK(actx->lock);
523 return (result);
526 static isc_uint16_t
527 dispatch_arc4uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
528 isc_uint16_t min, r;
530 if (upper_bound < 2)
531 return (0);
534 * Ensure the range of random numbers [min, 0xffff] be a multiple of
535 * upper_bound and contain at least a half of the 16 bit range.
538 if (upper_bound > 0x8000)
539 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
540 else
541 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
544 * This could theoretically loop forever but each retry has
545 * p > 0.5 (worst case, usually far better) of selecting a
546 * number inside the range we need, so it should rarely need
547 * to re-roll.
549 for (;;) {
550 r = dispatch_arc4random(actx);
551 if (r >= min)
552 break;
555 return (r % upper_bound);
559 * Return a hash of the destination and message id.
561 static isc_uint32_t
562 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
563 in_port_t port)
565 unsigned int ret;
567 ret = isc_sockaddr_hash(dest, ISC_TRUE);
568 ret ^= (id << 16) | port;
569 ret %= qid->qid_nbuckets;
571 INSIST(ret < qid->qid_nbuckets);
573 return (ret);
577 * Find the first entry in 'qid'. Returns NULL if there are no entries.
579 static dns_dispentry_t *
580 linear_first(dns_qid_t *qid) {
581 dns_dispentry_t *ret;
582 unsigned int bucket;
584 bucket = 0;
586 while (bucket < qid->qid_nbuckets) {
587 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
588 if (ret != NULL)
589 return (ret);
590 bucket++;
593 return (NULL);
597 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
598 * no more entries.
600 static dns_dispentry_t *
601 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
602 dns_dispentry_t *ret;
603 unsigned int bucket;
605 ret = ISC_LIST_NEXT(resp, link);
606 if (ret != NULL)
607 return (ret);
609 bucket = resp->bucket;
610 bucket++;
611 while (bucket < qid->qid_nbuckets) {
612 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
613 if (ret != NULL)
614 return (ret);
615 bucket++;
618 return (NULL);
622 * The dispatch must be locked.
624 static isc_boolean_t
625 destroy_disp_ok(dns_dispatch_t *disp)
627 if (disp->refcount != 0)
628 return (ISC_FALSE);
630 if (disp->recv_pending != 0)
631 return (ISC_FALSE);
633 if (!ISC_LIST_EMPTY(disp->activesockets))
634 return (ISC_FALSE);
636 if (disp->shutting_down == 0)
637 return (ISC_FALSE);
639 return (ISC_TRUE);
643 * Called when refcount reaches 0 (and safe to destroy).
645 * The dispatcher must not be locked.
646 * The manager must be locked.
648 static void
649 destroy_disp(isc_task_t *task, isc_event_t *event) {
650 dns_dispatch_t *disp;
651 dns_dispatchmgr_t *mgr;
652 isc_boolean_t killmgr;
653 dispsocket_t *dispsocket;
654 int i;
656 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
658 UNUSED(task);
660 disp = event->ev_arg;
661 mgr = disp->mgr;
663 LOCK(&mgr->lock);
664 ISC_LIST_UNLINK(mgr->list, disp, link);
666 dispatch_log(disp, LVL(90),
667 "shutting down; detaching from sock %p, task %p",
668 disp->socket, disp->task[0]); /* XXXX */
670 if (disp->socket != NULL)
671 isc_socket_detach(&disp->socket);
672 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
673 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
674 destroy_dispsocket(disp, &dispsocket);
676 for (i = 0; i < disp->ntasks; i++)
677 isc_task_detach(&disp->task[i]);
678 isc_event_free(&event);
680 dispatch_free(&disp);
682 killmgr = destroy_mgr_ok(mgr);
683 UNLOCK(&mgr->lock);
684 if (killmgr)
685 destroy_mgr(&mgr);
689 * Find a dispsocket for socket address 'dest', and port number 'port'.
690 * Return NULL if no such entry exists.
692 static dispsocket_t *
693 socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
694 unsigned int bucket)
696 dispsocket_t *dispsock;
698 REQUIRE(bucket < qid->qid_nbuckets);
700 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
702 while (dispsock != NULL) {
703 if (isc_sockaddr_equal(dest, &dispsock->host) &&
704 dispsock->localport == port)
705 return (dispsock);
706 dispsock = ISC_LIST_NEXT(dispsock, blink);
709 return (NULL);
713 * Make a new socket for a single dispatch with a random port number.
714 * The caller must hold the disp->lock and qid->lock.
716 static isc_result_t
717 get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
718 isc_socketmgr_t *sockmgr, dns_qid_t *qid,
719 dispsocket_t **dispsockp, in_port_t *portp)
721 int i;
722 isc_uint32_t r;
723 dns_dispatchmgr_t *mgr = disp->mgr;
724 isc_socket_t *sock = NULL;
725 isc_result_t result = ISC_R_FAILURE;
726 in_port_t port;
727 isc_sockaddr_t localaddr;
728 unsigned int bucket = 0;
729 dispsocket_t *dispsock;
730 unsigned int nports;
731 in_port_t *ports;
733 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
734 nports = disp->mgr->nv4ports;
735 ports = disp->mgr->v4ports;
736 } else {
737 nports = disp->mgr->nv6ports;
738 ports = disp->mgr->v6ports;
740 if (nports == 0)
741 return (ISC_R_ADDRNOTAVAIL);
743 dispsock = ISC_LIST_HEAD(disp->inactivesockets);
744 if (dispsock != NULL) {
745 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
746 sock = dispsock->socket;
747 dispsock->socket = NULL;
748 } else {
749 dispsock = isc_mempool_get(mgr->spool);
750 if (dispsock == NULL)
751 return (ISC_R_NOMEMORY);
753 disp->nsockets++;
754 dispsock->socket = NULL;
755 dispsock->disp = disp;
756 dispsock->resp = NULL;
757 isc_random_get(&r);
758 dispsock->task = NULL;
759 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
760 ISC_LINK_INIT(dispsock, link);
761 ISC_LINK_INIT(dispsock, blink);
762 dispsock->magic = DISPSOCK_MAGIC;
766 * Pick up a random UDP port and open a new socket with it. Avoid
767 * choosing ports that share the same destination because it will be
768 * very likely to fail in bind(2) or connect(2).
770 localaddr = disp->local;
771 for (i = 0; i < 64; i++) {
772 port = ports[dispatch_arc4uniformrandom(DISP_ARC4CTX(disp),
773 nports)];
774 isc_sockaddr_setport(&localaddr, port);
776 bucket = dns_hash(qid, dest, 0, port);
777 if (socket_search(qid, dest, port, bucket) != NULL)
778 continue;
780 result = open_socket(sockmgr, &localaddr, 0, &sock);
781 if (result == ISC_R_SUCCESS || result != ISC_R_ADDRINUSE)
782 break;
785 if (result == ISC_R_SUCCESS) {
786 dispsock->socket = sock;
787 dispsock->host = *dest;
788 dispsock->localport = port;
789 dispsock->bucket = bucket;
790 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
791 *dispsockp = dispsock;
792 *portp = port;
793 } else {
795 * We could keep it in the inactive list, but since this should
796 * be an exceptional case and might be resource shortage, we'd
797 * rather destroy it.
799 if (sock != NULL)
800 isc_socket_detach(&sock);
801 destroy_dispsocket(disp, &dispsock);
804 return (result);
808 * Destroy a dedicated dispatch socket.
810 static void
811 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
812 dispsocket_t *dispsock;
813 dns_qid_t *qid;
816 * The dispatch must be locked.
819 REQUIRE(dispsockp != NULL && *dispsockp != NULL);
820 dispsock = *dispsockp;
821 REQUIRE(!ISC_LINK_LINKED(dispsock, link));
823 disp->nsockets--;
824 dispsock->magic = 0;
825 if (dispsock->socket != NULL)
826 isc_socket_detach(&dispsock->socket);
827 if (ISC_LINK_LINKED(dispsock, blink)) {
828 qid = DNS_QID(disp);
829 LOCK(&qid->lock);
830 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
831 blink);
832 UNLOCK(&qid->lock);
834 if (dispsock->task != NULL)
835 isc_task_detach(&dispsock->task);
836 isc_mempool_put(disp->mgr->spool, dispsock);
838 *dispsockp = NULL;
842 * Deactivate a dedicated dispatch socket. Move it to the inactive list for
843 * future reuse unless the total number of sockets are exceeding the maximum.
845 static void
846 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
847 isc_result_t result;
848 dns_qid_t *qid;
851 * The dispatch must be locked.
853 ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
854 if (dispsock->resp != NULL) {
855 INSIST(dispsock->resp->dispsocket == dispsock);
856 dispsock->resp->dispsocket = NULL;
859 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
860 destroy_dispsocket(disp, &dispsock);
861 else {
862 result = isc_socket_close(dispsock->socket);
864 qid = DNS_QID(disp);
865 LOCK(&qid->lock);
866 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
867 blink);
868 UNLOCK(&qid->lock);
870 if (result == ISC_R_SUCCESS)
871 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
872 else {
874 * If the underlying system does not allow this
875 * optimization, destroy this temporary structure (and
876 * create a new one for a new transaction).
878 INSIST(result == ISC_R_NOTIMPLEMENTED);
879 destroy_dispsocket(disp, &dispsock);
885 * Find an entry for query ID 'id', socket address 'dest', and port number
886 * 'port'.
887 * Return NULL if no such entry exists.
889 static dns_dispentry_t *
890 entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
891 in_port_t port, unsigned int bucket)
893 dns_dispentry_t *res;
895 REQUIRE(bucket < qid->qid_nbuckets);
897 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
899 while (res != NULL) {
900 if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
901 res->port == port) {
902 return (res);
904 res = ISC_LIST_NEXT(res, link);
907 return (NULL);
910 static void
911 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
912 INSIST(buf != NULL && len != 0);
915 switch (disp->socktype) {
916 case isc_sockettype_tcp:
917 INSIST(disp->tcpbuffers > 0);
918 disp->tcpbuffers--;
919 isc_mem_put(disp->mgr->mctx, buf, len);
920 break;
921 case isc_sockettype_udp:
922 LOCK(&disp->mgr->buffer_lock);
923 INSIST(disp->mgr->buffers > 0);
924 INSIST(len == disp->mgr->buffersize);
925 disp->mgr->buffers--;
926 isc_mempool_put(disp->mgr->bpool, buf);
927 UNLOCK(&disp->mgr->buffer_lock);
928 break;
929 default:
930 INSIST(0);
931 break;
935 static void *
936 allocate_udp_buffer(dns_dispatch_t *disp) {
937 void *temp;
939 LOCK(&disp->mgr->buffer_lock);
940 temp = isc_mempool_get(disp->mgr->bpool);
942 if (temp != NULL)
943 disp->mgr->buffers++;
944 UNLOCK(&disp->mgr->buffer_lock);
946 return (temp);
949 static inline void
950 free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
951 if (disp->failsafe_ev == ev) {
952 INSIST(disp->shutdown_out == 1);
953 disp->shutdown_out = 0;
955 return;
958 isc_mempool_put(disp->mgr->epool, ev);
961 static inline dns_dispatchevent_t *
962 allocate_event(dns_dispatch_t *disp) {
963 dns_dispatchevent_t *ev;
965 ev = isc_mempool_get(disp->mgr->epool);
966 if (ev == NULL)
967 return (NULL);
968 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
969 NULL, NULL, NULL, NULL, NULL);
971 return (ev);
974 static void
975 udp_exrecv(isc_task_t *task, isc_event_t *ev) {
976 dispsocket_t *dispsock = ev->ev_arg;
978 UNUSED(task);
980 REQUIRE(VALID_DISPSOCK(dispsock));
981 udp_recv(ev, dispsock->disp, dispsock);
984 static void
985 udp_shrecv(isc_task_t *task, isc_event_t *ev) {
986 dns_dispatch_t *disp = ev->ev_arg;
988 UNUSED(task);
990 REQUIRE(VALID_DISPATCH(disp));
991 udp_recv(ev, disp, NULL);
995 * General flow:
997 * If I/O result == CANCELED or error, free the buffer.
999 * If query, free the buffer, restart.
1001 * If response:
1002 * Allocate event, fill in details.
1003 * If cannot allocate, free buffer, restart.
1004 * find target. If not found, free buffer, restart.
1005 * if event queue is not empty, queue. else, send.
1006 * restart.
1008 static void
1009 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1010 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1011 dns_messageid_t id;
1012 isc_result_t dres;
1013 isc_buffer_t source;
1014 unsigned int flags;
1015 dns_dispentry_t *resp = NULL;
1016 dns_dispatchevent_t *rev;
1017 unsigned int bucket;
1018 isc_boolean_t killit;
1019 isc_boolean_t queue_response;
1020 dns_dispatchmgr_t *mgr;
1021 dns_qid_t *qid;
1022 isc_netaddr_t netaddr;
1023 int match;
1024 int result;
1025 isc_boolean_t qidlocked = ISC_FALSE;
1027 LOCK(&disp->lock);
1029 mgr = disp->mgr;
1030 qid = mgr->qid;
1032 dispatch_log(disp, LVL(90),
1033 "got packet: requests %d, buffers %d, recvs %d",
1034 disp->requests, disp->mgr->buffers, disp->recv_pending);
1036 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1038 * Unless the receive event was imported from a listening
1039 * interface, in which case the event type is
1040 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1042 INSIST(disp->recv_pending != 0);
1043 disp->recv_pending = 0;
1046 if (dispsock != NULL &&
1047 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1049 * dispsock->resp can be NULL if this transaction was canceled
1050 * just after receiving a response. Since this socket is
1051 * exclusively used and there should be at most one receive
1052 * event the canceled event should have been no effect. So
1053 * we can (and should) deactivate the socket right now.
1055 deactivate_dispsocket(disp, dispsock);
1056 dispsock = NULL;
1059 if (disp->shutting_down) {
1061 * This dispatcher is shutting down.
1063 free_buffer(disp, ev->region.base, ev->region.length);
1065 isc_event_free(&ev_in);
1066 ev = NULL;
1068 killit = destroy_disp_ok(disp);
1069 UNLOCK(&disp->lock);
1070 if (killit)
1071 isc_task_send(disp->task[0], &disp->ctlevent);
1073 return;
1076 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1077 if (dispsock != NULL) {
1078 resp = dispsock->resp;
1079 id = resp->id;
1080 if (ev->result != ISC_R_SUCCESS) {
1082 * This is most likely a network error on a
1083 * connected socket. It makes no sense to
1084 * check the address or parse the packet, but it
1085 * will help to return the error to the caller.
1087 goto sendresponse;
1089 } else {
1090 free_buffer(disp, ev->region.base, ev->region.length);
1092 UNLOCK(&disp->lock);
1093 isc_event_free(&ev_in);
1094 return;
1096 } else if (ev->result != ISC_R_SUCCESS) {
1097 free_buffer(disp, ev->region.base, ev->region.length);
1099 if (ev->result != ISC_R_CANCELED)
1100 dispatch_log(disp, ISC_LOG_ERROR,
1101 "odd socket result in udp_recv(): %s",
1102 isc_result_totext(ev->result));
1104 UNLOCK(&disp->lock);
1105 isc_event_free(&ev_in);
1106 return;
1110 * If this is from a blackholed address, drop it.
1112 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1113 if (disp->mgr->blackhole != NULL &&
1114 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1115 NULL, &match, NULL) == ISC_R_SUCCESS &&
1116 match > 0)
1118 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1119 char netaddrstr[ISC_NETADDR_FORMATSIZE];
1120 isc_netaddr_format(&netaddr, netaddrstr,
1121 sizeof(netaddrstr));
1122 dispatch_log(disp, LVL(10),
1123 "blackholed packet from %s",
1124 netaddrstr);
1126 free_buffer(disp, ev->region.base, ev->region.length);
1127 goto restart;
1131 * Peek into the buffer to see what we can see.
1133 isc_buffer_init(&source, ev->region.base, ev->region.length);
1134 isc_buffer_add(&source, ev->n);
1135 dres = dns_message_peekheader(&source, &id, &flags);
1136 if (dres != ISC_R_SUCCESS) {
1137 free_buffer(disp, ev->region.base, ev->region.length);
1138 dispatch_log(disp, LVL(10), "got garbage packet");
1139 goto restart;
1142 dispatch_log(disp, LVL(92),
1143 "got valid DNS message header, /QR %c, id %u",
1144 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1147 * Look at flags. If query, drop it. If response,
1148 * look to see where it goes.
1150 queue_response = ISC_FALSE;
1151 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1152 /* query */
1153 free_buffer(disp, ev->region.base, ev->region.length);
1154 goto restart;
1158 * Search for the corresponding response. If we are using an exclusive
1159 * socket, we've already identified it and we can skip the search; but
1160 * the ID and the address must match the expected ones.
1162 if (resp == NULL) {
1163 bucket = dns_hash(qid, &ev->address, id, disp->localport);
1164 LOCK(&qid->lock);
1165 qidlocked = ISC_TRUE;
1166 resp = entry_search(qid, &ev->address, id, disp->localport,
1167 bucket);
1168 dispatch_log(disp, LVL(90),
1169 "search for response in bucket %d: %s",
1170 bucket, (resp == NULL ? "not found" : "found"));
1172 if (resp == NULL) {
1173 inc_stats(mgr, dns_resstatscounter_mismatch);
1174 free_buffer(disp, ev->region.base, ev->region.length);
1175 goto unlock;
1177 } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1178 &resp->host)) {
1179 dispatch_log(disp, LVL(90),
1180 "response to an exclusive socket doesn't match");
1181 inc_stats(mgr, dns_resstatscounter_mismatch);
1182 free_buffer(disp, ev->region.base, ev->region.length);
1183 goto unlock;
1187 * Now that we have the original dispatch the query was sent
1188 * from check that the address and port the response was
1189 * sent to make sense.
1191 if (disp != resp->disp) {
1192 isc_sockaddr_t a1;
1193 isc_sockaddr_t a2;
1196 * Check that the socket types and ports match.
1198 if (disp->socktype != resp->disp->socktype ||
1199 isc_sockaddr_getport(&disp->local) !=
1200 isc_sockaddr_getport(&resp->disp->local)) {
1201 free_buffer(disp, ev->region.base, ev->region.length);
1202 goto unlock;
1206 * If both dispatches are bound to an address then fail as
1207 * the addresses can't be equal (enforced by the IP stack).
1209 * Note under Linux a packet can be sent out via IPv4 socket
1210 * and the response be received via a IPv6 socket.
1212 * Requests sent out via IPv6 should always come back in
1213 * via IPv6.
1215 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1216 isc_sockaddr_pf(&disp->local) != PF_INET6) {
1217 free_buffer(disp, ev->region.base, ev->region.length);
1218 goto unlock;
1220 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1221 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1222 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1223 !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1224 free_buffer(disp, ev->region.base, ev->region.length);
1225 goto unlock;
1229 sendresponse:
1230 queue_response = resp->item_out;
1231 rev = allocate_event(resp->disp);
1232 if (rev == NULL) {
1233 free_buffer(disp, ev->region.base, ev->region.length);
1234 goto unlock;
1238 * At this point, rev contains the event we want to fill in, and
1239 * resp contains the information on the place to send it to.
1240 * Send the event off.
1242 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1243 isc_buffer_add(&rev->buffer, ev->n);
1244 rev->result = ev->result;
1245 rev->id = id;
1246 rev->addr = ev->address;
1247 rev->pktinfo = ev->pktinfo;
1248 rev->attributes = ev->attributes;
1249 if (queue_response) {
1250 ISC_LIST_APPEND(resp->items, rev, ev_link);
1251 } else {
1252 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1253 DNS_EVENT_DISPATCH,
1254 resp->action, resp->arg, resp, NULL, NULL);
1255 request_log(disp, resp, LVL(90),
1256 "[a] Sent event %p buffer %p len %d to task %p",
1257 rev, rev->buffer.base, rev->buffer.length,
1258 resp->task);
1259 resp->item_out = ISC_TRUE;
1260 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1262 unlock:
1263 if (qidlocked)
1264 UNLOCK(&qid->lock);
1267 * Restart recv() to get the next packet.
1269 restart:
1270 result = startrecv(disp, dispsock);
1271 if (result != ISC_R_SUCCESS && dispsock != NULL) {
1273 * XXX: wired. There seems to be no recovery process other than
1274 * deactivate this socket anyway (since we cannot start
1275 * receiving, we won't be able to receive a cancel event
1276 * from the user).
1278 deactivate_dispsocket(disp, dispsock);
1280 UNLOCK(&disp->lock);
1282 isc_event_free(&ev_in);
1286 * General flow:
1288 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1289 * various queues drain.
1291 * If query, restart.
1293 * If response:
1294 * Allocate event, fill in details.
1295 * If cannot allocate, restart.
1296 * find target. If not found, restart.
1297 * if event queue is not empty, queue. else, send.
1298 * restart.
1300 static void
1301 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1302 dns_dispatch_t *disp = ev_in->ev_arg;
1303 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1304 dns_messageid_t id;
1305 isc_result_t dres;
1306 unsigned int flags;
1307 dns_dispentry_t *resp;
1308 dns_dispatchevent_t *rev;
1309 unsigned int bucket;
1310 isc_boolean_t killit;
1311 isc_boolean_t queue_response;
1312 dns_qid_t *qid;
1313 int level;
1314 char buf[ISC_SOCKADDR_FORMATSIZE];
1316 UNUSED(task);
1318 REQUIRE(VALID_DISPATCH(disp));
1320 qid = disp->qid;
1322 dispatch_log(disp, LVL(90),
1323 "got TCP packet: requests %d, buffers %d, recvs %d",
1324 disp->requests, disp->tcpbuffers, disp->recv_pending);
1326 LOCK(&disp->lock);
1328 INSIST(disp->recv_pending != 0);
1329 disp->recv_pending = 0;
1331 if (disp->refcount == 0) {
1333 * This dispatcher is shutting down. Force cancelation.
1335 tcpmsg->result = ISC_R_CANCELED;
1338 if (tcpmsg->result != ISC_R_SUCCESS) {
1339 switch (tcpmsg->result) {
1340 case ISC_R_CANCELED:
1341 break;
1343 case ISC_R_EOF:
1344 dispatch_log(disp, LVL(90), "shutting down on EOF");
1345 do_cancel(disp);
1346 break;
1348 case ISC_R_CONNECTIONRESET:
1349 level = ISC_LOG_INFO;
1350 goto logit;
1352 default:
1353 level = ISC_LOG_ERROR;
1354 logit:
1355 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1356 dispatch_log(disp, level, "shutting down due to TCP "
1357 "receive error: %s: %s", buf,
1358 isc_result_totext(tcpmsg->result));
1359 do_cancel(disp);
1360 break;
1364 * The event is statically allocated in the tcpmsg
1365 * structure, and destroy_disp() frees the tcpmsg, so we must
1366 * free the event *before* calling destroy_disp().
1368 isc_event_free(&ev_in);
1370 disp->shutting_down = 1;
1371 disp->shutdown_why = tcpmsg->result;
1374 * If the recv() was canceled pass the word on.
1376 killit = destroy_disp_ok(disp);
1377 UNLOCK(&disp->lock);
1378 if (killit)
1379 isc_task_send(disp->task[0], &disp->ctlevent);
1380 return;
1383 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1384 tcpmsg->result,
1385 tcpmsg->buffer.length, tcpmsg->buffer.base);
1388 * Peek into the buffer to see what we can see.
1390 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1391 if (dres != ISC_R_SUCCESS) {
1392 dispatch_log(disp, LVL(10), "got garbage packet");
1393 goto restart;
1396 dispatch_log(disp, LVL(92),
1397 "got valid DNS message header, /QR %c, id %u",
1398 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1401 * Allocate an event to send to the query or response client, and
1402 * allocate a new buffer for our use.
1406 * Look at flags. If query, drop it. If response,
1407 * look to see where it goes.
1409 queue_response = ISC_FALSE;
1410 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1412 * Query.
1414 goto restart;
1418 * Response.
1420 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1421 LOCK(&qid->lock);
1422 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1423 dispatch_log(disp, LVL(90),
1424 "search for response in bucket %d: %s",
1425 bucket, (resp == NULL ? "not found" : "found"));
1427 if (resp == NULL)
1428 goto unlock;
1429 queue_response = resp->item_out;
1430 rev = allocate_event(disp);
1431 if (rev == NULL)
1432 goto unlock;
1435 * At this point, rev contains the event we want to fill in, and
1436 * resp contains the information on the place to send it to.
1437 * Send the event off.
1439 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1440 disp->tcpbuffers++;
1441 rev->result = ISC_R_SUCCESS;
1442 rev->id = id;
1443 rev->addr = tcpmsg->address;
1444 if (queue_response) {
1445 ISC_LIST_APPEND(resp->items, rev, ev_link);
1446 } else {
1447 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1448 resp->action, resp->arg, resp, NULL, NULL);
1449 request_log(disp, resp, LVL(90),
1450 "[b] Sent event %p buffer %p len %d to task %p",
1451 rev, rev->buffer.base, rev->buffer.length,
1452 resp->task);
1453 resp->item_out = ISC_TRUE;
1454 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1456 unlock:
1457 UNLOCK(&qid->lock);
1460 * Restart recv() to get the next packet.
1462 restart:
1463 (void)startrecv(disp, NULL);
1465 UNLOCK(&disp->lock);
1467 isc_event_free(&ev_in);
1471 * disp must be locked.
1473 static isc_result_t
1474 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1475 isc_result_t res;
1476 isc_region_t region;
1477 isc_socket_t *socket;
1479 if (disp->shutting_down == 1)
1480 return (ISC_R_SUCCESS);
1482 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1483 return (ISC_R_SUCCESS);
1485 if (disp->recv_pending != 0 && dispsock == NULL)
1486 return (ISC_R_SUCCESS);
1488 if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1489 return (ISC_R_NOMEMORY);
1491 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1492 dispsock == NULL)
1493 return (ISC_R_SUCCESS);
1495 if (dispsock != NULL)
1496 socket = dispsock->socket;
1497 else
1498 socket = disp->socket;
1499 INSIST(socket != NULL);
1501 switch (disp->socktype) {
1503 * UDP reads are always maximal.
1505 case isc_sockettype_udp:
1506 region.length = disp->mgr->buffersize;
1507 region.base = allocate_udp_buffer(disp);
1508 if (region.base == NULL)
1509 return (ISC_R_NOMEMORY);
1510 if (dispsock != NULL) {
1511 res = isc_socket_recv(socket, &region, 1,
1512 dispsock->task, udp_exrecv,
1513 dispsock);
1514 if (res != ISC_R_SUCCESS) {
1515 free_buffer(disp, region.base, region.length);
1516 return (res);
1518 } else {
1519 res = isc_socket_recv(socket, &region, 1,
1520 disp->task[0], udp_shrecv, disp);
1521 if (res != ISC_R_SUCCESS) {
1522 free_buffer(disp, region.base, region.length);
1523 disp->shutdown_why = res;
1524 disp->shutting_down = 1;
1525 do_cancel(disp);
1526 return (ISC_R_SUCCESS); /* recover by cancel */
1528 INSIST(disp->recv_pending == 0);
1529 disp->recv_pending = 1;
1531 break;
1533 case isc_sockettype_tcp:
1534 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1535 tcp_recv, disp);
1536 if (res != ISC_R_SUCCESS) {
1537 disp->shutdown_why = res;
1538 disp->shutting_down = 1;
1539 do_cancel(disp);
1540 return (ISC_R_SUCCESS); /* recover by cancel */
1542 INSIST(disp->recv_pending == 0);
1543 disp->recv_pending = 1;
1544 break;
1545 default:
1546 INSIST(0);
1547 break;
1550 return (ISC_R_SUCCESS);
1554 * Mgr must be locked when calling this function.
1556 static isc_boolean_t
1557 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1558 mgr_log(mgr, LVL(90),
1559 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1560 "epool=%d, rpool=%d, dpool=%d",
1561 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1562 isc_mempool_getallocated(mgr->epool),
1563 isc_mempool_getallocated(mgr->rpool),
1564 isc_mempool_getallocated(mgr->dpool));
1565 if (!MGR_IS_SHUTTINGDOWN(mgr))
1566 return (ISC_FALSE);
1567 if (!ISC_LIST_EMPTY(mgr->list))
1568 return (ISC_FALSE);
1569 if (isc_mempool_getallocated(mgr->epool) != 0)
1570 return (ISC_FALSE);
1571 if (isc_mempool_getallocated(mgr->rpool) != 0)
1572 return (ISC_FALSE);
1573 if (isc_mempool_getallocated(mgr->dpool) != 0)
1574 return (ISC_FALSE);
1576 return (ISC_TRUE);
1580 * Mgr must be unlocked when calling this function.
1582 static void
1583 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1584 isc_mem_t *mctx;
1585 dns_dispatchmgr_t *mgr;
1587 mgr = *mgrp;
1588 *mgrp = NULL;
1590 mctx = mgr->mctx;
1592 mgr->magic = 0;
1593 mgr->mctx = NULL;
1594 DESTROYLOCK(&mgr->lock);
1595 mgr->state = 0;
1597 DESTROYLOCK(&mgr->arc4_lock);
1599 isc_mempool_destroy(&mgr->epool);
1600 isc_mempool_destroy(&mgr->rpool);
1601 isc_mempool_destroy(&mgr->dpool);
1602 isc_mempool_destroy(&mgr->bpool);
1603 isc_mempool_destroy(&mgr->spool);
1605 DESTROYLOCK(&mgr->pool_lock);
1607 if (mgr->entropy != NULL)
1608 isc_entropy_detach(&mgr->entropy);
1609 if (mgr->qid != NULL)
1610 qid_destroy(mctx, &mgr->qid);
1612 DESTROYLOCK(&mgr->buffer_lock);
1614 if (mgr->blackhole != NULL)
1615 dns_acl_detach(&mgr->blackhole);
1617 if (mgr->stats != NULL)
1618 isc_stats_detach(&mgr->stats);
1620 if (mgr->v4ports != NULL) {
1621 isc_mem_put(mctx, mgr->v4ports,
1622 mgr->nv4ports * sizeof(in_port_t));
1624 if (mgr->v6ports != NULL) {
1625 isc_mem_put(mctx, mgr->v6ports,
1626 mgr->nv6ports * sizeof(in_port_t));
1628 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1629 isc_mem_detach(&mctx);
1632 static isc_result_t
1633 open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1634 unsigned int options, isc_socket_t **sockp)
1636 isc_socket_t *sock;
1637 isc_result_t result;
1639 sock = *sockp;
1640 if (sock == NULL) {
1641 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1642 isc_sockettype_udp, &sock);
1643 if (result != ISC_R_SUCCESS)
1644 return (result);
1645 isc_socket_setname(sock, "dispatcher", NULL);
1646 } else {
1647 result = isc_socket_open(sock);
1648 if (result != ISC_R_SUCCESS)
1649 return (result);
1652 #ifndef ISC_ALLOW_MAPPED
1653 isc_socket_ipv6only(sock, ISC_TRUE);
1654 #endif
1655 result = isc_socket_bind(sock, local, options);
1656 if (result != ISC_R_SUCCESS) {
1657 if (*sockp == NULL)
1658 isc_socket_detach(&sock);
1659 else
1660 isc_socket_close(sock);
1661 return (result);
1664 *sockp = sock;
1665 return (ISC_R_SUCCESS);
1669 * Create a temporary port list to set the initial default set of dispatch
1670 * ports: [1024, 65535]. This is almost meaningless as the application will
1671 * normally set the ports explicitly, but is provided to fill some minor corner
1672 * cases.
1674 static isc_result_t
1675 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1676 isc_result_t result;
1678 result = isc_portset_create(mctx, portsetp);
1679 if (result != ISC_R_SUCCESS)
1680 return (result);
1681 isc_portset_addrange(*portsetp, 1024, 65535);
1683 return (ISC_R_SUCCESS);
1687 * Publics.
1690 isc_result_t
1691 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1692 dns_dispatchmgr_t **mgrp)
1694 dns_dispatchmgr_t *mgr;
1695 isc_result_t result;
1696 isc_portset_t *v4portset = NULL;
1697 isc_portset_t *v6portset = NULL;
1699 REQUIRE(mctx != NULL);
1700 REQUIRE(mgrp != NULL && *mgrp == NULL);
1702 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1703 if (mgr == NULL)
1704 return (ISC_R_NOMEMORY);
1706 mgr->mctx = NULL;
1707 isc_mem_attach(mctx, &mgr->mctx);
1709 mgr->blackhole = NULL;
1710 mgr->stats = NULL;
1712 result = isc_mutex_init(&mgr->lock);
1713 if (result != ISC_R_SUCCESS)
1714 goto deallocate;
1716 result = isc_mutex_init(&mgr->arc4_lock);
1717 if (result != ISC_R_SUCCESS)
1718 goto kill_lock;
1720 result = isc_mutex_init(&mgr->buffer_lock);
1721 if (result != ISC_R_SUCCESS)
1722 goto kill_arc4_lock;
1724 result = isc_mutex_init(&mgr->pool_lock);
1725 if (result != ISC_R_SUCCESS)
1726 goto kill_buffer_lock;
1728 mgr->epool = NULL;
1729 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1730 &mgr->epool) != ISC_R_SUCCESS) {
1731 result = ISC_R_NOMEMORY;
1732 goto kill_pool_lock;
1735 mgr->rpool = NULL;
1736 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1737 &mgr->rpool) != ISC_R_SUCCESS) {
1738 result = ISC_R_NOMEMORY;
1739 goto kill_epool;
1742 mgr->dpool = NULL;
1743 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1744 &mgr->dpool) != ISC_R_SUCCESS) {
1745 result = ISC_R_NOMEMORY;
1746 goto kill_rpool;
1749 isc_mempool_setname(mgr->epool, "dispmgr_epool");
1750 isc_mempool_setfreemax(mgr->epool, 1024);
1751 isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1753 isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1754 isc_mempool_setfreemax(mgr->rpool, 1024);
1755 isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1757 isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1758 isc_mempool_setfreemax(mgr->dpool, 1024);
1759 isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1761 mgr->buffers = 0;
1762 mgr->buffersize = 0;
1763 mgr->maxbuffers = 0;
1764 mgr->bpool = NULL;
1765 mgr->spool = NULL;
1766 mgr->entropy = NULL;
1767 mgr->qid = NULL;
1768 mgr->state = 0;
1769 ISC_LIST_INIT(mgr->list);
1770 mgr->v4ports = NULL;
1771 mgr->v6ports = NULL;
1772 mgr->nv4ports = 0;
1773 mgr->nv6ports = 0;
1774 mgr->magic = DNS_DISPATCHMGR_MAGIC;
1776 result = create_default_portset(mctx, &v4portset);
1777 if (result == ISC_R_SUCCESS) {
1778 result = create_default_portset(mctx, &v6portset);
1779 if (result == ISC_R_SUCCESS) {
1780 result = dns_dispatchmgr_setavailports(mgr,
1781 v4portset,
1782 v6portset);
1785 if (v4portset != NULL)
1786 isc_portset_destroy(mctx, &v4portset);
1787 if (v6portset != NULL)
1788 isc_portset_destroy(mctx, &v6portset);
1789 if (result != ISC_R_SUCCESS)
1790 goto kill_dpool;
1792 if (entropy != NULL)
1793 isc_entropy_attach(entropy, &mgr->entropy);
1795 dispatch_arc4init(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
1797 *mgrp = mgr;
1798 return (ISC_R_SUCCESS);
1800 kill_dpool:
1801 isc_mempool_destroy(&mgr->dpool);
1802 kill_rpool:
1803 isc_mempool_destroy(&mgr->rpool);
1804 kill_epool:
1805 isc_mempool_destroy(&mgr->epool);
1806 kill_pool_lock:
1807 DESTROYLOCK(&mgr->pool_lock);
1808 kill_buffer_lock:
1809 DESTROYLOCK(&mgr->buffer_lock);
1810 kill_arc4_lock:
1811 DESTROYLOCK(&mgr->arc4_lock);
1812 kill_lock:
1813 DESTROYLOCK(&mgr->lock);
1814 deallocate:
1815 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1816 isc_mem_detach(&mctx);
1818 return (result);
1821 void
1822 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1823 REQUIRE(VALID_DISPATCHMGR(mgr));
1824 if (mgr->blackhole != NULL)
1825 dns_acl_detach(&mgr->blackhole);
1826 dns_acl_attach(blackhole, &mgr->blackhole);
1829 dns_acl_t *
1830 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1831 REQUIRE(VALID_DISPATCHMGR(mgr));
1832 return (mgr->blackhole);
1835 void
1836 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1837 dns_portlist_t *portlist)
1839 REQUIRE(VALID_DISPATCHMGR(mgr));
1840 UNUSED(portlist);
1842 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
1843 return;
1846 dns_portlist_t *
1847 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1848 REQUIRE(VALID_DISPATCHMGR(mgr));
1849 return (NULL); /* this function is deprecated */
1852 isc_result_t
1853 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
1854 isc_portset_t *v6portset)
1856 in_port_t *v4ports, *v6ports, p;
1857 unsigned int nv4ports, nv6ports, i4, i6;
1859 REQUIRE(VALID_DISPATCHMGR(mgr));
1861 nv4ports = isc_portset_nports(v4portset);
1862 nv6ports = isc_portset_nports(v6portset);
1864 v4ports = NULL;
1865 if (nv4ports != 0) {
1866 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
1867 if (v4ports == NULL)
1868 return (ISC_R_NOMEMORY);
1870 v6ports = NULL;
1871 if (nv6ports != 0) {
1872 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
1873 if (v6ports == NULL) {
1874 if (v4ports != NULL) {
1875 isc_mem_put(mgr->mctx, v4ports,
1876 sizeof(in_port_t) *
1877 isc_portset_nports(v4portset));
1879 return (ISC_R_NOMEMORY);
1883 p = 0;
1884 i4 = 0;
1885 i6 = 0;
1886 do {
1887 if (isc_portset_isset(v4portset, p)) {
1888 INSIST(i4 < nv4ports);
1889 v4ports[i4++] = p;
1891 if (isc_portset_isset(v6portset, p)) {
1892 INSIST(i6 < nv6ports);
1893 v6ports[i6++] = p;
1895 } while (p++ < 65535);
1896 INSIST(i4 == nv4ports && i6 == nv6ports);
1898 PORTBUFLOCK(mgr);
1899 if (mgr->v4ports != NULL) {
1900 isc_mem_put(mgr->mctx, mgr->v4ports,
1901 mgr->nv4ports * sizeof(in_port_t));
1903 mgr->v4ports = v4ports;
1904 mgr->nv4ports = nv4ports;
1906 if (mgr->v6ports != NULL) {
1907 isc_mem_put(mgr->mctx, mgr->v6ports,
1908 mgr->nv6ports * sizeof(in_port_t));
1910 mgr->v6ports = v6ports;
1911 mgr->nv6ports = nv6ports;
1912 PORTBUFUNLOCK(mgr);
1914 return (ISC_R_SUCCESS);
1917 static isc_result_t
1918 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
1919 unsigned int buffersize, unsigned int maxbuffers,
1920 unsigned int maxrequests, unsigned int buckets,
1921 unsigned int increment)
1923 isc_result_t result;
1925 REQUIRE(VALID_DISPATCHMGR(mgr));
1926 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1927 REQUIRE(maxbuffers > 0);
1928 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
1929 REQUIRE(increment > buckets);
1932 * Keep some number of items around. This should be a config
1933 * option. For now, keep 8, but later keep at least two even
1934 * if the caller wants less. This allows us to ensure certain
1935 * things, like an event can be "freed" and the next allocation
1936 * will always succeed.
1938 * Note that if limits are placed on anything here, we use one
1939 * event internally, so the actual limit should be "wanted + 1."
1941 * XXXMLG
1944 if (maxbuffers < 8)
1945 maxbuffers = 8;
1947 LOCK(&mgr->buffer_lock);
1949 /* Create or adjust buffer pool */
1950 if (mgr->bpool != NULL) {
1951 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1952 mgr->maxbuffers = maxbuffers;
1953 } else {
1954 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
1955 if (result != ISC_R_SUCCESS) {
1956 UNLOCK(&mgr->buffer_lock);
1957 return (result);
1959 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
1960 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1961 isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
1964 /* Create or adjust socket pool */
1965 if (mgr->spool != NULL) {
1966 isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
1967 UNLOCK(&mgr->buffer_lock);
1968 return (ISC_R_SUCCESS);
1970 result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
1971 &mgr->spool);
1972 if (result != ISC_R_SUCCESS) {
1973 UNLOCK(&mgr->buffer_lock);
1974 goto cleanup;
1976 isc_mempool_setname(mgr->spool, "dispmgr_spool");
1977 isc_mempool_setmaxalloc(mgr->spool, maxrequests);
1978 isc_mempool_associatelock(mgr->spool, &mgr->pool_lock);
1980 result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
1981 if (result != ISC_R_SUCCESS)
1982 goto cleanup;
1984 mgr->buffersize = buffersize;
1985 mgr->maxbuffers = maxbuffers;
1986 UNLOCK(&mgr->buffer_lock);
1987 return (ISC_R_SUCCESS);
1989 cleanup:
1990 isc_mempool_destroy(&mgr->bpool);
1991 if (mgr->spool != NULL)
1992 isc_mempool_destroy(&mgr->spool);
1993 UNLOCK(&mgr->buffer_lock);
1994 return (result);
1997 void
1998 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
1999 dns_dispatchmgr_t *mgr;
2000 isc_boolean_t killit;
2002 REQUIRE(mgrp != NULL);
2003 REQUIRE(VALID_DISPATCHMGR(*mgrp));
2005 mgr = *mgrp;
2006 *mgrp = NULL;
2008 LOCK(&mgr->lock);
2009 mgr->state |= MGR_SHUTTINGDOWN;
2011 killit = destroy_mgr_ok(mgr);
2012 UNLOCK(&mgr->lock);
2014 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2016 if (killit)
2017 destroy_mgr(&mgr);
2020 void
2021 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2022 REQUIRE(VALID_DISPATCHMGR(mgr));
2023 REQUIRE(ISC_LIST_EMPTY(mgr->list));
2024 REQUIRE(mgr->stats == NULL);
2026 isc_stats_attach(stats, &mgr->stats);
2029 static int
2030 port_cmp(const void *key, const void *ent) {
2031 in_port_t p1 = *(const in_port_t *)key;
2032 in_port_t p2 = *(const in_port_t *)ent;
2034 if (p1 < p2)
2035 return (-1);
2036 else if (p1 == p2)
2037 return (0);
2038 else
2039 return (1);
2042 static isc_boolean_t
2043 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2044 isc_sockaddr_t *sockaddrp)
2046 isc_sockaddr_t sockaddr;
2047 isc_result_t result;
2048 in_port_t *ports, port;
2049 unsigned int nports;
2050 isc_boolean_t available = ISC_FALSE;
2052 REQUIRE(sock != NULL || sockaddrp != NULL);
2054 PORTBUFLOCK(mgr);
2055 if (sock != NULL) {
2056 sockaddrp = &sockaddr;
2057 result = isc_socket_getsockname(sock, sockaddrp);
2058 if (result != ISC_R_SUCCESS)
2059 goto unlock;
2062 if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2063 ports = mgr->v4ports;
2064 nports = mgr->nv4ports;
2065 } else {
2066 ports = mgr->v6ports;
2067 nports = mgr->nv6ports;
2069 if (ports == NULL)
2070 goto unlock;
2072 port = isc_sockaddr_getport(sockaddrp);
2073 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2074 available = ISC_TRUE;
2076 unlock:
2077 PORTBUFUNLOCK(mgr);
2078 return (available);
2081 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2083 static isc_boolean_t
2084 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2085 isc_sockaddr_t sockaddr;
2086 isc_result_t result;
2088 REQUIRE(disp->socket != NULL);
2090 if (addr == NULL)
2091 return (ISC_TRUE);
2094 * Don't match wildcard ports unless the port is available in the
2095 * current configuration.
2097 if (isc_sockaddr_getport(addr) == 0 &&
2098 isc_sockaddr_getport(&disp->local) == 0 &&
2099 !portavailable(disp->mgr, disp->socket, NULL)) {
2100 return (ISC_FALSE);
2104 * Check if we match the binding <address,port>.
2105 * Wildcard ports match/fail here.
2107 if (isc_sockaddr_equal(&disp->local, addr))
2108 return (ISC_TRUE);
2109 if (isc_sockaddr_getport(addr) == 0)
2110 return (ISC_FALSE);
2113 * Check if we match a bound wildcard port <address,port>.
2115 if (!isc_sockaddr_eqaddr(&disp->local, addr))
2116 return (ISC_FALSE);
2117 result = isc_socket_getsockname(disp->socket, &sockaddr);
2118 if (result != ISC_R_SUCCESS)
2119 return (ISC_FALSE);
2121 return (isc_sockaddr_equal(&sockaddr, addr));
2125 * Requires mgr be locked.
2127 * No dispatcher can be locked by this thread when calling this function.
2130 * NOTE:
2131 * If a matching dispatcher is found, it is locked after this function
2132 * returns, and must be unlocked by the caller.
2134 static isc_result_t
2135 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2136 unsigned int attributes, unsigned int mask,
2137 dns_dispatch_t **dispp)
2139 dns_dispatch_t *disp;
2140 isc_result_t result;
2143 * Make certain that we will not match a private or exclusive dispatch.
2145 attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2146 mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2148 disp = ISC_LIST_HEAD(mgr->list);
2149 while (disp != NULL) {
2150 LOCK(&disp->lock);
2151 if ((disp->shutting_down == 0)
2152 && ATTRMATCH(disp->attributes, attributes, mask)
2153 && local_addr_match(disp, local))
2154 break;
2155 UNLOCK(&disp->lock);
2156 disp = ISC_LIST_NEXT(disp, link);
2159 if (disp == NULL) {
2160 result = ISC_R_NOTFOUND;
2161 goto out;
2164 *dispp = disp;
2165 result = ISC_R_SUCCESS;
2166 out:
2168 return (result);
2171 static isc_result_t
2172 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2173 unsigned int increment, dns_qid_t **qidp,
2174 isc_boolean_t needsocktable)
2176 dns_qid_t *qid;
2177 unsigned int i;
2178 isc_result_t result;
2180 REQUIRE(VALID_DISPATCHMGR(mgr));
2181 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2182 REQUIRE(increment > buckets);
2183 REQUIRE(qidp != NULL && *qidp == NULL);
2185 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2186 if (qid == NULL)
2187 return (ISC_R_NOMEMORY);
2189 qid->qid_table = isc_mem_get(mgr->mctx,
2190 buckets * sizeof(dns_displist_t));
2191 if (qid->qid_table == NULL) {
2192 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2193 return (ISC_R_NOMEMORY);
2196 qid->sock_table = NULL;
2197 if (needsocktable) {
2198 qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2199 sizeof(dispsocketlist_t));
2200 if (qid->sock_table == NULL) {
2201 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2202 isc_mem_put(mgr->mctx, qid->qid_table,
2203 buckets * sizeof(dns_displist_t));
2204 return (ISC_R_NOMEMORY);
2208 result = isc_mutex_init(&qid->lock);
2209 if (result != ISC_R_SUCCESS) {
2210 if (qid->sock_table != NULL) {
2211 isc_mem_put(mgr->mctx, qid->sock_table,
2212 buckets * sizeof(dispsocketlist_t));
2214 isc_mem_put(mgr->mctx, qid->qid_table,
2215 buckets * sizeof(dns_displist_t));
2216 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2217 return (result);
2220 for (i = 0; i < buckets; i++) {
2221 ISC_LIST_INIT(qid->qid_table[i]);
2222 if (qid->sock_table != NULL)
2223 ISC_LIST_INIT(qid->sock_table[i]);
2226 qid->qid_nbuckets = buckets;
2227 qid->qid_increment = increment;
2228 qid->magic = QID_MAGIC;
2229 *qidp = qid;
2230 return (ISC_R_SUCCESS);
2233 static void
2234 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2235 dns_qid_t *qid;
2237 REQUIRE(qidp != NULL);
2238 qid = *qidp;
2240 REQUIRE(VALID_QID(qid));
2242 *qidp = NULL;
2243 qid->magic = 0;
2244 isc_mem_put(mctx, qid->qid_table,
2245 qid->qid_nbuckets * sizeof(dns_displist_t));
2246 if (qid->sock_table != NULL) {
2247 isc_mem_put(mctx, qid->sock_table,
2248 qid->qid_nbuckets * sizeof(dispsocketlist_t));
2250 DESTROYLOCK(&qid->lock);
2251 isc_mem_put(mctx, qid, sizeof(*qid));
2255 * Allocate and set important limits.
2257 static isc_result_t
2258 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2259 dns_dispatch_t **dispp)
2261 dns_dispatch_t *disp;
2262 isc_result_t result;
2264 REQUIRE(VALID_DISPATCHMGR(mgr));
2265 REQUIRE(dispp != NULL && *dispp == NULL);
2268 * Set up the dispatcher, mostly. Don't bother setting some of
2269 * the options that are controlled by tcp vs. udp, etc.
2272 disp = isc_mempool_get(mgr->dpool);
2273 if (disp == NULL)
2274 return (ISC_R_NOMEMORY);
2276 disp->magic = 0;
2277 disp->mgr = mgr;
2278 disp->maxrequests = maxrequests;
2279 disp->attributes = 0;
2280 ISC_LINK_INIT(disp, link);
2281 disp->refcount = 1;
2282 disp->recv_pending = 0;
2283 memset(&disp->local, 0, sizeof(disp->local));
2284 disp->localport = 0;
2285 disp->shutting_down = 0;
2286 disp->shutdown_out = 0;
2287 disp->connected = 0;
2288 disp->tcpmsg_valid = 0;
2289 disp->shutdown_why = ISC_R_UNEXPECTED;
2290 disp->requests = 0;
2291 disp->tcpbuffers = 0;
2292 disp->qid = NULL;
2293 ISC_LIST_INIT(disp->activesockets);
2294 ISC_LIST_INIT(disp->inactivesockets);
2295 disp->nsockets = 0;
2296 dispatch_arc4init(&disp->arc4ctx, mgr->entropy, NULL);
2298 result = isc_mutex_init(&disp->lock);
2299 if (result != ISC_R_SUCCESS)
2300 goto deallocate;
2302 disp->failsafe_ev = allocate_event(disp);
2303 if (disp->failsafe_ev == NULL) {
2304 result = ISC_R_NOMEMORY;
2305 goto kill_lock;
2308 disp->magic = DISPATCH_MAGIC;
2310 *dispp = disp;
2311 return (ISC_R_SUCCESS);
2314 * error returns
2316 kill_lock:
2317 DESTROYLOCK(&disp->lock);
2318 deallocate:
2319 isc_mempool_put(mgr->dpool, disp);
2321 return (result);
2326 * MUST be unlocked, and not used by anything.
2328 static void
2329 dispatch_free(dns_dispatch_t **dispp)
2331 dns_dispatch_t *disp;
2332 dns_dispatchmgr_t *mgr;
2334 REQUIRE(VALID_DISPATCH(*dispp));
2335 disp = *dispp;
2336 *dispp = NULL;
2338 mgr = disp->mgr;
2339 REQUIRE(VALID_DISPATCHMGR(mgr));
2341 if (disp->tcpmsg_valid) {
2342 dns_tcpmsg_invalidate(&disp->tcpmsg);
2343 disp->tcpmsg_valid = 0;
2346 INSIST(disp->tcpbuffers == 0);
2347 INSIST(disp->requests == 0);
2348 INSIST(disp->recv_pending == 0);
2349 INSIST(ISC_LIST_EMPTY(disp->activesockets));
2350 INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2352 isc_mempool_put(mgr->epool, disp->failsafe_ev);
2353 disp->failsafe_ev = NULL;
2355 if (disp->qid != NULL)
2356 qid_destroy(mgr->mctx, &disp->qid);
2357 disp->mgr = NULL;
2358 DESTROYLOCK(&disp->lock);
2359 disp->magic = 0;
2360 isc_mempool_put(mgr->dpool, disp);
2363 isc_result_t
2364 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2365 isc_taskmgr_t *taskmgr, unsigned int buffersize,
2366 unsigned int maxbuffers, unsigned int maxrequests,
2367 unsigned int buckets, unsigned int increment,
2368 unsigned int attributes, dns_dispatch_t **dispp)
2370 isc_result_t result;
2371 dns_dispatch_t *disp;
2373 UNUSED(maxbuffers);
2374 UNUSED(buffersize);
2376 REQUIRE(VALID_DISPATCHMGR(mgr));
2377 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2378 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2379 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2381 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
2383 LOCK(&mgr->lock);
2386 * dispatch_allocate() checks mgr for us.
2387 * qid_allocate() checks buckets and increment for us.
2389 disp = NULL;
2390 result = dispatch_allocate(mgr, maxrequests, &disp);
2391 if (result != ISC_R_SUCCESS) {
2392 UNLOCK(&mgr->lock);
2393 return (result);
2396 result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2397 if (result != ISC_R_SUCCESS)
2398 goto deallocate_dispatch;
2400 disp->socktype = isc_sockettype_tcp;
2401 disp->socket = NULL;
2402 isc_socket_attach(sock, &disp->socket);
2404 disp->ntasks = 1;
2405 disp->task[0] = NULL;
2406 result = isc_task_create(taskmgr, 0, &disp->task[0]);
2407 if (result != ISC_R_SUCCESS)
2408 goto kill_socket;
2410 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2411 DNS_EVENT_DISPATCHCONTROL,
2412 destroy_disp, disp,
2413 sizeof(isc_event_t));
2414 if (disp->ctlevent == NULL) {
2415 result = ISC_R_NOMEMORY;
2416 goto kill_task;
2419 isc_task_setname(disp->task[0], "tcpdispatch", disp);
2421 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2422 disp->tcpmsg_valid = 1;
2424 disp->attributes = attributes;
2427 * Append it to the dispatcher list.
2429 ISC_LIST_APPEND(mgr->list, disp, link);
2430 UNLOCK(&mgr->lock);
2432 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2433 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2435 *dispp = disp;
2437 return (ISC_R_SUCCESS);
2440 * Error returns.
2442 kill_task:
2443 isc_task_detach(&disp->task[0]);
2444 kill_socket:
2445 isc_socket_detach(&disp->socket);
2446 deallocate_dispatch:
2447 dispatch_free(&disp);
2449 UNLOCK(&mgr->lock);
2451 return (result);
2454 isc_result_t
2455 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2456 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2457 unsigned int buffersize,
2458 unsigned int maxbuffers, unsigned int maxrequests,
2459 unsigned int buckets, unsigned int increment,
2460 unsigned int attributes, unsigned int mask,
2461 dns_dispatch_t **dispp)
2463 isc_result_t result;
2464 dns_dispatch_t *disp = NULL;
2466 REQUIRE(VALID_DISPATCHMGR(mgr));
2467 REQUIRE(sockmgr != NULL);
2468 REQUIRE(localaddr != NULL);
2469 REQUIRE(taskmgr != NULL);
2470 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2471 REQUIRE(maxbuffers > 0);
2472 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
2473 REQUIRE(increment > buckets);
2474 REQUIRE(dispp != NULL && *dispp == NULL);
2475 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2477 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2478 maxrequests, buckets, increment);
2479 if (result != ISC_R_SUCCESS)
2480 return (result);
2482 LOCK(&mgr->lock);
2484 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2485 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2486 goto createudp;
2490 * See if we have a dispatcher that matches.
2492 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2493 if (result == ISC_R_SUCCESS) {
2494 disp->refcount++;
2496 if (disp->maxrequests < maxrequests)
2497 disp->maxrequests = maxrequests;
2499 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
2500 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2502 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2503 if (disp->recv_pending != 0)
2504 isc_socket_cancel(disp->socket, disp->task[0],
2505 ISC_SOCKCANCEL_RECV);
2508 UNLOCK(&disp->lock);
2509 UNLOCK(&mgr->lock);
2511 *dispp = disp;
2513 return (ISC_R_SUCCESS);
2516 createudp:
2518 * Nope, create one.
2520 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2521 maxrequests, attributes, &disp);
2522 if (result != ISC_R_SUCCESS) {
2523 UNLOCK(&mgr->lock);
2524 return (result);
2527 UNLOCK(&mgr->lock);
2528 *dispp = disp;
2529 return (ISC_R_SUCCESS);
2533 * mgr should be locked.
2536 #ifndef DNS_DISPATCH_HELD
2537 #define DNS_DISPATCH_HELD 20U
2538 #endif
2540 static isc_result_t
2541 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2542 isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2543 isc_socket_t **sockp)
2545 unsigned int i, j;
2546 isc_socket_t *held[DNS_DISPATCH_HELD];
2547 isc_sockaddr_t localaddr_bound;
2548 isc_socket_t *sock = NULL;
2549 isc_result_t result = ISC_R_SUCCESS;
2550 isc_boolean_t anyport;
2552 INSIST(sockp != NULL && *sockp == NULL);
2554 localaddr_bound = *localaddr;
2555 anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2557 if (anyport) {
2558 unsigned int nports;
2559 in_port_t *ports;
2562 * If no port is specified, we first try to pick up a random
2563 * port by ourselves.
2565 if (isc_sockaddr_pf(&disp->local) == AF_INET) {
2566 nports = disp->mgr->nv4ports;
2567 ports = disp->mgr->v4ports;
2568 } else {
2569 nports = disp->mgr->nv6ports;
2570 ports = disp->mgr->v6ports;
2572 if (nports == 0)
2573 return (ISC_R_ADDRNOTAVAIL);
2575 for (i = 0; i < 1024; i++) {
2576 in_port_t prt;
2578 prt = ports[dispatch_arc4uniformrandom(
2579 DISP_ARC4CTX(disp),
2580 nports)];
2581 isc_sockaddr_setport(&localaddr_bound, prt);
2582 result = open_socket(sockmgr, &localaddr_bound,
2583 0, &sock);
2584 if (result == ISC_R_SUCCESS ||
2585 result != ISC_R_ADDRINUSE) {
2586 disp->localport = prt;
2587 *sockp = sock;
2588 return (result);
2593 * If this fails 1024 times, we then ask the kernel for
2594 * choosing one.
2596 } else {
2597 /* Allow to reuse address for non-random ports. */
2598 result = open_socket(sockmgr, localaddr,
2599 ISC_SOCKET_REUSEADDRESS, &sock);
2601 if (result == ISC_R_SUCCESS)
2602 *sockp = sock;
2604 return (result);
2607 memset(held, 0, sizeof(held));
2608 i = 0;
2610 for (j = 0; j < 0xffffU; j++) {
2611 result = open_socket(sockmgr, localaddr, 0, &sock);
2612 if (result != ISC_R_SUCCESS)
2613 goto end;
2614 else if (!anyport)
2615 break;
2616 else if (portavailable(mgr, sock, NULL))
2617 break;
2618 if (held[i] != NULL)
2619 isc_socket_detach(&held[i]);
2620 held[i++] = sock;
2621 sock = NULL;
2622 if (i == DNS_DISPATCH_HELD)
2623 i = 0;
2625 if (j == 0xffffU) {
2626 mgr_log(mgr, ISC_LOG_ERROR,
2627 "avoid-v%s-udp-ports: unable to allocate "
2628 "an available port",
2629 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2630 result = ISC_R_FAILURE;
2631 goto end;
2633 *sockp = sock;
2635 end:
2636 for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2637 if (held[i] != NULL)
2638 isc_socket_detach(&held[i]);
2641 return (result);
2644 static isc_result_t
2645 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2646 isc_taskmgr_t *taskmgr,
2647 isc_sockaddr_t *localaddr,
2648 unsigned int maxrequests,
2649 unsigned int attributes,
2650 dns_dispatch_t **dispp)
2652 isc_result_t result;
2653 dns_dispatch_t *disp;
2654 isc_socket_t *sock = NULL;
2655 int i = 0;
2658 * dispatch_allocate() checks mgr for us.
2660 disp = NULL;
2661 result = dispatch_allocate(mgr, maxrequests, &disp);
2662 if (result != ISC_R_SUCCESS)
2663 return (result);
2665 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
2666 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock);
2667 if (result != ISC_R_SUCCESS)
2668 goto deallocate_dispatch;
2669 } else {
2670 isc_sockaddr_t sa_any;
2673 * For dispatches using exclusive sockets with a specific
2674 * source address, we only check if the specified address is
2675 * available on the system. Query sockets will be created later
2676 * on demand.
2678 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
2679 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
2680 result = open_socket(sockmgr, localaddr, 0, &sock);
2681 if (sock != NULL)
2682 isc_socket_detach(&sock);
2683 if (result != ISC_R_SUCCESS)
2684 goto deallocate_dispatch;
2687 disp->socktype = isc_sockettype_udp;
2688 disp->socket = sock;
2689 disp->local = *localaddr;
2691 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2692 disp->ntasks = MAX_INTERNAL_TASKS;
2693 else
2694 disp->ntasks = 1;
2695 for (i = 0; i < disp->ntasks; i++) {
2696 disp->task[i] = NULL;
2697 result = isc_task_create(taskmgr, 0, &disp->task[i]);
2698 if (result != ISC_R_SUCCESS) {
2699 while (--i >= 0)
2700 isc_task_destroy(&disp->task[i]);
2701 goto kill_socket;
2703 isc_task_setname(disp->task[i], "udpdispatch", disp);
2706 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2707 DNS_EVENT_DISPATCHCONTROL,
2708 destroy_disp, disp,
2709 sizeof(isc_event_t));
2710 if (disp->ctlevent == NULL) {
2711 result = ISC_R_NOMEMORY;
2712 goto kill_task;
2715 attributes &= ~DNS_DISPATCHATTR_TCP;
2716 attributes |= DNS_DISPATCHATTR_UDP;
2717 disp->attributes = attributes;
2720 * Append it to the dispatcher list.
2722 ISC_LIST_APPEND(mgr->list, disp, link);
2724 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2725 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
2726 if (disp->socket != NULL)
2727 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2729 *dispp = disp;
2730 return (result);
2733 * Error returns.
2735 kill_task:
2736 for (i = 0; i < disp->ntasks; i++)
2737 isc_task_detach(&disp->task[i]);
2738 kill_socket:
2739 if (disp->socket != NULL)
2740 isc_socket_detach(&disp->socket);
2741 deallocate_dispatch:
2742 dispatch_free(&disp);
2744 return (result);
2747 void
2748 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2749 REQUIRE(VALID_DISPATCH(disp));
2750 REQUIRE(dispp != NULL && *dispp == NULL);
2752 LOCK(&disp->lock);
2753 disp->refcount++;
2754 UNLOCK(&disp->lock);
2756 *dispp = disp;
2760 * It is important to lock the manager while we are deleting the dispatch,
2761 * since dns_dispatch_getudp will call dispatch_find, which returns to
2762 * the caller a dispatch but does not attach to it until later. _getudp
2763 * locks the manager, however, so locking it here will keep us from attaching
2764 * to a dispatcher that is in the process of going away.
2766 void
2767 dns_dispatch_detach(dns_dispatch_t **dispp) {
2768 dns_dispatch_t *disp;
2769 dispsocket_t *dispsock;
2770 isc_boolean_t killit;
2772 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2774 disp = *dispp;
2775 *dispp = NULL;
2777 LOCK(&disp->lock);
2779 INSIST(disp->refcount > 0);
2780 disp->refcount--;
2781 killit = ISC_FALSE;
2782 if (disp->refcount == 0) {
2783 if (disp->recv_pending > 0)
2784 isc_socket_cancel(disp->socket, disp->task[0],
2785 ISC_SOCKCANCEL_RECV);
2786 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
2787 dispsock != NULL;
2788 dispsock = ISC_LIST_NEXT(dispsock, link)) {
2789 isc_socket_cancel(dispsock->socket, dispsock->task,
2790 ISC_SOCKCANCEL_RECV);
2792 disp->shutting_down = 1;
2795 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
2797 killit = destroy_disp_ok(disp);
2798 UNLOCK(&disp->lock);
2799 if (killit)
2800 isc_task_send(disp->task[0], &disp->ctlevent);
2803 isc_result_t
2804 dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2805 isc_task_t *task, isc_taskaction_t action, void *arg,
2806 dns_messageid_t *idp, dns_dispentry_t **resp,
2807 isc_socketmgr_t *sockmgr)
2809 dns_dispentry_t *res;
2810 unsigned int bucket;
2811 in_port_t localport = 0;
2812 dns_messageid_t id;
2813 int i;
2814 isc_boolean_t ok;
2815 dns_qid_t *qid;
2816 dispsocket_t *dispsocket = NULL;
2817 isc_result_t result;
2819 REQUIRE(VALID_DISPATCH(disp));
2820 REQUIRE(task != NULL);
2821 REQUIRE(dest != NULL);
2822 REQUIRE(resp != NULL && *resp == NULL);
2823 REQUIRE(idp != NULL);
2824 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2825 REQUIRE(sockmgr != NULL);
2827 LOCK(&disp->lock);
2829 if (disp->shutting_down == 1) {
2830 UNLOCK(&disp->lock);
2831 return (ISC_R_SHUTTINGDOWN);
2834 if (disp->requests >= disp->maxrequests) {
2835 UNLOCK(&disp->lock);
2836 return (ISC_R_QUOTA);
2839 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
2840 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
2841 dispsocket_t *oldestsocket;
2842 dns_dispentry_t *oldestresp;
2843 dns_dispatchevent_t *rev;
2846 * Kill oldest outstanding query if the number of sockets
2847 * exceeds the quota to keep the room for new queries.
2849 oldestsocket = ISC_LIST_HEAD(disp->activesockets);
2850 oldestresp = oldestsocket->resp;
2851 if (oldestresp != NULL && !oldestresp->item_out) {
2852 rev = allocate_event(oldestresp->disp);
2853 if (rev != NULL) {
2854 rev->buffer.base = NULL;
2855 rev->result = ISC_R_CANCELED;
2856 rev->id = oldestresp->id;
2857 ISC_EVENT_INIT(rev, sizeof(*rev), 0,
2858 NULL, DNS_EVENT_DISPATCH,
2859 oldestresp->action,
2860 oldestresp->arg, oldestresp,
2861 NULL, NULL);
2862 oldestresp->item_out = ISC_TRUE;
2863 isc_task_send(oldestresp->task,
2864 ISC_EVENT_PTR(&rev));
2865 inc_stats(disp->mgr,
2866 dns_resstatscounter_dispabort);
2871 * Move this entry to the tail so that it won't (easily) be
2872 * examined before actually being canceled.
2874 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
2875 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
2878 qid = DNS_QID(disp);
2879 LOCK(&qid->lock);
2881 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2883 * Get a separate UDP socket with a random port number.
2885 result = get_dispsocket(disp, dest, sockmgr, qid, &dispsocket,
2886 &localport);
2887 if (result != ISC_R_SUCCESS) {
2888 UNLOCK(&qid->lock);
2889 UNLOCK(&disp->lock);
2890 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
2891 return (result);
2893 } else {
2894 localport = disp->localport;
2898 * Try somewhat hard to find an unique ID.
2900 id = (dns_messageid_t)dispatch_arc4random(DISP_ARC4CTX(disp));
2901 bucket = dns_hash(qid, dest, id, localport);
2902 ok = ISC_FALSE;
2903 for (i = 0; i < 64; i++) {
2904 if (entry_search(qid, dest, id, localport, bucket) == NULL) {
2905 ok = ISC_TRUE;
2906 break;
2908 id += qid->qid_increment;
2909 id &= 0x0000ffff;
2910 bucket = dns_hash(qid, dest, id, localport);
2913 if (!ok) {
2914 UNLOCK(&qid->lock);
2915 UNLOCK(&disp->lock);
2916 return (ISC_R_NOMORE);
2919 res = isc_mempool_get(disp->mgr->rpool);
2920 if (res == NULL) {
2921 UNLOCK(&qid->lock);
2922 UNLOCK(&disp->lock);
2923 if (dispsocket != NULL)
2924 destroy_dispsocket(disp, &dispsocket);
2925 return (ISC_R_NOMEMORY);
2928 disp->refcount++;
2929 disp->requests++;
2930 res->task = NULL;
2931 isc_task_attach(task, &res->task);
2932 res->disp = disp;
2933 res->id = id;
2934 res->port = localport;
2935 res->bucket = bucket;
2936 res->host = *dest;
2937 res->action = action;
2938 res->arg = arg;
2939 res->dispsocket = dispsocket;
2940 if (dispsocket != NULL)
2941 dispsocket->resp = res;
2942 res->item_out = ISC_FALSE;
2943 ISC_LIST_INIT(res->items);
2944 ISC_LINK_INIT(res, link);
2945 res->magic = RESPONSE_MAGIC;
2946 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
2947 UNLOCK(&qid->lock);
2949 request_log(disp, res, LVL(90),
2950 "attached to task %p", res->task);
2952 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
2953 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
2954 result = startrecv(disp, dispsocket);
2955 if (result != ISC_R_SUCCESS) {
2956 LOCK(&qid->lock);
2957 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
2958 UNLOCK(&qid->lock);
2960 if (dispsocket != NULL)
2961 destroy_dispsocket(disp, &dispsocket);
2963 disp->refcount--;
2964 disp->requests--;
2966 UNLOCK(&disp->lock);
2967 isc_task_detach(&res->task);
2968 isc_mempool_put(disp->mgr->rpool, res);
2969 return (result);
2973 if (dispsocket != NULL)
2974 ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
2976 UNLOCK(&disp->lock);
2978 *idp = id;
2979 *resp = res;
2981 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2982 INSIST(res->dispsocket != NULL);
2984 return (ISC_R_SUCCESS);
2987 isc_result_t
2988 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2989 isc_task_t *task, isc_taskaction_t action, void *arg,
2990 dns_messageid_t *idp, dns_dispentry_t **resp)
2992 REQUIRE(VALID_DISPATCH(disp));
2993 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
2995 return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
2996 idp, resp, NULL));
2999 void
3000 dns_dispatch_starttcp(dns_dispatch_t *disp) {
3002 REQUIRE(VALID_DISPATCH(disp));
3004 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3006 LOCK(&disp->lock);
3007 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3008 (void)startrecv(disp, NULL);
3009 UNLOCK(&disp->lock);
3012 void
3013 dns_dispatch_removeresponse(dns_dispentry_t **resp,
3014 dns_dispatchevent_t **sockevent)
3016 dns_dispatchmgr_t *mgr;
3017 dns_dispatch_t *disp;
3018 dns_dispentry_t *res;
3019 dispsocket_t *dispsock;
3020 dns_dispatchevent_t *ev;
3021 unsigned int bucket;
3022 isc_boolean_t killit;
3023 unsigned int n;
3024 isc_eventlist_t events;
3025 dns_qid_t *qid;
3027 REQUIRE(resp != NULL);
3028 REQUIRE(VALID_RESPONSE(*resp));
3030 res = *resp;
3031 *resp = NULL;
3033 disp = res->disp;
3034 REQUIRE(VALID_DISPATCH(disp));
3035 mgr = disp->mgr;
3036 REQUIRE(VALID_DISPATCHMGR(mgr));
3038 qid = DNS_QID(disp);
3040 if (sockevent != NULL) {
3041 REQUIRE(*sockevent != NULL);
3042 ev = *sockevent;
3043 *sockevent = NULL;
3044 } else {
3045 ev = NULL;
3048 LOCK(&disp->lock);
3050 INSIST(disp->requests > 0);
3051 disp->requests--;
3052 INSIST(disp->refcount > 0);
3053 disp->refcount--;
3054 killit = ISC_FALSE;
3055 if (disp->refcount == 0) {
3056 if (disp->recv_pending > 0)
3057 isc_socket_cancel(disp->socket, disp->task[0],
3058 ISC_SOCKCANCEL_RECV);
3059 for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3060 dispsock != NULL;
3061 dispsock = ISC_LIST_NEXT(dispsock, link)) {
3062 isc_socket_cancel(dispsock->socket, dispsock->task,
3063 ISC_SOCKCANCEL_RECV);
3065 disp->shutting_down = 1;
3068 bucket = res->bucket;
3070 LOCK(&qid->lock);
3071 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3072 UNLOCK(&qid->lock);
3074 if (ev == NULL && res->item_out) {
3076 * We've posted our event, but the caller hasn't gotten it
3077 * yet. Take it back.
3079 ISC_LIST_INIT(events);
3080 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3081 NULL, &events);
3083 * We had better have gotten it back.
3085 INSIST(n == 1);
3086 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3089 if (ev != NULL) {
3090 REQUIRE(res->item_out == ISC_TRUE);
3091 res->item_out = ISC_FALSE;
3092 if (ev->buffer.base != NULL)
3093 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3094 free_event(disp, ev);
3097 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3098 isc_task_detach(&res->task);
3100 if (res->dispsocket != NULL) {
3101 isc_socket_cancel(res->dispsocket->socket,
3102 res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3103 res->dispsocket->resp = NULL;
3107 * Free any buffered requests as well
3109 ev = ISC_LIST_HEAD(res->items);
3110 while (ev != NULL) {
3111 ISC_LIST_UNLINK(res->items, ev, ev_link);
3112 if (ev->buffer.base != NULL)
3113 free_buffer(disp, ev->buffer.base, ev->buffer.length);
3114 free_event(disp, ev);
3115 ev = ISC_LIST_HEAD(res->items);
3117 res->magic = 0;
3118 isc_mempool_put(disp->mgr->rpool, res);
3119 if (disp->shutting_down == 1)
3120 do_cancel(disp);
3121 else
3122 (void)startrecv(disp, NULL);
3124 killit = destroy_disp_ok(disp);
3125 UNLOCK(&disp->lock);
3126 if (killit)
3127 isc_task_send(disp->task[0], &disp->ctlevent);
3130 static void
3131 do_cancel(dns_dispatch_t *disp) {
3132 dns_dispatchevent_t *ev;
3133 dns_dispentry_t *resp;
3134 dns_qid_t *qid;
3136 if (disp->shutdown_out == 1)
3137 return;
3139 qid = DNS_QID(disp);
3142 * Search for the first response handler without packets outstanding
3143 * unless a specific hander is given.
3145 LOCK(&qid->lock);
3146 for (resp = linear_first(qid);
3147 resp != NULL && resp->item_out;
3148 /* Empty. */)
3149 resp = linear_next(qid, resp);
3152 * No one to send the cancel event to, so nothing to do.
3154 if (resp == NULL)
3155 goto unlock;
3158 * Send the shutdown failsafe event to this resp.
3160 ev = disp->failsafe_ev;
3161 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3162 resp->action, resp->arg, resp, NULL, NULL);
3163 ev->result = disp->shutdown_why;
3164 ev->buffer.base = NULL;
3165 ev->buffer.length = 0;
3166 disp->shutdown_out = 1;
3167 request_log(disp, resp, LVL(10),
3168 "cancel: failsafe event %p -> task %p",
3169 ev, resp->task);
3170 resp->item_out = ISC_TRUE;
3171 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3172 unlock:
3173 UNLOCK(&qid->lock);
3176 isc_socket_t *
3177 dns_dispatch_getsocket(dns_dispatch_t *disp) {
3178 REQUIRE(VALID_DISPATCH(disp));
3180 return (disp->socket);
3183 isc_socket_t *
3184 dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3185 REQUIRE(VALID_RESPONSE(resp));
3187 if (resp->dispsocket != NULL)
3188 return (resp->dispsocket->socket);
3189 else
3190 return (NULL);
3193 isc_result_t
3194 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3196 REQUIRE(VALID_DISPATCH(disp));
3197 REQUIRE(addrp != NULL);
3199 if (disp->socktype == isc_sockettype_udp) {
3200 *addrp = disp->local;
3201 return (ISC_R_SUCCESS);
3203 return (ISC_R_NOTIMPLEMENTED);
3206 void
3207 dns_dispatch_cancel(dns_dispatch_t *disp) {
3208 REQUIRE(VALID_DISPATCH(disp));
3210 LOCK(&disp->lock);
3212 if (disp->shutting_down == 1) {
3213 UNLOCK(&disp->lock);
3214 return;
3217 disp->shutdown_why = ISC_R_CANCELED;
3218 disp->shutting_down = 1;
3219 do_cancel(disp);
3221 UNLOCK(&disp->lock);
3223 return;
3226 unsigned int
3227 dns_dispatch_getattributes(dns_dispatch_t *disp) {
3228 REQUIRE(VALID_DISPATCH(disp));
3231 * We don't bother locking disp here; it's the caller's responsibility
3232 * to use only non volatile flags.
3234 return (disp->attributes);
3237 void
3238 dns_dispatch_changeattributes(dns_dispatch_t *disp,
3239 unsigned int attributes, unsigned int mask)
3241 REQUIRE(VALID_DISPATCH(disp));
3242 /* Exclusive attribute can only be set on creation */
3243 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3244 /* Also, a dispatch with randomport specified cannot start listening */
3245 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3246 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3248 /* XXXMLG
3249 * Should check for valid attributes here!
3252 LOCK(&disp->lock);
3254 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3255 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3256 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3257 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3258 (void)startrecv(disp, NULL);
3259 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3260 == 0 &&
3261 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3262 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3263 if (disp->recv_pending != 0)
3264 isc_socket_cancel(disp->socket, disp->task[0],
3265 ISC_SOCKCANCEL_RECV);
3269 disp->attributes &= ~mask;
3270 disp->attributes |= (attributes & mask);
3271 UNLOCK(&disp->lock);
3274 void
3275 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3276 void *buf;
3277 isc_socketevent_t *sevent, *newsevent;
3279 REQUIRE(VALID_DISPATCH(disp));
3280 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3281 REQUIRE(event != NULL);
3283 sevent = (isc_socketevent_t *)event;
3285 INSIST(sevent->n <= disp->mgr->buffersize);
3286 newsevent = (isc_socketevent_t *)
3287 isc_event_allocate(disp->mgr->mctx, NULL,
3288 DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3289 disp, sizeof(isc_socketevent_t));
3290 if (newsevent == NULL)
3291 return;
3293 buf = allocate_udp_buffer(disp);
3294 if (buf == NULL) {
3295 isc_event_free(ISC_EVENT_PTR(&newsevent));
3296 return;
3298 memcpy(buf, sevent->region.base, sevent->n);
3299 newsevent->region.base = buf;
3300 newsevent->region.length = disp->mgr->buffersize;
3301 newsevent->n = sevent->n;
3302 newsevent->result = sevent->result;
3303 newsevent->address = sevent->address;
3304 newsevent->timestamp = sevent->timestamp;
3305 newsevent->pktinfo = sevent->pktinfo;
3306 newsevent->attributes = sevent->attributes;
3308 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3311 #if 0
3312 void
3313 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3314 dns_dispatch_t *disp;
3315 char foo[1024];
3317 disp = ISC_LIST_HEAD(mgr->list);
3318 while (disp != NULL) {
3319 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3320 printf("\tdispatch %p, addr %s\n", disp, foo);
3321 disp = ISC_LIST_NEXT(disp, link);
3324 #endif