1 /* Copyright (C) 2016-2017 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
19 * Copyright (c) 1985, 1989, 1993
20 * The Regents of the University of California. All rights reserved.
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * Portions Copyright (c) 1993 by Digital Equipment Corporation.
50 * Permission to use, copy, modify, and distribute this software for any
51 * purpose with or without fee is hereby granted, provided that the above
52 * copyright notice and this permission notice appear in all copies, and that
53 * the name of Digital Equipment Corporation not be used in advertising or
54 * publicity pertaining to distribution of the document or software without
55 * specific, written prior permission.
57 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
58 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
59 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
60 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
61 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
62 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
63 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
68 * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
70 * Permission to use, copy, modify, and distribute this software for any
71 * purpose with or without fee is hereby granted, provided that the above
72 * copyright notice and this permission notice appear in all copies.
74 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
75 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
76 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
77 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
78 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
79 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
80 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
85 * Send query to name server and wait for reply.
89 #include <sys/types.h>
90 #include <sys/param.h>
92 #include <sys/socket.h>
96 #include <netinet/in.h>
97 #include <arpa/nameser.h>
98 #include <arpa/inet.h>
99 #include <sys/ioctl.h>
104 #include <resolv/resolv-internal.h>
105 #include <resolv/resolv_context.h>
110 #include <kernel-features.h>
111 #include <libc-diag.h>
112 #include <hp-timing.h>
115 #define MAXPACKET PACKETSZ
117 #define MAXPACKET 65536
120 /* From ev_streams.c. */
123 __attribute ((always_inline
))
124 evConsIovec(void *buf
, size_t cnt
, struct iovec
*vec
) {
125 memset(vec
, 0xf5, sizeof (*vec
));
130 /* From ev_timers.c. */
132 #define BILLION 1000000000
135 evConsTime(struct timespec
*res
, time_t sec
, long nsec
) {
141 evAddTime(struct timespec
*res
, const struct timespec
*addend1
,
142 const struct timespec
*addend2
) {
143 res
->tv_sec
= addend1
->tv_sec
+ addend2
->tv_sec
;
144 res
->tv_nsec
= addend1
->tv_nsec
+ addend2
->tv_nsec
;
145 if (res
->tv_nsec
>= BILLION
) {
147 res
->tv_nsec
-= BILLION
;
152 evSubTime(struct timespec
*res
, const struct timespec
*minuend
,
153 const struct timespec
*subtrahend
) {
154 res
->tv_sec
= minuend
->tv_sec
- subtrahend
->tv_sec
;
155 if (minuend
->tv_nsec
>= subtrahend
->tv_nsec
)
156 res
->tv_nsec
= minuend
->tv_nsec
- subtrahend
->tv_nsec
;
158 res
->tv_nsec
= (BILLION
159 - subtrahend
->tv_nsec
+ minuend
->tv_nsec
);
165 evCmpTime(struct timespec a
, struct timespec b
) {
166 long x
= a
.tv_sec
- b
.tv_sec
;
169 x
= a
.tv_nsec
- b
.tv_nsec
;
170 return (x
< 0L ? (-1) : x
> 0L ? (1) : (0));
174 evNowTime(struct timespec
*res
) {
177 if (gettimeofday(&now
, NULL
) < 0)
178 evConsTime(res
, 0, 0);
180 TIMEVAL_TO_TIMESPEC (&now
, res
);
184 #define EXT(res) ((res)->_u._ext)
188 static struct sockaddr
*get_nsaddr (res_state
, unsigned int);
189 static int send_vc(res_state
, const u_char
*, int,
191 u_char
**, int *, int *, int, u_char
**,
192 u_char
**, int *, int *, int *);
193 static int send_dg(res_state
, const u_char
*, int,
195 u_char
**, int *, int *, int,
196 int *, int *, u_char
**,
197 u_char
**, int *, int *, int *);
198 static int sock_eq(struct sockaddr_in6
*, struct sockaddr_in6
*);
203 * res_isourserver(ina)
204 * looks up "ina" in _res.ns_addr_list[]
209 * paul vixie, 29may94
212 res_ourserver_p(const res_state statp
, const struct sockaddr_in6
*inp
)
216 if (inp
->sin6_family
== AF_INET
) {
217 struct sockaddr_in
*in4p
= (struct sockaddr_in
*) inp
;
218 in_port_t port
= in4p
->sin_port
;
219 in_addr_t addr
= in4p
->sin_addr
.s_addr
;
221 for (ns
= 0; ns
< statp
->nscount
; ns
++) {
222 const struct sockaddr_in
*srv
=
223 (struct sockaddr_in
*) get_nsaddr (statp
, ns
);
225 if ((srv
->sin_family
== AF_INET
) &&
226 (srv
->sin_port
== port
) &&
227 (srv
->sin_addr
.s_addr
== INADDR_ANY
||
228 srv
->sin_addr
.s_addr
== addr
))
231 } else if (inp
->sin6_family
== AF_INET6
) {
232 for (ns
= 0; ns
< statp
->nscount
; ns
++) {
233 const struct sockaddr_in6
*srv
234 = (struct sockaddr_in6
*) get_nsaddr (statp
, ns
);
235 if ((srv
->sin6_family
== AF_INET6
) &&
236 (srv
->sin6_port
== inp
->sin6_port
) &&
237 !(memcmp(&srv
->sin6_addr
, &in6addr_any
,
238 sizeof (struct in6_addr
)) &&
239 memcmp(&srv
->sin6_addr
, &inp
->sin6_addr
,
240 sizeof (struct in6_addr
))))
248 res_isourserver (const struct sockaddr_in
*inp
)
250 return res_ourserver_p (&_res
, (const struct sockaddr_in6
*) inp
);
254 * res_nameinquery(name, type, class, buf, eom)
255 * look for (name,type,class) in the query section of packet (buf,eom)
257 * buf + HFIXEDSZ <= eom
263 * paul vixie, 29may94
266 res_nameinquery(const char *name
, int type
, int class,
267 const u_char
*buf
, const u_char
*eom
)
269 const u_char
*cp
= buf
+ HFIXEDSZ
;
270 int qdcount
= ntohs(((HEADER
*)buf
)->qdcount
);
272 while (qdcount
-- > 0) {
273 char tname
[MAXDNAME
+1];
274 int n
, ttype
, tclass
;
276 n
= dn_expand(buf
, eom
, cp
, tname
, sizeof tname
);
280 if (cp
+ 2 * INT16SZ
> eom
)
283 NS_GET16(tclass
, cp
);
284 if (ttype
== type
&& tclass
== class &&
285 ns_samename(tname
, name
) == 1)
290 libresolv_hidden_def (res_nameinquery
)
292 /* Returns a shift value for the name server index. Used to implement
295 nameserver_offset (struct __res_state
*statp
)
297 /* If we only have one name server or rotation is disabled, return
298 offset 0 (no rotation). */
299 unsigned int nscount
= statp
->nscount
;
300 if (nscount
<= 1 || !(statp
->options
& RES_ROTATE
))
303 /* Global offset. The lowest bit indicates whether the offset has
304 been initialized with a random value. Use relaxed MO to access
305 global_offset because all we need is a sequence of roughly
307 static unsigned int global_offset
;
308 unsigned int offset
= atomic_fetch_add_relaxed (&global_offset
, 2);
309 if ((offset
& 1) == 0)
311 /* Initialization is required. */
314 HP_TIMING_NOW (ticks
);
318 __gettimeofday (&tv
, NULL
);
319 offset
= ((tv
.tv_sec
<< 8) ^ tv
.tv_usec
);
321 /* The lowest bit is the most random. Preserve it. */
324 /* Store the new starting value. atomic_fetch_add_relaxed
325 returns the old value, so emulate that by storing the new
326 (incremented) value. Concurrent initialization with
327 different random values is harmless. */
328 atomic_store_relaxed (&global_offset
, (offset
| 1) + 2);
331 /* Remove the initialization bit. */
334 /* Avoid the division in the most common cases. */
344 return offset
% nscount
;
349 * res_queriesmatch(buf1, eom1, buf2, eom2)
350 * is there a 1:1 mapping of (name,type,class)
351 * in (buf1,eom1) and (buf2,eom2)?
354 * 0 : not a 1:1 mapping
355 * >0 : is a 1:1 mapping
357 * paul vixie, 29may94
360 res_queriesmatch(const u_char
*buf1
, const u_char
*eom1
,
361 const u_char
*buf2
, const u_char
*eom2
)
363 if (buf1
+ HFIXEDSZ
> eom1
|| buf2
+ HFIXEDSZ
> eom2
)
367 * Only header section present in replies to
368 * dynamic update packets.
370 if ((((HEADER
*)buf1
)->opcode
== ns_o_update
) &&
371 (((HEADER
*)buf2
)->opcode
== ns_o_update
))
374 /* Note that we initially do not convert QDCOUNT to the host byte
375 order. We can compare it with the second buffer's QDCOUNT
376 value without doing this. */
377 int qdcount
= ((HEADER
*)buf1
)->qdcount
;
378 if (qdcount
!= ((HEADER
*)buf2
)->qdcount
)
381 qdcount
= htons (qdcount
);
382 const u_char
*cp
= buf1
+ HFIXEDSZ
;
384 while (qdcount
-- > 0) {
385 char tname
[MAXDNAME
+1];
386 int n
, ttype
, tclass
;
388 n
= dn_expand(buf1
, eom1
, cp
, tname
, sizeof tname
);
392 if (cp
+ 2 * INT16SZ
> eom1
)
395 NS_GET16(tclass
, cp
);
396 if (!res_nameinquery(tname
, ttype
, tclass
, buf2
, eom2
))
401 libresolv_hidden_def (res_queriesmatch
)
404 __res_context_send (struct resolv_context
*ctx
,
405 const unsigned char *buf
, int buflen
,
406 const unsigned char *buf2
, int buflen2
,
407 unsigned char *ans
, int anssiz
,
408 unsigned char **ansp
, unsigned char **ansp2
,
409 int *nansp2
, int *resplen2
, int *ansp2_malloced
)
411 struct __res_state
*statp
= ctx
->resp
;
412 int gotsomewhere
, terrno
, try, v_circuit
, resplen
, n
;
414 if (statp
->nscount
== 0) {
419 if (anssiz
< (buf2
== NULL
? 1 : 2) * HFIXEDSZ
) {
420 __set_errno (EINVAL
);
424 v_circuit
= ((statp
->options
& RES_USEVC
)
426 || buflen2
> PACKETSZ
);
431 * If the ns_addr_list in the resolver context has changed, then
432 * invalidate our cached copy and the associated timing data.
434 if (EXT(statp
).nscount
!= 0) {
437 if (EXT(statp
).nscount
!= statp
->nscount
)
440 for (unsigned int ns
= 0; ns
< statp
->nscount
; ns
++) {
441 if (statp
->nsaddr_list
[ns
].sin_family
!= 0
442 && !sock_eq((struct sockaddr_in6
*)
443 &statp
->nsaddr_list
[ns
],
444 EXT(statp
).nsaddrs
[ns
]))
451 __res_iclose(statp
, false);
452 EXT(statp
).nscount
= 0;
457 * Maybe initialize our private copy of the ns_addr_list.
459 if (EXT(statp
).nscount
== 0) {
460 for (unsigned int ns
= 0; ns
< statp
->nscount
; ns
++) {
461 EXT(statp
).nssocks
[ns
] = -1;
462 if (statp
->nsaddr_list
[ns
].sin_family
== 0)
464 if (EXT(statp
).nsaddrs
[ns
] == NULL
)
465 EXT(statp
).nsaddrs
[ns
] =
466 malloc(sizeof (struct sockaddr_in6
));
467 if (EXT(statp
).nsaddrs
[ns
] != NULL
)
468 memset (mempcpy(EXT(statp
).nsaddrs
[ns
],
469 &statp
->nsaddr_list
[ns
],
470 sizeof (struct sockaddr_in
)),
472 sizeof (struct sockaddr_in6
)
473 - sizeof (struct sockaddr_in
));
475 EXT(statp
).nscount
= statp
->nscount
;
478 /* Name server index offset. Used to implement
480 unsigned int ns_offset
= nameserver_offset (statp
);
483 * Send request, RETRY times, or until successful.
485 for (try = 0; try < statp
->retry
; try++) {
486 for (unsigned ns_shift
= 0; ns_shift
< statp
->nscount
; ns_shift
++)
488 /* The actual name server index. This implements
490 unsigned int ns
= ns_shift
+ ns_offset
;
491 if (ns
>= statp
->nscount
)
492 ns
-= statp
->nscount
;
495 if (__glibc_unlikely (v_circuit
)) {
496 /* Use VC; at most one attempt per server. */
498 n
= send_vc(statp
, buf
, buflen
, buf2
, buflen2
,
499 &ans
, &anssiz
, &terrno
,
500 ns
, ansp
, ansp2
, nansp2
, resplen2
,
504 if (n
== 0 && (buf2
== NULL
|| *resplen2
== 0))
508 n
= send_dg(statp
, buf
, buflen
, buf2
, buflen2
,
509 &ans
, &anssiz
, &terrno
,
510 ns
, &v_circuit
, &gotsomewhere
, ansp
,
511 ansp2
, nansp2
, resplen2
, ansp2_malloced
);
514 if (n
== 0 && (buf2
== NULL
|| *resplen2
== 0))
517 // XXX Check whether both requests failed or
518 // XXX whether one has been answered successfully
525 * If we have temporarily opened a virtual circuit,
526 * or if we haven't been asked to keep a socket open,
529 if ((v_circuit
&& (statp
->options
& RES_USEVC
) == 0) ||
530 (statp
->options
& RES_STAYOPEN
) == 0) {
531 __res_iclose(statp
, false);
537 __res_iclose(statp
, false);
540 __set_errno (ECONNREFUSED
); /* no nameservers found */
542 __set_errno (ETIMEDOUT
); /* no answer obtained */
544 __set_errno (terrno
);
548 /* Common part of res_nsend and res_send. */
550 context_send_common (struct resolv_context
*ctx
,
551 const unsigned char *buf
, int buflen
,
552 unsigned char *ans
, int anssiz
)
556 RES_SET_H_ERRNO (&_res
, NETDB_INTERNAL
);
559 int result
= __res_context_send (ctx
, buf
, buflen
, NULL
, 0, ans
, anssiz
,
560 NULL
, NULL
, NULL
, NULL
, NULL
);
561 __resolv_context_put (ctx
);
566 res_nsend (res_state statp
, const unsigned char *buf
, int buflen
,
567 unsigned char *ans
, int anssiz
)
569 return context_send_common
570 (__resolv_context_get_override (statp
), buf
, buflen
, ans
, anssiz
);
574 res_send (const unsigned char *buf
, int buflen
, unsigned char *ans
, int anssiz
)
576 return context_send_common
577 (__resolv_context_get (), buf
, buflen
, ans
, anssiz
);
582 static struct sockaddr
*
583 get_nsaddr (res_state statp
, unsigned int n
)
585 assert (n
< statp
->nscount
);
587 if (statp
->nsaddr_list
[n
].sin_family
== 0 && EXT(statp
).nsaddrs
[n
] != NULL
)
588 /* EXT(statp).nsaddrs[n] holds an address that is larger than
589 struct sockaddr, and user code did not update
590 statp->nsaddr_list[n]. */
591 return (struct sockaddr
*) EXT(statp
).nsaddrs
[n
];
593 /* User code updated statp->nsaddr_list[n], or statp->nsaddr_list[n]
594 has the same content as EXT(statp).nsaddrs[n]. */
595 return (struct sockaddr
*) (void *) &statp
->nsaddr_list
[n
];
598 /* Close the resolver structure, assign zero to *RESPLEN2 if RESPLEN2
599 is not NULL, and return zero. */
601 __attribute__ ((warn_unused_result
))
602 close_and_return_error (res_state statp
, int *resplen2
)
604 __res_iclose(statp
, false);
605 if (resplen2
!= NULL
)
610 /* The send_vc function is responsible for sending a DNS query over TCP
611 to the nameserver numbered NS from the res_state STATP i.e.
612 EXT(statp).nssocks[ns]. The function supports sending both IPv4 and
613 IPv6 queries at the same serially on the same socket.
615 Please note that for TCP there is no way to disable sending both
616 queries, unlike UDP, which honours RES_SNGLKUP and RES_SNGLKUPREOP
617 and sends the queries serially and waits for the result after each
618 sent query. This implementation should be corrected to honour these
621 Please also note that for TCP we send both queries over the same
622 socket one after another. This technically violates best practice
623 since the server is allowed to read the first query, respond, and
624 then close the socket (to service another client). If the server
625 does this, then the remaining second query in the socket data buffer
626 will cause the server to send the client an RST which will arrive
627 asynchronously and the client's OS will likely tear down the socket
628 receive buffer resulting in a potentially short read and lost
629 response data. This will force the client to retry the query again,
630 and this process may repeat until all servers and connection resets
631 are exhausted and then the query will fail. It's not known if this
632 happens with any frequency in real DNS server implementations. This
633 implementation should be corrected to use two sockets by default for
636 The query stored in BUF of BUFLEN length is sent first followed by
637 the query stored in BUF2 of BUFLEN2 length. Queries are sent
638 serially on the same socket.
640 Answers to the query are stored firstly in *ANSP up to a max of
641 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
642 is non-NULL (to indicate that modifying the answer buffer is allowed)
643 then malloc is used to allocate a new response buffer and ANSCP and
644 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
645 are needed but ANSCP is NULL, then as much of the response as
646 possible is read into the buffer, but the results will be truncated.
647 When truncation happens because of a small answer buffer the DNS
648 packets header field TC will bet set to 1, indicating a truncated
649 message and the rest of the socket data will be read and discarded.
651 Answers to the query are stored secondly in *ANSP2 up to a max of
652 *ANSSIZP2 bytes, with the actual response length stored in
653 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
654 is non-NULL (required for a second query) then malloc is used to
655 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
656 size and *ANSP2_MALLOCED is set to 1.
658 The ANSP2_MALLOCED argument will eventually be removed as the
659 change in buffer pointer can be used to detect the buffer has
660 changed and that the caller should use free on the new buffer.
662 Note that the answers may arrive in any order from the server and
663 therefore the first and second answer buffers may not correspond to
664 the first and second queries.
666 It is not supported to call this function with a non-NULL ANSP2
667 but a NULL ANSCP. Put another way, you can call send_vc with a
668 single unmodifiable buffer or two modifiable buffers, but no other
669 combination is supported.
671 It is the caller's responsibility to free the malloc allocated
672 buffers by detecting that the pointers have changed from their
673 original values i.e. *ANSCP or *ANSP2 has changed.
675 If errors are encountered then *TERRNO is set to an appropriate
676 errno value and a zero result is returned for a recoverable error,
677 and a less-than zero result is returned for a non-recoverable error.
679 If no errors are encountered then *TERRNO is left unmodified and
680 a the length of the first response in bytes is returned. */
682 send_vc(res_state statp
,
683 const u_char
*buf
, int buflen
, const u_char
*buf2
, int buflen2
,
684 u_char
**ansp
, int *anssizp
,
685 int *terrno
, int ns
, u_char
**anscp
, u_char
**ansp2
, int *anssizp2
,
686 int *resplen2
, int *ansp2_malloced
)
688 const HEADER
*hp
= (HEADER
*) buf
;
689 const HEADER
*hp2
= (HEADER
*) buf2
;
690 HEADER
*anhp
= (HEADER
*) *ansp
;
691 struct sockaddr
*nsap
= get_nsaddr (statp
, ns
);
692 int truncating
, connreset
, n
;
693 /* On some architectures compiler might emit a warning indicating
694 'resplen' may be used uninitialized. However if buf2 == NULL
695 then this code won't be executed; if buf2 != NULL, then first
696 time round the loop recvresp1 and recvresp2 will be 0 so this
697 code won't be executed but "thisresplenp = &resplen;" followed
698 by "*thisresplenp = rlen;" will be executed so that subsequent
699 times round the loop resplen has been initialized. So this is
702 DIAG_PUSH_NEEDS_COMMENT
;
703 DIAG_IGNORE_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
705 DIAG_POP_NEEDS_COMMENT
;
715 /* Are we still talking to whom we want to talk to? */
716 if (statp
->_vcsock
>= 0 && (statp
->_flags
& RES_F_VC
) != 0) {
717 struct sockaddr_in6 peer
;
718 socklen_t size
= sizeof peer
;
720 if (getpeername(statp
->_vcsock
,
721 (struct sockaddr
*)&peer
, &size
) < 0 ||
722 !sock_eq(&peer
, (struct sockaddr_in6
*) nsap
)) {
723 __res_iclose(statp
, false);
724 statp
->_flags
&= ~RES_F_VC
;
728 if (statp
->_vcsock
< 0 || (statp
->_flags
& RES_F_VC
) == 0) {
729 if (statp
->_vcsock
>= 0)
730 __res_iclose(statp
, false);
732 statp
->_vcsock
= socket
733 (nsap
->sa_family
, SOCK_STREAM
| SOCK_CLOEXEC
, 0);
734 if (statp
->_vcsock
< 0) {
736 if (resplen2
!= NULL
)
741 if (connect(statp
->_vcsock
, nsap
,
742 nsap
->sa_family
== AF_INET
743 ? sizeof (struct sockaddr_in
)
744 : sizeof (struct sockaddr_in6
)) < 0) {
746 return close_and_return_error (statp
, resplen2
);
748 statp
->_flags
|= RES_F_VC
;
752 * Send length & message
754 len
= htons ((u_short
) buflen
);
755 evConsIovec(&len
, INT16SZ
, &iov
[0]);
756 evConsIovec((void*)buf
, buflen
, &iov
[1]);
758 ssize_t explen
= INT16SZ
+ buflen
;
760 len2
= htons ((u_short
) buflen2
);
761 evConsIovec(&len2
, INT16SZ
, &iov
[2]);
762 evConsIovec((void*)buf2
, buflen2
, &iov
[3]);
764 explen
+= INT16SZ
+ buflen2
;
766 if (TEMP_FAILURE_RETRY (writev(statp
->_vcsock
, iov
, niov
)) != explen
) {
768 return close_and_return_error (statp
, resplen2
);
771 * Receive length & response
774 /* Skip the second response if there is no second query.
775 To do that we mark the second response as received. */
776 int recvresp2
= buf2
== NULL
;
779 cp
= (u_char
*)&rlen16
;
780 len
= sizeof(rlen16
);
781 while ((n
= TEMP_FAILURE_RETRY (read(statp
->_vcsock
, cp
,
790 * A long running process might get its TCP
791 * connection reset if the remote server was
792 * restarted. Requery the server instead of
793 * trying a new one. When there is only one
794 * server, this means that a query might work
795 * instead of failing. We only allow one reset
796 * per query to prevent looping.
798 if (*terrno
== ECONNRESET
&& !connreset
)
800 __res_iclose (statp
, false);
804 return close_and_return_error (statp
, resplen2
);
806 int rlen
= ntohs (rlen16
);
811 if ((recvresp1
| recvresp2
) == 0 || buf2
== NULL
) {
812 /* We have not received any responses
813 yet or we only have one response to
815 thisanssizp
= anssizp
;
816 thisansp
= anscp
?: ansp
;
817 assert (anscp
!= NULL
|| ansp2
== NULL
);
818 thisresplenp
= &resplen
;
820 thisanssizp
= anssizp2
;
822 thisresplenp
= resplen2
;
824 anhp
= (HEADER
*) *thisansp
;
826 *thisresplenp
= rlen
;
827 /* Is the answer buffer too small? */
828 if (*thisanssizp
< rlen
) {
829 /* If the current buffer is not the the static
830 user-supplied buffer then we can reallocate
832 if (thisansp
!= NULL
&& thisansp
!= ansp
) {
833 /* Always allocate MAXPACKET, callers expect
834 this specific size. */
835 u_char
*newp
= malloc (MAXPACKET
);
839 return close_and_return_error (statp
, resplen2
);
841 *thisanssizp
= MAXPACKET
;
843 if (thisansp
== ansp2
)
845 anhp
= (HEADER
*) newp
;
846 /* A uint16_t can't be larger than MAXPACKET
847 thus it's safe to allocate MAXPACKET but
848 read RLEN bytes instead. */
857 if (__glibc_unlikely (len
< HFIXEDSZ
)) {
859 * Undersized message.
862 return close_and_return_error (statp
, resplen2
);
866 while (len
!= 0 && (n
= read(statp
->_vcsock
, (char *)cp
, (int)len
)) > 0){
870 if (__glibc_unlikely (n
<= 0)) {
872 return close_and_return_error (statp
, resplen2
);
874 if (__glibc_unlikely (truncating
)) {
876 * Flush rest of answer so connection stays in synch.
879 len
= rlen
- *thisanssizp
;
883 n
= read(statp
->_vcsock
, junk
,
884 (len
> sizeof junk
) ? sizeof junk
: len
);
892 * If the calling application has bailed out of
893 * a previous call and failed to arrange to have
894 * the circuit closed or the server has got
895 * itself confused, then drop the packet and
896 * wait for the correct one.
898 if ((recvresp1
|| hp
->id
!= anhp
->id
)
899 && (recvresp2
|| hp2
->id
!= anhp
->id
))
902 /* Mark which reply we received. */
903 if (recvresp1
== 0 && hp
->id
== anhp
->id
)
907 /* Repeat waiting if we have a second answer to arrive. */
908 if ((recvresp1
& recvresp2
) == 0)
912 * All is well, or the error is fatal. Signal that the
913 * next nameserver ought not be tried.
919 reopen (res_state statp
, int *terrno
, int ns
)
921 if (EXT(statp
).nssocks
[ns
] == -1) {
922 struct sockaddr
*nsap
= get_nsaddr (statp
, ns
);
925 /* only try IPv6 if IPv6 NS and if not failed before */
926 if (nsap
->sa_family
== AF_INET6
&& !statp
->ipv6_unavail
) {
927 EXT(statp
).nssocks
[ns
] = socket
929 SOCK_DGRAM
| SOCK_NONBLOCK
| SOCK_CLOEXEC
, 0);
930 if (EXT(statp
).nssocks
[ns
] < 0)
931 statp
->ipv6_unavail
= errno
== EAFNOSUPPORT
;
932 slen
= sizeof (struct sockaddr_in6
);
933 } else if (nsap
->sa_family
== AF_INET
) {
934 EXT(statp
).nssocks
[ns
] = socket
936 SOCK_DGRAM
| SOCK_NONBLOCK
| SOCK_CLOEXEC
, 0);
937 slen
= sizeof (struct sockaddr_in
);
939 if (EXT(statp
).nssocks
[ns
] < 0) {
945 * On a 4.3BSD+ machine (client and server,
946 * actually), sending to a nameserver datagram
947 * port with no nameserver will cause an
948 * ICMP port unreachable message to be returned.
949 * If our datagram socket is "connected" to the
950 * server, we get an ECONNREFUSED error on the next
951 * socket operation, and select returns if the
952 * error message is received. We can thus detect
953 * the absence of a nameserver without timing out.
955 /* With GCC 5.3 when compiling with -Os the compiler
956 emits a warning that slen may be used uninitialized,
957 but that is never true. Both slen and
958 EXT(statp).nssocks[ns] are initialized together or
959 the function return -1 before control flow reaches
960 the call to connect with slen. */
961 DIAG_PUSH_NEEDS_COMMENT
;
962 DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
963 if (connect(EXT(statp
).nssocks
[ns
], nsap
, slen
) < 0) {
964 DIAG_POP_NEEDS_COMMENT
;
965 __res_iclose(statp
, false);
973 /* The send_dg function is responsible for sending a DNS query over UDP
974 to the nameserver numbered NS from the res_state STATP i.e.
975 EXT(statp).nssocks[ns]. The function supports IPv4 and IPv6 queries
976 along with the ability to send the query in parallel for both stacks
977 (default) or serially (RES_SINGLKUP). It also supports serial lookup
978 with a close and reopen of the socket used to talk to the server
979 (RES_SNGLKUPREOP) to work around broken name servers.
981 The query stored in BUF of BUFLEN length is sent first followed by
982 the query stored in BUF2 of BUFLEN2 length. Queries are sent
983 in parallel (default) or serially (RES_SINGLKUP or RES_SNGLKUPREOP).
985 Answers to the query are stored firstly in *ANSP up to a max of
986 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
987 is non-NULL (to indicate that modifying the answer buffer is allowed)
988 then malloc is used to allocate a new response buffer and ANSCP and
989 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
990 are needed but ANSCP is NULL, then as much of the response as
991 possible is read into the buffer, but the results will be truncated.
992 When truncation happens because of a small answer buffer the DNS
993 packets header field TC will bet set to 1, indicating a truncated
994 message, while the rest of the UDP packet is discarded.
996 Answers to the query are stored secondly in *ANSP2 up to a max of
997 *ANSSIZP2 bytes, with the actual response length stored in
998 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
999 is non-NULL (required for a second query) then malloc is used to
1000 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
1001 size and *ANSP2_MALLOCED is set to 1.
1003 The ANSP2_MALLOCED argument will eventually be removed as the
1004 change in buffer pointer can be used to detect the buffer has
1005 changed and that the caller should use free on the new buffer.
1007 Note that the answers may arrive in any order from the server and
1008 therefore the first and second answer buffers may not correspond to
1009 the first and second queries.
1011 It is not supported to call this function with a non-NULL ANSP2
1012 but a NULL ANSCP. Put another way, you can call send_vc with a
1013 single unmodifiable buffer or two modifiable buffers, but no other
1014 combination is supported.
1016 It is the caller's responsibility to free the malloc allocated
1017 buffers by detecting that the pointers have changed from their
1018 original values i.e. *ANSCP or *ANSP2 has changed.
1020 If an answer is truncated because of UDP datagram DNS limits then
1021 *V_CIRCUIT is set to 1 and the return value non-zero to indicate to
1022 the caller to retry with TCP. The value *GOTSOMEWHERE is set to 1
1023 if any progress was made reading a response from the nameserver and
1024 is used by the caller to distinguish between ECONNREFUSED and
1025 ETIMEDOUT (the latter if *GOTSOMEWHERE is 1).
1027 If errors are encountered then *TERRNO is set to an appropriate
1028 errno value and a zero result is returned for a recoverable error,
1029 and a less-than zero result is returned for a non-recoverable error.
1031 If no errors are encountered then *TERRNO is left unmodified and
1032 a the length of the first response in bytes is returned. */
1034 send_dg(res_state statp
,
1035 const u_char
*buf
, int buflen
, const u_char
*buf2
, int buflen2
,
1036 u_char
**ansp
, int *anssizp
,
1037 int *terrno
, int ns
, int *v_circuit
, int *gotsomewhere
, u_char
**anscp
,
1038 u_char
**ansp2
, int *anssizp2
, int *resplen2
, int *ansp2_malloced
)
1040 const HEADER
*hp
= (HEADER
*) buf
;
1041 const HEADER
*hp2
= (HEADER
*) buf2
;
1042 struct timespec now
, timeout
, finish
;
1043 struct pollfd pfd
[1];
1045 struct sockaddr_in6 from
;
1050 * Compute time for the total operation.
1052 int seconds
= (statp
->retrans
<< ns
);
1054 seconds
/= statp
->nscount
;
1057 bool single_request_reopen
= (statp
->options
& RES_SNGLKUPREOP
) != 0;
1058 bool single_request
= (((statp
->options
& RES_SNGLKUP
) != 0)
1059 | single_request_reopen
);
1060 int save_gotsomewhere
= *gotsomewhere
;
1064 retval
= reopen (statp
, terrno
, ns
);
1067 if (resplen2
!= NULL
)
1073 evConsTime(&timeout
, seconds
, 0);
1074 evAddTime(&finish
, &now
, &timeout
);
1075 int need_recompute
= 0;
1078 /* Skip the second response if there is no second query.
1079 To do that we mark the second response as received. */
1080 int recvresp2
= buf2
== NULL
;
1081 pfd
[0].fd
= EXT(statp
).nssocks
[ns
];
1082 pfd
[0].events
= POLLOUT
;
1084 if (need_recompute
) {
1087 if (evCmpTime(finish
, now
) <= 0) {
1089 return close_and_return_error (statp
, resplen2
);
1091 evSubTime(&timeout
, &finish
, &now
);
1094 /* Convert struct timespec in milliseconds. */
1095 ptimeout
= timeout
.tv_sec
* 1000 + timeout
.tv_nsec
/ 1000000;
1099 n
= __poll (pfd
, 1, 0);
1100 if (__glibc_unlikely (n
== 0)) {
1101 n
= __poll (pfd
, 1, ptimeout
);
1105 if (resplen
> 1 && (recvresp1
|| (buf2
!= NULL
&& recvresp2
)))
1107 /* There are quite a few broken name servers out
1108 there which don't handle two outstanding
1109 requests from the same source. There are also
1110 broken firewall settings. If we time out after
1111 having received one answer switch to the mode
1112 where we send the second request only once we
1113 have received the first answer. */
1114 if (!single_request
)
1116 statp
->options
|= RES_SNGLKUP
;
1117 single_request
= true;
1118 *gotsomewhere
= save_gotsomewhere
;
1121 else if (!single_request_reopen
)
1123 statp
->options
|= RES_SNGLKUPREOP
;
1124 single_request_reopen
= true;
1125 *gotsomewhere
= save_gotsomewhere
;
1126 __res_iclose (statp
, false);
1135 if (resplen2
!= NULL
)
1141 goto recompute_resend
;
1146 if (pfd
[0].revents
& POLLOUT
) {
1147 #ifndef __ASSUME_SENDMMSG
1148 static int have_sendmmsg
;
1150 # define have_sendmmsg 1
1152 if (have_sendmmsg
>= 0 && nwritten
== 0 && buf2
!= NULL
1155 struct iovec iov
[2];
1156 struct mmsghdr reqs
[2];
1157 reqs
[0].msg_hdr
.msg_name
= NULL
;
1158 reqs
[0].msg_hdr
.msg_namelen
= 0;
1159 reqs
[0].msg_hdr
.msg_iov
= &iov
[0];
1160 reqs
[0].msg_hdr
.msg_iovlen
= 1;
1161 iov
[0].iov_base
= (void *) buf
;
1162 iov
[0].iov_len
= buflen
;
1163 reqs
[0].msg_hdr
.msg_control
= NULL
;
1164 reqs
[0].msg_hdr
.msg_controllen
= 0;
1166 reqs
[1].msg_hdr
.msg_name
= NULL
;
1167 reqs
[1].msg_hdr
.msg_namelen
= 0;
1168 reqs
[1].msg_hdr
.msg_iov
= &iov
[1];
1169 reqs
[1].msg_hdr
.msg_iovlen
= 1;
1170 iov
[1].iov_base
= (void *) buf2
;
1171 iov
[1].iov_len
= buflen2
;
1172 reqs
[1].msg_hdr
.msg_control
= NULL
;
1173 reqs
[1].msg_hdr
.msg_controllen
= 0;
1175 int ndg
= __sendmmsg (pfd
[0].fd
, reqs
, 2, MSG_NOSIGNAL
);
1176 if (__glibc_likely (ndg
== 2))
1178 if (reqs
[0].msg_len
!= buflen
1179 || reqs
[1].msg_len
!= buflen2
)
1182 pfd
[0].events
= POLLIN
;
1185 else if (ndg
== 1 && reqs
[0].msg_len
== buflen
)
1187 else if (ndg
< 0 && (errno
== EINTR
|| errno
== EAGAIN
))
1188 goto recompute_resend
;
1191 #ifndef __ASSUME_SENDMMSG
1192 if (__glibc_unlikely (have_sendmmsg
== 0))
1194 if (ndg
< 0 && errno
== ENOSYS
)
1204 return close_and_return_error (statp
, resplen2
);
1210 #ifndef __ASSUME_SENDMMSG
1214 sr
= send (pfd
[0].fd
, buf2
, buflen2
, MSG_NOSIGNAL
);
1216 sr
= send (pfd
[0].fd
, buf
, buflen
, MSG_NOSIGNAL
);
1218 if (sr
!= (nwritten
!= 0 ? buflen2
: buflen
)) {
1219 if (errno
== EINTR
|| errno
== EAGAIN
)
1220 goto recompute_resend
;
1221 return close_and_return_error (statp
, resplen2
);
1224 if (nwritten
!= 0 || buf2
== NULL
|| single_request
)
1225 pfd
[0].events
= POLLIN
;
1227 pfd
[0].events
= POLLIN
| POLLOUT
;
1231 } else if (pfd
[0].revents
& POLLIN
) {
1236 if ((recvresp1
| recvresp2
) == 0 || buf2
== NULL
) {
1237 /* We have not received any responses
1238 yet or we only have one response to
1240 thisanssizp
= anssizp
;
1241 thisansp
= anscp
?: ansp
;
1242 assert (anscp
!= NULL
|| ansp2
== NULL
);
1243 thisresplenp
= &resplen
;
1245 thisanssizp
= anssizp2
;
1247 thisresplenp
= resplen2
;
1250 if (*thisanssizp
< MAXPACKET
1251 /* If the current buffer is not the the static
1252 user-supplied buffer then we can reallocate
1254 && (thisansp
!= NULL
&& thisansp
!= ansp
)
1256 /* Is the size too small? */
1257 && (ioctl (pfd
[0].fd
, FIONREAD
, thisresplenp
) < 0
1258 || *thisanssizp
< *thisresplenp
)
1261 /* Always allocate MAXPACKET, callers expect
1262 this specific size. */
1263 u_char
*newp
= malloc (MAXPACKET
);
1265 *thisanssizp
= MAXPACKET
;
1267 if (thisansp
== ansp2
)
1268 *ansp2_malloced
= 1;
1271 /* We could end up with truncation if anscp was NULL
1272 (not allowed to change caller's buffer) and the
1273 response buffer size is too small. This isn't a
1274 reliable way to detect truncation because the ioctl
1275 may be an inaccurate report of the UDP message size.
1276 Therefore we use this only to issue debug output.
1277 To do truncation accurately with UDP we need
1278 MSG_TRUNC which is only available on Linux. We
1279 can abstract out the Linux-specific feature in the
1280 future to detect truncation. */
1281 HEADER
*anhp
= (HEADER
*) *thisansp
;
1282 socklen_t fromlen
= sizeof(struct sockaddr_in6
);
1283 assert (sizeof(from
) <= fromlen
);
1284 *thisresplenp
= recvfrom(pfd
[0].fd
, (char*)*thisansp
,
1286 (struct sockaddr
*)&from
, &fromlen
);
1287 if (__glibc_unlikely (*thisresplenp
<= 0)) {
1288 if (errno
== EINTR
|| errno
== EAGAIN
) {
1292 return close_and_return_error (statp
, resplen2
);
1295 if (__glibc_unlikely (*thisresplenp
< HFIXEDSZ
)) {
1297 * Undersized message.
1300 return close_and_return_error (statp
, resplen2
);
1302 if ((recvresp1
|| hp
->id
!= anhp
->id
)
1303 && (recvresp2
|| hp2
->id
!= anhp
->id
)) {
1305 * response from old query, ignore it.
1306 * XXX - potential security hazard could
1311 if (!(statp
->options
& RES_INSECURE1
) &&
1312 !res_ourserver_p(statp
, &from
)) {
1314 * response from wrong server? ignore it.
1315 * XXX - potential security hazard could
1320 if (!(statp
->options
& RES_INSECURE2
)
1321 && (recvresp1
|| !res_queriesmatch(buf
, buf
+ buflen
,
1325 && (recvresp2
|| !res_queriesmatch(buf2
, buf2
+ buflen2
,
1330 * response contains wrong query? ignore it.
1331 * XXX - potential security hazard could
1336 if (anhp
->rcode
== SERVFAIL
||
1337 anhp
->rcode
== NOTIMP
||
1338 anhp
->rcode
== REFUSED
) {
1340 if (recvresp1
|| (buf2
!= NULL
&& recvresp2
)) {
1346 /* No data from the first reply. */
1348 /* We are waiting for a possible second reply. */
1349 if (hp
->id
== anhp
->id
)
1357 /* don't retry if called from dig */
1359 return close_and_return_error (statp
, resplen2
);
1360 __res_iclose(statp
, false);
1362 if (anhp
->rcode
== NOERROR
&& anhp
->ancount
== 0
1363 && anhp
->aa
== 0 && anhp
->ra
== 0 && anhp
->arcount
== 0) {
1366 if (!(statp
->options
& RES_IGNTC
) && anhp
->tc
) {
1368 * To get the rest of answer,
1369 * use TCP with same server.
1372 __res_iclose(statp
, false);
1373 // XXX if we have received one reply we could
1374 // XXX use it and not repeat it over TCP...
1375 if (resplen2
!= NULL
)
1379 /* Mark which reply we received. */
1380 if (recvresp1
== 0 && hp
->id
== anhp
->id
)
1384 /* Repeat waiting if we have a second answer to arrive. */
1385 if ((recvresp1
& recvresp2
) == 0) {
1386 if (single_request
) {
1387 pfd
[0].events
= POLLOUT
;
1388 if (single_request_reopen
) {
1389 __res_iclose (statp
, false);
1390 retval
= reopen (statp
, terrno
, ns
);
1393 if (resplen2
!= NULL
)
1397 pfd
[0].fd
= EXT(statp
).nssocks
[ns
];
1402 /* All is well. We have received both responses (if
1403 two responses were requested). */
1405 } else if (pfd
[0].revents
& (POLLERR
| POLLHUP
| POLLNVAL
))
1406 /* Something went wrong. We can stop trying. */
1407 return close_and_return_error (statp
, resplen2
);
1409 /* poll should not have returned > 0 in this case. */
1415 sock_eq(struct sockaddr_in6
*a1
, struct sockaddr_in6
*a2
) {
1416 if (a1
->sin6_family
== a2
->sin6_family
) {
1417 if (a1
->sin6_family
== AF_INET
)
1418 return ((((struct sockaddr_in
*)a1
)->sin_port
==
1419 ((struct sockaddr_in
*)a2
)->sin_port
) &&
1420 (((struct sockaddr_in
*)a1
)->sin_addr
.s_addr
==
1421 ((struct sockaddr_in
*)a2
)->sin_addr
.s_addr
));
1423 return ((a1
->sin6_port
== a2
->sin6_port
) &&
1424 !memcmp(&a1
->sin6_addr
, &a2
->sin6_addr
,
1425 sizeof (struct in6_addr
)));
1427 if (a1
->sin6_family
== AF_INET
) {
1428 struct sockaddr_in6
*sap
= a1
;
1431 } /* assumes that AF_INET and AF_INET6 are the only possibilities */
1432 return ((a1
->sin6_port
== ((struct sockaddr_in
*)a2
)->sin_port
) &&
1433 IN6_IS_ADDR_V4MAPPED(&a1
->sin6_addr
) &&
1434 (a1
->sin6_addr
.s6_addr32
[3] ==
1435 ((struct sockaddr_in
*)a2
)->sin_addr
.s_addr
));