1 /* Copyright (C) 2016-2018 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
19 * Copyright (c) 1985, 1989, 1993
20 * The Regents of the University of California. All rights reserved.
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * Portions Copyright (c) 1993 by Digital Equipment Corporation.
50 * Permission to use, copy, modify, and distribute this software for any
51 * purpose with or without fee is hereby granted, provided that the above
52 * copyright notice and this permission notice appear in all copies, and that
53 * the name of Digital Equipment Corporation not be used in advertising or
54 * publicity pertaining to distribution of the document or software without
55 * specific, written prior permission.
57 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
58 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
59 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
60 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
61 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
62 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
63 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
68 * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
70 * Permission to use, copy, modify, and distribute this software for any
71 * purpose with or without fee is hereby granted, provided that the above
72 * copyright notice and this permission notice appear in all copies.
74 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
75 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
76 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
77 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
78 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
79 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
80 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
85 * Send query to name server and wait for reply.
89 #include <sys/types.h>
90 #include <sys/param.h>
92 #include <sys/socket.h>
96 #include <netinet/in.h>
97 #include <arpa/nameser.h>
98 #include <arpa/inet.h>
99 #include <sys/ioctl.h>
104 #include <resolv/resolv-internal.h>
105 #include <resolv/resolv_context.h>
110 #include <kernel-features.h>
111 #include <libc-diag.h>
112 #include <hp-timing.h>
115 #define MAXPACKET PACKETSZ
117 #define MAXPACKET 65536
120 /* From ev_streams.c. */
123 __attribute ((always_inline
))
124 evConsIovec(void *buf
, size_t cnt
, struct iovec
*vec
) {
125 memset(vec
, 0xf5, sizeof (*vec
));
130 /* From ev_timers.c. */
132 #define BILLION 1000000000
135 evConsTime(struct timespec
*res
, time_t sec
, long nsec
) {
141 evAddTime(struct timespec
*res
, const struct timespec
*addend1
,
142 const struct timespec
*addend2
) {
143 res
->tv_sec
= addend1
->tv_sec
+ addend2
->tv_sec
;
144 res
->tv_nsec
= addend1
->tv_nsec
+ addend2
->tv_nsec
;
145 if (res
->tv_nsec
>= BILLION
) {
147 res
->tv_nsec
-= BILLION
;
152 evSubTime(struct timespec
*res
, const struct timespec
*minuend
,
153 const struct timespec
*subtrahend
) {
154 res
->tv_sec
= minuend
->tv_sec
- subtrahend
->tv_sec
;
155 if (minuend
->tv_nsec
>= subtrahend
->tv_nsec
)
156 res
->tv_nsec
= minuend
->tv_nsec
- subtrahend
->tv_nsec
;
158 res
->tv_nsec
= (BILLION
159 - subtrahend
->tv_nsec
+ minuend
->tv_nsec
);
165 evCmpTime(struct timespec a
, struct timespec b
) {
166 long x
= a
.tv_sec
- b
.tv_sec
;
169 x
= a
.tv_nsec
- b
.tv_nsec
;
170 return (x
< 0L ? (-1) : x
> 0L ? (1) : (0));
174 evNowTime(struct timespec
*res
) {
177 if (gettimeofday(&now
, NULL
) < 0)
178 evConsTime(res
, 0, 0);
180 TIMEVAL_TO_TIMESPEC (&now
, res
);
184 #define EXT(res) ((res)->_u._ext)
188 static struct sockaddr
*get_nsaddr (res_state
, unsigned int);
189 static int send_vc(res_state
, const u_char
*, int,
191 u_char
**, int *, int *, int, u_char
**,
192 u_char
**, int *, int *, int *);
193 static int send_dg(res_state
, const u_char
*, int,
195 u_char
**, int *, int *, int,
196 int *, int *, u_char
**,
197 u_char
**, int *, int *, int *);
198 static int sock_eq(struct sockaddr_in6
*, struct sockaddr_in6
*);
203 * res_isourserver(ina)
204 * looks up "ina" in _res.ns_addr_list[]
209 * paul vixie, 29may94
212 res_ourserver_p(const res_state statp
, const struct sockaddr_in6
*inp
)
216 if (inp
->sin6_family
== AF_INET
) {
217 struct sockaddr_in
*in4p
= (struct sockaddr_in
*) inp
;
218 in_port_t port
= in4p
->sin_port
;
219 in_addr_t addr
= in4p
->sin_addr
.s_addr
;
221 for (ns
= 0; ns
< statp
->nscount
; ns
++) {
222 const struct sockaddr_in
*srv
=
223 (struct sockaddr_in
*) get_nsaddr (statp
, ns
);
225 if ((srv
->sin_family
== AF_INET
) &&
226 (srv
->sin_port
== port
) &&
227 (srv
->sin_addr
.s_addr
== INADDR_ANY
||
228 srv
->sin_addr
.s_addr
== addr
))
231 } else if (inp
->sin6_family
== AF_INET6
) {
232 for (ns
= 0; ns
< statp
->nscount
; ns
++) {
233 const struct sockaddr_in6
*srv
234 = (struct sockaddr_in6
*) get_nsaddr (statp
, ns
);
235 if ((srv
->sin6_family
== AF_INET6
) &&
236 (srv
->sin6_port
== inp
->sin6_port
) &&
237 !(memcmp(&srv
->sin6_addr
, &in6addr_any
,
238 sizeof (struct in6_addr
)) &&
239 memcmp(&srv
->sin6_addr
, &inp
->sin6_addr
,
240 sizeof (struct in6_addr
))))
248 res_isourserver (const struct sockaddr_in
*inp
)
250 return res_ourserver_p (&_res
, (const struct sockaddr_in6
*) inp
);
254 * res_nameinquery(name, type, class, buf, eom)
255 * look for (name,type,class) in the query section of packet (buf,eom)
257 * buf + HFIXEDSZ <= eom
263 * paul vixie, 29may94
266 res_nameinquery(const char *name
, int type
, int class,
267 const u_char
*buf
, const u_char
*eom
)
269 const u_char
*cp
= buf
+ HFIXEDSZ
;
270 int qdcount
= ntohs(((HEADER
*)buf
)->qdcount
);
272 while (qdcount
-- > 0) {
273 char tname
[MAXDNAME
+1];
274 int n
, ttype
, tclass
;
276 n
= dn_expand(buf
, eom
, cp
, tname
, sizeof tname
);
280 if (cp
+ 2 * INT16SZ
> eom
)
283 NS_GET16(tclass
, cp
);
284 if (ttype
== type
&& tclass
== class &&
285 ns_samename(tname
, name
) == 1)
290 libresolv_hidden_def (res_nameinquery
)
292 /* Returns a shift value for the name server index. Used to implement
295 nameserver_offset (struct __res_state
*statp
)
297 /* If we only have one name server or rotation is disabled, return
298 offset 0 (no rotation). */
299 unsigned int nscount
= statp
->nscount
;
300 if (nscount
<= 1 || !(statp
->options
& RES_ROTATE
))
303 /* Global offset. The lowest bit indicates whether the offset has
304 been initialized with a random value. Use relaxed MO to access
305 global_offset because all we need is a sequence of roughly
307 static unsigned int global_offset
;
308 unsigned int offset
= atomic_fetch_add_relaxed (&global_offset
, 2);
309 if ((offset
& 1) == 0)
311 /* Initialization is required. */
314 HP_TIMING_NOW (ticks
);
318 __gettimeofday (&tv
, NULL
);
319 offset
= ((tv
.tv_sec
<< 8) ^ tv
.tv_usec
);
321 /* The lowest bit is the most random. Preserve it. */
324 /* Store the new starting value. atomic_fetch_add_relaxed
325 returns the old value, so emulate that by storing the new
326 (incremented) value. Concurrent initialization with
327 different random values is harmless. */
328 atomic_store_relaxed (&global_offset
, (offset
| 1) + 2);
331 /* Remove the initialization bit. */
334 /* Avoid the division in the most common cases. */
344 return offset
% nscount
;
349 * res_queriesmatch(buf1, eom1, buf2, eom2)
350 * is there a 1:1 mapping of (name,type,class)
351 * in (buf1,eom1) and (buf2,eom2)?
354 * 0 : not a 1:1 mapping
355 * >0 : is a 1:1 mapping
357 * paul vixie, 29may94
360 res_queriesmatch(const u_char
*buf1
, const u_char
*eom1
,
361 const u_char
*buf2
, const u_char
*eom2
)
363 if (buf1
+ HFIXEDSZ
> eom1
|| buf2
+ HFIXEDSZ
> eom2
)
367 * Only header section present in replies to
368 * dynamic update packets.
370 if ((((HEADER
*)buf1
)->opcode
== ns_o_update
) &&
371 (((HEADER
*)buf2
)->opcode
== ns_o_update
))
374 /* Note that we initially do not convert QDCOUNT to the host byte
375 order. We can compare it with the second buffer's QDCOUNT
376 value without doing this. */
377 int qdcount
= ((HEADER
*)buf1
)->qdcount
;
378 if (qdcount
!= ((HEADER
*)buf2
)->qdcount
)
381 qdcount
= htons (qdcount
);
382 const u_char
*cp
= buf1
+ HFIXEDSZ
;
384 while (qdcount
-- > 0) {
385 char tname
[MAXDNAME
+1];
386 int n
, ttype
, tclass
;
388 n
= dn_expand(buf1
, eom1
, cp
, tname
, sizeof tname
);
392 if (cp
+ 2 * INT16SZ
> eom1
)
395 NS_GET16(tclass
, cp
);
396 if (!res_nameinquery(tname
, ttype
, tclass
, buf2
, eom2
))
401 libresolv_hidden_def (res_queriesmatch
)
404 __res_context_send (struct resolv_context
*ctx
,
405 const unsigned char *buf
, int buflen
,
406 const unsigned char *buf2
, int buflen2
,
407 unsigned char *ans
, int anssiz
,
408 unsigned char **ansp
, unsigned char **ansp2
,
409 int *nansp2
, int *resplen2
, int *ansp2_malloced
)
411 struct __res_state
*statp
= ctx
->resp
;
412 int gotsomewhere
, terrno
, try, v_circuit
, resplen
, n
;
414 if (statp
->nscount
== 0) {
419 if (anssiz
< (buf2
== NULL
? 1 : 2) * HFIXEDSZ
) {
420 __set_errno (EINVAL
);
424 v_circuit
= ((statp
->options
& RES_USEVC
)
426 || buflen2
> PACKETSZ
);
431 * If the ns_addr_list in the resolver context has changed, then
432 * invalidate our cached copy and the associated timing data.
434 if (EXT(statp
).nscount
!= 0) {
437 if (EXT(statp
).nscount
!= statp
->nscount
)
440 for (unsigned int ns
= 0; ns
< statp
->nscount
; ns
++) {
441 if (statp
->nsaddr_list
[ns
].sin_family
!= 0
442 && !sock_eq((struct sockaddr_in6
*)
443 &statp
->nsaddr_list
[ns
],
444 EXT(statp
).nsaddrs
[ns
]))
451 __res_iclose(statp
, false);
452 EXT(statp
).nscount
= 0;
457 * Maybe initialize our private copy of the ns_addr_list.
459 if (EXT(statp
).nscount
== 0) {
460 for (unsigned int ns
= 0; ns
< statp
->nscount
; ns
++) {
461 EXT(statp
).nssocks
[ns
] = -1;
462 if (statp
->nsaddr_list
[ns
].sin_family
== 0)
464 if (EXT(statp
).nsaddrs
[ns
] == NULL
)
465 EXT(statp
).nsaddrs
[ns
] =
466 malloc(sizeof (struct sockaddr_in6
));
467 if (EXT(statp
).nsaddrs
[ns
] != NULL
)
468 memset (mempcpy(EXT(statp
).nsaddrs
[ns
],
469 &statp
->nsaddr_list
[ns
],
470 sizeof (struct sockaddr_in
)),
472 sizeof (struct sockaddr_in6
)
473 - sizeof (struct sockaddr_in
));
476 __set_errno (ENOMEM
);
480 EXT(statp
).nscount
= statp
->nscount
;
483 /* Name server index offset. Used to implement
485 unsigned int ns_offset
= nameserver_offset (statp
);
488 * Send request, RETRY times, or until successful.
490 for (try = 0; try < statp
->retry
; try++) {
491 for (unsigned ns_shift
= 0; ns_shift
< statp
->nscount
; ns_shift
++)
493 /* The actual name server index. This implements
495 unsigned int ns
= ns_shift
+ ns_offset
;
496 if (ns
>= statp
->nscount
)
497 ns
-= statp
->nscount
;
500 if (__glibc_unlikely (v_circuit
)) {
501 /* Use VC; at most one attempt per server. */
503 n
= send_vc(statp
, buf
, buflen
, buf2
, buflen2
,
504 &ans
, &anssiz
, &terrno
,
505 ns
, ansp
, ansp2
, nansp2
, resplen2
,
509 if (n
== 0 && (buf2
== NULL
|| *resplen2
== 0))
513 n
= send_dg(statp
, buf
, buflen
, buf2
, buflen2
,
514 &ans
, &anssiz
, &terrno
,
515 ns
, &v_circuit
, &gotsomewhere
, ansp
,
516 ansp2
, nansp2
, resplen2
, ansp2_malloced
);
519 if (n
== 0 && (buf2
== NULL
|| *resplen2
== 0))
522 // XXX Check whether both requests failed or
523 // XXX whether one has been answered successfully
530 * If we have temporarily opened a virtual circuit,
531 * or if we haven't been asked to keep a socket open,
534 if ((v_circuit
&& (statp
->options
& RES_USEVC
) == 0) ||
535 (statp
->options
& RES_STAYOPEN
) == 0) {
536 __res_iclose(statp
, false);
542 __res_iclose(statp
, false);
545 __set_errno (ECONNREFUSED
); /* no nameservers found */
547 __set_errno (ETIMEDOUT
); /* no answer obtained */
549 __set_errno (terrno
);
553 /* Common part of res_nsend and res_send. */
555 context_send_common (struct resolv_context
*ctx
,
556 const unsigned char *buf
, int buflen
,
557 unsigned char *ans
, int anssiz
)
561 RES_SET_H_ERRNO (&_res
, NETDB_INTERNAL
);
564 int result
= __res_context_send (ctx
, buf
, buflen
, NULL
, 0, ans
, anssiz
,
565 NULL
, NULL
, NULL
, NULL
, NULL
);
566 __resolv_context_put (ctx
);
571 res_nsend (res_state statp
, const unsigned char *buf
, int buflen
,
572 unsigned char *ans
, int anssiz
)
574 return context_send_common
575 (__resolv_context_get_override (statp
), buf
, buflen
, ans
, anssiz
);
579 res_send (const unsigned char *buf
, int buflen
, unsigned char *ans
, int anssiz
)
581 return context_send_common
582 (__resolv_context_get (), buf
, buflen
, ans
, anssiz
);
587 static struct sockaddr
*
588 get_nsaddr (res_state statp
, unsigned int n
)
590 assert (n
< statp
->nscount
);
592 if (statp
->nsaddr_list
[n
].sin_family
== 0 && EXT(statp
).nsaddrs
[n
] != NULL
)
593 /* EXT(statp).nsaddrs[n] holds an address that is larger than
594 struct sockaddr, and user code did not update
595 statp->nsaddr_list[n]. */
596 return (struct sockaddr
*) EXT(statp
).nsaddrs
[n
];
598 /* User code updated statp->nsaddr_list[n], or statp->nsaddr_list[n]
599 has the same content as EXT(statp).nsaddrs[n]. */
600 return (struct sockaddr
*) (void *) &statp
->nsaddr_list
[n
];
603 /* Close the resolver structure, assign zero to *RESPLEN2 if RESPLEN2
604 is not NULL, and return zero. */
606 __attribute__ ((warn_unused_result
))
607 close_and_return_error (res_state statp
, int *resplen2
)
609 __res_iclose(statp
, false);
610 if (resplen2
!= NULL
)
615 /* The send_vc function is responsible for sending a DNS query over TCP
616 to the nameserver numbered NS from the res_state STATP i.e.
617 EXT(statp).nssocks[ns]. The function supports sending both IPv4 and
618 IPv6 queries at the same serially on the same socket.
620 Please note that for TCP there is no way to disable sending both
621 queries, unlike UDP, which honours RES_SNGLKUP and RES_SNGLKUPREOP
622 and sends the queries serially and waits for the result after each
623 sent query. This implementation should be corrected to honour these
626 Please also note that for TCP we send both queries over the same
627 socket one after another. This technically violates best practice
628 since the server is allowed to read the first query, respond, and
629 then close the socket (to service another client). If the server
630 does this, then the remaining second query in the socket data buffer
631 will cause the server to send the client an RST which will arrive
632 asynchronously and the client's OS will likely tear down the socket
633 receive buffer resulting in a potentially short read and lost
634 response data. This will force the client to retry the query again,
635 and this process may repeat until all servers and connection resets
636 are exhausted and then the query will fail. It's not known if this
637 happens with any frequency in real DNS server implementations. This
638 implementation should be corrected to use two sockets by default for
641 The query stored in BUF of BUFLEN length is sent first followed by
642 the query stored in BUF2 of BUFLEN2 length. Queries are sent
643 serially on the same socket.
645 Answers to the query are stored firstly in *ANSP up to a max of
646 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
647 is non-NULL (to indicate that modifying the answer buffer is allowed)
648 then malloc is used to allocate a new response buffer and ANSCP and
649 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
650 are needed but ANSCP is NULL, then as much of the response as
651 possible is read into the buffer, but the results will be truncated.
652 When truncation happens because of a small answer buffer the DNS
653 packets header field TC will bet set to 1, indicating a truncated
654 message and the rest of the socket data will be read and discarded.
656 Answers to the query are stored secondly in *ANSP2 up to a max of
657 *ANSSIZP2 bytes, with the actual response length stored in
658 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
659 is non-NULL (required for a second query) then malloc is used to
660 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
661 size and *ANSP2_MALLOCED is set to 1.
663 The ANSP2_MALLOCED argument will eventually be removed as the
664 change in buffer pointer can be used to detect the buffer has
665 changed and that the caller should use free on the new buffer.
667 Note that the answers may arrive in any order from the server and
668 therefore the first and second answer buffers may not correspond to
669 the first and second queries.
671 It is not supported to call this function with a non-NULL ANSP2
672 but a NULL ANSCP. Put another way, you can call send_vc with a
673 single unmodifiable buffer or two modifiable buffers, but no other
674 combination is supported.
676 It is the caller's responsibility to free the malloc allocated
677 buffers by detecting that the pointers have changed from their
678 original values i.e. *ANSCP or *ANSP2 has changed.
680 If errors are encountered then *TERRNO is set to an appropriate
681 errno value and a zero result is returned for a recoverable error,
682 and a less-than zero result is returned for a non-recoverable error.
684 If no errors are encountered then *TERRNO is left unmodified and
685 a the length of the first response in bytes is returned. */
687 send_vc(res_state statp
,
688 const u_char
*buf
, int buflen
, const u_char
*buf2
, int buflen2
,
689 u_char
**ansp
, int *anssizp
,
690 int *terrno
, int ns
, u_char
**anscp
, u_char
**ansp2
, int *anssizp2
,
691 int *resplen2
, int *ansp2_malloced
)
693 const HEADER
*hp
= (HEADER
*) buf
;
694 const HEADER
*hp2
= (HEADER
*) buf2
;
695 HEADER
*anhp
= (HEADER
*) *ansp
;
696 struct sockaddr
*nsap
= get_nsaddr (statp
, ns
);
697 int truncating
, connreset
, n
;
698 /* On some architectures compiler might emit a warning indicating
699 'resplen' may be used uninitialized. However if buf2 == NULL
700 then this code won't be executed; if buf2 != NULL, then first
701 time round the loop recvresp1 and recvresp2 will be 0 so this
702 code won't be executed but "thisresplenp = &resplen;" followed
703 by "*thisresplenp = rlen;" will be executed so that subsequent
704 times round the loop resplen has been initialized. So this is
707 DIAG_PUSH_NEEDS_COMMENT
;
708 DIAG_IGNORE_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
710 DIAG_POP_NEEDS_COMMENT
;
720 /* Are we still talking to whom we want to talk to? */
721 if (statp
->_vcsock
>= 0 && (statp
->_flags
& RES_F_VC
) != 0) {
722 struct sockaddr_in6 peer
;
723 socklen_t size
= sizeof peer
;
725 if (getpeername(statp
->_vcsock
,
726 (struct sockaddr
*)&peer
, &size
) < 0 ||
727 !sock_eq(&peer
, (struct sockaddr_in6
*) nsap
)) {
728 __res_iclose(statp
, false);
729 statp
->_flags
&= ~RES_F_VC
;
733 if (statp
->_vcsock
< 0 || (statp
->_flags
& RES_F_VC
) == 0) {
734 if (statp
->_vcsock
>= 0)
735 __res_iclose(statp
, false);
737 statp
->_vcsock
= socket
738 (nsap
->sa_family
, SOCK_STREAM
| SOCK_CLOEXEC
, 0);
739 if (statp
->_vcsock
< 0) {
741 if (resplen2
!= NULL
)
746 if (connect(statp
->_vcsock
, nsap
,
747 nsap
->sa_family
== AF_INET
748 ? sizeof (struct sockaddr_in
)
749 : sizeof (struct sockaddr_in6
)) < 0) {
751 return close_and_return_error (statp
, resplen2
);
753 statp
->_flags
|= RES_F_VC
;
757 * Send length & message
759 len
= htons ((u_short
) buflen
);
760 evConsIovec(&len
, INT16SZ
, &iov
[0]);
761 evConsIovec((void*)buf
, buflen
, &iov
[1]);
763 ssize_t explen
= INT16SZ
+ buflen
;
765 len2
= htons ((u_short
) buflen2
);
766 evConsIovec(&len2
, INT16SZ
, &iov
[2]);
767 evConsIovec((void*)buf2
, buflen2
, &iov
[3]);
769 explen
+= INT16SZ
+ buflen2
;
771 if (TEMP_FAILURE_RETRY (writev(statp
->_vcsock
, iov
, niov
)) != explen
) {
773 return close_and_return_error (statp
, resplen2
);
776 * Receive length & response
779 /* Skip the second response if there is no second query.
780 To do that we mark the second response as received. */
781 int recvresp2
= buf2
== NULL
;
784 cp
= (u_char
*)&rlen16
;
785 len
= sizeof(rlen16
);
786 while ((n
= TEMP_FAILURE_RETRY (read(statp
->_vcsock
, cp
,
795 * A long running process might get its TCP
796 * connection reset if the remote server was
797 * restarted. Requery the server instead of
798 * trying a new one. When there is only one
799 * server, this means that a query might work
800 * instead of failing. We only allow one reset
801 * per query to prevent looping.
803 if (*terrno
== ECONNRESET
&& !connreset
)
805 __res_iclose (statp
, false);
809 return close_and_return_error (statp
, resplen2
);
811 int rlen
= ntohs (rlen16
);
816 if ((recvresp1
| recvresp2
) == 0 || buf2
== NULL
) {
817 /* We have not received any responses
818 yet or we only have one response to
820 thisanssizp
= anssizp
;
821 thisansp
= anscp
?: ansp
;
822 assert (anscp
!= NULL
|| ansp2
== NULL
);
823 thisresplenp
= &resplen
;
825 thisanssizp
= anssizp2
;
827 thisresplenp
= resplen2
;
829 anhp
= (HEADER
*) *thisansp
;
831 *thisresplenp
= rlen
;
832 /* Is the answer buffer too small? */
833 if (*thisanssizp
< rlen
) {
834 /* If the current buffer is not the the static
835 user-supplied buffer then we can reallocate
837 if (thisansp
!= NULL
&& thisansp
!= ansp
) {
838 /* Always allocate MAXPACKET, callers expect
839 this specific size. */
840 u_char
*newp
= malloc (MAXPACKET
);
844 return close_and_return_error (statp
, resplen2
);
846 *thisanssizp
= MAXPACKET
;
848 if (thisansp
== ansp2
)
850 anhp
= (HEADER
*) newp
;
851 /* A uint16_t can't be larger than MAXPACKET
852 thus it's safe to allocate MAXPACKET but
853 read RLEN bytes instead. */
862 if (__glibc_unlikely (len
< HFIXEDSZ
)) {
864 * Undersized message.
867 return close_and_return_error (statp
, resplen2
);
871 while (len
!= 0 && (n
= read(statp
->_vcsock
, (char *)cp
, (int)len
)) > 0){
875 if (__glibc_unlikely (n
<= 0)) {
877 return close_and_return_error (statp
, resplen2
);
879 if (__glibc_unlikely (truncating
)) {
881 * Flush rest of answer so connection stays in synch.
884 len
= rlen
- *thisanssizp
;
888 n
= read(statp
->_vcsock
, junk
,
889 (len
> sizeof junk
) ? sizeof junk
: len
);
897 * If the calling application has bailed out of
898 * a previous call and failed to arrange to have
899 * the circuit closed or the server has got
900 * itself confused, then drop the packet and
901 * wait for the correct one.
903 if ((recvresp1
|| hp
->id
!= anhp
->id
)
904 && (recvresp2
|| hp2
->id
!= anhp
->id
))
907 /* Mark which reply we received. */
908 if (recvresp1
== 0 && hp
->id
== anhp
->id
)
912 /* Repeat waiting if we have a second answer to arrive. */
913 if ((recvresp1
& recvresp2
) == 0)
917 * All is well, or the error is fatal. Signal that the
918 * next nameserver ought not be tried.
924 reopen (res_state statp
, int *terrno
, int ns
)
926 if (EXT(statp
).nssocks
[ns
] == -1) {
927 struct sockaddr
*nsap
= get_nsaddr (statp
, ns
);
930 /* only try IPv6 if IPv6 NS and if not failed before */
931 if (nsap
->sa_family
== AF_INET6
&& !statp
->ipv6_unavail
) {
932 EXT(statp
).nssocks
[ns
] = socket
934 SOCK_DGRAM
| SOCK_NONBLOCK
| SOCK_CLOEXEC
, 0);
935 if (EXT(statp
).nssocks
[ns
] < 0)
936 statp
->ipv6_unavail
= errno
== EAFNOSUPPORT
;
937 slen
= sizeof (struct sockaddr_in6
);
938 } else if (nsap
->sa_family
== AF_INET
) {
939 EXT(statp
).nssocks
[ns
] = socket
941 SOCK_DGRAM
| SOCK_NONBLOCK
| SOCK_CLOEXEC
, 0);
942 slen
= sizeof (struct sockaddr_in
);
944 if (EXT(statp
).nssocks
[ns
] < 0) {
950 * On a 4.3BSD+ machine (client and server,
951 * actually), sending to a nameserver datagram
952 * port with no nameserver will cause an
953 * ICMP port unreachable message to be returned.
954 * If our datagram socket is "connected" to the
955 * server, we get an ECONNREFUSED error on the next
956 * socket operation, and select returns if the
957 * error message is received. We can thus detect
958 * the absence of a nameserver without timing out.
960 /* With GCC 5.3 when compiling with -Os the compiler
961 emits a warning that slen may be used uninitialized,
962 but that is never true. Both slen and
963 EXT(statp).nssocks[ns] are initialized together or
964 the function return -1 before control flow reaches
965 the call to connect with slen. */
966 DIAG_PUSH_NEEDS_COMMENT
;
967 DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
968 if (connect(EXT(statp
).nssocks
[ns
], nsap
, slen
) < 0) {
969 DIAG_POP_NEEDS_COMMENT
;
970 __res_iclose(statp
, false);
978 /* The send_dg function is responsible for sending a DNS query over UDP
979 to the nameserver numbered NS from the res_state STATP i.e.
980 EXT(statp).nssocks[ns]. The function supports IPv4 and IPv6 queries
981 along with the ability to send the query in parallel for both stacks
982 (default) or serially (RES_SINGLKUP). It also supports serial lookup
983 with a close and reopen of the socket used to talk to the server
984 (RES_SNGLKUPREOP) to work around broken name servers.
986 The query stored in BUF of BUFLEN length is sent first followed by
987 the query stored in BUF2 of BUFLEN2 length. Queries are sent
988 in parallel (default) or serially (RES_SINGLKUP or RES_SNGLKUPREOP).
990 Answers to the query are stored firstly in *ANSP up to a max of
991 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
992 is non-NULL (to indicate that modifying the answer buffer is allowed)
993 then malloc is used to allocate a new response buffer and ANSCP and
994 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
995 are needed but ANSCP is NULL, then as much of the response as
996 possible is read into the buffer, but the results will be truncated.
997 When truncation happens because of a small answer buffer the DNS
998 packets header field TC will bet set to 1, indicating a truncated
999 message, while the rest of the UDP packet is discarded.
1001 Answers to the query are stored secondly in *ANSP2 up to a max of
1002 *ANSSIZP2 bytes, with the actual response length stored in
1003 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
1004 is non-NULL (required for a second query) then malloc is used to
1005 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
1006 size and *ANSP2_MALLOCED is set to 1.
1008 The ANSP2_MALLOCED argument will eventually be removed as the
1009 change in buffer pointer can be used to detect the buffer has
1010 changed and that the caller should use free on the new buffer.
1012 Note that the answers may arrive in any order from the server and
1013 therefore the first and second answer buffers may not correspond to
1014 the first and second queries.
1016 It is not supported to call this function with a non-NULL ANSP2
1017 but a NULL ANSCP. Put another way, you can call send_vc with a
1018 single unmodifiable buffer or two modifiable buffers, but no other
1019 combination is supported.
1021 It is the caller's responsibility to free the malloc allocated
1022 buffers by detecting that the pointers have changed from their
1023 original values i.e. *ANSCP or *ANSP2 has changed.
1025 If an answer is truncated because of UDP datagram DNS limits then
1026 *V_CIRCUIT is set to 1 and the return value non-zero to indicate to
1027 the caller to retry with TCP. The value *GOTSOMEWHERE is set to 1
1028 if any progress was made reading a response from the nameserver and
1029 is used by the caller to distinguish between ECONNREFUSED and
1030 ETIMEDOUT (the latter if *GOTSOMEWHERE is 1).
1032 If errors are encountered then *TERRNO is set to an appropriate
1033 errno value and a zero result is returned for a recoverable error,
1034 and a less-than zero result is returned for a non-recoverable error.
1036 If no errors are encountered then *TERRNO is left unmodified and
1037 a the length of the first response in bytes is returned. */
1039 send_dg(res_state statp
,
1040 const u_char
*buf
, int buflen
, const u_char
*buf2
, int buflen2
,
1041 u_char
**ansp
, int *anssizp
,
1042 int *terrno
, int ns
, int *v_circuit
, int *gotsomewhere
, u_char
**anscp
,
1043 u_char
**ansp2
, int *anssizp2
, int *resplen2
, int *ansp2_malloced
)
1045 const HEADER
*hp
= (HEADER
*) buf
;
1046 const HEADER
*hp2
= (HEADER
*) buf2
;
1047 struct timespec now
, timeout
, finish
;
1048 struct pollfd pfd
[1];
1050 struct sockaddr_in6 from
;
1055 * Compute time for the total operation.
1057 int seconds
= (statp
->retrans
<< ns
);
1059 seconds
/= statp
->nscount
;
1062 bool single_request_reopen
= (statp
->options
& RES_SNGLKUPREOP
) != 0;
1063 bool single_request
= (((statp
->options
& RES_SNGLKUP
) != 0)
1064 | single_request_reopen
);
1065 int save_gotsomewhere
= *gotsomewhere
;
1069 retval
= reopen (statp
, terrno
, ns
);
1072 if (resplen2
!= NULL
)
1078 evConsTime(&timeout
, seconds
, 0);
1079 evAddTime(&finish
, &now
, &timeout
);
1080 int need_recompute
= 0;
1083 /* Skip the second response if there is no second query.
1084 To do that we mark the second response as received. */
1085 int recvresp2
= buf2
== NULL
;
1086 pfd
[0].fd
= EXT(statp
).nssocks
[ns
];
1087 pfd
[0].events
= POLLOUT
;
1089 if (need_recompute
) {
1092 if (evCmpTime(finish
, now
) <= 0) {
1094 return close_and_return_error (statp
, resplen2
);
1096 evSubTime(&timeout
, &finish
, &now
);
1099 /* Convert struct timespec in milliseconds. */
1100 ptimeout
= timeout
.tv_sec
* 1000 + timeout
.tv_nsec
/ 1000000;
1104 n
= __poll (pfd
, 1, 0);
1105 if (__glibc_unlikely (n
== 0)) {
1106 n
= __poll (pfd
, 1, ptimeout
);
1110 if (resplen
> 1 && (recvresp1
|| (buf2
!= NULL
&& recvresp2
)))
1112 /* There are quite a few broken name servers out
1113 there which don't handle two outstanding
1114 requests from the same source. There are also
1115 broken firewall settings. If we time out after
1116 having received one answer switch to the mode
1117 where we send the second request only once we
1118 have received the first answer. */
1119 if (!single_request
)
1121 statp
->options
|= RES_SNGLKUP
;
1122 single_request
= true;
1123 *gotsomewhere
= save_gotsomewhere
;
1126 else if (!single_request_reopen
)
1128 statp
->options
|= RES_SNGLKUPREOP
;
1129 single_request_reopen
= true;
1130 *gotsomewhere
= save_gotsomewhere
;
1131 __res_iclose (statp
, false);
1140 if (resplen2
!= NULL
)
1146 goto recompute_resend
;
1151 if (pfd
[0].revents
& POLLOUT
) {
1152 #ifndef __ASSUME_SENDMMSG
1153 static int have_sendmmsg
;
1155 # define have_sendmmsg 1
1157 if (have_sendmmsg
>= 0 && nwritten
== 0 && buf2
!= NULL
1161 { .iov_base
= (void *) buf
, .iov_len
= buflen
};
1163 { .iov_base
= (void *) buf2
, .iov_len
= buflen2
};
1164 struct mmsghdr reqs
[2] =
1182 int ndg
= __sendmmsg (pfd
[0].fd
, reqs
, 2, MSG_NOSIGNAL
);
1183 if (__glibc_likely (ndg
== 2))
1185 if (reqs
[0].msg_len
!= buflen
1186 || reqs
[1].msg_len
!= buflen2
)
1189 pfd
[0].events
= POLLIN
;
1192 else if (ndg
== 1 && reqs
[0].msg_len
== buflen
)
1194 else if (ndg
< 0 && (errno
== EINTR
|| errno
== EAGAIN
))
1195 goto recompute_resend
;
1198 #ifndef __ASSUME_SENDMMSG
1199 if (__glibc_unlikely (have_sendmmsg
== 0))
1201 if (ndg
< 0 && errno
== ENOSYS
)
1211 return close_and_return_error (statp
, resplen2
);
1217 #ifndef __ASSUME_SENDMMSG
1221 sr
= send (pfd
[0].fd
, buf2
, buflen2
, MSG_NOSIGNAL
);
1223 sr
= send (pfd
[0].fd
, buf
, buflen
, MSG_NOSIGNAL
);
1225 if (sr
!= (nwritten
!= 0 ? buflen2
: buflen
)) {
1226 if (errno
== EINTR
|| errno
== EAGAIN
)
1227 goto recompute_resend
;
1228 return close_and_return_error (statp
, resplen2
);
1231 if (nwritten
!= 0 || buf2
== NULL
|| single_request
)
1232 pfd
[0].events
= POLLIN
;
1234 pfd
[0].events
= POLLIN
| POLLOUT
;
1238 } else if (pfd
[0].revents
& POLLIN
) {
1243 if ((recvresp1
| recvresp2
) == 0 || buf2
== NULL
) {
1244 /* We have not received any responses
1245 yet or we only have one response to
1247 thisanssizp
= anssizp
;
1248 thisansp
= anscp
?: ansp
;
1249 assert (anscp
!= NULL
|| ansp2
== NULL
);
1250 thisresplenp
= &resplen
;
1252 thisanssizp
= anssizp2
;
1254 thisresplenp
= resplen2
;
1257 if (*thisanssizp
< MAXPACKET
1258 /* If the current buffer is not the the static
1259 user-supplied buffer then we can reallocate
1261 && (thisansp
!= NULL
&& thisansp
!= ansp
)
1263 /* Is the size too small? */
1264 && (ioctl (pfd
[0].fd
, FIONREAD
, thisresplenp
) < 0
1265 || *thisanssizp
< *thisresplenp
)
1268 /* Always allocate MAXPACKET, callers expect
1269 this specific size. */
1270 u_char
*newp
= malloc (MAXPACKET
);
1272 *thisanssizp
= MAXPACKET
;
1274 if (thisansp
== ansp2
)
1275 *ansp2_malloced
= 1;
1278 /* We could end up with truncation if anscp was NULL
1279 (not allowed to change caller's buffer) and the
1280 response buffer size is too small. This isn't a
1281 reliable way to detect truncation because the ioctl
1282 may be an inaccurate report of the UDP message size.
1283 Therefore we use this only to issue debug output.
1284 To do truncation accurately with UDP we need
1285 MSG_TRUNC which is only available on Linux. We
1286 can abstract out the Linux-specific feature in the
1287 future to detect truncation. */
1288 HEADER
*anhp
= (HEADER
*) *thisansp
;
1289 socklen_t fromlen
= sizeof(struct sockaddr_in6
);
1290 assert (sizeof(from
) <= fromlen
);
1291 *thisresplenp
= recvfrom(pfd
[0].fd
, (char*)*thisansp
,
1293 (struct sockaddr
*)&from
, &fromlen
);
1294 if (__glibc_unlikely (*thisresplenp
<= 0)) {
1295 if (errno
== EINTR
|| errno
== EAGAIN
) {
1299 return close_and_return_error (statp
, resplen2
);
1302 if (__glibc_unlikely (*thisresplenp
< HFIXEDSZ
)) {
1304 * Undersized message.
1307 return close_and_return_error (statp
, resplen2
);
1309 if ((recvresp1
|| hp
->id
!= anhp
->id
)
1310 && (recvresp2
|| hp2
->id
!= anhp
->id
)) {
1312 * response from old query, ignore it.
1313 * XXX - potential security hazard could
1318 if (!(statp
->options
& RES_INSECURE1
) &&
1319 !res_ourserver_p(statp
, &from
)) {
1321 * response from wrong server? ignore it.
1322 * XXX - potential security hazard could
1327 if (!(statp
->options
& RES_INSECURE2
)
1328 && (recvresp1
|| !res_queriesmatch(buf
, buf
+ buflen
,
1332 && (recvresp2
|| !res_queriesmatch(buf2
, buf2
+ buflen2
,
1337 * response contains wrong query? ignore it.
1338 * XXX - potential security hazard could
1343 if (anhp
->rcode
== SERVFAIL
||
1344 anhp
->rcode
== NOTIMP
||
1345 anhp
->rcode
== REFUSED
) {
1347 if (recvresp1
|| (buf2
!= NULL
&& recvresp2
)) {
1353 /* No data from the first reply. */
1355 /* We are waiting for a possible second reply. */
1356 if (hp
->id
== anhp
->id
)
1364 /* don't retry if called from dig */
1366 return close_and_return_error (statp
, resplen2
);
1367 __res_iclose(statp
, false);
1369 if (anhp
->rcode
== NOERROR
&& anhp
->ancount
== 0
1370 && anhp
->aa
== 0 && anhp
->ra
== 0 && anhp
->arcount
== 0) {
1373 if (!(statp
->options
& RES_IGNTC
) && anhp
->tc
) {
1375 * To get the rest of answer,
1376 * use TCP with same server.
1379 __res_iclose(statp
, false);
1380 // XXX if we have received one reply we could
1381 // XXX use it and not repeat it over TCP...
1382 if (resplen2
!= NULL
)
1386 /* Mark which reply we received. */
1387 if (recvresp1
== 0 && hp
->id
== anhp
->id
)
1391 /* Repeat waiting if we have a second answer to arrive. */
1392 if ((recvresp1
& recvresp2
) == 0) {
1393 if (single_request
) {
1394 pfd
[0].events
= POLLOUT
;
1395 if (single_request_reopen
) {
1396 __res_iclose (statp
, false);
1397 retval
= reopen (statp
, terrno
, ns
);
1400 if (resplen2
!= NULL
)
1404 pfd
[0].fd
= EXT(statp
).nssocks
[ns
];
1409 /* All is well. We have received both responses (if
1410 two responses were requested). */
1412 } else if (pfd
[0].revents
& (POLLERR
| POLLHUP
| POLLNVAL
))
1413 /* Something went wrong. We can stop trying. */
1414 return close_and_return_error (statp
, resplen2
);
1416 /* poll should not have returned > 0 in this case. */
1422 sock_eq(struct sockaddr_in6
*a1
, struct sockaddr_in6
*a2
) {
1423 if (a1
->sin6_family
== a2
->sin6_family
) {
1424 if (a1
->sin6_family
== AF_INET
)
1425 return ((((struct sockaddr_in
*)a1
)->sin_port
==
1426 ((struct sockaddr_in
*)a2
)->sin_port
) &&
1427 (((struct sockaddr_in
*)a1
)->sin_addr
.s_addr
==
1428 ((struct sockaddr_in
*)a2
)->sin_addr
.s_addr
));
1430 return ((a1
->sin6_port
== a2
->sin6_port
) &&
1431 !memcmp(&a1
->sin6_addr
, &a2
->sin6_addr
,
1432 sizeof (struct in6_addr
)));
1434 if (a1
->sin6_family
== AF_INET
) {
1435 struct sockaddr_in6
*sap
= a1
;
1438 } /* assumes that AF_INET and AF_INET6 are the only possibilities */
1439 return ((a1
->sin6_port
== ((struct sockaddr_in
*)a2
)->sin_port
) &&
1440 IN6_IS_ADDR_V4MAPPED(&a1
->sin6_addr
) &&
1441 (a1
->sin6_addr
.s6_addr32
[3] ==
1442 ((struct sockaddr_in
*)a2
)->sin_addr
.s_addr
));