1 /* Copyright (C) 2016-2019 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
19 * Copyright (c) 1985, 1989, 1993
20 * The Regents of the University of California. All rights reserved.
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * Portions Copyright (c) 1993 by Digital Equipment Corporation.
50 * Permission to use, copy, modify, and distribute this software for any
51 * purpose with or without fee is hereby granted, provided that the above
52 * copyright notice and this permission notice appear in all copies, and that
53 * the name of Digital Equipment Corporation not be used in advertising or
54 * publicity pertaining to distribution of the document or software without
55 * specific, written prior permission.
57 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
58 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
59 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
60 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
61 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
62 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
63 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
68 * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
70 * Permission to use, copy, modify, and distribute this software for any
71 * purpose with or without fee is hereby granted, provided that the above
72 * copyright notice and this permission notice appear in all copies.
74 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
75 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
76 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
77 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
78 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
79 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
80 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
85 * Send query to name server and wait for reply.
89 #include <sys/types.h>
90 #include <sys/param.h>
92 #include <sys/socket.h>
96 #include <netinet/in.h>
97 #include <arpa/nameser.h>
98 #include <arpa/inet.h>
99 #include <sys/ioctl.h>
104 #include <resolv/resolv-internal.h>
105 #include <resolv/resolv_context.h>
110 #include <kernel-features.h>
111 #include <libc-diag.h>
112 #include <random-bits.h>
115 #define MAXPACKET PACKETSZ
117 #define MAXPACKET 65536
120 /* From ev_streams.c. */
123 __attribute ((always_inline
))
124 evConsIovec(void *buf
, size_t cnt
, struct iovec
*vec
) {
125 memset(vec
, 0xf5, sizeof (*vec
));
130 /* From ev_timers.c. */
132 #define BILLION 1000000000
135 evConsTime(struct timespec
*res
, time_t sec
, long nsec
) {
141 evAddTime(struct timespec
*res
, const struct timespec
*addend1
,
142 const struct timespec
*addend2
) {
143 res
->tv_sec
= addend1
->tv_sec
+ addend2
->tv_sec
;
144 res
->tv_nsec
= addend1
->tv_nsec
+ addend2
->tv_nsec
;
145 if (res
->tv_nsec
>= BILLION
) {
147 res
->tv_nsec
-= BILLION
;
152 evSubTime(struct timespec
*res
, const struct timespec
*minuend
,
153 const struct timespec
*subtrahend
) {
154 res
->tv_sec
= minuend
->tv_sec
- subtrahend
->tv_sec
;
155 if (minuend
->tv_nsec
>= subtrahend
->tv_nsec
)
156 res
->tv_nsec
= minuend
->tv_nsec
- subtrahend
->tv_nsec
;
158 res
->tv_nsec
= (BILLION
159 - subtrahend
->tv_nsec
+ minuend
->tv_nsec
);
165 evCmpTime(struct timespec a
, struct timespec b
) {
166 long x
= a
.tv_sec
- b
.tv_sec
;
169 x
= a
.tv_nsec
- b
.tv_nsec
;
170 return (x
< 0L ? (-1) : x
> 0L ? (1) : (0));
174 evNowTime(struct timespec
*res
) {
175 __clock_gettime(CLOCK_REALTIME
, res
);
179 #define EXT(res) ((res)->_u._ext)
183 static struct sockaddr
*get_nsaddr (res_state
, unsigned int);
184 static int send_vc(res_state
, const u_char
*, int,
186 u_char
**, int *, int *, int, u_char
**,
187 u_char
**, int *, int *, int *);
188 static int send_dg(res_state
, const u_char
*, int,
190 u_char
**, int *, int *, int,
191 int *, int *, u_char
**,
192 u_char
**, int *, int *, int *);
193 static int sock_eq(struct sockaddr_in6
*, struct sockaddr_in6
*);
198 * res_isourserver(ina)
199 * looks up "ina" in _res.ns_addr_list[]
204 * paul vixie, 29may94
207 res_ourserver_p(const res_state statp
, const struct sockaddr_in6
*inp
)
211 if (inp
->sin6_family
== AF_INET
) {
212 struct sockaddr_in
*in4p
= (struct sockaddr_in
*) inp
;
213 in_port_t port
= in4p
->sin_port
;
214 in_addr_t addr
= in4p
->sin_addr
.s_addr
;
216 for (ns
= 0; ns
< statp
->nscount
; ns
++) {
217 const struct sockaddr_in
*srv
=
218 (struct sockaddr_in
*) get_nsaddr (statp
, ns
);
220 if ((srv
->sin_family
== AF_INET
) &&
221 (srv
->sin_port
== port
) &&
222 (srv
->sin_addr
.s_addr
== INADDR_ANY
||
223 srv
->sin_addr
.s_addr
== addr
))
226 } else if (inp
->sin6_family
== AF_INET6
) {
227 for (ns
= 0; ns
< statp
->nscount
; ns
++) {
228 const struct sockaddr_in6
*srv
229 = (struct sockaddr_in6
*) get_nsaddr (statp
, ns
);
230 if ((srv
->sin6_family
== AF_INET6
) &&
231 (srv
->sin6_port
== inp
->sin6_port
) &&
232 !(memcmp(&srv
->sin6_addr
, &in6addr_any
,
233 sizeof (struct in6_addr
)) &&
234 memcmp(&srv
->sin6_addr
, &inp
->sin6_addr
,
235 sizeof (struct in6_addr
))))
243 res_isourserver (const struct sockaddr_in
*inp
)
245 return res_ourserver_p (&_res
, (const struct sockaddr_in6
*) inp
);
249 * res_nameinquery(name, type, class, buf, eom)
250 * look for (name,type,class) in the query section of packet (buf,eom)
252 * buf + HFIXEDSZ <= eom
258 * paul vixie, 29may94
261 res_nameinquery(const char *name
, int type
, int class,
262 const u_char
*buf
, const u_char
*eom
)
264 const u_char
*cp
= buf
+ HFIXEDSZ
;
265 int qdcount
= ntohs(((HEADER
*)buf
)->qdcount
);
267 while (qdcount
-- > 0) {
268 char tname
[MAXDNAME
+1];
269 int n
, ttype
, tclass
;
271 n
= dn_expand(buf
, eom
, cp
, tname
, sizeof tname
);
275 if (cp
+ 2 * INT16SZ
> eom
)
278 NS_GET16(tclass
, cp
);
279 if (ttype
== type
&& tclass
== class &&
280 ns_samename(tname
, name
) == 1)
285 libresolv_hidden_def (res_nameinquery
)
287 /* Returns a shift value for the name server index. Used to implement
290 nameserver_offset (struct __res_state
*statp
)
292 /* If we only have one name server or rotation is disabled, return
293 offset 0 (no rotation). */
294 unsigned int nscount
= statp
->nscount
;
295 if (nscount
<= 1 || !(statp
->options
& RES_ROTATE
))
298 /* Global offset. The lowest bit indicates whether the offset has
299 been initialized with a random value. Use relaxed MO to access
300 global_offset because all we need is a sequence of roughly
302 static unsigned int global_offset
;
303 unsigned int offset
= atomic_fetch_add_relaxed (&global_offset
, 2);
304 if ((offset
& 1) == 0)
306 /* Initialization is required. */
307 offset
= random_bits ();
308 /* The lowest bit is the most random. Preserve it. */
311 /* Store the new starting value. atomic_fetch_add_relaxed
312 returns the old value, so emulate that by storing the new
313 (incremented) value. Concurrent initialization with
314 different random values is harmless. */
315 atomic_store_relaxed (&global_offset
, (offset
| 1) + 2);
318 /* Remove the initialization bit. */
321 /* Avoid the division in the most common cases. */
331 return offset
% nscount
;
335 /* Clear the AD bit unless the trust-ad option was specified in the
336 resolver configuration. */
338 mask_ad_bit (struct resolv_context
*ctx
, void *buf
)
340 if (!(ctx
->resp
->options
& RES_TRUSTAD
))
341 ((HEADER
*) buf
)->ad
= 0;
345 * res_queriesmatch(buf1, eom1, buf2, eom2)
346 * is there a 1:1 mapping of (name,type,class)
347 * in (buf1,eom1) and (buf2,eom2)?
350 * 0 : not a 1:1 mapping
351 * >0 : is a 1:1 mapping
353 * paul vixie, 29may94
356 res_queriesmatch(const u_char
*buf1
, const u_char
*eom1
,
357 const u_char
*buf2
, const u_char
*eom2
)
359 if (buf1
+ HFIXEDSZ
> eom1
|| buf2
+ HFIXEDSZ
> eom2
)
363 * Only header section present in replies to
364 * dynamic update packets.
366 if ((((HEADER
*)buf1
)->opcode
== ns_o_update
) &&
367 (((HEADER
*)buf2
)->opcode
== ns_o_update
))
370 /* Note that we initially do not convert QDCOUNT to the host byte
371 order. We can compare it with the second buffer's QDCOUNT
372 value without doing this. */
373 int qdcount
= ((HEADER
*)buf1
)->qdcount
;
374 if (qdcount
!= ((HEADER
*)buf2
)->qdcount
)
377 qdcount
= htons (qdcount
);
378 const u_char
*cp
= buf1
+ HFIXEDSZ
;
380 while (qdcount
-- > 0) {
381 char tname
[MAXDNAME
+1];
382 int n
, ttype
, tclass
;
384 n
= dn_expand(buf1
, eom1
, cp
, tname
, sizeof tname
);
388 if (cp
+ 2 * INT16SZ
> eom1
)
391 NS_GET16(tclass
, cp
);
392 if (!res_nameinquery(tname
, ttype
, tclass
, buf2
, eom2
))
397 libresolv_hidden_def (res_queriesmatch
)
400 __res_context_send (struct resolv_context
*ctx
,
401 const unsigned char *buf
, int buflen
,
402 const unsigned char *buf2
, int buflen2
,
403 unsigned char *ans
, int anssiz
,
404 unsigned char **ansp
, unsigned char **ansp2
,
405 int *nansp2
, int *resplen2
, int *ansp2_malloced
)
407 struct __res_state
*statp
= ctx
->resp
;
408 int gotsomewhere
, terrno
, try, v_circuit
, resplen
;
409 /* On some architectures send_vc is inlined and the compiler might emit
410 a warning indicating 'resplen' may be used uninitialized. Note that
411 the warning belongs to resplen in send_vc which is used as return
412 value! There the maybe-uninitialized warning is already ignored as
413 it is a false-positive - see comment in send_vc.
414 Here the variable n is set to the return value of send_vc.
416 DIAG_PUSH_NEEDS_COMMENT
;
417 DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
419 DIAG_POP_NEEDS_COMMENT
;
421 if (statp
->nscount
== 0) {
426 if (anssiz
< (buf2
== NULL
? 1 : 2) * HFIXEDSZ
) {
427 __set_errno (EINVAL
);
431 v_circuit
= ((statp
->options
& RES_USEVC
)
433 || buflen2
> PACKETSZ
);
438 * If the ns_addr_list in the resolver context has changed, then
439 * invalidate our cached copy and the associated timing data.
441 if (EXT(statp
).nscount
!= 0) {
444 if (EXT(statp
).nscount
!= statp
->nscount
)
447 for (unsigned int ns
= 0; ns
< statp
->nscount
; ns
++) {
448 if (statp
->nsaddr_list
[ns
].sin_family
!= 0
449 && !sock_eq((struct sockaddr_in6
*)
450 &statp
->nsaddr_list
[ns
],
451 EXT(statp
).nsaddrs
[ns
]))
458 __res_iclose(statp
, false);
459 EXT(statp
).nscount
= 0;
464 * Maybe initialize our private copy of the ns_addr_list.
466 if (EXT(statp
).nscount
== 0) {
467 for (unsigned int ns
= 0; ns
< statp
->nscount
; ns
++) {
468 EXT(statp
).nssocks
[ns
] = -1;
469 if (statp
->nsaddr_list
[ns
].sin_family
== 0)
471 if (EXT(statp
).nsaddrs
[ns
] == NULL
)
472 EXT(statp
).nsaddrs
[ns
] =
473 malloc(sizeof (struct sockaddr_in6
));
474 if (EXT(statp
).nsaddrs
[ns
] != NULL
)
475 memset (mempcpy(EXT(statp
).nsaddrs
[ns
],
476 &statp
->nsaddr_list
[ns
],
477 sizeof (struct sockaddr_in
)),
479 sizeof (struct sockaddr_in6
)
480 - sizeof (struct sockaddr_in
));
484 EXT(statp
).nscount
= statp
->nscount
;
487 /* Name server index offset. Used to implement
489 unsigned int ns_offset
= nameserver_offset (statp
);
492 * Send request, RETRY times, or until successful.
494 for (try = 0; try < statp
->retry
; try++) {
495 for (unsigned ns_shift
= 0; ns_shift
< statp
->nscount
; ns_shift
++)
497 /* The actual name server index. This implements
499 unsigned int ns
= ns_shift
+ ns_offset
;
500 if (ns
>= statp
->nscount
)
501 ns
-= statp
->nscount
;
504 if (__glibc_unlikely (v_circuit
)) {
505 /* Use VC; at most one attempt per server. */
507 n
= send_vc(statp
, buf
, buflen
, buf2
, buflen2
,
508 &ans
, &anssiz
, &terrno
,
509 ns
, ansp
, ansp2
, nansp2
, resplen2
,
513 /* See comment at the declaration of n. */
514 DIAG_PUSH_NEEDS_COMMENT
;
515 DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
516 if (n
== 0 && (buf2
== NULL
|| *resplen2
== 0))
518 DIAG_POP_NEEDS_COMMENT
;
521 n
= send_dg(statp
, buf
, buflen
, buf2
, buflen2
,
522 &ans
, &anssiz
, &terrno
,
523 ns
, &v_circuit
, &gotsomewhere
, ansp
,
524 ansp2
, nansp2
, resplen2
, ansp2_malloced
);
527 if (n
== 0 && (buf2
== NULL
|| *resplen2
== 0))
530 // XXX Check whether both requests failed or
531 // XXX whether one has been answered successfully
537 /* Mask the AD bit in both responses unless it is
539 if (resplen
> HFIXEDSZ
)
542 mask_ad_bit (ctx
, *ansp
);
544 mask_ad_bit (ctx
, ans
);
546 if (resplen2
!= NULL
&& *resplen2
> HFIXEDSZ
)
547 mask_ad_bit (ctx
, *ansp2
);
550 * If we have temporarily opened a virtual circuit,
551 * or if we haven't been asked to keep a socket open,
554 if ((v_circuit
&& (statp
->options
& RES_USEVC
) == 0) ||
555 (statp
->options
& RES_STAYOPEN
) == 0) {
556 __res_iclose(statp
, false);
562 __res_iclose(statp
, false);
565 __set_errno (ECONNREFUSED
); /* no nameservers found */
567 __set_errno (ETIMEDOUT
); /* no answer obtained */
569 __set_errno (terrno
);
573 /* Common part of res_nsend and res_send. */
575 context_send_common (struct resolv_context
*ctx
,
576 const unsigned char *buf
, int buflen
,
577 unsigned char *ans
, int anssiz
)
581 RES_SET_H_ERRNO (&_res
, NETDB_INTERNAL
);
584 int result
= __res_context_send (ctx
, buf
, buflen
, NULL
, 0, ans
, anssiz
,
585 NULL
, NULL
, NULL
, NULL
, NULL
);
586 __resolv_context_put (ctx
);
591 res_nsend (res_state statp
, const unsigned char *buf
, int buflen
,
592 unsigned char *ans
, int anssiz
)
594 return context_send_common
595 (__resolv_context_get_override (statp
), buf
, buflen
, ans
, anssiz
);
599 res_send (const unsigned char *buf
, int buflen
, unsigned char *ans
, int anssiz
)
601 return context_send_common
602 (__resolv_context_get (), buf
, buflen
, ans
, anssiz
);
607 static struct sockaddr
*
608 get_nsaddr (res_state statp
, unsigned int n
)
610 assert (n
< statp
->nscount
);
612 if (statp
->nsaddr_list
[n
].sin_family
== 0 && EXT(statp
).nsaddrs
[n
] != NULL
)
613 /* EXT(statp).nsaddrs[n] holds an address that is larger than
614 struct sockaddr, and user code did not update
615 statp->nsaddr_list[n]. */
616 return (struct sockaddr
*) EXT(statp
).nsaddrs
[n
];
618 /* User code updated statp->nsaddr_list[n], or statp->nsaddr_list[n]
619 has the same content as EXT(statp).nsaddrs[n]. */
620 return (struct sockaddr
*) (void *) &statp
->nsaddr_list
[n
];
623 /* Close the resolver structure, assign zero to *RESPLEN2 if RESPLEN2
624 is not NULL, and return zero. */
626 __attribute__ ((warn_unused_result
))
627 close_and_return_error (res_state statp
, int *resplen2
)
629 __res_iclose(statp
, false);
630 if (resplen2
!= NULL
)
635 /* The send_vc function is responsible for sending a DNS query over TCP
636 to the nameserver numbered NS from the res_state STATP i.e.
637 EXT(statp).nssocks[ns]. The function supports sending both IPv4 and
638 IPv6 queries at the same serially on the same socket.
640 Please note that for TCP there is no way to disable sending both
641 queries, unlike UDP, which honours RES_SNGLKUP and RES_SNGLKUPREOP
642 and sends the queries serially and waits for the result after each
643 sent query. This implementation should be corrected to honour these
646 Please also note that for TCP we send both queries over the same
647 socket one after another. This technically violates best practice
648 since the server is allowed to read the first query, respond, and
649 then close the socket (to service another client). If the server
650 does this, then the remaining second query in the socket data buffer
651 will cause the server to send the client an RST which will arrive
652 asynchronously and the client's OS will likely tear down the socket
653 receive buffer resulting in a potentially short read and lost
654 response data. This will force the client to retry the query again,
655 and this process may repeat until all servers and connection resets
656 are exhausted and then the query will fail. It's not known if this
657 happens with any frequency in real DNS server implementations. This
658 implementation should be corrected to use two sockets by default for
661 The query stored in BUF of BUFLEN length is sent first followed by
662 the query stored in BUF2 of BUFLEN2 length. Queries are sent
663 serially on the same socket.
665 Answers to the query are stored firstly in *ANSP up to a max of
666 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
667 is non-NULL (to indicate that modifying the answer buffer is allowed)
668 then malloc is used to allocate a new response buffer and ANSCP and
669 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
670 are needed but ANSCP is NULL, then as much of the response as
671 possible is read into the buffer, but the results will be truncated.
672 When truncation happens because of a small answer buffer the DNS
673 packets header field TC will bet set to 1, indicating a truncated
674 message and the rest of the socket data will be read and discarded.
676 Answers to the query are stored secondly in *ANSP2 up to a max of
677 *ANSSIZP2 bytes, with the actual response length stored in
678 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
679 is non-NULL (required for a second query) then malloc is used to
680 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
681 size and *ANSP2_MALLOCED is set to 1.
683 The ANSP2_MALLOCED argument will eventually be removed as the
684 change in buffer pointer can be used to detect the buffer has
685 changed and that the caller should use free on the new buffer.
687 Note that the answers may arrive in any order from the server and
688 therefore the first and second answer buffers may not correspond to
689 the first and second queries.
691 It is not supported to call this function with a non-NULL ANSP2
692 but a NULL ANSCP. Put another way, you can call send_vc with a
693 single unmodifiable buffer or two modifiable buffers, but no other
694 combination is supported.
696 It is the caller's responsibility to free the malloc allocated
697 buffers by detecting that the pointers have changed from their
698 original values i.e. *ANSCP or *ANSP2 has changed.
700 If errors are encountered then *TERRNO is set to an appropriate
701 errno value and a zero result is returned for a recoverable error,
702 and a less-than zero result is returned for a non-recoverable error.
704 If no errors are encountered then *TERRNO is left unmodified and
705 a the length of the first response in bytes is returned. */
707 send_vc(res_state statp
,
708 const u_char
*buf
, int buflen
, const u_char
*buf2
, int buflen2
,
709 u_char
**ansp
, int *anssizp
,
710 int *terrno
, int ns
, u_char
**anscp
, u_char
**ansp2
, int *anssizp2
,
711 int *resplen2
, int *ansp2_malloced
)
713 const HEADER
*hp
= (HEADER
*) buf
;
714 const HEADER
*hp2
= (HEADER
*) buf2
;
715 HEADER
*anhp
= (HEADER
*) *ansp
;
716 struct sockaddr
*nsap
= get_nsaddr (statp
, ns
);
717 int truncating
, connreset
, n
;
718 /* On some architectures compiler might emit a warning indicating
719 'resplen' may be used uninitialized. However if buf2 == NULL
720 then this code won't be executed; if buf2 != NULL, then first
721 time round the loop recvresp1 and recvresp2 will be 0 so this
722 code won't be executed but "thisresplenp = &resplen;" followed
723 by "*thisresplenp = rlen;" will be executed so that subsequent
724 times round the loop resplen has been initialized. So this is
727 DIAG_PUSH_NEEDS_COMMENT
;
728 DIAG_IGNORE_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
730 DIAG_POP_NEEDS_COMMENT
;
740 /* Are we still talking to whom we want to talk to? */
741 if (statp
->_vcsock
>= 0 && (statp
->_flags
& RES_F_VC
) != 0) {
742 struct sockaddr_in6 peer
;
743 socklen_t size
= sizeof peer
;
745 if (getpeername(statp
->_vcsock
,
746 (struct sockaddr
*)&peer
, &size
) < 0 ||
747 !sock_eq(&peer
, (struct sockaddr_in6
*) nsap
)) {
748 __res_iclose(statp
, false);
749 statp
->_flags
&= ~RES_F_VC
;
753 if (statp
->_vcsock
< 0 || (statp
->_flags
& RES_F_VC
) == 0) {
754 if (statp
->_vcsock
>= 0)
755 __res_iclose(statp
, false);
757 statp
->_vcsock
= socket
758 (nsap
->sa_family
, SOCK_STREAM
| SOCK_CLOEXEC
, 0);
759 if (statp
->_vcsock
< 0) {
761 if (resplen2
!= NULL
)
766 if (connect(statp
->_vcsock
, nsap
,
767 nsap
->sa_family
== AF_INET
768 ? sizeof (struct sockaddr_in
)
769 : sizeof (struct sockaddr_in6
)) < 0) {
771 return close_and_return_error (statp
, resplen2
);
773 statp
->_flags
|= RES_F_VC
;
777 * Send length & message
779 len
= htons ((u_short
) buflen
);
780 evConsIovec(&len
, INT16SZ
, &iov
[0]);
781 evConsIovec((void*)buf
, buflen
, &iov
[1]);
783 ssize_t explen
= INT16SZ
+ buflen
;
785 len2
= htons ((u_short
) buflen2
);
786 evConsIovec(&len2
, INT16SZ
, &iov
[2]);
787 evConsIovec((void*)buf2
, buflen2
, &iov
[3]);
789 explen
+= INT16SZ
+ buflen2
;
791 if (TEMP_FAILURE_RETRY (writev(statp
->_vcsock
, iov
, niov
)) != explen
) {
793 return close_and_return_error (statp
, resplen2
);
796 * Receive length & response
799 /* Skip the second response if there is no second query.
800 To do that we mark the second response as received. */
801 int recvresp2
= buf2
== NULL
;
804 cp
= (u_char
*)&rlen16
;
805 len
= sizeof(rlen16
);
806 while ((n
= TEMP_FAILURE_RETRY (read(statp
->_vcsock
, cp
,
815 * A long running process might get its TCP
816 * connection reset if the remote server was
817 * restarted. Requery the server instead of
818 * trying a new one. When there is only one
819 * server, this means that a query might work
820 * instead of failing. We only allow one reset
821 * per query to prevent looping.
823 if (*terrno
== ECONNRESET
&& !connreset
)
825 __res_iclose (statp
, false);
829 return close_and_return_error (statp
, resplen2
);
831 int rlen
= ntohs (rlen16
);
836 if ((recvresp1
| recvresp2
) == 0 || buf2
== NULL
) {
837 /* We have not received any responses
838 yet or we only have one response to
840 thisanssizp
= anssizp
;
841 thisansp
= anscp
?: ansp
;
842 assert (anscp
!= NULL
|| ansp2
== NULL
);
843 thisresplenp
= &resplen
;
845 thisanssizp
= anssizp2
;
847 thisresplenp
= resplen2
;
849 anhp
= (HEADER
*) *thisansp
;
851 *thisresplenp
= rlen
;
852 /* Is the answer buffer too small? */
853 if (*thisanssizp
< rlen
) {
854 /* If the current buffer is not the the static
855 user-supplied buffer then we can reallocate
857 if (thisansp
!= NULL
&& thisansp
!= ansp
) {
858 /* Always allocate MAXPACKET, callers expect
859 this specific size. */
860 u_char
*newp
= malloc (MAXPACKET
);
864 return close_and_return_error (statp
, resplen2
);
866 *thisanssizp
= MAXPACKET
;
868 if (thisansp
== ansp2
)
870 anhp
= (HEADER
*) newp
;
871 /* A uint16_t can't be larger than MAXPACKET
872 thus it's safe to allocate MAXPACKET but
873 read RLEN bytes instead. */
882 if (__glibc_unlikely (len
< HFIXEDSZ
)) {
884 * Undersized message.
887 return close_and_return_error (statp
, resplen2
);
891 while (len
!= 0 && (n
= read(statp
->_vcsock
, (char *)cp
, (int)len
)) > 0){
895 if (__glibc_unlikely (n
<= 0)) {
897 return close_and_return_error (statp
, resplen2
);
899 if (__glibc_unlikely (truncating
)) {
901 * Flush rest of answer so connection stays in synch.
904 len
= rlen
- *thisanssizp
;
908 n
= read(statp
->_vcsock
, junk
,
909 (len
> sizeof junk
) ? sizeof junk
: len
);
917 * If the calling application has bailed out of
918 * a previous call and failed to arrange to have
919 * the circuit closed or the server has got
920 * itself confused, then drop the packet and
921 * wait for the correct one.
923 if ((recvresp1
|| hp
->id
!= anhp
->id
)
924 && (recvresp2
|| hp2
->id
!= anhp
->id
))
927 /* Mark which reply we received. */
928 if (recvresp1
== 0 && hp
->id
== anhp
->id
)
932 /* Repeat waiting if we have a second answer to arrive. */
933 if ((recvresp1
& recvresp2
) == 0)
937 * All is well, or the error is fatal. Signal that the
938 * next nameserver ought not be tried.
944 reopen (res_state statp
, int *terrno
, int ns
)
946 if (EXT(statp
).nssocks
[ns
] == -1) {
947 struct sockaddr
*nsap
= get_nsaddr (statp
, ns
);
950 /* only try IPv6 if IPv6 NS and if not failed before */
951 if (nsap
->sa_family
== AF_INET6
&& !statp
->ipv6_unavail
) {
952 EXT(statp
).nssocks
[ns
] = socket
954 SOCK_DGRAM
| SOCK_NONBLOCK
| SOCK_CLOEXEC
, 0);
955 if (EXT(statp
).nssocks
[ns
] < 0)
956 statp
->ipv6_unavail
= errno
== EAFNOSUPPORT
;
957 slen
= sizeof (struct sockaddr_in6
);
958 } else if (nsap
->sa_family
== AF_INET
) {
959 EXT(statp
).nssocks
[ns
] = socket
961 SOCK_DGRAM
| SOCK_NONBLOCK
| SOCK_CLOEXEC
, 0);
962 slen
= sizeof (struct sockaddr_in
);
964 if (EXT(statp
).nssocks
[ns
] < 0) {
969 /* Enable full ICMP error reporting for this
971 if (__res_enable_icmp (nsap
->sa_family
,
972 EXT (statp
).nssocks
[ns
]) < 0)
974 int saved_errno
= errno
;
975 __res_iclose (statp
, false);
976 __set_errno (saved_errno
);
977 *terrno
= saved_errno
;
982 * On a 4.3BSD+ machine (client and server,
983 * actually), sending to a nameserver datagram
984 * port with no nameserver will cause an
985 * ICMP port unreachable message to be returned.
986 * If our datagram socket is "connected" to the
987 * server, we get an ECONNREFUSED error on the next
988 * socket operation, and select returns if the
989 * error message is received. We can thus detect
990 * the absence of a nameserver without timing out.
992 /* With GCC 5.3 when compiling with -Os the compiler
993 emits a warning that slen may be used uninitialized,
994 but that is never true. Both slen and
995 EXT(statp).nssocks[ns] are initialized together or
996 the function return -1 before control flow reaches
997 the call to connect with slen. */
998 DIAG_PUSH_NEEDS_COMMENT
;
999 DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
1000 if (connect(EXT(statp
).nssocks
[ns
], nsap
, slen
) < 0) {
1001 DIAG_POP_NEEDS_COMMENT
;
1002 __res_iclose(statp
, false);
1010 /* The send_dg function is responsible for sending a DNS query over UDP
1011 to the nameserver numbered NS from the res_state STATP i.e.
1012 EXT(statp).nssocks[ns]. The function supports IPv4 and IPv6 queries
1013 along with the ability to send the query in parallel for both stacks
1014 (default) or serially (RES_SINGLKUP). It also supports serial lookup
1015 with a close and reopen of the socket used to talk to the server
1016 (RES_SNGLKUPREOP) to work around broken name servers.
1018 The query stored in BUF of BUFLEN length is sent first followed by
1019 the query stored in BUF2 of BUFLEN2 length. Queries are sent
1020 in parallel (default) or serially (RES_SINGLKUP or RES_SNGLKUPREOP).
1022 Answers to the query are stored firstly in *ANSP up to a max of
1023 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
1024 is non-NULL (to indicate that modifying the answer buffer is allowed)
1025 then malloc is used to allocate a new response buffer and ANSCP and
1026 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
1027 are needed but ANSCP is NULL, then as much of the response as
1028 possible is read into the buffer, but the results will be truncated.
1029 When truncation happens because of a small answer buffer the DNS
1030 packets header field TC will bet set to 1, indicating a truncated
1031 message, while the rest of the UDP packet is discarded.
1033 Answers to the query are stored secondly in *ANSP2 up to a max of
1034 *ANSSIZP2 bytes, with the actual response length stored in
1035 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
1036 is non-NULL (required for a second query) then malloc is used to
1037 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
1038 size and *ANSP2_MALLOCED is set to 1.
1040 The ANSP2_MALLOCED argument will eventually be removed as the
1041 change in buffer pointer can be used to detect the buffer has
1042 changed and that the caller should use free on the new buffer.
1044 Note that the answers may arrive in any order from the server and
1045 therefore the first and second answer buffers may not correspond to
1046 the first and second queries.
1048 It is not supported to call this function with a non-NULL ANSP2
1049 but a NULL ANSCP. Put another way, you can call send_vc with a
1050 single unmodifiable buffer or two modifiable buffers, but no other
1051 combination is supported.
1053 It is the caller's responsibility to free the malloc allocated
1054 buffers by detecting that the pointers have changed from their
1055 original values i.e. *ANSCP or *ANSP2 has changed.
1057 If an answer is truncated because of UDP datagram DNS limits then
1058 *V_CIRCUIT is set to 1 and the return value non-zero to indicate to
1059 the caller to retry with TCP. The value *GOTSOMEWHERE is set to 1
1060 if any progress was made reading a response from the nameserver and
1061 is used by the caller to distinguish between ECONNREFUSED and
1062 ETIMEDOUT (the latter if *GOTSOMEWHERE is 1).
1064 If errors are encountered then *TERRNO is set to an appropriate
1065 errno value and a zero result is returned for a recoverable error,
1066 and a less-than zero result is returned for a non-recoverable error.
1068 If no errors are encountered then *TERRNO is left unmodified and
1069 a the length of the first response in bytes is returned. */
1071 send_dg(res_state statp
,
1072 const u_char
*buf
, int buflen
, const u_char
*buf2
, int buflen2
,
1073 u_char
**ansp
, int *anssizp
,
1074 int *terrno
, int ns
, int *v_circuit
, int *gotsomewhere
, u_char
**anscp
,
1075 u_char
**ansp2
, int *anssizp2
, int *resplen2
, int *ansp2_malloced
)
1077 const HEADER
*hp
= (HEADER
*) buf
;
1078 const HEADER
*hp2
= (HEADER
*) buf2
;
1079 struct timespec now
, timeout
, finish
;
1080 struct pollfd pfd
[1];
1082 struct sockaddr_in6 from
;
1087 * Compute time for the total operation.
1089 int seconds
= (statp
->retrans
<< ns
);
1091 seconds
/= statp
->nscount
;
1094 bool single_request_reopen
= (statp
->options
& RES_SNGLKUPREOP
) != 0;
1095 bool single_request
= (((statp
->options
& RES_SNGLKUP
) != 0)
1096 | single_request_reopen
);
1097 int save_gotsomewhere
= *gotsomewhere
;
1101 retval
= reopen (statp
, terrno
, ns
);
1104 if (resplen2
!= NULL
)
1110 evConsTime(&timeout
, seconds
, 0);
1111 evAddTime(&finish
, &now
, &timeout
);
1112 int need_recompute
= 0;
1115 /* Skip the second response if there is no second query.
1116 To do that we mark the second response as received. */
1117 int recvresp2
= buf2
== NULL
;
1118 pfd
[0].fd
= EXT(statp
).nssocks
[ns
];
1119 pfd
[0].events
= POLLOUT
;
1121 if (need_recompute
) {
1124 if (evCmpTime(finish
, now
) <= 0) {
1126 return close_and_return_error (statp
, resplen2
);
1128 evSubTime(&timeout
, &finish
, &now
);
1131 /* Convert struct timespec in milliseconds. */
1132 ptimeout
= timeout
.tv_sec
* 1000 + timeout
.tv_nsec
/ 1000000;
1136 n
= __poll (pfd
, 1, 0);
1137 if (__glibc_unlikely (n
== 0)) {
1138 n
= __poll (pfd
, 1, ptimeout
);
1142 if (resplen
> 1 && (recvresp1
|| (buf2
!= NULL
&& recvresp2
)))
1144 /* There are quite a few broken name servers out
1145 there which don't handle two outstanding
1146 requests from the same source. There are also
1147 broken firewall settings. If we time out after
1148 having received one answer switch to the mode
1149 where we send the second request only once we
1150 have received the first answer. */
1151 if (!single_request
)
1153 statp
->options
|= RES_SNGLKUP
;
1154 single_request
= true;
1155 *gotsomewhere
= save_gotsomewhere
;
1158 else if (!single_request_reopen
)
1160 statp
->options
|= RES_SNGLKUPREOP
;
1161 single_request_reopen
= true;
1162 *gotsomewhere
= save_gotsomewhere
;
1163 __res_iclose (statp
, false);
1172 if (resplen2
!= NULL
)
1178 goto recompute_resend
;
1183 if (pfd
[0].revents
& POLLOUT
) {
1184 #ifndef __ASSUME_SENDMMSG
1185 static int have_sendmmsg
;
1187 # define have_sendmmsg 1
1189 if (have_sendmmsg
>= 0 && nwritten
== 0 && buf2
!= NULL
1193 { .iov_base
= (void *) buf
, .iov_len
= buflen
};
1195 { .iov_base
= (void *) buf2
, .iov_len
= buflen2
};
1196 struct mmsghdr reqs
[2] =
1214 int ndg
= __sendmmsg (pfd
[0].fd
, reqs
, 2, MSG_NOSIGNAL
);
1215 if (__glibc_likely (ndg
== 2))
1217 if (reqs
[0].msg_len
!= buflen
1218 || reqs
[1].msg_len
!= buflen2
)
1221 pfd
[0].events
= POLLIN
;
1224 else if (ndg
== 1 && reqs
[0].msg_len
== buflen
)
1226 else if (ndg
< 0 && (errno
== EINTR
|| errno
== EAGAIN
))
1227 goto recompute_resend
;
1230 #ifndef __ASSUME_SENDMMSG
1231 if (__glibc_unlikely (have_sendmmsg
== 0))
1233 if (ndg
< 0 && errno
== ENOSYS
)
1243 return close_and_return_error (statp
, resplen2
);
1249 #ifndef __ASSUME_SENDMMSG
1253 sr
= send (pfd
[0].fd
, buf2
, buflen2
, MSG_NOSIGNAL
);
1255 sr
= send (pfd
[0].fd
, buf
, buflen
, MSG_NOSIGNAL
);
1257 if (sr
!= (nwritten
!= 0 ? buflen2
: buflen
)) {
1258 if (errno
== EINTR
|| errno
== EAGAIN
)
1259 goto recompute_resend
;
1260 return close_and_return_error (statp
, resplen2
);
1263 if (nwritten
!= 0 || buf2
== NULL
|| single_request
)
1264 pfd
[0].events
= POLLIN
;
1266 pfd
[0].events
= POLLIN
| POLLOUT
;
1270 } else if (pfd
[0].revents
& POLLIN
) {
1275 if ((recvresp1
| recvresp2
) == 0 || buf2
== NULL
) {
1276 /* We have not received any responses
1277 yet or we only have one response to
1279 thisanssizp
= anssizp
;
1280 thisansp
= anscp
?: ansp
;
1281 assert (anscp
!= NULL
|| ansp2
== NULL
);
1282 thisresplenp
= &resplen
;
1284 thisanssizp
= anssizp2
;
1286 thisresplenp
= resplen2
;
1289 if (*thisanssizp
< MAXPACKET
1290 /* If the current buffer is not the the static
1291 user-supplied buffer then we can reallocate
1293 && (thisansp
!= NULL
&& thisansp
!= ansp
)
1295 /* Is the size too small? */
1296 && (ioctl (pfd
[0].fd
, FIONREAD
, thisresplenp
) < 0
1297 || *thisanssizp
< *thisresplenp
)
1300 /* Always allocate MAXPACKET, callers expect
1301 this specific size. */
1302 u_char
*newp
= malloc (MAXPACKET
);
1304 *thisanssizp
= MAXPACKET
;
1306 if (thisansp
== ansp2
)
1307 *ansp2_malloced
= 1;
1310 /* We could end up with truncation if anscp was NULL
1311 (not allowed to change caller's buffer) and the
1312 response buffer size is too small. This isn't a
1313 reliable way to detect truncation because the ioctl
1314 may be an inaccurate report of the UDP message size.
1315 Therefore we use this only to issue debug output.
1316 To do truncation accurately with UDP we need
1317 MSG_TRUNC which is only available on Linux. We
1318 can abstract out the Linux-specific feature in the
1319 future to detect truncation. */
1320 HEADER
*anhp
= (HEADER
*) *thisansp
;
1321 socklen_t fromlen
= sizeof(struct sockaddr_in6
);
1322 assert (sizeof(from
) <= fromlen
);
1323 *thisresplenp
= recvfrom(pfd
[0].fd
, (char*)*thisansp
,
1325 (struct sockaddr
*)&from
, &fromlen
);
1326 if (__glibc_unlikely (*thisresplenp
<= 0)) {
1327 if (errno
== EINTR
|| errno
== EAGAIN
) {
1331 return close_and_return_error (statp
, resplen2
);
1334 if (__glibc_unlikely (*thisresplenp
< HFIXEDSZ
)) {
1336 * Undersized message.
1339 return close_and_return_error (statp
, resplen2
);
1341 if ((recvresp1
|| hp
->id
!= anhp
->id
)
1342 && (recvresp2
|| hp2
->id
!= anhp
->id
)) {
1344 * response from old query, ignore it.
1345 * XXX - potential security hazard could
1351 /* Paranoia check. Due to the connected UDP socket,
1352 the kernel has already filtered invalid addresses
1354 if (!res_ourserver_p(statp
, &from
))
1357 /* Check for the correct header layout and a matching
1359 if ((recvresp1
|| !res_queriesmatch(buf
, buf
+ buflen
,
1363 && (recvresp2
|| !res_queriesmatch(buf2
, buf2
+ buflen2
,
1369 if (anhp
->rcode
== SERVFAIL
||
1370 anhp
->rcode
== NOTIMP
||
1371 anhp
->rcode
== REFUSED
) {
1373 if (recvresp1
|| (buf2
!= NULL
&& recvresp2
)) {
1379 /* No data from the first reply. */
1381 /* We are waiting for a possible second reply. */
1382 if (hp
->id
== anhp
->id
)
1390 /* don't retry if called from dig */
1392 return close_and_return_error (statp
, resplen2
);
1393 __res_iclose(statp
, false);
1395 if (anhp
->rcode
== NOERROR
&& anhp
->ancount
== 0
1396 && anhp
->aa
== 0 && anhp
->ra
== 0 && anhp
->arcount
== 0) {
1399 if (!(statp
->options
& RES_IGNTC
) && anhp
->tc
) {
1401 * To get the rest of answer,
1402 * use TCP with same server.
1405 __res_iclose(statp
, false);
1406 // XXX if we have received one reply we could
1407 // XXX use it and not repeat it over TCP...
1408 if (resplen2
!= NULL
)
1412 /* Mark which reply we received. */
1413 if (recvresp1
== 0 && hp
->id
== anhp
->id
)
1417 /* Repeat waiting if we have a second answer to arrive. */
1418 if ((recvresp1
& recvresp2
) == 0) {
1419 if (single_request
) {
1420 pfd
[0].events
= POLLOUT
;
1421 if (single_request_reopen
) {
1422 __res_iclose (statp
, false);
1423 retval
= reopen (statp
, terrno
, ns
);
1426 if (resplen2
!= NULL
)
1430 pfd
[0].fd
= EXT(statp
).nssocks
[ns
];
1435 /* All is well. We have received both responses (if
1436 two responses were requested). */
1438 } else if (pfd
[0].revents
& (POLLERR
| POLLHUP
| POLLNVAL
))
1439 /* Something went wrong. We can stop trying. */
1440 return close_and_return_error (statp
, resplen2
);
1442 /* poll should not have returned > 0 in this case. */
1448 sock_eq(struct sockaddr_in6
*a1
, struct sockaddr_in6
*a2
) {
1449 if (a1
->sin6_family
== a2
->sin6_family
) {
1450 if (a1
->sin6_family
== AF_INET
)
1451 return ((((struct sockaddr_in
*)a1
)->sin_port
==
1452 ((struct sockaddr_in
*)a2
)->sin_port
) &&
1453 (((struct sockaddr_in
*)a1
)->sin_addr
.s_addr
==
1454 ((struct sockaddr_in
*)a2
)->sin_addr
.s_addr
));
1456 return ((a1
->sin6_port
== a2
->sin6_port
) &&
1457 !memcmp(&a1
->sin6_addr
, &a2
->sin6_addr
,
1458 sizeof (struct in6_addr
)));
1460 if (a1
->sin6_family
== AF_INET
) {
1461 struct sockaddr_in6
*sap
= a1
;
1464 } /* assumes that AF_INET and AF_INET6 are the only possibilities */
1465 return ((a1
->sin6_port
== ((struct sockaddr_in
*)a2
)->sin_port
) &&
1466 IN6_IS_ADDR_V4MAPPED(&a1
->sin6_addr
) &&
1467 (a1
->sin6_addr
.s6_addr32
[3] ==
1468 ((struct sockaddr_in
*)a2
)->sin_addr
.s_addr
));