stdlib: Implement introsort for qsort (BZ 19305)
[glibc.git] / resolv / res_send.c
blobd098eba470b550452a12c97061f80728ba7667db
1 /* Copyright (C) 2016-2023 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
19 * Copyright (c) 1985, 1989, 1993
20 * The Regents of the University of California. All rights reserved.
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
48 * Portions Copyright (c) 1993 by Digital Equipment Corporation.
50 * Permission to use, copy, modify, and distribute this software for any
51 * purpose with or without fee is hereby granted, provided that the above
52 * copyright notice and this permission notice appear in all copies, and that
53 * the name of Digital Equipment Corporation not be used in advertising or
54 * publicity pertaining to distribution of the document or software without
55 * specific, written prior permission.
57 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
58 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
59 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
60 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
61 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
62 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
63 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
64 * SOFTWARE.
68 * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
70 * Permission to use, copy, modify, and distribute this software for any
71 * purpose with or without fee is hereby granted, provided that the above
72 * copyright notice and this permission notice appear in all copies.
74 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
75 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
76 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
77 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
78 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
79 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
80 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
81 * SOFTWARE.
85 * Send query to name server and wait for reply.
88 #include <assert.h>
89 #include <sys/types.h>
90 #include <sys/param.h>
91 #include <sys/time.h>
92 #include <sys/socket.h>
93 #include <sys/uio.h>
94 #include <sys/poll.h>
96 #include <netinet/in.h>
97 #include <arpa/nameser.h>
98 #include <arpa/inet.h>
99 #include <sys/ioctl.h>
101 #include <errno.h>
102 #include <fcntl.h>
103 #include <netdb.h>
104 #include <resolv/resolv-internal.h>
105 #include <resolv/resolv_context.h>
106 #include <signal.h>
107 #include <stdlib.h>
108 #include <string.h>
109 #include <unistd.h>
110 #include <kernel-features.h>
111 #include <libc-diag.h>
112 #include <random-bits.h>
114 #if PACKETSZ > 65536
115 #define MAXPACKET PACKETSZ
116 #else
117 #define MAXPACKET 65536
118 #endif
120 /* From ev_streams.c. */
122 static inline void
123 __attribute ((always_inline))
124 evConsIovec(void *buf, size_t cnt, struct iovec *vec) {
125 memset(vec, 0xf5, sizeof (*vec));
126 vec->iov_base = buf;
127 vec->iov_len = cnt;
130 /* From ev_timers.c. */
132 #define BILLION 1000000000
134 static inline void
135 evConsTime(struct timespec *res, time_t sec, long nsec) {
136 res->tv_sec = sec;
137 res->tv_nsec = nsec;
140 static inline void
141 evAddTime(struct timespec *res, const struct timespec *addend1,
142 const struct timespec *addend2) {
143 res->tv_sec = addend1->tv_sec + addend2->tv_sec;
144 res->tv_nsec = addend1->tv_nsec + addend2->tv_nsec;
145 if (res->tv_nsec >= BILLION) {
146 res->tv_sec++;
147 res->tv_nsec -= BILLION;
151 static inline void
152 evSubTime(struct timespec *res, const struct timespec *minuend,
153 const struct timespec *subtrahend) {
154 res->tv_sec = minuend->tv_sec - subtrahend->tv_sec;
155 if (minuend->tv_nsec >= subtrahend->tv_nsec)
156 res->tv_nsec = minuend->tv_nsec - subtrahend->tv_nsec;
157 else {
158 res->tv_nsec = (BILLION
159 - subtrahend->tv_nsec + minuend->tv_nsec);
160 res->tv_sec--;
164 static int
165 evCmpTime(struct timespec a, struct timespec b) {
166 long x = a.tv_sec - b.tv_sec;
168 if (x == 0L)
169 x = a.tv_nsec - b.tv_nsec;
170 return (x < 0L ? (-1) : x > 0L ? (1) : (0));
173 static void
174 evNowTime(struct timespec *res) {
175 __clock_gettime(CLOCK_REALTIME, res);
179 #define EXT(res) ((res)->_u._ext)
181 /* Forward. */
183 static int send_vc(res_state, const u_char *, int,
184 const u_char *, int,
185 u_char **, int *, int *, int, u_char **,
186 u_char **, int *, int *, int *);
187 static int send_dg(res_state, const u_char *, int,
188 const u_char *, int,
189 u_char **, int *, int *, int,
190 int *, int *, u_char **,
191 u_char **, int *, int *, int *);
192 static int sock_eq(struct sockaddr_in6 *, struct sockaddr_in6 *);
194 /* Returns a shift value for the name server index. Used to implement
195 RES_ROTATE. */
196 static unsigned int
197 nameserver_offset (struct __res_state *statp)
199 /* If we only have one name server or rotation is disabled, return
200 offset 0 (no rotation). */
201 unsigned int nscount = statp->nscount;
202 if (nscount <= 1 || !(statp->options & RES_ROTATE))
203 return 0;
205 /* Global offset. The lowest bit indicates whether the offset has
206 been initialized with a random value. Use relaxed MO to access
207 global_offset because all we need is a sequence of roughly
208 sequential value. */
209 static unsigned int global_offset;
210 unsigned int offset = atomic_fetch_add_relaxed (&global_offset, 2);
211 if ((offset & 1) == 0)
213 /* Initialization is required. */
214 offset = random_bits ();
215 /* The lowest bit is the most random. Preserve it. */
216 offset <<= 1;
218 /* Store the new starting value. atomic_fetch_add_relaxed
219 returns the old value, so emulate that by storing the new
220 (incremented) value. Concurrent initialization with
221 different random values is harmless. */
222 atomic_store_relaxed (&global_offset, (offset | 1) + 2);
225 /* Remove the initialization bit. */
226 offset >>= 1;
228 /* Avoid the division in the most common cases. */
229 switch (nscount)
231 case 2:
232 return offset & 1;
233 case 3:
234 return offset % 3;
235 case 4:
236 return offset & 3;
237 default:
238 return offset % nscount;
242 /* Clear the AD bit unless the trust-ad option was specified in the
243 resolver configuration. */
244 static void
245 mask_ad_bit (struct resolv_context *ctx, void *buf)
247 if (!(ctx->resp->options & RES_TRUSTAD))
248 ((UHEADER *) buf)->ad = 0;
252 __res_context_send (struct resolv_context *ctx,
253 const unsigned char *buf, int buflen,
254 const unsigned char *buf2, int buflen2,
255 unsigned char *ans, int anssiz,
256 unsigned char **ansp, unsigned char **ansp2,
257 int *nansp2, int *resplen2, int *ansp2_malloced)
259 struct __res_state *statp = ctx->resp;
260 int gotsomewhere, terrno, try, v_circuit, resplen;
261 /* On some architectures send_vc is inlined and the compiler might emit
262 a warning indicating 'resplen' may be used uninitialized. Note that
263 the warning belongs to resplen in send_vc which is used as return
264 value! There the maybe-uninitialized warning is already ignored as
265 it is a false-positive - see comment in send_vc.
266 Here the variable n is set to the return value of send_vc.
267 See below. */
268 DIAG_PUSH_NEEDS_COMMENT;
269 DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
270 int n;
271 DIAG_POP_NEEDS_COMMENT;
273 if (statp->nscount == 0) {
274 __set_errno (ESRCH);
275 return (-1);
278 if (anssiz < (buf2 == NULL ? 1 : 2) * HFIXEDSZ) {
279 __set_errno (EINVAL);
280 return (-1);
283 v_circuit = ((statp->options & RES_USEVC)
284 || buflen > PACKETSZ
285 || buflen2 > PACKETSZ);
286 gotsomewhere = 0;
287 terrno = ETIMEDOUT;
290 * If the ns_addr_list in the resolver context has changed, then
291 * invalidate our cached copy and the associated timing data.
293 if (EXT(statp).nscount != 0) {
294 int needclose = 0;
296 if (EXT(statp).nscount != statp->nscount)
297 needclose++;
298 else
299 for (unsigned int ns = 0; ns < statp->nscount; ns++) {
300 if (statp->nsaddr_list[ns].sin_family != 0
301 && !sock_eq((struct sockaddr_in6 *)
302 &statp->nsaddr_list[ns],
303 EXT(statp).nsaddrs[ns]))
305 needclose++;
306 break;
309 if (needclose) {
310 __res_iclose(statp, false);
311 EXT(statp).nscount = 0;
316 * Maybe initialize our private copy of the ns_addr_list.
318 if (EXT(statp).nscount == 0) {
319 for (unsigned int ns = 0; ns < statp->nscount; ns++) {
320 EXT(statp).nssocks[ns] = -1;
321 if (statp->nsaddr_list[ns].sin_family == 0)
322 continue;
323 if (EXT(statp).nsaddrs[ns] == NULL)
324 EXT(statp).nsaddrs[ns] =
325 malloc(sizeof (struct sockaddr_in6));
326 if (EXT(statp).nsaddrs[ns] != NULL)
327 memset (mempcpy(EXT(statp).nsaddrs[ns],
328 &statp->nsaddr_list[ns],
329 sizeof (struct sockaddr_in)),
330 '\0',
331 sizeof (struct sockaddr_in6)
332 - sizeof (struct sockaddr_in));
333 else
334 return -1;
336 EXT(statp).nscount = statp->nscount;
339 /* Name server index offset. Used to implement
340 RES_ROTATE. */
341 unsigned int ns_offset = nameserver_offset (statp);
344 * Send request, RETRY times, or until successful.
346 for (try = 0; try < statp->retry; try++) {
347 for (unsigned ns_shift = 0; ns_shift < statp->nscount; ns_shift++)
349 /* The actual name server index. This implements
350 RES_ROTATE. */
351 unsigned int ns = ns_shift + ns_offset;
352 if (ns >= statp->nscount)
353 ns -= statp->nscount;
355 same_ns:
356 if (__glibc_unlikely (v_circuit)) {
357 /* Use VC; at most one attempt per server. */
358 try = statp->retry;
359 n = send_vc(statp, buf, buflen, buf2, buflen2,
360 &ans, &anssiz, &terrno,
361 ns, ansp, ansp2, nansp2, resplen2,
362 ansp2_malloced);
363 if (n < 0)
364 return (-1);
365 /* See comment at the declaration of n. */
366 DIAG_PUSH_NEEDS_COMMENT;
367 DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
368 if (n == 0 && (buf2 == NULL || *resplen2 == 0))
369 goto next_ns;
370 DIAG_POP_NEEDS_COMMENT;
371 } else {
372 /* Use datagrams. */
373 n = send_dg(statp, buf, buflen, buf2, buflen2,
374 &ans, &anssiz, &terrno,
375 ns, &v_circuit, &gotsomewhere, ansp,
376 ansp2, nansp2, resplen2, ansp2_malloced);
377 if (n < 0)
378 return (-1);
379 if (n == 0 && (buf2 == NULL || *resplen2 == 0))
380 goto next_ns;
381 if (v_circuit)
382 // XXX Check whether both requests failed or
383 // XXX whether one has been answered successfully
384 goto same_ns;
387 resplen = n;
389 /* See comment at the declaration of n. Note: resplen = n; */
390 DIAG_PUSH_NEEDS_COMMENT;
391 DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
392 /* Mask the AD bit in both responses unless it is
393 marked trusted. */
394 if (resplen > HFIXEDSZ)
396 if (ansp != NULL)
397 mask_ad_bit (ctx, *ansp);
398 else
399 mask_ad_bit (ctx, ans);
401 DIAG_POP_NEEDS_COMMENT;
402 if (resplen2 != NULL && *resplen2 > HFIXEDSZ)
403 mask_ad_bit (ctx, *ansp2);
406 * If we have temporarily opened a virtual circuit,
407 * or if we haven't been asked to keep a socket open,
408 * close the socket.
410 if ((v_circuit && (statp->options & RES_USEVC) == 0) ||
411 (statp->options & RES_STAYOPEN) == 0) {
412 __res_iclose(statp, false);
414 return (resplen);
415 next_ns: ;
416 } /*foreach ns*/
417 } /*foreach retry*/
418 __res_iclose(statp, false);
419 if (!v_circuit) {
420 if (!gotsomewhere)
421 __set_errno (ECONNREFUSED); /* no nameservers found */
422 else
423 __set_errno (ETIMEDOUT); /* no answer obtained */
424 } else
425 __set_errno (terrno);
426 return (-1);
428 libc_hidden_def (__res_context_send)
430 /* Common part of res_nsend and res_send. */
431 static int
432 context_send_common (struct resolv_context *ctx,
433 const unsigned char *buf, int buflen,
434 unsigned char *ans, int anssiz)
436 if (ctx == NULL)
438 RES_SET_H_ERRNO (&_res, NETDB_INTERNAL);
439 return -1;
442 int result;
443 if (__res_handle_no_aaaa (ctx, buf, buflen, ans, anssiz, &result))
444 return result;
446 result = __res_context_send (ctx, buf, buflen, NULL, 0, ans, anssiz,
447 NULL, NULL, NULL, NULL, NULL);
448 __resolv_context_put (ctx);
449 return result;
453 ___res_nsend (res_state statp, const unsigned char *buf, int buflen,
454 unsigned char *ans, int anssiz)
456 return context_send_common
457 (__resolv_context_get_override (statp), buf, buflen, ans, anssiz);
459 versioned_symbol (libc, ___res_nsend, res_nsend, GLIBC_2_34);
460 #if OTHER_SHLIB_COMPAT (libresolv, GLIBC_2_2, GLIBC_2_34)
461 compat_symbol (libresolv, ___res_nsend, __res_nsend, GLIBC_2_2);
462 #endif
465 ___res_send (const unsigned char *buf, int buflen, unsigned char *ans,
466 int anssiz)
468 return context_send_common
469 (__resolv_context_get (), buf, buflen, ans, anssiz);
471 versioned_symbol (libc, ___res_send, res_send, GLIBC_2_34);
472 #if OTHER_SHLIB_COMPAT (libresolv, GLIBC_2_0, GLIBC_2_34)
473 compat_symbol (libresolv, ___res_send, __res_send, GLIBC_2_0);
474 #endif
476 /* Private */
478 /* Close the resolver structure, assign zero to *RESPLEN2 if RESPLEN2
479 is not NULL, and return zero. */
480 static int
481 __attribute__ ((warn_unused_result))
482 close_and_return_error (res_state statp, int *resplen2)
484 __res_iclose(statp, false);
485 if (resplen2 != NULL)
486 *resplen2 = 0;
487 return 0;
490 /* The send_vc function is responsible for sending a DNS query over TCP
491 to the nameserver numbered NS from the res_state STATP i.e.
492 EXT(statp).nssocks[ns]. The function supports sending both IPv4 and
493 IPv6 queries at the same serially on the same socket.
495 Please note that for TCP there is no way to disable sending both
496 queries, unlike UDP, which honours RES_SNGLKUP and RES_SNGLKUPREOP
497 and sends the queries serially and waits for the result after each
498 sent query. This implementation should be corrected to honour these
499 options.
501 Please also note that for TCP we send both queries over the same
502 socket one after another. This technically violates best practice
503 since the server is allowed to read the first query, respond, and
504 then close the socket (to service another client). If the server
505 does this, then the remaining second query in the socket data buffer
506 will cause the server to send the client an RST which will arrive
507 asynchronously and the client's OS will likely tear down the socket
508 receive buffer resulting in a potentially short read and lost
509 response data. This will force the client to retry the query again,
510 and this process may repeat until all servers and connection resets
511 are exhausted and then the query will fail. It's not known if this
512 happens with any frequency in real DNS server implementations. This
513 implementation should be corrected to use two sockets by default for
514 parallel queries.
516 The query stored in BUF of BUFLEN length is sent first followed by
517 the query stored in BUF2 of BUFLEN2 length. Queries are sent
518 serially on the same socket.
520 Answers to the query are stored firstly in *ANSP up to a max of
521 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
522 is non-NULL (to indicate that modifying the answer buffer is allowed)
523 then malloc is used to allocate a new response buffer and ANSCP and
524 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
525 are needed but ANSCP is NULL, then as much of the response as
526 possible is read into the buffer, but the results will be truncated.
527 When truncation happens because of a small answer buffer the DNS
528 packets header field TC will bet set to 1, indicating a truncated
529 message and the rest of the socket data will be read and discarded.
531 Answers to the query are stored secondly in *ANSP2 up to a max of
532 *ANSSIZP2 bytes, with the actual response length stored in
533 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
534 is non-NULL (required for a second query) then malloc is used to
535 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
536 size and *ANSP2_MALLOCED is set to 1.
538 The ANSP2_MALLOCED argument will eventually be removed as the
539 change in buffer pointer can be used to detect the buffer has
540 changed and that the caller should use free on the new buffer.
542 Note that the answers may arrive in any order from the server and
543 therefore the first and second answer buffers may not correspond to
544 the first and second queries.
546 It is not supported to call this function with a non-NULL ANSP2
547 but a NULL ANSCP. Put another way, you can call send_vc with a
548 single unmodifiable buffer or two modifiable buffers, but no other
549 combination is supported.
551 It is the caller's responsibility to free the malloc allocated
552 buffers by detecting that the pointers have changed from their
553 original values i.e. *ANSCP or *ANSP2 has changed.
555 If errors are encountered then *TERRNO is set to an appropriate
556 errno value and a zero result is returned for a recoverable error,
557 and a less-than zero result is returned for a non-recoverable error.
559 If no errors are encountered then *TERRNO is left unmodified and
560 a the length of the first response in bytes is returned. */
561 static int
562 send_vc(res_state statp,
563 const u_char *buf, int buflen, const u_char *buf2, int buflen2,
564 u_char **ansp, int *anssizp,
565 int *terrno, int ns, u_char **anscp, u_char **ansp2, int *anssizp2,
566 int *resplen2, int *ansp2_malloced)
568 const UHEADER *hp = (UHEADER *) buf;
569 const UHEADER *hp2 = (UHEADER *) buf2;
570 UHEADER *anhp = (UHEADER *) *ansp;
571 struct sockaddr *nsap = __res_get_nsaddr (statp, ns);
572 int truncating, connreset, n;
573 /* On some architectures compiler might emit a warning indicating
574 'resplen' may be used uninitialized. However if buf2 == NULL
575 then this code won't be executed; if buf2 != NULL, then first
576 time round the loop recvresp1 and recvresp2 will be 0 so this
577 code won't be executed but "thisresplenp = &resplen;" followed
578 by "*thisresplenp = rlen;" will be executed so that subsequent
579 times round the loop resplen has been initialized. So this is
580 a false-positive.
582 DIAG_PUSH_NEEDS_COMMENT;
583 DIAG_IGNORE_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
584 int resplen;
585 DIAG_POP_NEEDS_COMMENT;
586 struct iovec iov[4];
587 u_short len;
588 u_short len2;
589 u_char *cp;
591 connreset = 0;
592 same_ns:
593 truncating = 0;
595 /* Are we still talking to whom we want to talk to? */
596 if (statp->_vcsock >= 0 && (statp->_flags & RES_F_VC) != 0) {
597 struct sockaddr_in6 peer;
598 socklen_t size = sizeof peer;
600 if (__getpeername (statp->_vcsock,
601 (struct sockaddr *) &peer, &size) < 0
602 || !sock_eq (&peer, (struct sockaddr_in6 *) nsap)) {
603 __res_iclose(statp, false);
604 statp->_flags &= ~RES_F_VC;
608 if (statp->_vcsock < 0 || (statp->_flags & RES_F_VC) == 0) {
609 if (statp->_vcsock >= 0)
610 __res_iclose(statp, false);
612 statp->_vcsock = __socket
613 (nsap->sa_family, SOCK_STREAM | SOCK_CLOEXEC, 0);
614 if (statp->_vcsock < 0) {
615 *terrno = errno;
616 if (resplen2 != NULL)
617 *resplen2 = 0;
618 return (-1);
620 __set_errno (0);
621 if (__connect (statp->_vcsock, nsap,
622 nsap->sa_family == AF_INET
623 ? sizeof (struct sockaddr_in)
624 : sizeof (struct sockaddr_in6)) < 0) {
625 *terrno = errno;
626 return close_and_return_error (statp, resplen2);
628 statp->_flags |= RES_F_VC;
632 * Send length & message
634 len = htons ((u_short) buflen);
635 evConsIovec(&len, INT16SZ, &iov[0]);
636 evConsIovec((void*)buf, buflen, &iov[1]);
637 int niov = 2;
638 ssize_t explen = INT16SZ + buflen;
639 if (buf2 != NULL) {
640 len2 = htons ((u_short) buflen2);
641 evConsIovec(&len2, INT16SZ, &iov[2]);
642 evConsIovec((void*)buf2, buflen2, &iov[3]);
643 niov = 4;
644 explen += INT16SZ + buflen2;
646 if (TEMP_FAILURE_RETRY (__writev (statp->_vcsock, iov, niov))
647 != explen) {
648 *terrno = errno;
649 return close_and_return_error (statp, resplen2);
652 * Receive length & response
654 int recvresp1 = 0;
655 /* Skip the second response if there is no second query.
656 To do that we mark the second response as received. */
657 int recvresp2 = buf2 == NULL;
658 uint16_t rlen16;
659 read_len:
660 cp = (u_char *)&rlen16;
661 len = sizeof(rlen16);
662 while ((n = TEMP_FAILURE_RETRY (read(statp->_vcsock, cp,
663 (int)len))) > 0) {
664 cp += n;
665 if ((len -= n) <= 0)
666 break;
668 if (n <= 0) {
669 *terrno = errno;
671 * A long running process might get its TCP
672 * connection reset if the remote server was
673 * restarted. Requery the server instead of
674 * trying a new one. When there is only one
675 * server, this means that a query might work
676 * instead of failing. We only allow one reset
677 * per query to prevent looping.
679 if (*terrno == ECONNRESET && !connreset)
681 __res_iclose (statp, false);
682 connreset = 1;
683 goto same_ns;
685 return close_and_return_error (statp, resplen2);
687 int rlen = ntohs (rlen16);
689 int *thisanssizp;
690 u_char **thisansp;
691 int *thisresplenp;
692 if ((recvresp1 | recvresp2) == 0 || buf2 == NULL) {
693 /* We have not received any responses
694 yet or we only have one response to
695 receive. */
696 thisanssizp = anssizp;
697 thisansp = anscp ?: ansp;
698 assert (anscp != NULL || ansp2 == NULL);
699 thisresplenp = &resplen;
700 } else {
701 thisanssizp = anssizp2;
702 thisansp = ansp2;
703 thisresplenp = resplen2;
705 anhp = (UHEADER *) *thisansp;
707 *thisresplenp = rlen;
708 /* Is the answer buffer too small? */
709 if (*thisanssizp < rlen) {
710 /* If the current buffer is not the the static
711 user-supplied buffer then we can reallocate
712 it. */
713 if (thisansp != NULL && thisansp != ansp) {
714 /* Always allocate MAXPACKET, callers expect
715 this specific size. */
716 u_char *newp = malloc (MAXPACKET);
717 if (newp == NULL)
719 *terrno = ENOMEM;
720 return close_and_return_error (statp, resplen2);
722 *thisanssizp = MAXPACKET;
723 *thisansp = newp;
724 if (thisansp == ansp2)
725 *ansp2_malloced = 1;
726 anhp = (UHEADER *) newp;
727 /* A uint16_t can't be larger than MAXPACKET
728 thus it's safe to allocate MAXPACKET but
729 read RLEN bytes instead. */
730 len = rlen;
731 } else {
732 truncating = 1;
733 len = *thisanssizp;
735 } else
736 len = rlen;
738 if (__glibc_unlikely (len < HFIXEDSZ)) {
740 * Undersized message.
742 *terrno = EMSGSIZE;
743 return close_and_return_error (statp, resplen2);
746 cp = *thisansp;
747 while (len != 0 && (n = read(statp->_vcsock, (char *)cp, (int)len)) > 0){
748 cp += n;
749 len -= n;
751 if (__glibc_unlikely (n <= 0)) {
752 *terrno = errno;
753 return close_and_return_error (statp, resplen2);
755 if (__glibc_unlikely (truncating)) {
757 * Flush rest of answer so connection stays in synch.
759 anhp->tc = 1;
760 len = rlen - *thisanssizp;
761 while (len != 0) {
762 char junk[PACKETSZ];
764 n = read(statp->_vcsock, junk,
765 (len > sizeof junk) ? sizeof junk : len);
766 if (n > 0)
767 len -= n;
768 else
769 break;
773 * If the calling application has bailed out of
774 * a previous call and failed to arrange to have
775 * the circuit closed or the server has got
776 * itself confused, then drop the packet and
777 * wait for the correct one.
779 if ((recvresp1 || hp->id != anhp->id)
780 && (recvresp2 || hp2->id != anhp->id))
781 goto read_len;
783 /* Mark which reply we received. */
784 if (recvresp1 == 0 && hp->id == anhp->id)
785 recvresp1 = 1;
786 else
787 recvresp2 = 1;
788 /* Repeat waiting if we have a second answer to arrive. */
789 if ((recvresp1 & recvresp2) == 0)
790 goto read_len;
793 * All is well, or the error is fatal. Signal that the
794 * next nameserver ought not be tried.
796 return resplen;
799 static int
800 reopen (res_state statp, int *terrno, int ns)
802 if (EXT(statp).nssocks[ns] == -1) {
803 struct sockaddr *nsap = __res_get_nsaddr (statp, ns);
804 socklen_t slen;
806 /* only try IPv6 if IPv6 NS and if not failed before */
807 if (nsap->sa_family == AF_INET6 && !statp->ipv6_unavail) {
808 EXT (statp).nssocks[ns] = __socket
809 (PF_INET6,
810 SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
811 if (EXT(statp).nssocks[ns] < 0)
812 statp->ipv6_unavail = errno == EAFNOSUPPORT;
813 slen = sizeof (struct sockaddr_in6);
814 } else if (nsap->sa_family == AF_INET) {
815 EXT (statp).nssocks[ns] = __socket
816 (PF_INET,
817 SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
818 slen = sizeof (struct sockaddr_in);
820 if (EXT(statp).nssocks[ns] < 0) {
821 *terrno = errno;
822 return (-1);
825 /* Enable full ICMP error reporting for this
826 socket. */
827 if (__res_enable_icmp (nsap->sa_family,
828 EXT (statp).nssocks[ns]) < 0)
830 int saved_errno = errno;
831 __res_iclose (statp, false);
832 __set_errno (saved_errno);
833 *terrno = saved_errno;
834 return -1;
838 * On a 4.3BSD+ machine (client and server,
839 * actually), sending to a nameserver datagram
840 * port with no nameserver will cause an
841 * ICMP port unreachable message to be returned.
842 * If our datagram socket is "connected" to the
843 * server, we get an ECONNREFUSED error on the next
844 * socket operation, and select returns if the
845 * error message is received. We can thus detect
846 * the absence of a nameserver without timing out.
848 /* With GCC 5.3 when compiling with -Os the compiler
849 emits a warning that slen may be used uninitialized,
850 but that is never true. Both slen and
851 EXT(statp).nssocks[ns] are initialized together or
852 the function return -1 before control flow reaches
853 the call to connect with slen. */
854 DIAG_PUSH_NEEDS_COMMENT;
855 DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
856 if (__connect (EXT (statp).nssocks[ns], nsap, slen) < 0) {
857 DIAG_POP_NEEDS_COMMENT;
858 __res_iclose(statp, false);
859 return (0);
863 return 1;
866 /* The send_dg function is responsible for sending a DNS query over UDP
867 to the nameserver numbered NS from the res_state STATP i.e.
868 EXT(statp).nssocks[ns]. The function supports IPv4 and IPv6 queries
869 along with the ability to send the query in parallel for both stacks
870 (default) or serially (RES_SINGLKUP). It also supports serial lookup
871 with a close and reopen of the socket used to talk to the server
872 (RES_SNGLKUPREOP) to work around broken name servers.
874 The query stored in BUF of BUFLEN length is sent first followed by
875 the query stored in BUF2 of BUFLEN2 length. Queries are sent
876 in parallel (default) or serially (RES_SINGLKUP or RES_SNGLKUPREOP).
878 Answers to the query are stored firstly in *ANSP up to a max of
879 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
880 is non-NULL (to indicate that modifying the answer buffer is allowed)
881 then malloc is used to allocate a new response buffer and ANSCP and
882 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
883 are needed but ANSCP is NULL, then as much of the response as
884 possible is read into the buffer, but the results will be truncated.
885 When truncation happens because of a small answer buffer the DNS
886 packets header field TC will bet set to 1, indicating a truncated
887 message, while the rest of the UDP packet is discarded.
889 Answers to the query are stored secondly in *ANSP2 up to a max of
890 *ANSSIZP2 bytes, with the actual response length stored in
891 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
892 is non-NULL (required for a second query) then malloc is used to
893 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
894 size and *ANSP2_MALLOCED is set to 1.
896 The ANSP2_MALLOCED argument will eventually be removed as the
897 change in buffer pointer can be used to detect the buffer has
898 changed and that the caller should use free on the new buffer.
900 Note that the answers may arrive in any order from the server and
901 therefore the first and second answer buffers may not correspond to
902 the first and second queries.
904 It is not supported to call this function with a non-NULL ANSP2
905 but a NULL ANSCP. Put another way, you can call send_vc with a
906 single unmodifiable buffer or two modifiable buffers, but no other
907 combination is supported.
909 It is the caller's responsibility to free the malloc allocated
910 buffers by detecting that the pointers have changed from their
911 original values i.e. *ANSCP or *ANSP2 has changed.
913 If an answer is truncated because of UDP datagram DNS limits then
914 *V_CIRCUIT is set to 1 and the return value non-zero to indicate to
915 the caller to retry with TCP. The value *GOTSOMEWHERE is set to 1
916 if any progress was made reading a response from the nameserver and
917 is used by the caller to distinguish between ECONNREFUSED and
918 ETIMEDOUT (the latter if *GOTSOMEWHERE is 1).
920 If errors are encountered then *TERRNO is set to an appropriate
921 errno value and a zero result is returned for a recoverable error,
922 and a less-than zero result is returned for a non-recoverable error.
924 If no errors are encountered then *TERRNO is left unmodified and
925 a the length of the first response in bytes is returned. */
926 static int
927 send_dg(res_state statp,
928 const u_char *buf, int buflen, const u_char *buf2, int buflen2,
929 u_char **ansp, int *anssizp,
930 int *terrno, int ns, int *v_circuit, int *gotsomewhere, u_char **anscp,
931 u_char **ansp2, int *anssizp2, int *resplen2, int *ansp2_malloced)
933 const UHEADER *hp = (UHEADER *) buf;
934 const UHEADER *hp2 = (UHEADER *) buf2;
935 struct timespec now, timeout, finish;
936 struct pollfd pfd[1];
937 int ptimeout;
938 struct sockaddr_in6 from;
939 int resplen = 0;
940 int n;
943 * Compute time for the total operation.
945 int seconds = (statp->retrans << ns);
946 if (ns > 0)
947 seconds /= statp->nscount;
948 if (seconds <= 0)
949 seconds = 1;
950 bool single_request_reopen = (statp->options & RES_SNGLKUPREOP) != 0;
951 bool single_request = (((statp->options & RES_SNGLKUP) != 0)
952 | single_request_reopen);
953 int save_gotsomewhere = *gotsomewhere;
955 int retval;
956 retry_reopen:
957 retval = reopen (statp, terrno, ns);
958 if (retval <= 0)
960 if (resplen2 != NULL)
961 *resplen2 = 0;
962 return retval;
964 retry:
965 evNowTime(&now);
966 evConsTime(&timeout, seconds, 0);
967 evAddTime(&finish, &now, &timeout);
968 int need_recompute = 0;
969 int nwritten = 0;
970 int recvresp1 = 0;
971 /* Skip the second response if there is no second query.
972 To do that we mark the second response as received. */
973 int recvresp2 = buf2 == NULL;
974 pfd[0].fd = EXT(statp).nssocks[ns];
975 pfd[0].events = POLLOUT;
976 wait:
977 if (need_recompute) {
978 recompute_resend:
979 evNowTime(&now);
980 if (evCmpTime(finish, now) <= 0) {
981 poll_err_out:
982 return close_and_return_error (statp, resplen2);
984 evSubTime(&timeout, &finish, &now);
985 need_recompute = 0;
987 /* Convert struct timespec in milliseconds. */
988 ptimeout = timeout.tv_sec * 1000 + timeout.tv_nsec / 1000000;
990 n = 0;
991 if (nwritten == 0)
992 n = __poll (pfd, 1, 0);
993 if (__glibc_unlikely (n == 0)) {
994 n = __poll (pfd, 1, ptimeout);
995 need_recompute = 1;
997 if (n == 0) {
998 if (resplen > 1 && (recvresp1 || (buf2 != NULL && recvresp2)))
1000 /* There are quite a few broken name servers out
1001 there which don't handle two outstanding
1002 requests from the same source. There are also
1003 broken firewall settings. If we time out after
1004 having received one answer switch to the mode
1005 where we send the second request only once we
1006 have received the first answer. */
1007 if (!single_request)
1009 statp->options |= RES_SNGLKUP;
1010 single_request = true;
1011 *gotsomewhere = save_gotsomewhere;
1012 goto retry;
1014 else if (!single_request_reopen)
1016 statp->options |= RES_SNGLKUPREOP;
1017 single_request_reopen = true;
1018 *gotsomewhere = save_gotsomewhere;
1019 __res_iclose (statp, false);
1020 goto retry_reopen;
1023 *resplen2 = 1;
1024 return resplen;
1027 *gotsomewhere = 1;
1028 if (resplen2 != NULL)
1029 *resplen2 = 0;
1030 return 0;
1032 if (n < 0) {
1033 if (errno == EINTR)
1034 goto recompute_resend;
1036 goto poll_err_out;
1038 __set_errno (0);
1039 if (pfd[0].revents & POLLOUT) {
1040 #ifndef __ASSUME_SENDMMSG
1041 static int have_sendmmsg;
1042 #else
1043 # define have_sendmmsg 1
1044 #endif
1045 if (have_sendmmsg >= 0 && nwritten == 0 && buf2 != NULL
1046 && !single_request)
1048 struct iovec iov =
1049 { .iov_base = (void *) buf, .iov_len = buflen };
1050 struct iovec iov2 =
1051 { .iov_base = (void *) buf2, .iov_len = buflen2 };
1052 struct mmsghdr reqs[2] =
1055 .msg_hdr =
1057 .msg_iov = &iov,
1058 .msg_iovlen = 1,
1062 .msg_hdr =
1064 .msg_iov = &iov2,
1065 .msg_iovlen = 1,
1070 int ndg = __sendmmsg (pfd[0].fd, reqs, 2, MSG_NOSIGNAL);
1071 if (__glibc_likely (ndg == 2))
1073 if (reqs[0].msg_len != buflen
1074 || reqs[1].msg_len != buflen2)
1075 goto fail_sendmmsg;
1077 pfd[0].events = POLLIN;
1078 nwritten += 2;
1080 else if (ndg == 1 && reqs[0].msg_len == buflen)
1081 goto just_one;
1082 else if (ndg < 0 && (errno == EINTR || errno == EAGAIN))
1083 goto recompute_resend;
1084 else
1086 #ifndef __ASSUME_SENDMMSG
1087 if (__glibc_unlikely (have_sendmmsg == 0))
1089 if (ndg < 0 && errno == ENOSYS)
1091 have_sendmmsg = -1;
1092 goto try_send;
1094 have_sendmmsg = 1;
1096 #endif
1098 fail_sendmmsg:
1099 return close_and_return_error (statp, resplen2);
1102 else
1104 ssize_t sr;
1105 #ifndef __ASSUME_SENDMMSG
1106 try_send:
1107 #endif
1108 if (nwritten != 0)
1109 sr = __send (pfd[0].fd, buf2, buflen2, MSG_NOSIGNAL);
1110 else
1111 sr = __send (pfd[0].fd, buf, buflen, MSG_NOSIGNAL);
1113 if (sr != (nwritten != 0 ? buflen2 : buflen)) {
1114 if (errno == EINTR || errno == EAGAIN)
1115 goto recompute_resend;
1116 return close_and_return_error (statp, resplen2);
1118 just_one:
1119 if (nwritten != 0 || buf2 == NULL || single_request)
1120 pfd[0].events = POLLIN;
1121 else
1122 pfd[0].events = POLLIN | POLLOUT;
1123 ++nwritten;
1125 goto wait;
1126 } else if (pfd[0].revents & POLLIN) {
1127 int *thisanssizp;
1128 u_char **thisansp;
1129 int *thisresplenp;
1131 if ((recvresp1 | recvresp2) == 0 || buf2 == NULL) {
1132 /* We have not received any responses
1133 yet or we only have one response to
1134 receive. */
1135 thisanssizp = anssizp;
1136 thisansp = anscp ?: ansp;
1137 assert (anscp != NULL || ansp2 == NULL);
1138 thisresplenp = &resplen;
1139 } else {
1140 thisanssizp = anssizp2;
1141 thisansp = ansp2;
1142 thisresplenp = resplen2;
1145 if (*thisanssizp < MAXPACKET
1146 /* If the current buffer is not the the static
1147 user-supplied buffer then we can reallocate
1148 it. */
1149 && (thisansp != NULL && thisansp != ansp)
1150 #ifdef FIONREAD
1151 /* Is the size too small? */
1152 && (__ioctl (pfd[0].fd, FIONREAD, thisresplenp) < 0
1153 || *thisanssizp < *thisresplenp)
1154 #endif
1156 /* Always allocate MAXPACKET, callers expect
1157 this specific size. */
1158 u_char *newp = malloc (MAXPACKET);
1159 if (newp != NULL) {
1160 *thisanssizp = MAXPACKET;
1161 *thisansp = newp;
1162 if (thisansp == ansp2)
1163 *ansp2_malloced = 1;
1166 /* We could end up with truncation if anscp was NULL
1167 (not allowed to change caller's buffer) and the
1168 response buffer size is too small. This isn't a
1169 reliable way to detect truncation because the ioctl
1170 may be an inaccurate report of the UDP message size.
1171 Therefore we use this only to issue debug output.
1172 To do truncation accurately with UDP we need
1173 MSG_TRUNC which is only available on Linux. We
1174 can abstract out the Linux-specific feature in the
1175 future to detect truncation. */
1176 UHEADER *anhp = (UHEADER *) *thisansp;
1177 socklen_t fromlen = sizeof(struct sockaddr_in6);
1178 assert (sizeof(from) <= fromlen);
1179 *thisresplenp = __recvfrom (pfd[0].fd, (char *) *thisansp,
1180 *thisanssizp, 0,
1181 (struct sockaddr *) &from,
1182 &fromlen);
1183 if (__glibc_unlikely (*thisresplenp <= 0)) {
1184 if (errno == EINTR || errno == EAGAIN) {
1185 need_recompute = 1;
1186 goto wait;
1188 return close_and_return_error (statp, resplen2);
1190 *gotsomewhere = 1;
1191 if (__glibc_unlikely (*thisresplenp < HFIXEDSZ)) {
1193 * Undersized message.
1195 *terrno = EMSGSIZE;
1196 return close_and_return_error (statp, resplen2);
1199 /* Check for the correct header layout and a matching
1200 question. */
1201 int matching_query = 0; /* Default to no matching query. */
1202 if (!recvresp1
1203 && anhp->id == hp->id
1204 && __libc_res_queriesmatch (buf, buf + buflen,
1205 *thisansp,
1206 *thisansp + *thisanssizp))
1207 matching_query = 1;
1208 if (!recvresp2
1209 && anhp->id == hp2->id
1210 && __libc_res_queriesmatch (buf2, buf2 + buflen2,
1211 *thisansp,
1212 *thisansp + *thisanssizp))
1213 matching_query = 2;
1214 if (matching_query == 0)
1215 /* Spurious UDP packet. Drop it and continue
1216 waiting. */
1218 need_recompute = 1;
1219 goto wait;
1222 if (anhp->rcode == SERVFAIL ||
1223 anhp->rcode == NOTIMP ||
1224 anhp->rcode == REFUSED) {
1225 next_ns:
1226 if (recvresp1 || (buf2 != NULL && recvresp2)) {
1227 *resplen2 = 0;
1228 return resplen;
1230 if (buf2 != NULL)
1232 /* No data from the first reply. */
1233 resplen = 0;
1234 /* We are waiting for a possible second reply. */
1235 if (matching_query == 1)
1236 recvresp1 = 1;
1237 else
1238 recvresp2 = 1;
1240 goto wait;
1243 /* don't retry if called from dig */
1244 if (!statp->pfcode)
1245 return close_and_return_error (statp, resplen2);
1246 __res_iclose(statp, false);
1248 if (anhp->rcode == NOERROR && anhp->ancount == 0
1249 && anhp->aa == 0 && anhp->ra == 0 && anhp->arcount == 0) {
1250 goto next_ns;
1252 if (!(statp->options & RES_IGNTC) && anhp->tc) {
1254 * To get the rest of answer,
1255 * use TCP with same server.
1257 *v_circuit = 1;
1258 __res_iclose(statp, false);
1259 // XXX if we have received one reply we could
1260 // XXX use it and not repeat it over TCP...
1261 if (resplen2 != NULL)
1262 *resplen2 = 0;
1263 return (1);
1265 /* Mark which reply we received. */
1266 if (matching_query == 1)
1267 recvresp1 = 1;
1268 else
1269 recvresp2 = 1;
1270 /* Repeat waiting if we have a second answer to arrive. */
1271 if ((recvresp1 & recvresp2) == 0) {
1272 if (single_request) {
1273 pfd[0].events = POLLOUT;
1274 if (single_request_reopen) {
1275 __res_iclose (statp, false);
1276 retval = reopen (statp, terrno, ns);
1277 if (retval <= 0)
1279 if (resplen2 != NULL)
1280 *resplen2 = 0;
1281 return retval;
1283 pfd[0].fd = EXT(statp).nssocks[ns];
1286 goto wait;
1288 /* All is well. We have received both responses (if
1289 two responses were requested). */
1290 return (resplen);
1291 } else if (pfd[0].revents & (POLLERR | POLLHUP | POLLNVAL))
1292 /* Something went wrong. We can stop trying. */
1293 return close_and_return_error (statp, resplen2);
1294 else {
1295 /* poll should not have returned > 0 in this case. */
1296 abort ();
1300 static int
1301 sock_eq(struct sockaddr_in6 *a1, struct sockaddr_in6 *a2) {
1302 if (a1->sin6_family == a2->sin6_family) {
1303 if (a1->sin6_family == AF_INET)
1304 return ((((struct sockaddr_in *)a1)->sin_port ==
1305 ((struct sockaddr_in *)a2)->sin_port) &&
1306 (((struct sockaddr_in *)a1)->sin_addr.s_addr ==
1307 ((struct sockaddr_in *)a2)->sin_addr.s_addr));
1308 else
1309 return ((a1->sin6_port == a2->sin6_port) &&
1310 !memcmp(&a1->sin6_addr, &a2->sin6_addr,
1311 sizeof (struct in6_addr)));
1313 if (a1->sin6_family == AF_INET) {
1314 struct sockaddr_in6 *sap = a1;
1315 a1 = a2;
1316 a2 = sap;
1317 } /* assumes that AF_INET and AF_INET6 are the only possibilities */
1318 return ((a1->sin6_port == ((struct sockaddr_in *)a2)->sin_port) &&
1319 IN6_IS_ADDR_V4MAPPED(&a1->sin6_addr) &&
1320 (a1->sin6_addr.s6_addr32[3] ==
1321 ((struct sockaddr_in *)a2)->sin_addr.s_addr));