1 #include <sys/socket.h>
2 #include <netinet/in.h>
3 #include <netinet/tcp.h>
14 #include "stdio_impl.h"
18 static void cleanup(void *p
)
20 struct pollfd
*pfd
= p
;
21 for (int i
=0; pfd
[i
].fd
>= -1; i
++)
22 if (pfd
[i
].fd
>= 0) __syscall(SYS_close
, pfd
[i
].fd
);
25 static unsigned long mtime()
28 if (clock_gettime(CLOCK_MONOTONIC
, &ts
) < 0 && errno
== ENOSYS
)
29 clock_gettime(CLOCK_REALTIME
, &ts
);
30 return (unsigned long)ts
.tv_sec
* 1000
31 + ts
.tv_nsec
/ 1000000;
34 static int start_tcp(struct pollfd
*pfd
, int family
, const void *sa
, socklen_t sl
, const unsigned char *q
, int ql
)
37 .msg_name
= (void *)sa
,
40 .msg_iov
= (struct iovec
[2]){
41 { .iov_base
= (uint8_t[]){ ql
>>8, ql
}, .iov_len
= 2 },
42 { .iov_base
= (void *)q
, .iov_len
= ql
} }
45 int fd
= socket(family
, SOCK_STREAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
47 pfd
->events
= POLLOUT
;
48 if (!setsockopt(fd
, IPPROTO_TCP
, TCP_FASTOPEN_CONNECT
,
49 &(int){1}, sizeof(int))) {
50 r
= sendmsg(fd
, &mh
, MSG_FASTOPEN
|MSG_NOSIGNAL
);
51 if (r
== ql
+2) pfd
->events
= POLLIN
;
53 if (errno
== EINPROGRESS
) return 0;
55 r
= connect(fd
, sa
, sl
);
56 if (!r
|| errno
== EINPROGRESS
) return 0;
62 static void step_mh(struct msghdr
*mh
, size_t n
)
64 /* Adjust iovec in msghdr to skip first n bytes. */
65 while (mh
->msg_iovlen
&& n
>= mh
->msg_iov
->iov_len
) {
66 n
-= mh
->msg_iov
->iov_len
;
70 if (!mh
->msg_iovlen
) return;
71 mh
->msg_iov
->iov_base
= (char *)mh
->msg_iov
->iov_base
+ n
;
72 mh
->msg_iov
->iov_len
-= n
;
75 /* Internal contract for __res_msend[_rc]: asize must be >=512, nqueries
76 * must be sufficiently small to be safe as VLA size. In practice it's
77 * either 1 or 2, anyway. */
79 int __res_msend_rc(int nqueries
, const unsigned char *const *queries
,
80 const int *qlens
, unsigned char *const *answers
, int *alens
, int asize
,
81 const struct resolvconf
*conf
)
84 int timeout
, attempts
, retry_interval
, servfail_retry
;
86 struct sockaddr_in sin
;
87 struct sockaddr_in6 sin6
;
88 } sa
= {0}, ns
[MAXNS
] = {{0}};
89 socklen_t sl
= sizeof sa
.sin
;
96 struct pollfd pfd
[nqueries
+2];
97 int qpos
[nqueries
], apos
[nqueries
];
98 unsigned char alen_buf
[nqueries
][2];
100 unsigned long t0
, t1
, t2
;
102 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE
, &cs
);
104 timeout
= 1000*conf
->timeout
;
105 attempts
= conf
->attempts
;
107 for (nns
=0; nns
<conf
->nns
; nns
++) {
108 const struct address
*iplit
= &conf
->ns
[nns
];
109 if (iplit
->family
== AF_INET
) {
110 memcpy(&ns
[nns
].sin
.sin_addr
, iplit
->addr
, 4);
111 ns
[nns
].sin
.sin_port
= htons(53);
112 ns
[nns
].sin
.sin_family
= AF_INET
;
115 memcpy(&ns
[nns
].sin6
.sin6_addr
, iplit
->addr
, 16);
116 ns
[nns
].sin6
.sin6_port
= htons(53);
117 ns
[nns
].sin6
.sin6_scope_id
= iplit
->scopeid
;
118 ns
[nns
].sin6
.sin6_family
= family
= AF_INET6
;
122 /* Get local address and open/bind a socket */
123 fd
= socket(family
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
125 /* Handle case where system lacks IPv6 support */
126 if (fd
< 0 && family
== AF_INET6
&& errno
== EAFNOSUPPORT
) {
127 for (i
=0; i
<nns
&& conf
->ns
[nns
].family
== AF_INET6
; i
++);
129 pthread_setcancelstate(cs
, 0);
132 fd
= socket(AF_INET
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
137 /* Convert any IPv4 addresses in a mixed environment to v4-mapped */
138 if (fd
>= 0 && family
== AF_INET6
) {
139 setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &(int){0}, sizeof 0);
140 for (i
=0; i
<nns
; i
++) {
141 if (ns
[i
].sin
.sin_family
!= AF_INET
) continue;
142 memcpy(ns
[i
].sin6
.sin6_addr
.s6_addr
+12,
143 &ns
[i
].sin
.sin_addr
, 4);
144 memcpy(ns
[i
].sin6
.sin6_addr
.s6_addr
,
145 "\0\0\0\0\0\0\0\0\0\0\xff\xff", 12);
146 ns
[i
].sin6
.sin6_family
= AF_INET6
;
147 ns
[i
].sin6
.sin6_flowinfo
= 0;
148 ns
[i
].sin6
.sin6_scope_id
= 0;
152 sa
.sin
.sin_family
= family
;
153 if (fd
< 0 || bind(fd
, (void *)&sa
, sl
) < 0) {
154 if (fd
>= 0) close(fd
);
155 pthread_setcancelstate(cs
, 0);
159 /* Past this point, there are no errors. Each individual query will
160 * yield either no reply (indicated by zero length) or an answer
161 * packet which is up to the caller to interpret. */
163 for (i
=0; i
<nqueries
; i
++) pfd
[i
].fd
= -1;
164 pfd
[nqueries
].fd
= fd
;
165 pfd
[nqueries
].events
= POLLIN
;
166 pfd
[nqueries
+1].fd
= -2;
168 pthread_cleanup_push(cleanup
, pfd
);
169 pthread_setcancelstate(cs
, 0);
171 memset(alens
, 0, sizeof *alens
* nqueries
);
173 retry_interval
= timeout
/ attempts
;
176 t1
= t2
- retry_interval
;
178 for (; t2
-t0
< timeout
; t2
=mtime()) {
179 /* This is the loop exit condition: that all queries
180 * have an accepted answer. */
181 for (i
=0; i
<nqueries
&& alens
[i
]>0; i
++);
182 if (i
==nqueries
) break;
184 if (t2
-t1
>= retry_interval
) {
185 /* Query all configured namservers in parallel */
186 for (i
=0; i
<nqueries
; i
++)
188 for (j
=0; j
<nns
; j
++)
189 sendto(fd
, queries
[i
],
190 qlens
[i
], MSG_NOSIGNAL
,
193 servfail_retry
= 2 * nqueries
;
196 /* Wait for a response, or until time to retry */
197 if (poll(pfd
, nqueries
+1, t1
+retry_interval
-t2
) <= 0) continue;
199 while (next
< nqueries
) {
201 .msg_name
= (void *)&sa
,
204 .msg_iov
= (struct iovec
[]){
205 { .iov_base
= (void *)answers
[next
],
209 rlen
= recvmsg(fd
, &mh
, 0);
212 /* Ignore non-identifiable packets */
213 if (rlen
< 4) continue;
215 /* Ignore replies from addresses we didn't send to */
216 for (j
=0; j
<nns
&& memcmp(ns
+j
, &sa
, sl
); j
++);
217 if (j
==nns
) continue;
219 /* Find which query this answer goes with, if any */
220 for (i
=next
; i
<nqueries
&& (
221 answers
[next
][0] != queries
[i
][0] ||
222 answers
[next
][1] != queries
[i
][1] ); i
++);
223 if (i
==nqueries
) continue;
224 if (alens
[i
]) continue;
226 /* Only accept positive or negative responses;
227 * retry immediately on server failure, and ignore
228 * all other codes such as refusal. */
229 switch (answers
[next
][3] & 15) {
234 if (servfail_retry
&& servfail_retry
--)
235 sendto(fd
, queries
[i
],
236 qlens
[i
], MSG_NOSIGNAL
,
242 /* Store answer in the right slot, or update next
243 * available temp slot if it's already in place. */
246 for (; next
<nqueries
&& alens
[next
]; next
++);
248 memcpy(answers
[i
], answers
[next
], rlen
);
250 /* Ignore further UDP if all slots full or TCP-mode */
251 if (next
== nqueries
) pfd
[nqueries
].events
= 0;
253 /* If answer is truncated (TC bit), fallback to TCP */
254 if ((answers
[i
][2] & 2) || (mh
.msg_flags
& MSG_TRUNC
)) {
256 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE
, 0);
257 r
= start_tcp(pfd
+i
, family
, ns
+j
, sl
, queries
[i
], qlens
[i
]);
258 pthread_setcancelstate(cs
, 0);
267 for (i
=0; i
<nqueries
; i
++) if (pfd
[i
].revents
& POLLOUT
) {
270 .msg_iov
= (struct iovec
[2]){
271 { .iov_base
= (uint8_t[]){ qlens
[i
]>>8, qlens
[i
] }, .iov_len
= 2 },
272 { .iov_base
= (void *)queries
[i
], .iov_len
= qlens
[i
] } }
274 step_mh(&mh
, qpos
[i
]);
275 r
= sendmsg(pfd
[i
].fd
, &mh
, MSG_NOSIGNAL
);
278 if (qpos
[i
] == qlens
[i
]+2)
279 pfd
[i
].events
= POLLIN
;
282 for (i
=0; i
<nqueries
; i
++) if (pfd
[i
].revents
& POLLIN
) {
285 .msg_iov
= (struct iovec
[2]){
286 { .iov_base
= alen_buf
[i
], .iov_len
= 2 },
287 { .iov_base
= answers
[i
], .iov_len
= asize
} }
289 step_mh(&mh
, apos
[i
]);
290 r
= recvmsg(pfd
[i
].fd
, &mh
, 0);
291 if (r
<= 0) goto out
;
293 if (apos
[i
] < 2) continue;
294 int alen
= alen_buf
[i
][0]*256 + alen_buf
[i
][1];
295 if (alen
< 13) goto out
;
296 if (apos
[i
] < alen
+2 && apos
[i
] < asize
+2)
298 int rcode
= answers
[i
][3] & 15;
299 if (rcode
!= 0 && rcode
!= 3)
302 /* Storing the length here commits the accepted answer.
303 * Immediately close TCP socket so as not to consume
304 * resources we no longer need. */
306 __syscall(SYS_close
, pfd
[i
].fd
);
311 pthread_cleanup_pop(1);
313 /* Disregard any incomplete TCP results */
314 for (i
=0; i
<nqueries
; i
++) if (alens
[i
]<0) alens
[i
] = 0;
319 int __res_msend(int nqueries
, const unsigned char *const *queries
,
320 const int *qlens
, unsigned char *const *answers
, int *alens
, int asize
)
322 struct resolvconf conf
;
323 if (__get_resolv_conf(&conf
, 0, 0) < 0) return -1;
324 return __res_msend_rc(nqueries
, queries
, qlens
, answers
, alens
, asize
, &conf
);