1 #include <sys/socket.h>
2 #include <netinet/in.h>
3 #include <netinet/tcp.h>
14 #include "stdio_impl.h"
18 static void cleanup(void *p
)
20 struct pollfd
*pfd
= p
;
21 for (int i
=0; pfd
[i
].fd
>= -1; i
++)
22 if (pfd
[i
].fd
>= 0) __syscall(SYS_close
, pfd
[i
].fd
);
25 static unsigned long mtime()
28 clock_gettime(CLOCK_REALTIME
, &ts
);
29 return (unsigned long)ts
.tv_sec
* 1000
30 + ts
.tv_nsec
/ 1000000;
33 static int start_tcp(struct pollfd
*pfd
, int family
, const void *sa
, socklen_t sl
, const unsigned char *q
, int ql
)
36 .msg_name
= (void *)sa
,
39 .msg_iov
= (struct iovec
[2]){
40 { .iov_base
= (uint8_t[]){ ql
>>8, ql
}, .iov_len
= 2 },
41 { .iov_base
= (void *)q
, .iov_len
= ql
} }
44 int fd
= socket(family
, SOCK_STREAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
46 pfd
->events
= POLLOUT
;
47 if (!setsockopt(fd
, IPPROTO_TCP
, TCP_FASTOPEN_CONNECT
,
48 &(int){1}, sizeof(int))) {
49 r
= sendmsg(fd
, &mh
, MSG_FASTOPEN
|MSG_NOSIGNAL
);
50 if (r
== ql
+2) pfd
->events
= POLLIN
;
52 if (errno
== EINPROGRESS
) return 0;
54 r
= connect(fd
, sa
, sl
);
55 if (!r
|| errno
== EINPROGRESS
) return 0;
61 static void step_mh(struct msghdr
*mh
, size_t n
)
63 /* Adjust iovec in msghdr to skip first n bytes. */
64 while (mh
->msg_iovlen
&& n
>= mh
->msg_iov
->iov_len
) {
65 n
-= mh
->msg_iov
->iov_len
;
69 if (!mh
->msg_iovlen
) return;
70 mh
->msg_iov
->iov_base
= (char *)mh
->msg_iov
->iov_base
+ n
;
71 mh
->msg_iov
->iov_len
-= n
;
74 /* Internal contract for __res_msend[_rc]: asize must be >=512, nqueries
75 * must be sufficiently small to be safe as VLA size. In practice it's
76 * either 1 or 2, anyway. */
78 int __res_msend_rc(int nqueries
, const unsigned char *const *queries
,
79 const int *qlens
, unsigned char *const *answers
, int *alens
, int asize
,
80 const struct resolvconf
*conf
)
83 int timeout
, attempts
, retry_interval
, servfail_retry
;
85 struct sockaddr_in sin
;
86 struct sockaddr_in6 sin6
;
87 } sa
= {0}, ns
[MAXNS
] = {{0}};
88 socklen_t sl
= sizeof sa
.sin
;
95 struct pollfd pfd
[nqueries
+2];
96 int qpos
[nqueries
], apos
[nqueries
];
97 unsigned char alen_buf
[nqueries
][2];
99 unsigned long t0
, t1
, t2
;
101 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE
, &cs
);
103 timeout
= 1000*conf
->timeout
;
104 attempts
= conf
->attempts
;
106 for (nns
=0; nns
<conf
->nns
; nns
++) {
107 const struct address
*iplit
= &conf
->ns
[nns
];
108 if (iplit
->family
== AF_INET
) {
109 memcpy(&ns
[nns
].sin
.sin_addr
, iplit
->addr
, 4);
110 ns
[nns
].sin
.sin_port
= htons(53);
111 ns
[nns
].sin
.sin_family
= AF_INET
;
114 memcpy(&ns
[nns
].sin6
.sin6_addr
, iplit
->addr
, 16);
115 ns
[nns
].sin6
.sin6_port
= htons(53);
116 ns
[nns
].sin6
.sin6_scope_id
= iplit
->scopeid
;
117 ns
[nns
].sin6
.sin6_family
= family
= AF_INET6
;
121 /* Get local address and open/bind a socket */
122 fd
= socket(family
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
124 /* Handle case where system lacks IPv6 support */
125 if (fd
< 0 && family
== AF_INET6
&& errno
== EAFNOSUPPORT
) {
126 for (i
=0; i
<nns
&& conf
->ns
[nns
].family
== AF_INET6
; i
++);
128 pthread_setcancelstate(cs
, 0);
131 fd
= socket(AF_INET
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
135 sa
.sin
.sin_family
= family
;
136 if (fd
< 0 || bind(fd
, (void *)&sa
, sl
) < 0) {
137 if (fd
>= 0) close(fd
);
138 pthread_setcancelstate(cs
, 0);
142 /* Past this point, there are no errors. Each individual query will
143 * yield either no reply (indicated by zero length) or an answer
144 * packet which is up to the caller to interpret. */
146 for (i
=0; i
<nqueries
; i
++) pfd
[i
].fd
= -1;
147 pfd
[nqueries
].fd
= fd
;
148 pfd
[nqueries
].events
= POLLIN
;
149 pfd
[nqueries
+1].fd
= -2;
151 pthread_cleanup_push(cleanup
, pfd
);
152 pthread_setcancelstate(cs
, 0);
154 /* Convert any IPv4 addresses in a mixed environment to v4-mapped */
155 if (family
== AF_INET6
) {
156 setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &(int){0}, sizeof 0);
157 for (i
=0; i
<nns
; i
++) {
158 if (ns
[i
].sin
.sin_family
!= AF_INET
) continue;
159 memcpy(ns
[i
].sin6
.sin6_addr
.s6_addr
+12,
160 &ns
[i
].sin
.sin_addr
, 4);
161 memcpy(ns
[i
].sin6
.sin6_addr
.s6_addr
,
162 "\0\0\0\0\0\0\0\0\0\0\xff\xff", 12);
163 ns
[i
].sin6
.sin6_family
= AF_INET6
;
164 ns
[i
].sin6
.sin6_flowinfo
= 0;
165 ns
[i
].sin6
.sin6_scope_id
= 0;
169 memset(alens
, 0, sizeof *alens
* nqueries
);
171 retry_interval
= timeout
/ attempts
;
174 t1
= t2
- retry_interval
;
176 for (; t2
-t0
< timeout
; t2
=mtime()) {
177 /* This is the loop exit condition: that all queries
178 * have an accepted answer. */
179 for (i
=0; i
<nqueries
&& alens
[i
]>0; i
++);
180 if (i
==nqueries
) break;
182 if (t2
-t1
>= retry_interval
) {
183 /* Query all configured namservers in parallel */
184 for (i
=0; i
<nqueries
; i
++)
186 for (j
=0; j
<nns
; j
++)
187 sendto(fd
, queries
[i
],
188 qlens
[i
], MSG_NOSIGNAL
,
191 servfail_retry
= 2 * nqueries
;
194 /* Wait for a response, or until time to retry */
195 if (poll(pfd
, nqueries
+1, t1
+retry_interval
-t2
) <= 0) continue;
197 while (next
< nqueries
) {
199 .msg_name
= (void *)&sa
,
202 .msg_iov
= (struct iovec
[]){
203 { .iov_base
= (void *)answers
[next
],
207 rlen
= recvmsg(fd
, &mh
, 0);
210 /* Ignore non-identifiable packets */
211 if (rlen
< 4) continue;
213 /* Ignore replies from addresses we didn't send to */
214 for (j
=0; j
<nns
&& memcmp(ns
+j
, &sa
, sl
); j
++);
215 if (j
==nns
) continue;
217 /* Find which query this answer goes with, if any */
218 for (i
=next
; i
<nqueries
&& (
219 answers
[next
][0] != queries
[i
][0] ||
220 answers
[next
][1] != queries
[i
][1] ); i
++);
221 if (i
==nqueries
) continue;
222 if (alens
[i
]) continue;
224 /* Only accept positive or negative responses;
225 * retry immediately on server failure, and ignore
226 * all other codes such as refusal. */
227 switch (answers
[next
][3] & 15) {
232 if (servfail_retry
&& servfail_retry
--)
233 sendto(fd
, queries
[i
],
234 qlens
[i
], MSG_NOSIGNAL
,
240 /* Store answer in the right slot, or update next
241 * available temp slot if it's already in place. */
244 for (; next
<nqueries
&& alens
[next
]; next
++);
246 memcpy(answers
[i
], answers
[next
], rlen
);
248 /* Ignore further UDP if all slots full or TCP-mode */
249 if (next
== nqueries
) pfd
[nqueries
].events
= 0;
251 /* If answer is truncated (TC bit), fallback to TCP */
252 if ((answers
[i
][2] & 2) || (mh
.msg_flags
& MSG_TRUNC
)) {
254 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE
, 0);
255 r
= start_tcp(pfd
+i
, family
, ns
+j
, sl
, queries
[i
], qlens
[i
]);
256 pthread_setcancelstate(cs
, 0);
265 for (i
=0; i
<nqueries
; i
++) if (pfd
[i
].revents
& POLLOUT
) {
268 .msg_iov
= (struct iovec
[2]){
269 { .iov_base
= (uint8_t[]){ qlens
[i
]>>8, qlens
[i
] }, .iov_len
= 2 },
270 { .iov_base
= (void *)queries
[i
], .iov_len
= qlens
[i
] } }
272 step_mh(&mh
, qpos
[i
]);
273 r
= sendmsg(pfd
[i
].fd
, &mh
, MSG_NOSIGNAL
);
276 if (qpos
[i
] == qlens
[i
]+2)
277 pfd
[i
].events
= POLLIN
;
280 for (i
=0; i
<nqueries
; i
++) if (pfd
[i
].revents
& POLLIN
) {
283 .msg_iov
= (struct iovec
[2]){
284 { .iov_base
= alen_buf
[i
], .iov_len
= 2 },
285 { .iov_base
= answers
[i
], .iov_len
= asize
} }
287 step_mh(&mh
, apos
[i
]);
288 r
= recvmsg(pfd
[i
].fd
, &mh
, 0);
291 if (apos
[i
] < 2) continue;
292 int alen
= alen_buf
[i
][0]*256 + alen_buf
[i
][1];
293 if (alen
< 13) goto out
;
294 if (apos
[i
] < alen
+2 && apos
[i
] < asize
+2)
296 int rcode
= answers
[i
][3] & 15;
297 if (rcode
!= 0 && rcode
!= 3)
300 /* Storing the length here commits the accepted answer.
301 * Immediately close TCP socket so as not to consume
302 * resources we no longer need. */
304 __syscall(SYS_close
, pfd
[i
].fd
);
309 pthread_cleanup_pop(1);
311 /* Disregard any incomplete TCP results */
312 for (i
=0; i
<nqueries
; i
++) if (alens
[i
]<0) alens
[i
] = 0;
317 int __res_msend(int nqueries
, const unsigned char *const *queries
,
318 const int *qlens
, unsigned char *const *answers
, int *alens
, int asize
)
320 struct resolvconf conf
;
321 if (__get_resolv_conf(&conf
, 0, 0) < 0) return -1;
322 return __res_msend_rc(nqueries
, queries
, qlens
, answers
, alens
, asize
, &conf
);