Update Red Hat Copyright Notices
[nbdkit.git] / server / sockets.c
blobfe8b24409c14a64fbcc15ffe50acd804bbfc4c47
1 /* nbdkit
2 * Copyright Red Hat
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 #include <config.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdbool.h>
38 #include <inttypes.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <errno.h>
42 #include <assert.h>
43 #include <sys/types.h>
45 #ifdef HAVE_SYS_SOCKET_H
46 #include <sys/socket.h>
47 #endif
49 #ifdef HAVE_SYS_UN_H
50 #include <sys/un.h>
51 #endif
53 #ifdef HAVE_NETINET_IN_H
54 #include <netinet/in.h>
55 #endif
57 #ifdef HAVE_NETINET_TCP_H
58 #include <netinet/tcp.h>
59 #endif
61 #ifdef HAVE_NETDB_H
62 #include <netdb.h>
63 #endif
65 #ifdef HAVE_LINUX_VM_SOCKETS_H
66 #include <linux/vm_sockets.h>
67 #elif HAVE_SYS_VSOCK_H
68 #include <sys/vsock.h>
69 #endif
71 #ifdef HAVE_LIBSELINUX
72 #include <selinux/selinux.h>
73 #endif
75 #include <pthread.h>
77 #include "internal.h"
78 #include "poll.h"
79 #include "utils.h"
80 #include "vector.h"
82 static void
83 set_selinux_label (void)
85 if (selinux_label) {
86 #ifdef HAVE_LIBSELINUX
87 if (setsockcreatecon_raw (selinux_label) == -1) {
88 perror ("selinux-label: setsockcreatecon_raw");
89 exit (EXIT_FAILURE);
91 #else
92 fprintf (stderr,
93 "%s: --selinux-label option used, but "
94 "this binary was compiled without SELinux support\n",
95 program_name);
96 exit (EXIT_FAILURE);
97 #endif
101 static void
102 clear_selinux_label (void)
104 #ifdef HAVE_LIBSELINUX
105 if (selinux_label) {
106 if (setsockcreatecon_raw (NULL) == -1) {
107 perror ("selinux-label: setsockcreatecon_raw(NULL)");
108 exit (EXIT_FAILURE);
111 #endif
114 void
115 bind_unix_socket (sockets *socks)
117 size_t len;
118 int sock;
119 struct sockaddr_un addr;
121 assert (unixsocket);
122 #ifndef WIN32 /* On Win32 the abspath might start with a drive letter. */
123 assert (unixsocket[0] == '/');
124 #endif
126 len = strlen (unixsocket);
127 if (len >= UNIX_PATH_MAX) {
128 fprintf (stderr, "%s: -U: path too long: length %zu > max %d bytes\n",
129 program_name, len, UNIX_PATH_MAX-1);
130 exit (EXIT_FAILURE);
133 set_selinux_label ();
135 #ifdef SOCK_CLOEXEC
136 sock = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
137 #else
138 /* Fortunately, this code is only run at startup, so there is no
139 * risk of the fd leaking to a plugin's fork()
141 sock = set_cloexec (socket (AF_UNIX, SOCK_STREAM, 0));
142 #endif
143 if (sock == -1) {
144 perror ("bind_unix_socket: socket");
145 exit (EXIT_FAILURE);
148 addr.sun_family = AF_UNIX;
149 memcpy (addr.sun_path, unixsocket, len+1 /* trailing \0 */);
151 if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
152 perror (unixsocket);
153 exit (EXIT_FAILURE);
156 if (listen (sock, SOMAXCONN) == -1) {
157 perror ("listen");
158 exit (EXIT_FAILURE);
161 clear_selinux_label ();
163 if (sockets_append (socks, sock) == -1) {
164 perror ("realloc");
165 exit (EXIT_FAILURE);
168 debug ("bound to unix socket %s", unixsocket);
171 void
172 bind_tcpip_socket (sockets *socks)
174 struct addrinfo *ai = NULL;
175 struct addrinfo hints;
176 struct addrinfo *a;
177 int err, opt;
178 int saved_errno = 0;
180 if (port == NULL)
181 port = "10809";
183 memset (&hints, 0, sizeof hints);
184 hints.ai_flags = AI_PASSIVE;
185 hints.ai_family = tcpip_sock_af;
186 hints.ai_socktype = SOCK_STREAM;
188 err = getaddrinfo (ipaddr, port, &hints, &ai);
189 if (err != 0) {
190 fprintf (stderr, "%s: getaddrinfo: %s: %s: %s\n",
191 program_name,
192 ipaddr ? ipaddr : "<any>",
193 port,
194 gai_strerror (err));
195 exit (EXIT_FAILURE);
198 for (a = ai; a != NULL; a = a->ai_next) {
199 int sock;
201 set_selinux_label ();
203 #ifdef SOCK_CLOEXEC
204 sock = socket (a->ai_family, a->ai_socktype | SOCK_CLOEXEC, a->ai_protocol);
205 #else
206 /* Fortunately, this code is only run at startup, so there is no
207 * risk of the fd leaking to a plugin's fork()
209 sock = set_cloexec (socket (a->ai_family, a->ai_socktype, a->ai_protocol));
210 #endif
211 if (sock == -1) {
212 if (errno == EAFNOSUPPORT) {
213 /* If ipv6.disable=1 was specified to the Linux kernel then
214 * getaddrinfo may still return AF_INET6 sockets but socket(2)
215 * will return this error. I think it's safe to basically
216 * ignore this error.
218 saved_errno = errno;
219 debug ("bind_tcpip_socket: socket: %m (ignored)");
220 continue;
222 else {
223 perror ("bind_tcpip_socket: socket");
224 exit (EXIT_FAILURE);
228 opt = 1;
229 if (setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof opt) == -1)
230 perror ("setsockopt: SO_REUSEADDR");
232 #ifdef IPV6_V6ONLY
233 if (a->ai_family == PF_INET6) {
234 if (setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof opt) == -1)
235 perror ("setsockopt: IPv6 only");
237 #endif
239 if (bind (sock, a->ai_addr, a->ai_addrlen) == -1) {
240 if (errno == EADDRINUSE) {
241 saved_errno = errno;
242 debug ("bind_tcpip_socket: bind: %m (ignored)");
243 closesocket (sock);
244 continue;
246 perror ("bind");
247 exit (EXIT_FAILURE);
250 if (listen (sock, SOMAXCONN) == -1) {
251 perror ("listen");
252 exit (EXIT_FAILURE);
255 clear_selinux_label ();
257 if (sockets_append (socks, sock) == -1) {
258 perror ("realloc");
259 exit (EXIT_FAILURE);
263 freeaddrinfo (ai);
265 if (socks->len == 0) {
266 fprintf (stderr, "%s: unable to bind to any TCP/IP sockets\n",
267 program_name);
268 if (saved_errno)
269 fprintf (stderr, "%s: socket error: %s\n",
270 program_name, strerror (saved_errno));
271 exit (EXIT_FAILURE);
274 debug ("bound to IP address %s:%s (%zu socket(s))",
275 ipaddr ? ipaddr : "<any>", port, socks->len);
278 void
279 bind_vsock (sockets *socks)
281 #if defined (AF_VSOCK) && defined (VMADDR_CID_ANY)
282 uint32_t vsock_port;
283 int sock;
284 struct sockaddr_vm addr;
286 if (port == NULL)
287 vsock_port = 10809;
288 else {
289 /* --port parameter must be numeric for vsock, unless
290 * /etc/services is extended but that seems unlikely. XXX
292 if (nbdkit_parse_uint32_t ("port", port, &vsock_port) == -1)
293 exit (EXIT_FAILURE);
296 #ifdef SOCK_CLOEXEC
297 sock = socket (AF_VSOCK, SOCK_STREAM|SOCK_CLOEXEC, 0);
298 #else
299 /* Fortunately, this code is only run at startup, so there is no
300 * risk of the fd leaking to a plugin's fork()
302 sock = set_cloexec (socket (AF_VSOCK, SOCK_STREAM, 0));
303 #endif
304 if (sock == -1) {
305 perror ("bind_vsock: socket");
306 exit (EXIT_FAILURE);
309 memset (&addr, 0, sizeof addr);
310 addr.svm_family = AF_VSOCK;
311 addr.svm_cid = VMADDR_CID_ANY;
312 addr.svm_port = vsock_port;
314 if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
315 perror (unixsocket);
316 exit (EXIT_FAILURE);
319 if (listen (sock, SOMAXCONN) == -1) {
320 perror ("listen");
321 exit (EXIT_FAILURE);
324 if (sockets_append (socks, sock) == -1) {
325 perror ("realloc");
326 exit (EXIT_FAILURE);
329 /* It's not easy to get the actual CID here.
330 * IOCTL_VM_SOCKETS_GET_LOCAL_CID is documented, but requires
331 * opening /dev/vsock which is not accessible to non-root users.
332 * bind above doesn't update the sockaddr. Using getsockname
333 * doesn't work.
335 debug ("bound to vsock any:%" PRIu32, addr.svm_port);
337 #else
338 /* Can't happen because main() checks if AF_VSOCK is defined and
339 * prevents vsock from being set, so this function can never be
340 * called.
342 abort ();
343 #endif
346 /* This counts the number of connection threads running (note: not the
347 * number of worker threads, each connection thread will start many
348 * worker independent threads in the current implementation). The
349 * purpose of this is so we can wait for all the connection threads to
350 * exit before we return from accept_incoming_connections, so that
351 * unload-time actions happen with no connections open.
353 static pthread_mutex_t count_mutex = PTHREAD_MUTEX_INITIALIZER;
354 static pthread_cond_t count_cond = PTHREAD_COND_INITIALIZER;
355 static unsigned count = 0;
357 struct thread_data {
358 int sock;
359 size_t instance_num;
362 static void *
363 start_thread (void *datav)
365 struct thread_data *data = datav;
367 debug ("accepted connection");
369 pthread_mutex_lock (&count_mutex);
370 count++;
371 pthread_mutex_unlock (&count_mutex);
373 /* Set thread-local data. */
374 threadlocal_new_server_thread ();
375 threadlocal_set_instance_num (data->instance_num);
377 handle_single_connection (data->sock, data->sock);
379 free (data);
381 pthread_mutex_lock (&count_mutex);
382 count--;
383 pthread_cond_signal (&count_cond);
384 pthread_mutex_unlock (&count_mutex);
386 return NULL;
389 static void
390 accept_connection (int listen_sock)
392 int err;
393 pthread_attr_t attrs;
394 pthread_t thread;
395 struct thread_data *thread_data;
396 static size_t instance_num = 1;
397 const int flag = 1;
399 thread_data = malloc (sizeof *thread_data);
400 if (unlikely (!thread_data)) {
401 perror ("malloc");
402 return;
405 thread_data->instance_num = instance_num++;
406 again:
407 #ifdef HAVE_ACCEPT4
408 thread_data->sock = accept4 (listen_sock, NULL, NULL, SOCK_CLOEXEC);
409 #else
410 /* If we were fully parallel, then this function could be accepting
411 * connections in one thread while another thread could be in a
412 * plugin trying to fork. But plugins.c forced thread_model to
413 * serialize_all_requests when it detects a lack of atomic CLOEXEC,
414 * at which point, we can use a mutex to ensure we aren't accepting
415 * until the plugin is not running, making non-atomicity okay.
417 assert (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS);
418 lock_request ();
419 thread_data->sock = set_cloexec (accept (listen_sock, NULL, NULL));
420 unlock_request ();
421 #endif
422 if (thread_data->sock == -1) {
423 if (errno == EINTR || errno == EAGAIN)
424 goto again;
425 nbdkit_error ("accept: %m");
426 free (thread_data);
427 return;
430 /* Disable Nagle's algorithm on this socket. However we don't want
431 * to fail if this doesn't work.
433 setsockopt (thread_data->sock, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof flag);
435 /* Start a thread to handle this connection. Note we always do this
436 * even for non-threaded plugins. There are mutexes in plugins.c
437 * which ensure that non-threaded plugins are handled correctly.
439 pthread_attr_init (&attrs);
440 pthread_attr_setdetachstate (&attrs, PTHREAD_CREATE_DETACHED);
441 err = pthread_create (&thread, &attrs, start_thread, thread_data);
442 pthread_attr_destroy (&attrs);
443 if (unlikely (err != 0)) {
444 errno = err;
445 nbdkit_error ("pthread_create: %m");
446 closesocket (thread_data->sock);
447 free (thread_data);
448 return;
451 /* If the thread starts successfully, then it is responsible for
452 * closing the socket and freeing thread_data.
456 #ifndef WIN32
458 /* Check the list of sockets plus quit_fd until a POLLIN event occurs
459 * on any of them.
461 * If POLLIN occurs on quit_fd do nothing except returning early
462 * (don't call accept_connection in this case).
464 * If POLLIN occurs on one of the sockets, call
465 * accept_connection (socks.ptr[i]) on each of them.
467 static void
468 check_sockets_and_quit_fd (const sockets *socks)
470 const size_t nr_socks = socks->len;
471 size_t i;
472 int r;
474 CLEANUP_FREE struct pollfd *fds =
475 malloc (sizeof (struct pollfd) * (nr_socks+1));
476 if (fds == NULL) {
477 perror ("malloc");
478 exit (EXIT_FAILURE);
481 for (i = 0; i < nr_socks; ++i) {
482 fds[i].fd = socks->ptr[i];
483 fds[i].events = POLLIN;
484 fds[i].revents = 0;
486 fds[nr_socks].fd = quit_fd;
487 fds[nr_socks].events = POLLIN;
488 fds[nr_socks].revents = 0;
490 r = poll (fds, nr_socks + 1, -1);
491 if (r == -1) {
492 if (errno == EINTR || errno == EAGAIN)
493 return;
494 perror ("poll");
495 exit (EXIT_FAILURE);
498 /* We don't even have to read quit_fd - just knowing that it has
499 * data means the signal handler ran, so we are ready to quit the
500 * loop.
502 if (fds[nr_socks].revents & POLLIN)
503 return;
505 for (i = 0; i < nr_socks; ++i) {
506 if (fds[i].revents & POLLIN)
507 accept_connection (socks->ptr[i]);
511 #else /* WIN32 */
513 static void
514 check_sockets_and_quit_fd (const sockets *socks)
516 const size_t nr_socks = socks->len;
517 size_t i;
518 HANDLE h;
519 CLEANUP_FREE HANDLE *handles = NULL;
520 DWORD r;
522 handles = malloc ((nr_socks+1) * sizeof (HANDLE));
523 if (handles == NULL) {
524 perror ("malloc");
525 exit (EXIT_FAILURE);
528 for (i = 0; i < nr_socks; ++i) {
529 h = WSACreateEvent ();
530 WSAEventSelect (_get_osfhandle (socks->ptr[i]), h,
531 FD_ACCEPT|FD_READ|FD_CLOSE);
532 handles[i] = h;
534 handles[nr_socks] = quit_fd;
536 r = WaitForMultipleObjectsEx ((DWORD) (nr_socks+1), handles,
537 FALSE, INFINITE, TRUE);
538 debug ("WaitForMultipleObjectsEx returned %d", (int) r);
539 if (r == WAIT_FAILED) {
540 fprintf (stderr, "%s: WaitForMultipleObjectsEx: error %lu\n",
541 program_name, GetLastError ());
542 exit (EXIT_FAILURE);
545 for (i = 0; i < nr_socks; ++i) {
546 WSAEventSelect (_get_osfhandle (socks->ptr[i]), NULL, 0);
547 WSACloseEvent (handles[i]);
550 if (r == WAIT_OBJECT_0 + nr_socks) /* quit_fd signalled. */
551 return;
553 if (r >= WAIT_OBJECT_0 && r < WAIT_OBJECT_0 + nr_socks) {
554 i = r - WAIT_OBJECT_0;
555 accept_connection (socks->ptr[i]);
556 return;
559 debug ("WaitForMultipleObjectsEx: unexpected return value: %lu\n", r);
562 #endif /* WIN32 */
564 void
565 accept_incoming_connections (const sockets *socks)
567 size_t i;
568 int err;
570 while (!quit)
571 check_sockets_and_quit_fd (socks);
573 /* Wait for all threads to exit. */
574 pthread_mutex_lock (&count_mutex);
575 for (;;) {
576 if (count == 0)
577 break;
578 err = pthread_cond_wait (&count_cond, &count_mutex);
579 if (err != 0) {
580 errno = err;
581 perror ("pthread_cond_wait");
584 pthread_mutex_unlock (&count_mutex);
586 for (i = 0; i < socks->len; ++i)
587 closesocket (socks->ptr[i]);
588 free (socks->ptr);