plugins: Wire up rust plugin support for NBD_INFO_INIT_STATE
[nbdkit/ericb.git] / server / sockets.c
blob3351286cc01b02865020725d2a8a2889e34a42c0
1 /* nbdkit
2 * Copyright (C) 2013-2019 Red Hat Inc.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 #include <config.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdbool.h>
38 #include <inttypes.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <poll.h>
42 #include <errno.h>
43 #include <assert.h>
44 #include <sys/types.h>
45 #include <sys/socket.h>
46 #include <sys/un.h>
47 #include <netinet/in.h>
48 #include <netinet/tcp.h>
49 #include <netdb.h>
51 #ifdef HAVE_LINUX_VM_SOCKETS_H
52 #include <linux/vm_sockets.h>
53 #endif
55 #ifdef HAVE_LIBSELINUX
56 #include <selinux/selinux.h>
57 #endif
59 #include <pthread.h>
61 #include "internal.h"
62 #include "utils.h"
64 static void
65 set_selinux_label (void)
67 if (selinux_label) {
68 #ifdef HAVE_LIBSELINUX
69 if (setsockcreatecon_raw (selinux_label) == -1) {
70 perror ("selinux-label: setsockcreatecon_raw");
71 exit (EXIT_FAILURE);
73 #else
74 fprintf (stderr,
75 "%s: --selinux-label option used, but "
76 "this binary was compiled without SELinux support\n",
77 program_name);
78 exit (EXIT_FAILURE);
79 #endif
83 static void
84 clear_selinux_label (void)
86 #ifdef HAVE_LIBSELINUX
87 if (selinux_label) {
88 if (setsockcreatecon_raw (NULL) == -1) {
89 perror ("selinux-label: setsockcreatecon_raw(NULL)");
90 exit (EXIT_FAILURE);
93 #endif
96 int *
97 bind_unix_socket (size_t *nr_socks)
99 size_t len;
100 int sock;
101 struct sockaddr_un addr;
102 int *ret;
104 assert (unixsocket);
105 assert (unixsocket[0] == '/');
107 len = strlen (unixsocket);
108 if (len >= UNIX_PATH_MAX) {
109 fprintf (stderr, "%s: -U: path too long: length %zu > max %d bytes\n",
110 program_name, len, UNIX_PATH_MAX-1);
111 exit (EXIT_FAILURE);
114 set_selinux_label ();
116 #ifdef SOCK_CLOEXEC
117 sock = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
118 #else
119 /* Fortunately, this code is only run at startup, so there is no
120 * risk of the fd leaking to a plugin's fork()
122 sock = set_cloexec (socket (AF_UNIX, SOCK_STREAM, 0));
123 #endif
124 if (sock == -1) {
125 perror ("bind_unix_socket: socket");
126 exit (EXIT_FAILURE);
129 addr.sun_family = AF_UNIX;
130 memcpy (addr.sun_path, unixsocket, len+1 /* trailing \0 */);
132 if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
133 perror (unixsocket);
134 exit (EXIT_FAILURE);
137 if (listen (sock, SOMAXCONN) == -1) {
138 perror ("listen");
139 exit (EXIT_FAILURE);
142 clear_selinux_label ();
144 ret = malloc (sizeof (int));
145 if (!ret) {
146 perror ("malloc");
147 exit (EXIT_FAILURE);
149 ret[0] = sock;
150 *nr_socks = 1;
152 debug ("bound to unix socket %s", unixsocket);
154 return ret;
157 int *
158 bind_tcpip_socket (size_t *nr_socks)
160 struct addrinfo *ai = NULL;
161 struct addrinfo hints;
162 struct addrinfo *a;
163 int err, opt;
164 int *socks = NULL;
165 bool addr_in_use = false;
167 if (port == NULL)
168 port = "10809";
170 memset (&hints, 0, sizeof hints);
171 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
172 hints.ai_socktype = SOCK_STREAM;
174 err = getaddrinfo (ipaddr, port, &hints, &ai);
175 if (err != 0) {
176 fprintf (stderr, "%s: getaddrinfo: %s: %s: %s",
177 program_name,
178 ipaddr ? ipaddr : "<any>",
179 port,
180 gai_strerror (err));
181 exit (EXIT_FAILURE);
184 *nr_socks = 0;
186 for (a = ai; a != NULL; a = a->ai_next) {
187 int sock;
189 set_selinux_label ();
191 #ifdef SOCK_CLOEXEC
192 sock = socket (a->ai_family, a->ai_socktype | SOCK_CLOEXEC, a->ai_protocol);
193 #else
194 /* Fortunately, this code is only run at startup, so there is no
195 * risk of the fd leaking to a plugin's fork()
197 sock = set_cloexec (socket (a->ai_family, a->ai_socktype, a->ai_protocol));
198 #endif
199 if (sock == -1) {
200 perror ("bind_tcpip_socket: socket");
201 exit (EXIT_FAILURE);
204 opt = 1;
205 if (setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof opt) == -1)
206 perror ("setsockopt: SO_REUSEADDR");
208 #ifdef IPV6_V6ONLY
209 if (a->ai_family == PF_INET6) {
210 if (setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof opt) == -1)
211 perror ("setsockopt: IPv6 only");
213 #endif
215 if (bind (sock, a->ai_addr, a->ai_addrlen) == -1) {
216 if (errno == EADDRINUSE) {
217 addr_in_use = true;
218 close (sock);
219 continue;
221 perror ("bind");
222 exit (EXIT_FAILURE);
225 if (listen (sock, SOMAXCONN) == -1) {
226 perror ("listen");
227 exit (EXIT_FAILURE);
230 clear_selinux_label ();
232 (*nr_socks)++;
233 socks = realloc (socks, sizeof (int) * (*nr_socks));
234 if (!socks) {
235 perror ("realloc");
236 exit (EXIT_FAILURE);
238 socks[*nr_socks - 1] = sock;
241 freeaddrinfo (ai);
243 if (*nr_socks == 0 && addr_in_use) {
244 fprintf (stderr, "%s: unable to bind to any sockets: %s\n",
245 program_name, strerror (EADDRINUSE));
246 exit (EXIT_FAILURE);
249 debug ("bound to IP address %s:%s (%zu socket(s))",
250 ipaddr ? ipaddr : "<any>", port, *nr_socks);
252 return socks;
255 int *
256 bind_vsock (size_t *nr_socks)
258 #ifdef AF_VSOCK
259 uint32_t vsock_port;
260 int sock;
261 int *ret;
262 struct sockaddr_vm addr;
264 if (port == NULL)
265 vsock_port = 10809;
266 else {
267 /* --port parameter must be numeric for vsock, unless
268 * /etc/services is extended but that seems unlikely. XXX
270 if (nbdkit_parse_uint32_t ("port", port, &vsock_port) == -1)
271 exit (EXIT_FAILURE);
274 /* Any platform with AF_VSOCK also supports SOCK_CLOEXEC so there is
275 * no fallback path.
277 sock = socket (AF_VSOCK, SOCK_STREAM|SOCK_CLOEXEC, 0);
278 if (sock == -1) {
279 perror ("bind_vsock: socket");
280 exit (EXIT_FAILURE);
283 memset (&addr, 0, sizeof addr);
284 addr.svm_family = AF_VSOCK;
285 addr.svm_cid = VMADDR_CID_ANY;
286 addr.svm_port = vsock_port;
288 if (bind (sock, (struct sockaddr *) &addr, sizeof addr) == -1) {
289 perror (unixsocket);
290 exit (EXIT_FAILURE);
293 if (listen (sock, SOMAXCONN) == -1) {
294 perror ("listen");
295 exit (EXIT_FAILURE);
298 ret = malloc (sizeof (int));
299 if (!ret) {
300 perror ("malloc");
301 exit (EXIT_FAILURE);
303 ret[0] = sock;
304 *nr_socks = 1;
306 /* It's not easy to get the actual CID here.
307 * IOCTL_VM_SOCKETS_GET_LOCAL_CID is documented, but requires
308 * opening /dev/vsock which is not accessible to non-root users.
309 * bind above doesn't update the sockaddr. Using getsockname
310 * doesn't work.
312 debug ("bound to vsock any:%" PRIu32, addr.svm_port);
314 return ret;
316 #else
317 /* Can't happen because main() checks if AF_VSOCK is defined and
318 * prevents vsock from being set, so this function can never be
319 * called.
321 abort ();
322 #endif
325 /* This counts the number of connection threads running (note: not the
326 * number of worker threads, each connection thread will start many
327 * worker independent threads in the current implementation). The
328 * purpose of this is so we can wait for all the connection threads to
329 * exit before we return from accept_incoming_connections, so that
330 * unload-time actions happen with no connections open.
332 static pthread_mutex_t count_mutex = PTHREAD_MUTEX_INITIALIZER;
333 static pthread_cond_t count_cond = PTHREAD_COND_INITIALIZER;
334 static unsigned count = 0;
336 struct thread_data {
337 int sock;
338 size_t instance_num;
341 static void *
342 start_thread (void *datav)
344 struct thread_data *data = datav;
346 debug ("accepted connection");
348 pthread_mutex_lock (&count_mutex);
349 count++;
350 pthread_mutex_unlock (&count_mutex);
352 /* Set thread-local data. */
353 threadlocal_new_server_thread ();
354 threadlocal_set_instance_num (data->instance_num);
356 handle_single_connection (data->sock, data->sock);
358 free (data);
360 pthread_mutex_lock (&count_mutex);
361 count--;
362 pthread_cond_signal (&count_cond);
363 pthread_mutex_unlock (&count_mutex);
365 return NULL;
368 static void
369 accept_connection (int listen_sock)
371 int err;
372 pthread_attr_t attrs;
373 pthread_t thread;
374 struct thread_data *thread_data;
375 static size_t instance_num = 1;
376 const int flag = 1;
378 thread_data = malloc (sizeof *thread_data);
379 if (unlikely (!thread_data)) {
380 perror ("malloc");
381 return;
384 thread_data->instance_num = instance_num++;
385 again:
386 #ifdef HAVE_ACCEPT4
387 thread_data->sock = accept4 (listen_sock, NULL, NULL, SOCK_CLOEXEC);
388 #else
389 /* If we were fully parallel, then this function could be accepting
390 * connections in one thread while another thread could be in a
391 * plugin trying to fork. But plugins.c forced thread_model to
392 * serialize_all_requests when it detects a lack of atomic CLOEXEC,
393 * at which point, we can use a mutex to ensure we aren't accepting
394 * until the plugin is not running, making non-atomicity okay.
396 assert (backend->thread_model (backend) <=
397 NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS);
398 lock_request (NULL);
399 thread_data->sock = set_cloexec (accept (listen_sock, NULL, NULL));
400 unlock_request (NULL);
401 #endif
402 if (thread_data->sock == -1) {
403 if (errno == EINTR || errno == EAGAIN)
404 goto again;
405 perror ("accept");
406 free (thread_data);
407 return;
410 /* Disable Nagle's algorithm on this socket. However we don't want
411 * to fail if this doesn't work.
413 setsockopt (thread_data->sock, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof flag);
415 /* Start a thread to handle this connection. Note we always do this
416 * even for non-threaded plugins. There are mutexes in plugins.c
417 * which ensure that non-threaded plugins are handled correctly.
419 pthread_attr_init (&attrs);
420 pthread_attr_setdetachstate (&attrs, PTHREAD_CREATE_DETACHED);
421 err = pthread_create (&thread, &attrs, start_thread, thread_data);
422 pthread_attr_destroy (&attrs);
423 if (unlikely (err != 0)) {
424 fprintf (stderr, "%s: pthread_create: %s\n", program_name, strerror (err));
425 close (thread_data->sock);
426 free (thread_data);
427 return;
430 /* If the thread starts successfully, then it is responsible for
431 * closing the socket and freeing thread_data.
435 /* Check the list of sockets plus quit_fd until a POLLIN event occurs
436 * on any of them.
438 * If POLLIN occurs on quit_fd do nothing except returning early
439 * (don't call accept_connection in this case).
441 * If POLLIN occurs on one of the sockets, call
442 * accept_connection (socks[i]) on each of them.
444 static void
445 check_sockets_and_quit_fd (int *socks, size_t nr_socks)
447 size_t i;
448 int r;
450 CLEANUP_FREE struct pollfd *fds =
451 malloc (sizeof (struct pollfd) * (nr_socks+1));
452 if (fds == NULL) {
453 perror ("malloc");
454 exit (EXIT_FAILURE);
457 for (i = 0; i < nr_socks; ++i) {
458 fds[i].fd = socks[i];
459 fds[i].events = POLLIN;
460 fds[i].revents = 0;
462 fds[nr_socks].fd = quit_fd;
463 fds[nr_socks].events = POLLIN;
464 fds[nr_socks].revents = 0;
466 r = poll (fds, nr_socks + 1, -1);
467 if (r == -1) {
468 if (errno == EINTR || errno == EAGAIN)
469 return;
470 perror ("poll");
471 exit (EXIT_FAILURE);
474 /* We don't even have to read quit_fd - just knowing that it has
475 * data means the signal handler ran, so we are ready to quit the
476 * loop.
478 if (fds[nr_socks].revents & POLLIN)
479 return;
481 for (i = 0; i < nr_socks; ++i) {
482 if (fds[i].revents & POLLIN)
483 accept_connection (socks[i]);
487 void
488 accept_incoming_connections (int *socks, size_t nr_socks)
490 size_t i;
491 int err;
493 while (!quit)
494 check_sockets_and_quit_fd (socks, nr_socks);
496 /* Wait for all threads to exit. */
497 pthread_mutex_lock (&count_mutex);
498 for (;;) {
499 if (count == 0)
500 break;
501 err = pthread_cond_wait (&count_cond, &count_mutex);
502 if (err != 0) {
503 errno = err;
504 perror ("pthread_cond_wait");
507 pthread_mutex_unlock (&count_mutex);
509 for (i = 0; i < nr_socks; ++i)
510 close (socks[i]);
511 free (socks);