2 * Copyright (C) 2013-2019 Red Hat Inc.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 #include <sys/types.h>
45 #include <sys/socket.h>
47 #include <netinet/in.h>
48 #include <netinet/tcp.h>
51 #ifdef HAVE_LINUX_VM_SOCKETS_H
52 #include <linux/vm_sockets.h>
55 #ifdef HAVE_LIBSELINUX
56 #include <selinux/selinux.h>
65 set_selinux_label (void)
68 #ifdef HAVE_LIBSELINUX
69 if (setsockcreatecon_raw (selinux_label
) == -1) {
70 perror ("selinux-label: setsockcreatecon_raw");
75 "%s: --selinux-label option used, but "
76 "this binary was compiled without SELinux support\n",
84 clear_selinux_label (void)
86 #ifdef HAVE_LIBSELINUX
88 if (setsockcreatecon_raw (NULL
) == -1) {
89 perror ("selinux-label: setsockcreatecon_raw(NULL)");
97 bind_unix_socket (size_t *nr_socks
)
101 struct sockaddr_un addr
;
105 assert (unixsocket
[0] == '/');
107 len
= strlen (unixsocket
);
108 if (len
>= UNIX_PATH_MAX
) {
109 fprintf (stderr
, "%s: -U: path too long: length %zu > max %d bytes\n",
110 program_name
, len
, UNIX_PATH_MAX
-1);
114 set_selinux_label ();
117 sock
= socket (AF_UNIX
, SOCK_STREAM
|SOCK_CLOEXEC
, 0);
119 /* Fortunately, this code is only run at startup, so there is no
120 * risk of the fd leaking to a plugin's fork()
122 sock
= set_cloexec (socket (AF_UNIX
, SOCK_STREAM
, 0));
125 perror ("bind_unix_socket: socket");
129 addr
.sun_family
= AF_UNIX
;
130 memcpy (addr
.sun_path
, unixsocket
, len
+1 /* trailing \0 */);
132 if (bind (sock
, (struct sockaddr
*) &addr
, sizeof addr
) == -1) {
137 if (listen (sock
, SOMAXCONN
) == -1) {
142 clear_selinux_label ();
144 ret
= malloc (sizeof (int));
152 debug ("bound to unix socket %s", unixsocket
);
158 bind_tcpip_socket (size_t *nr_socks
)
160 struct addrinfo
*ai
= NULL
;
161 struct addrinfo hints
;
165 bool addr_in_use
= false;
170 memset (&hints
, 0, sizeof hints
);
171 hints
.ai_flags
= AI_PASSIVE
| AI_ADDRCONFIG
;
172 hints
.ai_socktype
= SOCK_STREAM
;
174 err
= getaddrinfo (ipaddr
, port
, &hints
, &ai
);
176 fprintf (stderr
, "%s: getaddrinfo: %s: %s: %s",
178 ipaddr
? ipaddr
: "<any>",
186 for (a
= ai
; a
!= NULL
; a
= a
->ai_next
) {
189 set_selinux_label ();
192 sock
= socket (a
->ai_family
, a
->ai_socktype
| SOCK_CLOEXEC
, a
->ai_protocol
);
194 /* Fortunately, this code is only run at startup, so there is no
195 * risk of the fd leaking to a plugin's fork()
197 sock
= set_cloexec (socket (a
->ai_family
, a
->ai_socktype
, a
->ai_protocol
));
200 perror ("bind_tcpip_socket: socket");
205 if (setsockopt (sock
, SOL_SOCKET
, SO_REUSEADDR
, &opt
, sizeof opt
) == -1)
206 perror ("setsockopt: SO_REUSEADDR");
209 if (a
->ai_family
== PF_INET6
) {
210 if (setsockopt (sock
, IPPROTO_IPV6
, IPV6_V6ONLY
, &opt
, sizeof opt
) == -1)
211 perror ("setsockopt: IPv6 only");
215 if (bind (sock
, a
->ai_addr
, a
->ai_addrlen
) == -1) {
216 if (errno
== EADDRINUSE
) {
225 if (listen (sock
, SOMAXCONN
) == -1) {
230 clear_selinux_label ();
233 socks
= realloc (socks
, sizeof (int) * (*nr_socks
));
238 socks
[*nr_socks
- 1] = sock
;
243 if (*nr_socks
== 0 && addr_in_use
) {
244 fprintf (stderr
, "%s: unable to bind to any sockets: %s\n",
245 program_name
, strerror (EADDRINUSE
));
249 debug ("bound to IP address %s:%s (%zu socket(s))",
250 ipaddr
? ipaddr
: "<any>", port
, *nr_socks
);
256 bind_vsock (size_t *nr_socks
)
262 struct sockaddr_vm addr
;
267 /* --port parameter must be numeric for vsock, unless
268 * /etc/services is extended but that seems unlikely. XXX
270 if (nbdkit_parse_uint32_t ("port", port
, &vsock_port
) == -1)
274 /* Any platform with AF_VSOCK also supports SOCK_CLOEXEC so there is
277 sock
= socket (AF_VSOCK
, SOCK_STREAM
|SOCK_CLOEXEC
, 0);
279 perror ("bind_vsock: socket");
283 memset (&addr
, 0, sizeof addr
);
284 addr
.svm_family
= AF_VSOCK
;
285 addr
.svm_cid
= VMADDR_CID_ANY
;
286 addr
.svm_port
= vsock_port
;
288 if (bind (sock
, (struct sockaddr
*) &addr
, sizeof addr
) == -1) {
293 if (listen (sock
, SOMAXCONN
) == -1) {
298 ret
= malloc (sizeof (int));
306 /* It's not easy to get the actual CID here.
307 * IOCTL_VM_SOCKETS_GET_LOCAL_CID is documented, but requires
308 * opening /dev/vsock which is not accessible to non-root users.
309 * bind above doesn't update the sockaddr. Using getsockname
312 debug ("bound to vsock any:%" PRIu32
, addr
.svm_port
);
317 /* Can't happen because main() checks if AF_VSOCK is defined and
318 * prevents vsock from being set, so this function can never be
325 /* This counts the number of connection threads running (note: not the
326 * number of worker threads, each connection thread will start many
327 * worker independent threads in the current implementation). The
328 * purpose of this is so we can wait for all the connection threads to
329 * exit before we return from accept_incoming_connections, so that
330 * unload-time actions happen with no connections open.
332 static pthread_mutex_t count_mutex
= PTHREAD_MUTEX_INITIALIZER
;
333 static pthread_cond_t count_cond
= PTHREAD_COND_INITIALIZER
;
334 static unsigned count
= 0;
342 start_thread (void *datav
)
344 struct thread_data
*data
= datav
;
346 debug ("accepted connection");
348 pthread_mutex_lock (&count_mutex
);
350 pthread_mutex_unlock (&count_mutex
);
352 /* Set thread-local data. */
353 threadlocal_new_server_thread ();
354 threadlocal_set_instance_num (data
->instance_num
);
356 handle_single_connection (data
->sock
, data
->sock
);
360 pthread_mutex_lock (&count_mutex
);
362 pthread_cond_signal (&count_cond
);
363 pthread_mutex_unlock (&count_mutex
);
369 accept_connection (int listen_sock
)
372 pthread_attr_t attrs
;
374 struct thread_data
*thread_data
;
375 static size_t instance_num
= 1;
378 thread_data
= malloc (sizeof *thread_data
);
379 if (unlikely (!thread_data
)) {
384 thread_data
->instance_num
= instance_num
++;
387 thread_data
->sock
= accept4 (listen_sock
, NULL
, NULL
, SOCK_CLOEXEC
);
389 /* If we were fully parallel, then this function could be accepting
390 * connections in one thread while another thread could be in a
391 * plugin trying to fork. But plugins.c forced thread_model to
392 * serialize_all_requests when it detects a lack of atomic CLOEXEC,
393 * at which point, we can use a mutex to ensure we aren't accepting
394 * until the plugin is not running, making non-atomicity okay.
396 assert (backend
->thread_model (backend
) <=
397 NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS
);
399 thread_data
->sock
= set_cloexec (accept (listen_sock
, NULL
, NULL
));
400 unlock_request (NULL
);
402 if (thread_data
->sock
== -1) {
403 if (errno
== EINTR
|| errno
== EAGAIN
)
410 /* Disable Nagle's algorithm on this socket. However we don't want
411 * to fail if this doesn't work.
413 setsockopt (thread_data
->sock
, IPPROTO_TCP
, TCP_NODELAY
, &flag
, sizeof flag
);
415 /* Start a thread to handle this connection. Note we always do this
416 * even for non-threaded plugins. There are mutexes in plugins.c
417 * which ensure that non-threaded plugins are handled correctly.
419 pthread_attr_init (&attrs
);
420 pthread_attr_setdetachstate (&attrs
, PTHREAD_CREATE_DETACHED
);
421 err
= pthread_create (&thread
, &attrs
, start_thread
, thread_data
);
422 pthread_attr_destroy (&attrs
);
423 if (unlikely (err
!= 0)) {
424 fprintf (stderr
, "%s: pthread_create: %s\n", program_name
, strerror (err
));
425 close (thread_data
->sock
);
430 /* If the thread starts successfully, then it is responsible for
431 * closing the socket and freeing thread_data.
435 /* Check the list of sockets plus quit_fd until a POLLIN event occurs
438 * If POLLIN occurs on quit_fd do nothing except returning early
439 * (don't call accept_connection in this case).
441 * If POLLIN occurs on one of the sockets, call
442 * accept_connection (socks[i]) on each of them.
445 check_sockets_and_quit_fd (int *socks
, size_t nr_socks
)
450 CLEANUP_FREE
struct pollfd
*fds
=
451 malloc (sizeof (struct pollfd
) * (nr_socks
+1));
457 for (i
= 0; i
< nr_socks
; ++i
) {
458 fds
[i
].fd
= socks
[i
];
459 fds
[i
].events
= POLLIN
;
462 fds
[nr_socks
].fd
= quit_fd
;
463 fds
[nr_socks
].events
= POLLIN
;
464 fds
[nr_socks
].revents
= 0;
466 r
= poll (fds
, nr_socks
+ 1, -1);
468 if (errno
== EINTR
|| errno
== EAGAIN
)
474 /* We don't even have to read quit_fd - just knowing that it has
475 * data means the signal handler ran, so we are ready to quit the
478 if (fds
[nr_socks
].revents
& POLLIN
)
481 for (i
= 0; i
< nr_socks
; ++i
) {
482 if (fds
[i
].revents
& POLLIN
)
483 accept_connection (socks
[i
]);
488 accept_incoming_connections (int *socks
, size_t nr_socks
)
494 check_sockets_and_quit_fd (socks
, nr_socks
);
496 /* Wait for all threads to exit. */
497 pthread_mutex_lock (&count_mutex
);
501 err
= pthread_cond_wait (&count_cond
, &count_mutex
);
504 perror ("pthread_cond_wait");
507 pthread_mutex_unlock (&count_mutex
);
509 for (i
= 0; i
< nr_socks
; ++i
)