4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 #include <sys/types.h>
45 #ifdef HAVE_SYS_SOCKET_H
46 #include <sys/socket.h>
53 #ifdef HAVE_NETINET_IN_H
54 #include <netinet/in.h>
57 #ifdef HAVE_NETINET_TCP_H
58 #include <netinet/tcp.h>
65 #ifdef HAVE_LINUX_VM_SOCKETS_H
66 #include <linux/vm_sockets.h>
67 #elif HAVE_SYS_VSOCK_H
68 #include <sys/vsock.h>
71 #ifdef HAVE_LIBSELINUX
72 #include <selinux/selinux.h>
83 set_selinux_label (void)
86 #ifdef HAVE_LIBSELINUX
87 if (setsockcreatecon_raw (selinux_label
) == -1) {
88 perror ("selinux-label: setsockcreatecon_raw");
93 "%s: --selinux-label option used, but "
94 "this binary was compiled without SELinux support\n",
102 clear_selinux_label (void)
104 #ifdef HAVE_LIBSELINUX
106 if (setsockcreatecon_raw (NULL
) == -1) {
107 perror ("selinux-label: setsockcreatecon_raw(NULL)");
115 bind_unix_socket (sockets
*socks
)
119 struct sockaddr_un addr
;
122 #ifndef WIN32 /* On Win32 the abspath might start with a drive letter. */
123 assert (unixsocket
[0] == '/');
126 len
= strlen (unixsocket
);
127 if (len
>= UNIX_PATH_MAX
) {
128 fprintf (stderr
, "%s: -U: path too long: length %zu > max %d bytes\n",
129 program_name
, len
, UNIX_PATH_MAX
-1);
133 set_selinux_label ();
136 sock
= socket (AF_UNIX
, SOCK_STREAM
|SOCK_CLOEXEC
, 0);
138 /* Fortunately, this code is only run at startup, so there is no
139 * risk of the fd leaking to a plugin's fork()
141 sock
= set_cloexec (socket (AF_UNIX
, SOCK_STREAM
, 0));
144 perror ("bind_unix_socket: socket");
148 addr
.sun_family
= AF_UNIX
;
149 memcpy (addr
.sun_path
, unixsocket
, len
+1 /* trailing \0 */);
151 if (bind (sock
, (struct sockaddr
*) &addr
, sizeof addr
) == -1) {
156 if (listen (sock
, SOMAXCONN
) == -1) {
161 clear_selinux_label ();
163 if (sockets_append (socks
, sock
) == -1) {
168 debug ("bound to unix socket %s", unixsocket
);
172 bind_tcpip_socket (sockets
*socks
)
174 struct addrinfo
*ai
= NULL
;
175 struct addrinfo hints
;
183 memset (&hints
, 0, sizeof hints
);
184 hints
.ai_flags
= AI_PASSIVE
;
185 hints
.ai_family
= tcpip_sock_af
;
186 hints
.ai_socktype
= SOCK_STREAM
;
188 err
= getaddrinfo (ipaddr
, port
, &hints
, &ai
);
190 fprintf (stderr
, "%s: getaddrinfo: %s: %s: %s\n",
192 ipaddr
? ipaddr
: "<any>",
198 for (a
= ai
; a
!= NULL
; a
= a
->ai_next
) {
201 set_selinux_label ();
204 sock
= socket (a
->ai_family
, a
->ai_socktype
| SOCK_CLOEXEC
, a
->ai_protocol
);
206 /* Fortunately, this code is only run at startup, so there is no
207 * risk of the fd leaking to a plugin's fork()
209 sock
= set_cloexec (socket (a
->ai_family
, a
->ai_socktype
, a
->ai_protocol
));
212 if (errno
== EAFNOSUPPORT
) {
213 /* If ipv6.disable=1 was specified to the Linux kernel then
214 * getaddrinfo may still return AF_INET6 sockets but socket(2)
215 * will return this error. I think it's safe to basically
219 debug ("bind_tcpip_socket: socket: %m (ignored)");
223 perror ("bind_tcpip_socket: socket");
229 if (setsockopt (sock
, SOL_SOCKET
, SO_REUSEADDR
, &opt
, sizeof opt
) == -1)
230 perror ("setsockopt: SO_REUSEADDR");
233 if (a
->ai_family
== PF_INET6
) {
234 if (setsockopt (sock
, IPPROTO_IPV6
, IPV6_V6ONLY
, &opt
, sizeof opt
) == -1)
235 perror ("setsockopt: IPv6 only");
239 if (bind (sock
, a
->ai_addr
, a
->ai_addrlen
) == -1) {
240 if (errno
== EADDRINUSE
) {
242 debug ("bind_tcpip_socket: bind: %m (ignored)");
250 if (listen (sock
, SOMAXCONN
) == -1) {
255 clear_selinux_label ();
257 if (sockets_append (socks
, sock
) == -1) {
265 if (socks
->len
== 0) {
266 fprintf (stderr
, "%s: unable to bind to any TCP/IP sockets\n",
269 fprintf (stderr
, "%s: socket error: %s\n",
270 program_name
, strerror (saved_errno
));
274 debug ("bound to IP address %s:%s (%zu socket(s))",
275 ipaddr
? ipaddr
: "<any>", port
, socks
->len
);
279 bind_vsock (sockets
*socks
)
281 #if defined (AF_VSOCK) && defined (VMADDR_CID_ANY)
284 struct sockaddr_vm addr
;
289 /* --port parameter must be numeric for vsock, unless
290 * /etc/services is extended but that seems unlikely. XXX
292 if (nbdkit_parse_uint32_t ("port", port
, &vsock_port
) == -1)
297 sock
= socket (AF_VSOCK
, SOCK_STREAM
|SOCK_CLOEXEC
, 0);
299 /* Fortunately, this code is only run at startup, so there is no
300 * risk of the fd leaking to a plugin's fork()
302 sock
= set_cloexec (socket (AF_VSOCK
, SOCK_STREAM
, 0));
305 perror ("bind_vsock: socket");
309 memset (&addr
, 0, sizeof addr
);
310 addr
.svm_family
= AF_VSOCK
;
311 addr
.svm_cid
= VMADDR_CID_ANY
;
312 addr
.svm_port
= vsock_port
;
314 if (bind (sock
, (struct sockaddr
*) &addr
, sizeof addr
) == -1) {
319 if (listen (sock
, SOMAXCONN
) == -1) {
324 if (sockets_append (socks
, sock
) == -1) {
329 /* It's not easy to get the actual CID here.
330 * IOCTL_VM_SOCKETS_GET_LOCAL_CID is documented, but requires
331 * opening /dev/vsock which is not accessible to non-root users.
332 * bind above doesn't update the sockaddr. Using getsockname
335 debug ("bound to vsock any:%" PRIu32
, addr
.svm_port
);
338 /* Can't happen because main() checks if AF_VSOCK is defined and
339 * prevents vsock from being set, so this function can never be
346 /* This counts the number of connection threads running (note: not the
347 * number of worker threads, each connection thread will start many
348 * worker independent threads in the current implementation). The
349 * purpose of this is so we can wait for all the connection threads to
350 * exit before we return from accept_incoming_connections, so that
351 * unload-time actions happen with no connections open.
353 static pthread_mutex_t count_mutex
= PTHREAD_MUTEX_INITIALIZER
;
354 static pthread_cond_t count_cond
= PTHREAD_COND_INITIALIZER
;
355 static unsigned count
= 0;
363 start_thread (void *datav
)
365 struct thread_data
*data
= datav
;
367 debug ("accepted connection");
369 pthread_mutex_lock (&count_mutex
);
371 pthread_mutex_unlock (&count_mutex
);
373 /* Set thread-local data. */
374 threadlocal_new_server_thread ();
375 threadlocal_set_instance_num (data
->instance_num
);
377 handle_single_connection (data
->sock
, data
->sock
);
381 pthread_mutex_lock (&count_mutex
);
383 pthread_cond_signal (&count_cond
);
384 pthread_mutex_unlock (&count_mutex
);
390 accept_connection (int listen_sock
)
393 pthread_attr_t attrs
;
395 struct thread_data
*thread_data
;
396 static size_t instance_num
= 1;
399 thread_data
= malloc (sizeof *thread_data
);
400 if (unlikely (!thread_data
)) {
405 thread_data
->instance_num
= instance_num
++;
408 thread_data
->sock
= accept4 (listen_sock
, NULL
, NULL
, SOCK_CLOEXEC
);
410 /* If we were fully parallel, then this function could be accepting
411 * connections in one thread while another thread could be in a
412 * plugin trying to fork. But plugins.c forced thread_model to
413 * serialize_all_requests when it detects a lack of atomic CLOEXEC,
414 * at which point, we can use a mutex to ensure we aren't accepting
415 * until the plugin is not running, making non-atomicity okay.
417 assert (thread_model
<= NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS
);
419 thread_data
->sock
= set_cloexec (accept (listen_sock
, NULL
, NULL
));
422 if (thread_data
->sock
== -1) {
423 if (errno
== EINTR
|| errno
== EAGAIN
)
425 nbdkit_error ("accept: %m");
430 /* Disable Nagle's algorithm on this socket. However we don't want
431 * to fail if this doesn't work.
433 setsockopt (thread_data
->sock
, IPPROTO_TCP
, TCP_NODELAY
, &flag
, sizeof flag
);
435 /* Start a thread to handle this connection. Note we always do this
436 * even for non-threaded plugins. There are mutexes in plugins.c
437 * which ensure that non-threaded plugins are handled correctly.
439 pthread_attr_init (&attrs
);
440 pthread_attr_setdetachstate (&attrs
, PTHREAD_CREATE_DETACHED
);
441 err
= pthread_create (&thread
, &attrs
, start_thread
, thread_data
);
442 pthread_attr_destroy (&attrs
);
443 if (unlikely (err
!= 0)) {
445 nbdkit_error ("pthread_create: %m");
446 closesocket (thread_data
->sock
);
451 /* If the thread starts successfully, then it is responsible for
452 * closing the socket and freeing thread_data.
458 /* Check the list of sockets plus quit_fd until a POLLIN event occurs
461 * If POLLIN occurs on quit_fd do nothing except returning early
462 * (don't call accept_connection in this case).
464 * If POLLIN occurs on one of the sockets, call
465 * accept_connection (socks.ptr[i]) on each of them.
468 check_sockets_and_quit_fd (const sockets
*socks
)
470 const size_t nr_socks
= socks
->len
;
474 CLEANUP_FREE
struct pollfd
*fds
=
475 malloc (sizeof (struct pollfd
) * (nr_socks
+1));
481 for (i
= 0; i
< nr_socks
; ++i
) {
482 fds
[i
].fd
= socks
->ptr
[i
];
483 fds
[i
].events
= POLLIN
;
486 fds
[nr_socks
].fd
= quit_fd
;
487 fds
[nr_socks
].events
= POLLIN
;
488 fds
[nr_socks
].revents
= 0;
490 r
= poll (fds
, nr_socks
+ 1, -1);
492 if (errno
== EINTR
|| errno
== EAGAIN
)
498 /* We don't even have to read quit_fd - just knowing that it has
499 * data means the signal handler ran, so we are ready to quit the
502 if (fds
[nr_socks
].revents
& POLLIN
)
505 for (i
= 0; i
< nr_socks
; ++i
) {
506 if (fds
[i
].revents
& POLLIN
)
507 accept_connection (socks
->ptr
[i
]);
514 check_sockets_and_quit_fd (const sockets
*socks
)
516 const size_t nr_socks
= socks
->len
;
519 CLEANUP_FREE HANDLE
*handles
= NULL
;
522 handles
= malloc ((nr_socks
+1) * sizeof (HANDLE
));
523 if (handles
== NULL
) {
528 for (i
= 0; i
< nr_socks
; ++i
) {
529 h
= WSACreateEvent ();
530 WSAEventSelect (_get_osfhandle (socks
->ptr
[i
]), h
,
531 FD_ACCEPT
|FD_READ
|FD_CLOSE
);
534 handles
[nr_socks
] = quit_fd
;
536 r
= WaitForMultipleObjectsEx ((DWORD
) (nr_socks
+1), handles
,
537 FALSE
, INFINITE
, TRUE
);
538 debug ("WaitForMultipleObjectsEx returned %d", (int) r
);
539 if (r
== WAIT_FAILED
) {
540 fprintf (stderr
, "%s: WaitForMultipleObjectsEx: error %lu\n",
541 program_name
, GetLastError ());
545 for (i
= 0; i
< nr_socks
; ++i
) {
546 WSAEventSelect (_get_osfhandle (socks
->ptr
[i
]), NULL
, 0);
547 WSACloseEvent (handles
[i
]);
550 if (r
== WAIT_OBJECT_0
+ nr_socks
) /* quit_fd signalled. */
553 if (r
>= WAIT_OBJECT_0
&& r
< WAIT_OBJECT_0
+ nr_socks
) {
554 i
= r
- WAIT_OBJECT_0
;
555 accept_connection (socks
->ptr
[i
]);
559 debug ("WaitForMultipleObjectsEx: unexpected return value: %lu\n", r
);
565 accept_incoming_connections (const sockets
*socks
)
571 check_sockets_and_quit_fd (socks
);
573 /* Wait for all threads to exit. */
574 pthread_mutex_lock (&count_mutex
);
578 err
= pthread_cond_wait (&count_cond
, &count_mutex
);
581 perror ("pthread_cond_wait");
584 pthread_mutex_unlock (&count_mutex
);
586 for (i
= 0; i
< socks
->len
; ++i
)
587 closesocket (socks
->ptr
[i
]);