2 * Copyright 6WIND S.A., 2014
4 * This work is licensed under the terms of the GNU GPL, version 2 or
5 * (at your option) any later version. See the COPYING file in the
8 #include "qemu/osdep.h"
9 #include "qemu-common.h"
10 #include "qemu/sockets.h"
13 #include <sys/socket.h>
16 #include "ivshmem-server.h"
18 /* log a message on stdout if verbose=1 */
19 #define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
20 if ((server)->verbose) { \
21 printf(fmt, ## __VA_ARGS__); \
25 /** maximum size of a huge page, used by ivshmem_server_ftruncate() */
26 #define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
28 /** default listen backlog (number of sockets not accepted) */
29 #define IVSHMEM_SERVER_LISTEN_BACKLOG 10
31 /* send message to a client unix socket */
33 ivshmem_server_send_one_msg(int sock_fd
, int64_t peer_id
, int fd
)
40 char control
[CMSG_SPACE(sizeof(int))];
44 peer_id
= GINT64_TO_LE(peer_id
);
45 iov
[0].iov_base
= &peer_id
;
46 iov
[0].iov_len
= sizeof(peer_id
);
48 memset(&msg
, 0, sizeof(msg
));
52 /* if fd is specified, add it in a cmsg */
54 memset(&msg_control
, 0, sizeof(msg_control
));
55 msg
.msg_control
= &msg_control
;
56 msg
.msg_controllen
= sizeof(msg_control
);
57 cmsg
= CMSG_FIRSTHDR(&msg
);
58 cmsg
->cmsg_level
= SOL_SOCKET
;
59 cmsg
->cmsg_type
= SCM_RIGHTS
;
60 cmsg
->cmsg_len
= CMSG_LEN(sizeof(int));
61 memcpy(CMSG_DATA(cmsg
), &fd
, sizeof(fd
));
64 ret
= sendmsg(sock_fd
, &msg
, 0);
72 /* free a peer when the server advertises a disconnection or when the
75 ivshmem_server_free_peer(IvshmemServer
*server
, IvshmemServerPeer
*peer
)
78 IvshmemServerPeer
*other_peer
;
80 IVSHMEM_SERVER_DEBUG(server
, "free peer %" PRId64
"\n", peer
->id
);
82 QTAILQ_REMOVE(&server
->peer_list
, peer
, next
);
84 /* advertise the deletion to other peers */
85 QTAILQ_FOREACH(other_peer
, &server
->peer_list
, next
) {
86 ivshmem_server_send_one_msg(other_peer
->sock_fd
, peer
->id
, -1);
89 for (vector
= 0; vector
< peer
->vectors_count
; vector
++) {
90 event_notifier_cleanup(&peer
->vectors
[vector
]);
96 /* send the peer id and the shm_fd just after a new client connection */
98 ivshmem_server_send_initial_info(IvshmemServer
*server
, IvshmemServerPeer
*peer
)
102 /* send our protocol version first */
103 ret
= ivshmem_server_send_one_msg(peer
->sock_fd
, IVSHMEM_PROTOCOL_VERSION
,
106 IVSHMEM_SERVER_DEBUG(server
, "cannot send version: %s\n",
111 /* send the peer id to the client */
112 ret
= ivshmem_server_send_one_msg(peer
->sock_fd
, peer
->id
, -1);
114 IVSHMEM_SERVER_DEBUG(server
, "cannot send peer id: %s\n",
119 /* send the shm_fd */
120 ret
= ivshmem_server_send_one_msg(peer
->sock_fd
, -1, server
->shm_fd
);
122 IVSHMEM_SERVER_DEBUG(server
, "cannot send shm fd: %s\n",
130 /* handle message on listening unix socket (new client connection) */
132 ivshmem_server_handle_new_conn(IvshmemServer
*server
)
134 IvshmemServerPeer
*peer
, *other_peer
;
135 struct sockaddr_un unaddr
;
136 socklen_t unaddr_len
;
140 /* accept the incoming connection */
141 unaddr_len
= sizeof(unaddr
);
142 newfd
= qemu_accept(server
->sock_fd
,
143 (struct sockaddr
*)&unaddr
, &unaddr_len
);
146 IVSHMEM_SERVER_DEBUG(server
, "cannot accept() %s\n", strerror(errno
));
150 qemu_set_nonblock(newfd
);
151 IVSHMEM_SERVER_DEBUG(server
, "accept()=%d\n", newfd
);
153 /* allocate new structure for this peer */
154 peer
= g_malloc0(sizeof(*peer
));
155 peer
->sock_fd
= newfd
;
157 /* get an unused peer id */
158 /* XXX: this could use id allocation such as Linux IDA, or simply
160 for (i
= 0; i
< G_MAXUINT16
; i
++) {
161 if (ivshmem_server_search_peer(server
, server
->cur_id
) == NULL
) {
166 if (i
== G_MAXUINT16
) {
167 IVSHMEM_SERVER_DEBUG(server
, "cannot allocate new client id\n");
172 peer
->id
= server
->cur_id
++;
174 /* create eventfd, one per vector */
175 peer
->vectors_count
= server
->n_vectors
;
176 for (i
= 0; i
< peer
->vectors_count
; i
++) {
177 if (event_notifier_init(&peer
->vectors
[i
], FALSE
) < 0) {
178 IVSHMEM_SERVER_DEBUG(server
, "cannot create eventfd\n");
183 /* send peer id and shm fd */
184 if (ivshmem_server_send_initial_info(server
, peer
) < 0) {
185 IVSHMEM_SERVER_DEBUG(server
, "cannot send initial info\n");
189 /* advertise the new peer to others */
190 QTAILQ_FOREACH(other_peer
, &server
->peer_list
, next
) {
191 for (i
= 0; i
< peer
->vectors_count
; i
++) {
192 ivshmem_server_send_one_msg(other_peer
->sock_fd
, peer
->id
,
193 peer
->vectors
[i
].wfd
);
197 /* advertise the other peers to the new one */
198 QTAILQ_FOREACH(other_peer
, &server
->peer_list
, next
) {
199 for (i
= 0; i
< peer
->vectors_count
; i
++) {
200 ivshmem_server_send_one_msg(peer
->sock_fd
, other_peer
->id
,
201 other_peer
->vectors
[i
].wfd
);
205 /* advertise the new peer to itself */
206 for (i
= 0; i
< peer
->vectors_count
; i
++) {
207 ivshmem_server_send_one_msg(peer
->sock_fd
, peer
->id
,
208 event_notifier_get_fd(&peer
->vectors
[i
]));
211 QTAILQ_INSERT_TAIL(&server
->peer_list
, peer
, next
);
212 IVSHMEM_SERVER_DEBUG(server
, "new peer id = %" PRId64
"\n",
218 event_notifier_cleanup(&peer
->vectors
[i
]);
225 /* Try to ftruncate a file to next power of 2 of shmsize.
226 * If it fails; all power of 2 above shmsize are tested until
227 * we reach the maximum huge page size. This is useful
228 * if the shm file is in a hugetlbfs that cannot be truncated to the
231 ivshmem_server_ftruncate(int fd
, unsigned shmsize
)
236 /* align shmsize to next power of 2 */
237 shmsize
= pow2ceil(shmsize
);
239 if (fstat(fd
, &mapstat
) != -1 && mapstat
.st_size
== shmsize
) {
243 while (shmsize
<= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE
) {
244 ret
= ftruncate(fd
, shmsize
);
254 /* Init a new ivshmem server */
256 ivshmem_server_init(IvshmemServer
*server
, const char *unix_sock_path
,
257 const char *shm_path
, bool use_shm_open
,
258 size_t shm_size
, unsigned n_vectors
,
263 memset(server
, 0, sizeof(*server
));
264 server
->verbose
= verbose
;
266 ret
= snprintf(server
->unix_sock_path
, sizeof(server
->unix_sock_path
),
267 "%s", unix_sock_path
);
268 if (ret
< 0 || ret
>= sizeof(server
->unix_sock_path
)) {
269 IVSHMEM_SERVER_DEBUG(server
, "could not copy unix socket path\n");
272 ret
= snprintf(server
->shm_path
, sizeof(server
->shm_path
),
274 if (ret
< 0 || ret
>= sizeof(server
->shm_path
)) {
275 IVSHMEM_SERVER_DEBUG(server
, "could not copy shm path\n");
279 server
->use_shm_open
= use_shm_open
;
280 server
->shm_size
= shm_size
;
281 server
->n_vectors
= n_vectors
;
283 QTAILQ_INIT(&server
->peer_list
);
288 /* open shm, create and bind to the unix socket */
290 ivshmem_server_start(IvshmemServer
*server
)
292 struct sockaddr_un sun
;
293 int shm_fd
, sock_fd
, ret
;
296 if (server
->use_shm_open
) {
297 IVSHMEM_SERVER_DEBUG(server
, "Using POSIX shared memory: %s\n",
299 shm_fd
= shm_open(server
->shm_path
, O_CREAT
| O_RDWR
, S_IRWXU
);
301 gchar
*filename
= g_strdup_printf("%s/ivshmem.XXXXXX", server
->shm_path
);
302 IVSHMEM_SERVER_DEBUG(server
, "Using file-backed shared memory: %s\n",
304 shm_fd
= mkstemp(filename
);
310 fprintf(stderr
, "cannot open shm file %s: %s\n", server
->shm_path
,
314 if (ivshmem_server_ftruncate(shm_fd
, server
->shm_size
) < 0) {
315 fprintf(stderr
, "ftruncate(%s) failed: %s\n", server
->shm_path
,
320 IVSHMEM_SERVER_DEBUG(server
, "create & bind socket %s\n",
321 server
->unix_sock_path
);
323 /* create the unix listening socket */
324 sock_fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
326 IVSHMEM_SERVER_DEBUG(server
, "cannot create socket: %s\n",
331 sun
.sun_family
= AF_UNIX
;
332 ret
= snprintf(sun
.sun_path
, sizeof(sun
.sun_path
), "%s",
333 server
->unix_sock_path
);
334 if (ret
< 0 || ret
>= sizeof(sun
.sun_path
)) {
335 IVSHMEM_SERVER_DEBUG(server
, "could not copy unix socket path\n");
338 if (bind(sock_fd
, (struct sockaddr
*)&sun
, sizeof(sun
)) < 0) {
339 IVSHMEM_SERVER_DEBUG(server
, "cannot connect to %s: %s\n", sun
.sun_path
,
344 if (listen(sock_fd
, IVSHMEM_SERVER_LISTEN_BACKLOG
) < 0) {
345 IVSHMEM_SERVER_DEBUG(server
, "listen() failed: %s\n", strerror(errno
));
349 server
->sock_fd
= sock_fd
;
350 server
->shm_fd
= shm_fd
;
361 /* close connections to clients, the unix socket and the shm fd */
363 ivshmem_server_close(IvshmemServer
*server
)
365 IvshmemServerPeer
*peer
, *npeer
;
367 IVSHMEM_SERVER_DEBUG(server
, "close server\n");
369 QTAILQ_FOREACH_SAFE(peer
, &server
->peer_list
, next
, npeer
) {
370 ivshmem_server_free_peer(server
, peer
);
373 unlink(server
->unix_sock_path
);
374 close(server
->sock_fd
);
375 close(server
->shm_fd
);
376 server
->sock_fd
= -1;
380 /* get the fd_set according to the unix socket and the peer list */
382 ivshmem_server_get_fds(const IvshmemServer
*server
, fd_set
*fds
, int *maxfd
)
384 IvshmemServerPeer
*peer
;
386 if (server
->sock_fd
== -1) {
390 FD_SET(server
->sock_fd
, fds
);
391 if (server
->sock_fd
>= *maxfd
) {
392 *maxfd
= server
->sock_fd
+ 1;
395 QTAILQ_FOREACH(peer
, &server
->peer_list
, next
) {
396 FD_SET(peer
->sock_fd
, fds
);
397 if (peer
->sock_fd
>= *maxfd
) {
398 *maxfd
= peer
->sock_fd
+ 1;
403 /* process incoming messages on the sockets in fd_set */
405 ivshmem_server_handle_fds(IvshmemServer
*server
, fd_set
*fds
, int maxfd
)
407 IvshmemServerPeer
*peer
, *peer_next
;
409 if (server
->sock_fd
< maxfd
&& FD_ISSET(server
->sock_fd
, fds
) &&
410 ivshmem_server_handle_new_conn(server
) < 0 && errno
!= EINTR
) {
411 IVSHMEM_SERVER_DEBUG(server
, "ivshmem_server_handle_new_conn() "
416 QTAILQ_FOREACH_SAFE(peer
, &server
->peer_list
, next
, peer_next
) {
417 /* any message from a peer socket result in a close() */
418 IVSHMEM_SERVER_DEBUG(server
, "peer->sock_fd=%d\n", peer
->sock_fd
);
419 if (peer
->sock_fd
< maxfd
&& FD_ISSET(peer
->sock_fd
, fds
)) {
420 ivshmem_server_free_peer(server
, peer
);
427 /* lookup peer from its id */
429 ivshmem_server_search_peer(IvshmemServer
*server
, int64_t peer_id
)
431 IvshmemServerPeer
*peer
;
433 QTAILQ_FOREACH(peer
, &server
->peer_list
, next
) {
434 if (peer
->id
== peer_id
) {
441 /* dump our info, the list of peers their vectors on stdout */
443 ivshmem_server_dump(const IvshmemServer
*server
)
445 const IvshmemServerPeer
*peer
;
449 QTAILQ_FOREACH(peer
, &server
->peer_list
, next
) {
450 printf("peer_id = %" PRId64
"\n", peer
->id
);
452 for (vector
= 0; vector
< peer
->vectors_count
; vector
++) {
453 printf(" vector %d is enabled (fd=%d)\n", vector
,
454 event_notifier_get_fd(&peer
->vectors
[vector
]));