2 * Copyright 6WIND S.A., 2014
4 * This work is licensed under the terms of the GNU GPL, version 2 or
5 * (at your option) any later version. See the COPYING file in the
8 #include "qemu-common.h"
9 #include "qemu/sockets.h"
12 #include <sys/types.h>
13 #include <sys/socket.h>
19 #include "ivshmem-server.h"
21 /* log a message on stdout if verbose=1 */
22 #define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
23 if ((server)->verbose) { \
24 printf(fmt, ## __VA_ARGS__); \
28 /** maximum size of a huge page, used by ivshmem_server_ftruncate() */
29 #define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
31 /** default listen backlog (number of sockets not accepted) */
32 #define IVSHMEM_SERVER_LISTEN_BACKLOG 10
34 /* send message to a client unix socket */
36 ivshmem_server_send_one_msg(int sock_fd
, int64_t peer_id
, int fd
)
43 char control
[CMSG_SPACE(sizeof(int))];
47 peer_id
= GINT64_TO_LE(peer_id
);
48 iov
[0].iov_base
= &peer_id
;
49 iov
[0].iov_len
= sizeof(peer_id
);
51 memset(&msg
, 0, sizeof(msg
));
55 /* if fd is specified, add it in a cmsg */
57 memset(&msg_control
, 0, sizeof(msg_control
));
58 msg
.msg_control
= &msg_control
;
59 msg
.msg_controllen
= sizeof(msg_control
);
60 cmsg
= CMSG_FIRSTHDR(&msg
);
61 cmsg
->cmsg_level
= SOL_SOCKET
;
62 cmsg
->cmsg_type
= SCM_RIGHTS
;
63 cmsg
->cmsg_len
= CMSG_LEN(sizeof(int));
64 memcpy(CMSG_DATA(cmsg
), &fd
, sizeof(fd
));
67 ret
= sendmsg(sock_fd
, &msg
, 0);
75 /* free a peer when the server advertises a disconnection or when the
78 ivshmem_server_free_peer(IvshmemServer
*server
, IvshmemServerPeer
*peer
)
81 IvshmemServerPeer
*other_peer
;
83 IVSHMEM_SERVER_DEBUG(server
, "free peer %" PRId64
"\n", peer
->id
);
85 QTAILQ_REMOVE(&server
->peer_list
, peer
, next
);
87 /* advertise the deletion to other peers */
88 QTAILQ_FOREACH(other_peer
, &server
->peer_list
, next
) {
89 ivshmem_server_send_one_msg(other_peer
->sock_fd
, peer
->id
, -1);
92 for (vector
= 0; vector
< peer
->vectors_count
; vector
++) {
93 event_notifier_cleanup(&peer
->vectors
[vector
]);
99 /* send the peer id and the shm_fd just after a new client connection */
101 ivshmem_server_send_initial_info(IvshmemServer
*server
, IvshmemServerPeer
*peer
)
105 /* send our protocol version first */
106 ret
= ivshmem_server_send_one_msg(peer
->sock_fd
, IVSHMEM_PROTOCOL_VERSION
,
109 IVSHMEM_SERVER_DEBUG(server
, "cannot send version: %s\n",
114 /* send the peer id to the client */
115 ret
= ivshmem_server_send_one_msg(peer
->sock_fd
, peer
->id
, -1);
117 IVSHMEM_SERVER_DEBUG(server
, "cannot send peer id: %s\n",
122 /* send the shm_fd */
123 ret
= ivshmem_server_send_one_msg(peer
->sock_fd
, -1, server
->shm_fd
);
125 IVSHMEM_SERVER_DEBUG(server
, "cannot send shm fd: %s\n",
133 /* handle message on listening unix socket (new client connection) */
135 ivshmem_server_handle_new_conn(IvshmemServer
*server
)
137 IvshmemServerPeer
*peer
, *other_peer
;
138 struct sockaddr_un unaddr
;
139 socklen_t unaddr_len
;
143 /* accept the incoming connection */
144 unaddr_len
= sizeof(unaddr
);
145 newfd
= qemu_accept(server
->sock_fd
,
146 (struct sockaddr
*)&unaddr
, &unaddr_len
);
149 IVSHMEM_SERVER_DEBUG(server
, "cannot accept() %s\n", strerror(errno
));
153 qemu_set_nonblock(newfd
);
154 IVSHMEM_SERVER_DEBUG(server
, "accept()=%d\n", newfd
);
156 /* allocate new structure for this peer */
157 peer
= g_malloc0(sizeof(*peer
));
158 peer
->sock_fd
= newfd
;
160 /* get an unused peer id */
161 /* XXX: this could use id allocation such as Linux IDA, or simply
163 for (i
= 0; i
< G_MAXUINT16
; i
++) {
164 if (ivshmem_server_search_peer(server
, server
->cur_id
) == NULL
) {
169 if (i
== G_MAXUINT16
) {
170 IVSHMEM_SERVER_DEBUG(server
, "cannot allocate new client id\n");
175 peer
->id
= server
->cur_id
++;
177 /* create eventfd, one per vector */
178 peer
->vectors_count
= server
->n_vectors
;
179 for (i
= 0; i
< peer
->vectors_count
; i
++) {
180 if (event_notifier_init(&peer
->vectors
[i
], FALSE
) < 0) {
181 IVSHMEM_SERVER_DEBUG(server
, "cannot create eventfd\n");
186 /* send peer id and shm fd */
187 if (ivshmem_server_send_initial_info(server
, peer
) < 0) {
188 IVSHMEM_SERVER_DEBUG(server
, "cannot send initial info\n");
192 /* advertise the new peer to others */
193 QTAILQ_FOREACH(other_peer
, &server
->peer_list
, next
) {
194 for (i
= 0; i
< peer
->vectors_count
; i
++) {
195 ivshmem_server_send_one_msg(other_peer
->sock_fd
, peer
->id
,
196 peer
->vectors
[i
].wfd
);
200 /* advertise the other peers to the new one */
201 QTAILQ_FOREACH(other_peer
, &server
->peer_list
, next
) {
202 for (i
= 0; i
< peer
->vectors_count
; i
++) {
203 ivshmem_server_send_one_msg(peer
->sock_fd
, other_peer
->id
,
204 other_peer
->vectors
[i
].wfd
);
208 /* advertise the new peer to itself */
209 for (i
= 0; i
< peer
->vectors_count
; i
++) {
210 ivshmem_server_send_one_msg(peer
->sock_fd
, peer
->id
,
211 event_notifier_get_fd(&peer
->vectors
[i
]));
214 QTAILQ_INSERT_TAIL(&server
->peer_list
, peer
, next
);
215 IVSHMEM_SERVER_DEBUG(server
, "new peer id = %" PRId64
"\n",
221 event_notifier_cleanup(&peer
->vectors
[i
]);
228 /* Try to ftruncate a file to next power of 2 of shmsize.
229 * If it fails; all power of 2 above shmsize are tested until
230 * we reach the maximum huge page size. This is useful
231 * if the shm file is in a hugetlbfs that cannot be truncated to the
234 ivshmem_server_ftruncate(int fd
, unsigned shmsize
)
239 /* align shmsize to next power of 2 */
240 shmsize
= pow2ceil(shmsize
);
242 if (fstat(fd
, &mapstat
) != -1 && mapstat
.st_size
== shmsize
) {
246 while (shmsize
<= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE
) {
247 ret
= ftruncate(fd
, shmsize
);
257 /* Init a new ivshmem server */
259 ivshmem_server_init(IvshmemServer
*server
, const char *unix_sock_path
,
260 const char *shm_path
, size_t shm_size
, unsigned n_vectors
,
265 memset(server
, 0, sizeof(*server
));
266 server
->verbose
= verbose
;
268 ret
= snprintf(server
->unix_sock_path
, sizeof(server
->unix_sock_path
),
269 "%s", unix_sock_path
);
270 if (ret
< 0 || ret
>= sizeof(server
->unix_sock_path
)) {
271 IVSHMEM_SERVER_DEBUG(server
, "could not copy unix socket path\n");
274 ret
= snprintf(server
->shm_path
, sizeof(server
->shm_path
),
276 if (ret
< 0 || ret
>= sizeof(server
->shm_path
)) {
277 IVSHMEM_SERVER_DEBUG(server
, "could not copy shm path\n");
281 server
->shm_size
= shm_size
;
282 server
->n_vectors
= n_vectors
;
284 QTAILQ_INIT(&server
->peer_list
);
291 #define HUGETLBFS_MAGIC 0x958458f6
293 static long gethugepagesize(const char *path
)
299 ret
= statfs(path
, &fs
);
300 } while (ret
!= 0 && errno
== EINTR
);
306 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
314 /* open shm, create and bind to the unix socket */
316 ivshmem_server_start(IvshmemServer
*server
)
318 struct sockaddr_un sun
;
319 int shm_fd
, sock_fd
, ret
;
325 hpagesize
= gethugepagesize(server
->shm_path
);
326 if (hpagesize
< 0 && errno
!= ENOENT
) {
327 IVSHMEM_SERVER_DEBUG(server
, "cannot stat shm file %s: %s\n",
328 server
->shm_path
, strerror(errno
));
332 gchar
*filename
= g_strdup_printf("%s/ivshmem.XXXXXX", server
->shm_path
);
333 IVSHMEM_SERVER_DEBUG(server
, "Using hugepages: %s\n", server
->shm_path
);
334 shm_fd
= mkstemp(filename
);
340 IVSHMEM_SERVER_DEBUG(server
, "Using POSIX shared memory: %s\n",
342 shm_fd
= shm_open(server
->shm_path
, O_CREAT
|O_RDWR
, S_IRWXU
);
346 fprintf(stderr
, "cannot open shm file %s: %s\n", server
->shm_path
,
350 if (ivshmem_server_ftruncate(shm_fd
, server
->shm_size
) < 0) {
351 fprintf(stderr
, "ftruncate(%s) failed: %s\n", server
->shm_path
,
356 IVSHMEM_SERVER_DEBUG(server
, "create & bind socket %s\n",
357 server
->unix_sock_path
);
359 /* create the unix listening socket */
360 sock_fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
362 IVSHMEM_SERVER_DEBUG(server
, "cannot create socket: %s\n",
367 sun
.sun_family
= AF_UNIX
;
368 ret
= snprintf(sun
.sun_path
, sizeof(sun
.sun_path
), "%s",
369 server
->unix_sock_path
);
370 if (ret
< 0 || ret
>= sizeof(sun
.sun_path
)) {
371 IVSHMEM_SERVER_DEBUG(server
, "could not copy unix socket path\n");
374 if (bind(sock_fd
, (struct sockaddr
*)&sun
, sizeof(sun
)) < 0) {
375 IVSHMEM_SERVER_DEBUG(server
, "cannot connect to %s: %s\n", sun
.sun_path
,
380 if (listen(sock_fd
, IVSHMEM_SERVER_LISTEN_BACKLOG
) < 0) {
381 IVSHMEM_SERVER_DEBUG(server
, "listen() failed: %s\n", strerror(errno
));
385 server
->sock_fd
= sock_fd
;
386 server
->shm_fd
= shm_fd
;
397 /* close connections to clients, the unix socket and the shm fd */
399 ivshmem_server_close(IvshmemServer
*server
)
401 IvshmemServerPeer
*peer
, *npeer
;
403 IVSHMEM_SERVER_DEBUG(server
, "close server\n");
405 QTAILQ_FOREACH_SAFE(peer
, &server
->peer_list
, next
, npeer
) {
406 ivshmem_server_free_peer(server
, peer
);
409 unlink(server
->unix_sock_path
);
410 close(server
->sock_fd
);
411 close(server
->shm_fd
);
412 server
->sock_fd
= -1;
416 /* get the fd_set according to the unix socket and the peer list */
418 ivshmem_server_get_fds(const IvshmemServer
*server
, fd_set
*fds
, int *maxfd
)
420 IvshmemServerPeer
*peer
;
422 if (server
->sock_fd
== -1) {
426 FD_SET(server
->sock_fd
, fds
);
427 if (server
->sock_fd
>= *maxfd
) {
428 *maxfd
= server
->sock_fd
+ 1;
431 QTAILQ_FOREACH(peer
, &server
->peer_list
, next
) {
432 FD_SET(peer
->sock_fd
, fds
);
433 if (peer
->sock_fd
>= *maxfd
) {
434 *maxfd
= peer
->sock_fd
+ 1;
439 /* process incoming messages on the sockets in fd_set */
441 ivshmem_server_handle_fds(IvshmemServer
*server
, fd_set
*fds
, int maxfd
)
443 IvshmemServerPeer
*peer
, *peer_next
;
445 if (server
->sock_fd
< maxfd
&& FD_ISSET(server
->sock_fd
, fds
) &&
446 ivshmem_server_handle_new_conn(server
) < 0 && errno
!= EINTR
) {
447 IVSHMEM_SERVER_DEBUG(server
, "ivshmem_server_handle_new_conn() "
452 QTAILQ_FOREACH_SAFE(peer
, &server
->peer_list
, next
, peer_next
) {
453 /* any message from a peer socket result in a close() */
454 IVSHMEM_SERVER_DEBUG(server
, "peer->sock_fd=%d\n", peer
->sock_fd
);
455 if (peer
->sock_fd
< maxfd
&& FD_ISSET(peer
->sock_fd
, fds
)) {
456 ivshmem_server_free_peer(server
, peer
);
463 /* lookup peer from its id */
465 ivshmem_server_search_peer(IvshmemServer
*server
, int64_t peer_id
)
467 IvshmemServerPeer
*peer
;
469 QTAILQ_FOREACH(peer
, &server
->peer_list
, next
) {
470 if (peer
->id
== peer_id
) {
477 /* dump our info, the list of peers their vectors on stdout */
479 ivshmem_server_dump(const IvshmemServer
*server
)
481 const IvshmemServerPeer
*peer
;
485 QTAILQ_FOREACH(peer
, &server
->peer_list
, next
) {
486 printf("peer_id = %" PRId64
"\n", peer
->id
);
488 for (vector
= 0; vector
< peer
->vectors_count
; vector
++) {
489 printf(" vector %d is enabled (fd=%d)\n", vector
,
490 event_notifier_get_fd(&peer
->vectors
[vector
]));