ivshmem-server: fix hugetlbfs support
[qemu/ar7.git] / contrib / ivshmem-server / ivshmem-server.c
blob01c8920108e172b10adeb5b47e85b17c11c0bfa1
1 /*
2 * Copyright 6WIND S.A., 2014
4 * This work is licensed under the terms of the GNU GPL, version 2 or
5 * (at your option) any later version. See the COPYING file in the
6 * top-level directory.
7 */
8 #include "qemu-common.h"
9 #include "qemu/sockets.h"
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <sys/socket.h>
14 #include <sys/un.h>
15 #ifdef CONFIG_LINUX
16 #include <sys/vfs.h>
17 #endif
19 #include "ivshmem-server.h"
21 /* log a message on stdout if verbose=1 */
22 #define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
23 if ((server)->verbose) { \
24 printf(fmt, ## __VA_ARGS__); \
25 } \
26 } while (0)
28 /** maximum size of a huge page, used by ivshmem_server_ftruncate() */
29 #define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
31 /** default listen backlog (number of sockets not accepted) */
32 #define IVSHMEM_SERVER_LISTEN_BACKLOG 10
34 /* send message to a client unix socket */
35 static int
36 ivshmem_server_send_one_msg(int sock_fd, long peer_id, int fd)
38 int ret;
39 struct msghdr msg;
40 struct iovec iov[1];
41 union {
42 struct cmsghdr cmsg;
43 char control[CMSG_SPACE(sizeof(int))];
44 } msg_control;
45 struct cmsghdr *cmsg;
47 iov[0].iov_base = &peer_id;
48 iov[0].iov_len = sizeof(peer_id);
50 memset(&msg, 0, sizeof(msg));
51 msg.msg_iov = iov;
52 msg.msg_iovlen = 1;
54 /* if fd is specified, add it in a cmsg */
55 if (fd >= 0) {
56 memset(&msg_control, 0, sizeof(msg_control));
57 msg.msg_control = &msg_control;
58 msg.msg_controllen = sizeof(msg_control);
59 cmsg = CMSG_FIRSTHDR(&msg);
60 cmsg->cmsg_level = SOL_SOCKET;
61 cmsg->cmsg_type = SCM_RIGHTS;
62 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
63 memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
66 ret = sendmsg(sock_fd, &msg, 0);
67 if (ret <= 0) {
68 return -1;
71 return 0;
74 /* free a peer when the server advertises a disconnection or when the
75 * server is freed */
76 static void
77 ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer)
79 unsigned vector;
80 IvshmemServerPeer *other_peer;
82 IVSHMEM_SERVER_DEBUG(server, "free peer %ld\n", peer->id);
83 close(peer->sock_fd);
84 QTAILQ_REMOVE(&server->peer_list, peer, next);
86 /* advertise the deletion to other peers */
87 QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
88 ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1);
91 for (vector = 0; vector < peer->vectors_count; vector++) {
92 event_notifier_cleanup(&peer->vectors[vector]);
95 g_free(peer);
98 /* send the peer id and the shm_fd just after a new client connection */
99 static int
100 ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer)
102 int ret;
104 /* send the peer id to the client */
105 ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1);
106 if (ret < 0) {
107 IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n",
108 strerror(errno));
109 return -1;
112 /* send the shm_fd */
113 ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd);
114 if (ret < 0) {
115 IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n",
116 strerror(errno));
117 return -1;
120 return 0;
123 /* handle message on listening unix socket (new client connection) */
124 static int
125 ivshmem_server_handle_new_conn(IvshmemServer *server)
127 IvshmemServerPeer *peer, *other_peer;
128 struct sockaddr_un unaddr;
129 socklen_t unaddr_len;
130 int newfd;
131 unsigned i;
133 /* accept the incoming connection */
134 unaddr_len = sizeof(unaddr);
135 newfd = qemu_accept(server->sock_fd,
136 (struct sockaddr *)&unaddr, &unaddr_len);
138 if (newfd < 0) {
139 IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno));
140 return -1;
143 qemu_set_nonblock(newfd);
144 IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd);
146 /* allocate new structure for this peer */
147 peer = g_malloc0(sizeof(*peer));
148 peer->sock_fd = newfd;
150 /* get an unused peer id */
151 /* XXX: this could use id allocation such as Linux IDA, or simply
152 * a free-list */
153 for (i = 0; i < G_MAXUINT16; i++) {
154 if (ivshmem_server_search_peer(server, server->cur_id) == NULL) {
155 break;
157 server->cur_id++;
159 if (i == G_MAXUINT16) {
160 IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n");
161 goto fail;
163 peer->id = server->cur_id++;
165 /* create eventfd, one per vector */
166 peer->vectors_count = server->n_vectors;
167 for (i = 0; i < peer->vectors_count; i++) {
168 if (event_notifier_init(&peer->vectors[i], FALSE) < 0) {
169 IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n");
170 goto fail;
174 /* send peer id and shm fd */
175 if (ivshmem_server_send_initial_info(server, peer) < 0) {
176 IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n");
177 goto fail;
180 /* advertise the new peer to others */
181 QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
182 for (i = 0; i < peer->vectors_count; i++) {
183 ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id,
184 peer->vectors[i].wfd);
188 /* advertise the other peers to the new one */
189 QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
190 for (i = 0; i < peer->vectors_count; i++) {
191 ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id,
192 other_peer->vectors[i].wfd);
196 /* advertise the new peer to itself */
197 for (i = 0; i < peer->vectors_count; i++) {
198 ivshmem_server_send_one_msg(peer->sock_fd, peer->id,
199 event_notifier_get_fd(&peer->vectors[i]));
202 QTAILQ_INSERT_TAIL(&server->peer_list, peer, next);
203 IVSHMEM_SERVER_DEBUG(server, "new peer id = %ld\n",
204 peer->id);
205 return 0;
207 fail:
208 while (i--) {
209 event_notifier_cleanup(&peer->vectors[i]);
211 close(newfd);
212 g_free(peer);
213 return -1;
216 /* Try to ftruncate a file to next power of 2 of shmsize.
217 * If it fails; all power of 2 above shmsize are tested until
218 * we reach the maximum huge page size. This is useful
219 * if the shm file is in a hugetlbfs that cannot be truncated to the
220 * shm_size value. */
221 static int
222 ivshmem_server_ftruncate(int fd, unsigned shmsize)
224 int ret;
225 struct stat mapstat;
227 /* align shmsize to next power of 2 */
228 shmsize = pow2ceil(shmsize);
230 if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) {
231 return 0;
234 while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) {
235 ret = ftruncate(fd, shmsize);
236 if (ret == 0) {
237 return ret;
239 shmsize *= 2;
242 return -1;
245 /* Init a new ivshmem server */
247 ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
248 const char *shm_path, size_t shm_size, unsigned n_vectors,
249 bool verbose)
251 int ret;
253 memset(server, 0, sizeof(*server));
254 server->verbose = verbose;
256 ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path),
257 "%s", unix_sock_path);
258 if (ret < 0 || ret >= sizeof(server->unix_sock_path)) {
259 IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
260 return -1;
262 ret = snprintf(server->shm_path, sizeof(server->shm_path),
263 "%s", shm_path);
264 if (ret < 0 || ret >= sizeof(server->shm_path)) {
265 IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n");
266 return -1;
269 server->shm_size = shm_size;
270 server->n_vectors = n_vectors;
272 QTAILQ_INIT(&server->peer_list);
274 return 0;
277 #ifdef CONFIG_LINUX
279 #define HUGETLBFS_MAGIC 0x958458f6
281 static long gethugepagesize(const char *path)
283 struct statfs fs;
284 int ret;
286 do {
287 ret = statfs(path, &fs);
288 } while (ret != 0 && errno == EINTR);
290 if (ret != 0) {
291 return -1;
294 if (fs.f_type != HUGETLBFS_MAGIC) {
295 return -1;
298 return fs.f_bsize;
300 #endif
302 /* open shm, create and bind to the unix socket */
304 ivshmem_server_start(IvshmemServer *server)
306 struct sockaddr_un sun;
307 int shm_fd, sock_fd, ret;
309 /* open shm file */
310 #ifdef CONFIG_LINUX
311 long hpagesize;
313 hpagesize = gethugepagesize(server->shm_path);
314 if (hpagesize < 0 && errno != ENOENT) {
315 IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n",
316 server->shm_path, strerror(errno));
319 if (hpagesize > 0) {
320 gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
321 IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path);
322 shm_fd = mkstemp(filename);
323 unlink(filename);
324 g_free(filename);
325 } else
326 #endif
328 IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
329 server->shm_path);
330 shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
333 if (shm_fd < 0) {
334 fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
335 strerror(errno));
336 return -1;
338 if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) {
339 fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path,
340 strerror(errno));
341 goto err_close_shm;
344 IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n",
345 server->unix_sock_path);
347 /* create the unix listening socket */
348 sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
349 if (sock_fd < 0) {
350 IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n",
351 strerror(errno));
352 goto err_close_shm;
355 sun.sun_family = AF_UNIX;
356 ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
357 server->unix_sock_path);
358 if (ret < 0 || ret >= sizeof(sun.sun_path)) {
359 IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
360 goto err_close_sock;
362 if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
363 IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path,
364 strerror(errno));
365 goto err_close_sock;
368 if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) {
369 IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno));
370 goto err_close_sock;
373 server->sock_fd = sock_fd;
374 server->shm_fd = shm_fd;
376 return 0;
378 err_close_sock:
379 close(sock_fd);
380 err_close_shm:
381 close(shm_fd);
382 return -1;
385 /* close connections to clients, the unix socket and the shm fd */
386 void
387 ivshmem_server_close(IvshmemServer *server)
389 IvshmemServerPeer *peer, *npeer;
391 IVSHMEM_SERVER_DEBUG(server, "close server\n");
393 QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) {
394 ivshmem_server_free_peer(server, peer);
397 unlink(server->unix_sock_path);
398 close(server->sock_fd);
399 close(server->shm_fd);
400 server->sock_fd = -1;
401 server->shm_fd = -1;
404 /* get the fd_set according to the unix socket and the peer list */
405 void
406 ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd)
408 IvshmemServerPeer *peer;
410 if (server->sock_fd == -1) {
411 return;
414 FD_SET(server->sock_fd, fds);
415 if (server->sock_fd >= *maxfd) {
416 *maxfd = server->sock_fd + 1;
419 QTAILQ_FOREACH(peer, &server->peer_list, next) {
420 FD_SET(peer->sock_fd, fds);
421 if (peer->sock_fd >= *maxfd) {
422 *maxfd = peer->sock_fd + 1;
427 /* process incoming messages on the sockets in fd_set */
429 ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd)
431 IvshmemServerPeer *peer, *peer_next;
433 if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) &&
434 ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) {
435 IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() "
436 "failed\n");
437 return -1;
440 QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) {
441 /* any message from a peer socket result in a close() */
442 IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd);
443 if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) {
444 ivshmem_server_free_peer(server, peer);
448 return 0;
451 /* lookup peer from its id */
452 IvshmemServerPeer *
453 ivshmem_server_search_peer(IvshmemServer *server, long peer_id)
455 IvshmemServerPeer *peer;
457 QTAILQ_FOREACH(peer, &server->peer_list, next) {
458 if (peer->id == peer_id) {
459 return peer;
462 return NULL;
465 /* dump our info, the list of peers their vectors on stdout */
466 void
467 ivshmem_server_dump(const IvshmemServer *server)
469 const IvshmemServerPeer *peer;
470 unsigned vector;
472 /* dump peers */
473 QTAILQ_FOREACH(peer, &server->peer_list, next) {
474 printf("peer_id = %ld\n", peer->id);
476 for (vector = 0; vector < peer->vectors_count; vector++) {
477 printf(" vector %d is enabled (fd=%d)\n", vector,
478 event_notifier_get_fd(&peer->vectors[vector]));