2 * netmap access for qemu
4 * Copyright (c) 2012-2013 Luigi Rizzo
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include <sys/ioctl.h>
30 #include <net/netmap.h>
31 #include <net/netmap_user.h>
35 #include "sysemu/sysemu.h"
36 #include "qemu/error-report.h"
39 /* Private netmap device info. */
40 typedef struct NetmapPriv
{
44 struct netmap_if
*nifp
;
45 struct netmap_ring
*rx
;
46 struct netmap_ring
*tx
;
47 char fdname
[PATH_MAX
]; /* Normally "/dev/netmap". */
48 char ifname
[IFNAMSIZ
];
51 typedef struct NetmapState
{
56 struct iovec iov
[IOV_MAX
];
59 #define D(format, ...) \
61 struct timeval __xxts; \
62 gettimeofday(&__xxts, NULL); \
63 printf("%03d.%06d %s [%d] " format "\n", \
64 (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
65 __func__, __LINE__, ##__VA_ARGS__); \
68 /* Rate limited version of "D", lps indicates how many per second */
69 #define RD(lps, format, ...) \
71 static int t0, __cnt; \
72 struct timeval __xxts; \
73 gettimeofday(&__xxts, NULL); \
74 if (t0 != __xxts.tv_sec) { \
78 if (__cnt++ < lps) { \
79 D(format, ##__VA_ARGS__); \
85 #define pkt_copy bcopy
87 /* A fast copy routine only for multiples of 64 bytes, non overlapped. */
89 pkt_copy(const void *_src
, void *_dst
, int l
)
91 const uint64_t *src
= _src
;
93 if (unlikely(l
>= 1024)) {
97 for (; l
> 0; l
-= 64) {
108 #endif /* __FreeBSD__ */
111 * Open a netmap device. We assume there is only one queue
112 * (which is the case for the VALE bridge).
114 static int netmap_open(NetmapPriv
*me
)
121 me
->fd
= fd
= open(me
->fdname
, O_RDWR
);
123 error_report("Unable to open netmap device '%s' (%s)",
124 me
->fdname
, strerror(errno
));
127 memset(&req
, 0, sizeof(req
));
128 pstrcpy(req
.nr_name
, sizeof(req
.nr_name
), me
->ifname
);
129 req
.nr_ringid
= NETMAP_NO_TX_POLL
;
130 req
.nr_version
= NETMAP_API
;
131 err
= ioctl(fd
, NIOCREGIF
, &req
);
133 error_report("Unable to register %s: %s", me
->ifname
, strerror(errno
));
136 l
= me
->memsize
= req
.nr_memsize
;
138 me
->mem
= mmap(0, l
, PROT_WRITE
| PROT_READ
, MAP_SHARED
, fd
, 0);
139 if (me
->mem
== MAP_FAILED
) {
140 error_report("Unable to mmap netmap shared memory: %s",
146 me
->nifp
= NETMAP_IF(me
->mem
, req
.nr_offset
);
147 me
->tx
= NETMAP_TXRING(me
->nifp
, 0);
148 me
->rx
= NETMAP_RXRING(me
->nifp
, 0);
156 /* Tell the event-loop if the netmap backend can send packets
158 static int netmap_can_send(void *opaque
)
160 NetmapState
*s
= opaque
;
162 return qemu_can_send_packet(&s
->nc
);
165 static void netmap_send(void *opaque
);
166 static void netmap_writable(void *opaque
);
168 /* Set the event-loop handlers for the netmap backend. */
169 static void netmap_update_fd_handler(NetmapState
*s
)
171 qemu_set_fd_handler2(s
->me
.fd
,
172 s
->read_poll
? netmap_can_send
: NULL
,
173 s
->read_poll
? netmap_send
: NULL
,
174 s
->write_poll
? netmap_writable
: NULL
,
178 /* Update the read handler. */
179 static void netmap_read_poll(NetmapState
*s
, bool enable
)
181 if (s
->read_poll
!= enable
) { /* Do nothing if not changed. */
182 s
->read_poll
= enable
;
183 netmap_update_fd_handler(s
);
187 /* Update the write handler. */
188 static void netmap_write_poll(NetmapState
*s
, bool enable
)
190 if (s
->write_poll
!= enable
) {
191 s
->write_poll
= enable
;
192 netmap_update_fd_handler(s
);
196 static void netmap_poll(NetClientState
*nc
, bool enable
)
198 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
200 if (s
->read_poll
!= enable
|| s
->write_poll
!= enable
) {
201 s
->read_poll
= enable
;
202 s
->read_poll
= enable
;
203 netmap_update_fd_handler(s
);
208 * The fd_write() callback, invoked if the fd is marked as
209 * writable after a poll. Unregister the handler and flush any
212 static void netmap_writable(void *opaque
)
214 NetmapState
*s
= opaque
;
216 netmap_write_poll(s
, false);
217 qemu_flush_queued_packets(&s
->nc
);
220 static ssize_t
netmap_receive(NetClientState
*nc
,
221 const uint8_t *buf
, size_t size
)
223 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
224 struct netmap_ring
*ring
= s
->me
.tx
;
229 if (unlikely(!ring
)) {
234 if (unlikely(size
> ring
->nr_buf_size
)) {
235 RD(5, "[netmap_receive] drop packet of size %d > %d\n",
236 (int)size
, ring
->nr_buf_size
);
240 if (ring
->avail
== 0) {
241 /* No available slots in the netmap TX ring. */
242 netmap_write_poll(s
, true);
247 idx
= ring
->slot
[i
].buf_idx
;
248 dst
= (uint8_t *)NETMAP_BUF(ring
, idx
);
250 ring
->slot
[i
].len
= size
;
251 ring
->slot
[i
].flags
= 0;
252 pkt_copy(buf
, dst
, size
);
253 ring
->cur
= NETMAP_RING_NEXT(ring
, i
);
255 ioctl(s
->me
.fd
, NIOCTXSYNC
, NULL
);
260 static ssize_t
netmap_receive_iov(NetClientState
*nc
,
261 const struct iovec
*iov
, int iovcnt
)
263 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
264 struct netmap_ring
*ring
= s
->me
.tx
;
272 if (unlikely(!ring
)) {
273 /* Drop the packet. */
274 return iov_size(iov
, iovcnt
);
280 if (avail
< iovcnt
) {
281 /* Not enough netmap slots. */
282 netmap_write_poll(s
, true);
286 for (j
= 0; j
< iovcnt
; j
++) {
287 int iov_frag_size
= iov
[j
].iov_len
;
291 /* Split each iovec fragment over more netmap slots, if
293 while (iov_frag_size
) {
294 nm_frag_size
= MIN(iov_frag_size
, ring
->nr_buf_size
);
296 if (unlikely(avail
== 0)) {
297 /* We run out of netmap slots while splitting the
299 netmap_write_poll(s
, true);
303 idx
= ring
->slot
[i
].buf_idx
;
304 dst
= (uint8_t *)NETMAP_BUF(ring
, idx
);
306 ring
->slot
[i
].len
= nm_frag_size
;
307 ring
->slot
[i
].flags
= NS_MOREFRAG
;
308 pkt_copy(iov
[j
].iov_base
+ offset
, dst
, nm_frag_size
);
311 i
= NETMAP_RING_NEXT(ring
, i
);
314 offset
+= nm_frag_size
;
315 iov_frag_size
-= nm_frag_size
;
318 /* The last slot must not have NS_MOREFRAG set. */
319 ring
->slot
[last
].flags
&= ~NS_MOREFRAG
;
321 /* Now update ring->cur and ring->avail. */
325 ioctl(s
->me
.fd
, NIOCTXSYNC
, NULL
);
327 return iov_size(iov
, iovcnt
);
330 /* Complete a previous send (backend --> guest) and enable the
332 static void netmap_send_completed(NetClientState
*nc
, ssize_t len
)
334 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
336 netmap_read_poll(s
, true);
339 static void netmap_send(void *opaque
)
341 NetmapState
*s
= opaque
;
342 struct netmap_ring
*ring
= s
->me
.rx
;
344 /* Keep sending while there are available packets into the netmap
345 RX ring and the forwarding path towards the peer is open. */
346 while (ring
->avail
> 0 && qemu_can_send_packet(&s
->nc
)) {
355 idx
= ring
->slot
[i
].buf_idx
;
356 morefrag
= (ring
->slot
[i
].flags
& NS_MOREFRAG
);
357 s
->iov
[iovcnt
].iov_base
= (u_char
*)NETMAP_BUF(ring
, idx
);
358 s
->iov
[iovcnt
].iov_len
= ring
->slot
[i
].len
;
361 ring
->cur
= NETMAP_RING_NEXT(ring
, i
);
363 } while (ring
->avail
&& morefrag
);
365 if (unlikely(!ring
->avail
&& morefrag
)) {
366 RD(5, "[netmap_send] ran out of slots, with a pending"
367 "incomplete packet\n");
370 iovsize
= qemu_sendv_packet_async(&s
->nc
, s
->iov
, iovcnt
,
371 netmap_send_completed
);
374 /* The peer does not receive anymore. Packet is queued, stop
375 * reading from the backend until netmap_send_completed()
377 netmap_read_poll(s
, false);
383 /* Flush and close. */
384 static void netmap_cleanup(NetClientState
*nc
)
386 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
388 qemu_purge_queued_packets(nc
);
390 netmap_poll(nc
, false);
391 munmap(s
->me
.mem
, s
->me
.memsize
);
398 /* NetClientInfo methods */
399 static NetClientInfo net_netmap_info
= {
400 .type
= NET_CLIENT_OPTIONS_KIND_NETMAP
,
401 .size
= sizeof(NetmapState
),
402 .receive
= netmap_receive
,
403 .receive_iov
= netmap_receive_iov
,
405 .cleanup
= netmap_cleanup
,
408 /* The exported init function
410 * ... -net netmap,ifname="..."
412 int net_init_netmap(const NetClientOptions
*opts
,
413 const char *name
, NetClientState
*peer
)
415 const NetdevNetmapOptions
*netmap_opts
= opts
->netmap
;
420 pstrcpy(me
.fdname
, sizeof(me
.fdname
),
421 netmap_opts
->has_devname
? netmap_opts
->devname
: "/dev/netmap");
422 /* Set default name for the port if not supplied. */
423 pstrcpy(me
.ifname
, sizeof(me
.ifname
), netmap_opts
->ifname
);
424 if (netmap_open(&me
)) {
427 /* Create the object. */
428 nc
= qemu_new_net_client(&net_netmap_info
, peer
, "netmap", name
);
429 s
= DO_UPCAST(NetmapState
, nc
, nc
);
431 netmap_read_poll(s
, true); /* Initially only poll for reads. */