2 * netmap access for qemu
4 * Copyright (c) 2012-2013 Luigi Rizzo
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "qemu/osdep.h"
27 #include <sys/ioctl.h>
29 #define NETMAP_WITH_LIBS
30 #include <net/netmap.h>
31 #include <net/netmap_user.h>
36 #include "sysemu/sysemu.h"
37 #include "qemu/error-report.h"
38 #include "qapi/error.h"
40 #include "qemu/cutils.h"
42 typedef struct NetmapState
{
45 char ifname
[IFNAMSIZ
];
46 struct netmap_ring
*tx
;
47 struct netmap_ring
*rx
;
50 struct iovec iov
[IOV_MAX
];
51 int vnet_hdr_len
; /* Current virtio-net header length. */
55 #define pkt_copy bcopy
57 /* A fast copy routine only for multiples of 64 bytes, non overlapped. */
59 pkt_copy(const void *_src
, void *_dst
, int l
)
61 const uint64_t *src
= _src
;
63 if (unlikely(l
>= 1024)) {
67 for (; l
> 0; l
-= 64) {
78 #endif /* __FreeBSD__ */
81 * Open a netmap device. We assume there is only one queue
82 * (which is the case for the VALE bridge).
84 static struct nm_desc
*netmap_open(const NetdevNetmapOptions
*nm_opts
,
90 memset(&req
, 0, sizeof(req
));
92 nmd
= nm_open(nm_opts
->ifname
, &req
, NETMAP_NO_TX_POLL
,
95 error_setg_errno(errp
, errno
, "Failed to nm_open() %s",
103 static void netmap_send(void *opaque
);
104 static void netmap_writable(void *opaque
);
106 /* Set the event-loop handlers for the netmap backend. */
107 static void netmap_update_fd_handler(NetmapState
*s
)
109 qemu_set_fd_handler(s
->nmd
->fd
,
110 s
->read_poll
? netmap_send
: NULL
,
111 s
->write_poll
? netmap_writable
: NULL
,
115 /* Update the read handler. */
116 static void netmap_read_poll(NetmapState
*s
, bool enable
)
118 if (s
->read_poll
!= enable
) { /* Do nothing if not changed. */
119 s
->read_poll
= enable
;
120 netmap_update_fd_handler(s
);
124 /* Update the write handler. */
125 static void netmap_write_poll(NetmapState
*s
, bool enable
)
127 if (s
->write_poll
!= enable
) {
128 s
->write_poll
= enable
;
129 netmap_update_fd_handler(s
);
133 static void netmap_poll(NetClientState
*nc
, bool enable
)
135 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
137 if (s
->read_poll
!= enable
|| s
->write_poll
!= enable
) {
138 s
->write_poll
= enable
;
139 s
->read_poll
= enable
;
140 netmap_update_fd_handler(s
);
145 * The fd_write() callback, invoked if the fd is marked as
146 * writable after a poll. Unregister the handler and flush any
149 static void netmap_writable(void *opaque
)
151 NetmapState
*s
= opaque
;
153 netmap_write_poll(s
, false);
154 qemu_flush_queued_packets(&s
->nc
);
157 static ssize_t
netmap_receive(NetClientState
*nc
,
158 const uint8_t *buf
, size_t size
)
160 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
161 struct netmap_ring
*ring
= s
->tx
;
166 if (unlikely(!ring
)) {
171 if (unlikely(size
> ring
->nr_buf_size
)) {
172 RD(5, "[netmap_receive] drop packet of size %d > %d\n",
173 (int)size
, ring
->nr_buf_size
);
177 if (nm_ring_empty(ring
)) {
178 /* No available slots in the netmap TX ring. */
179 netmap_write_poll(s
, true);
184 idx
= ring
->slot
[i
].buf_idx
;
185 dst
= (uint8_t *)NETMAP_BUF(ring
, idx
);
187 ring
->slot
[i
].len
= size
;
188 ring
->slot
[i
].flags
= 0;
189 pkt_copy(buf
, dst
, size
);
190 ring
->cur
= ring
->head
= nm_ring_next(ring
, i
);
191 ioctl(s
->nmd
->fd
, NIOCTXSYNC
, NULL
);
196 static ssize_t
netmap_receive_iov(NetClientState
*nc
,
197 const struct iovec
*iov
, int iovcnt
)
199 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
200 struct netmap_ring
*ring
= s
->tx
;
207 if (unlikely(!ring
)) {
208 /* Drop the packet. */
209 return iov_size(iov
, iovcnt
);
212 last
= i
= ring
->cur
;
214 if (nm_ring_space(ring
) < iovcnt
) {
215 /* Not enough netmap slots. */
216 netmap_write_poll(s
, true);
220 for (j
= 0; j
< iovcnt
; j
++) {
221 int iov_frag_size
= iov
[j
].iov_len
;
225 /* Split each iovec fragment over more netmap slots, if
227 while (iov_frag_size
) {
228 nm_frag_size
= MIN(iov_frag_size
, ring
->nr_buf_size
);
230 if (unlikely(nm_ring_empty(ring
))) {
231 /* We run out of netmap slots while splitting the
233 netmap_write_poll(s
, true);
237 idx
= ring
->slot
[i
].buf_idx
;
238 dst
= (uint8_t *)NETMAP_BUF(ring
, idx
);
240 ring
->slot
[i
].len
= nm_frag_size
;
241 ring
->slot
[i
].flags
= NS_MOREFRAG
;
242 pkt_copy(iov
[j
].iov_base
+ offset
, dst
, nm_frag_size
);
245 i
= nm_ring_next(ring
, i
);
247 offset
+= nm_frag_size
;
248 iov_frag_size
-= nm_frag_size
;
251 /* The last slot must not have NS_MOREFRAG set. */
252 ring
->slot
[last
].flags
&= ~NS_MOREFRAG
;
254 /* Now update ring->cur and ring->head. */
255 ring
->cur
= ring
->head
= i
;
257 ioctl(s
->nmd
->fd
, NIOCTXSYNC
, NULL
);
259 return iov_size(iov
, iovcnt
);
262 /* Complete a previous send (backend --> guest) and enable the
264 static void netmap_send_completed(NetClientState
*nc
, ssize_t len
)
266 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
268 netmap_read_poll(s
, true);
271 static void netmap_send(void *opaque
)
273 NetmapState
*s
= opaque
;
274 struct netmap_ring
*ring
= s
->rx
;
276 /* Keep sending while there are available packets into the netmap
277 RX ring and the forwarding path towards the peer is open. */
278 while (!nm_ring_empty(ring
)) {
287 idx
= ring
->slot
[i
].buf_idx
;
288 morefrag
= (ring
->slot
[i
].flags
& NS_MOREFRAG
);
289 s
->iov
[iovcnt
].iov_base
= (u_char
*)NETMAP_BUF(ring
, idx
);
290 s
->iov
[iovcnt
].iov_len
= ring
->slot
[i
].len
;
293 ring
->cur
= ring
->head
= nm_ring_next(ring
, i
);
294 } while (!nm_ring_empty(ring
) && morefrag
);
296 if (unlikely(nm_ring_empty(ring
) && morefrag
)) {
297 RD(5, "[netmap_send] ran out of slots, with a pending"
298 "incomplete packet\n");
301 iovsize
= qemu_sendv_packet_async(&s
->nc
, s
->iov
, iovcnt
,
302 netmap_send_completed
);
305 /* The peer does not receive anymore. Packet is queued, stop
306 * reading from the backend until netmap_send_completed()
308 netmap_read_poll(s
, false);
314 /* Flush and close. */
315 static void netmap_cleanup(NetClientState
*nc
)
317 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
319 qemu_purge_queued_packets(nc
);
321 netmap_poll(nc
, false);
326 /* Offloading manipulation support callbacks. */
327 static int netmap_fd_set_vnet_hdr_len(NetmapState
*s
, int len
)
331 /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header
332 * length for the netmap adapter associated to 's->ifname'.
334 memset(&req
, 0, sizeof(req
));
335 pstrcpy(req
.nr_name
, sizeof(req
.nr_name
), s
->ifname
);
336 req
.nr_version
= NETMAP_API
;
337 req
.nr_cmd
= NETMAP_BDG_VNET_HDR
;
340 return ioctl(s
->nmd
->fd
, NIOCREGIF
, &req
);
343 static bool netmap_has_vnet_hdr_len(NetClientState
*nc
, int len
)
345 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
346 int prev_len
= s
->vnet_hdr_len
;
348 /* Check that we can set the new length. */
349 if (netmap_fd_set_vnet_hdr_len(s
, len
)) {
353 /* Restore the previous length. */
354 if (netmap_fd_set_vnet_hdr_len(s
, prev_len
)) {
355 error_report("Failed to restore vnet-hdr length %d on %s: %s",
356 prev_len
, s
->ifname
, strerror(errno
));
363 /* A netmap interface that supports virtio-net headers always
364 * supports UFO, so we use this callback also for the has_ufo hook. */
365 static bool netmap_has_vnet_hdr(NetClientState
*nc
)
367 return netmap_has_vnet_hdr_len(nc
, sizeof(struct virtio_net_hdr
));
370 static void netmap_using_vnet_hdr(NetClientState
*nc
, bool enable
)
374 static void netmap_set_vnet_hdr_len(NetClientState
*nc
, int len
)
376 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
379 err
= netmap_fd_set_vnet_hdr_len(s
, len
);
381 error_report("Unable to set vnet-hdr length %d on %s: %s",
382 len
, s
->ifname
, strerror(errno
));
384 /* Keep track of the current length. */
385 s
->vnet_hdr_len
= len
;
389 static void netmap_set_offload(NetClientState
*nc
, int csum
, int tso4
, int tso6
,
392 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
394 /* Setting a virtio-net header length greater than zero automatically
395 * enables the offloadings. */
396 if (!s
->vnet_hdr_len
) {
397 netmap_set_vnet_hdr_len(nc
, sizeof(struct virtio_net_hdr
));
401 /* NetClientInfo methods */
402 static NetClientInfo net_netmap_info
= {
403 .type
= NET_CLIENT_OPTIONS_KIND_NETMAP
,
404 .size
= sizeof(NetmapState
),
405 .receive
= netmap_receive
,
406 .receive_iov
= netmap_receive_iov
,
408 .cleanup
= netmap_cleanup
,
409 .has_ufo
= netmap_has_vnet_hdr
,
410 .has_vnet_hdr
= netmap_has_vnet_hdr
,
411 .has_vnet_hdr_len
= netmap_has_vnet_hdr_len
,
412 .using_vnet_hdr
= netmap_using_vnet_hdr
,
413 .set_offload
= netmap_set_offload
,
414 .set_vnet_hdr_len
= netmap_set_vnet_hdr_len
,
417 /* The exported init function
419 * ... -net netmap,ifname="..."
421 int net_init_netmap(const NetClientOptions
*opts
,
422 const char *name
, NetClientState
*peer
, Error
**errp
)
424 const NetdevNetmapOptions
*netmap_opts
= opts
->u
.netmap
.data
;
430 nmd
= netmap_open(netmap_opts
, &err
);
432 error_propagate(errp
, err
);
435 /* Create the object. */
436 nc
= qemu_new_net_client(&net_netmap_info
, peer
, "netmap", name
);
437 s
= DO_UPCAST(NetmapState
, nc
, nc
);
439 s
->tx
= NETMAP_TXRING(nmd
->nifp
, 0);
440 s
->rx
= NETMAP_RXRING(nmd
->nifp
, 0);
442 pstrcpy(s
->ifname
, sizeof(s
->ifname
), netmap_opts
->ifname
);
443 netmap_read_poll(s
, true); /* Initially only poll for reads. */