2 * netmap access for qemu
4 * Copyright (c) 2012-2013 Luigi Rizzo
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include <sys/ioctl.h>
31 #define NETMAP_WITH_LIBS
32 #include <net/netmap.h>
33 #include <net/netmap_user.h>
38 #include "sysemu/sysemu.h"
39 #include "qemu/error-report.h"
42 /* Private netmap device info. */
43 typedef struct NetmapPriv
{
47 struct netmap_if
*nifp
;
48 struct netmap_ring
*rx
;
49 struct netmap_ring
*tx
;
50 char fdname
[PATH_MAX
]; /* Normally "/dev/netmap". */
51 char ifname
[IFNAMSIZ
];
54 typedef struct NetmapState
{
59 struct iovec iov
[IOV_MAX
];
60 int vnet_hdr_len
; /* Current virtio-net header length. */
64 #define pkt_copy bcopy
66 /* A fast copy routine only for multiples of 64 bytes, non overlapped. */
68 pkt_copy(const void *_src
, void *_dst
, int l
)
70 const uint64_t *src
= _src
;
72 if (unlikely(l
>= 1024)) {
76 for (; l
> 0; l
-= 64) {
87 #endif /* __FreeBSD__ */
90 * Open a netmap device. We assume there is only one queue
91 * (which is the case for the VALE bridge).
93 static void netmap_open(NetmapPriv
*me
, Error
**errp
)
100 me
->fd
= fd
= open(me
->fdname
, O_RDWR
);
102 error_setg_file_open(errp
, errno
, me
->fdname
);
105 memset(&req
, 0, sizeof(req
));
106 pstrcpy(req
.nr_name
, sizeof(req
.nr_name
), me
->ifname
);
107 req
.nr_ringid
= NETMAP_NO_TX_POLL
;
108 req
.nr_version
= NETMAP_API
;
109 err
= ioctl(fd
, NIOCREGIF
, &req
);
111 error_setg_errno(errp
, errno
, "Unable to register %s", me
->ifname
);
114 l
= me
->memsize
= req
.nr_memsize
;
116 me
->mem
= mmap(0, l
, PROT_WRITE
| PROT_READ
, MAP_SHARED
, fd
, 0);
117 if (me
->mem
== MAP_FAILED
) {
118 error_setg_errno(errp
, errno
, "Unable to mmap netmap shared memory");
123 me
->nifp
= NETMAP_IF(me
->mem
, req
.nr_offset
);
124 me
->tx
= NETMAP_TXRING(me
->nifp
, 0);
125 me
->rx
= NETMAP_RXRING(me
->nifp
, 0);
133 static void netmap_send(void *opaque
);
134 static void netmap_writable(void *opaque
);
136 /* Set the event-loop handlers for the netmap backend. */
137 static void netmap_update_fd_handler(NetmapState
*s
)
139 qemu_set_fd_handler(s
->me
.fd
,
140 s
->read_poll
? netmap_send
: NULL
,
141 s
->write_poll
? netmap_writable
: NULL
,
145 /* Update the read handler. */
146 static void netmap_read_poll(NetmapState
*s
, bool enable
)
148 if (s
->read_poll
!= enable
) { /* Do nothing if not changed. */
149 s
->read_poll
= enable
;
150 netmap_update_fd_handler(s
);
154 /* Update the write handler. */
155 static void netmap_write_poll(NetmapState
*s
, bool enable
)
157 if (s
->write_poll
!= enable
) {
158 s
->write_poll
= enable
;
159 netmap_update_fd_handler(s
);
163 static void netmap_poll(NetClientState
*nc
, bool enable
)
165 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
167 if (s
->read_poll
!= enable
|| s
->write_poll
!= enable
) {
168 s
->write_poll
= enable
;
169 s
->read_poll
= enable
;
170 netmap_update_fd_handler(s
);
175 * The fd_write() callback, invoked if the fd is marked as
176 * writable after a poll. Unregister the handler and flush any
179 static void netmap_writable(void *opaque
)
181 NetmapState
*s
= opaque
;
183 netmap_write_poll(s
, false);
184 qemu_flush_queued_packets(&s
->nc
);
187 static ssize_t
netmap_receive(NetClientState
*nc
,
188 const uint8_t *buf
, size_t size
)
190 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
191 struct netmap_ring
*ring
= s
->me
.tx
;
196 if (unlikely(!ring
)) {
201 if (unlikely(size
> ring
->nr_buf_size
)) {
202 RD(5, "[netmap_receive] drop packet of size %d > %d\n",
203 (int)size
, ring
->nr_buf_size
);
207 if (nm_ring_empty(ring
)) {
208 /* No available slots in the netmap TX ring. */
209 netmap_write_poll(s
, true);
214 idx
= ring
->slot
[i
].buf_idx
;
215 dst
= (uint8_t *)NETMAP_BUF(ring
, idx
);
217 ring
->slot
[i
].len
= size
;
218 ring
->slot
[i
].flags
= 0;
219 pkt_copy(buf
, dst
, size
);
220 ring
->cur
= ring
->head
= nm_ring_next(ring
, i
);
221 ioctl(s
->me
.fd
, NIOCTXSYNC
, NULL
);
226 static ssize_t
netmap_receive_iov(NetClientState
*nc
,
227 const struct iovec
*iov
, int iovcnt
)
229 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
230 struct netmap_ring
*ring
= s
->me
.tx
;
237 if (unlikely(!ring
)) {
238 /* Drop the packet. */
239 return iov_size(iov
, iovcnt
);
242 last
= i
= ring
->cur
;
244 if (nm_ring_space(ring
) < iovcnt
) {
245 /* Not enough netmap slots. */
246 netmap_write_poll(s
, true);
250 for (j
= 0; j
< iovcnt
; j
++) {
251 int iov_frag_size
= iov
[j
].iov_len
;
255 /* Split each iovec fragment over more netmap slots, if
257 while (iov_frag_size
) {
258 nm_frag_size
= MIN(iov_frag_size
, ring
->nr_buf_size
);
260 if (unlikely(nm_ring_empty(ring
))) {
261 /* We run out of netmap slots while splitting the
263 netmap_write_poll(s
, true);
267 idx
= ring
->slot
[i
].buf_idx
;
268 dst
= (uint8_t *)NETMAP_BUF(ring
, idx
);
270 ring
->slot
[i
].len
= nm_frag_size
;
271 ring
->slot
[i
].flags
= NS_MOREFRAG
;
272 pkt_copy(iov
[j
].iov_base
+ offset
, dst
, nm_frag_size
);
275 i
= nm_ring_next(ring
, i
);
277 offset
+= nm_frag_size
;
278 iov_frag_size
-= nm_frag_size
;
281 /* The last slot must not have NS_MOREFRAG set. */
282 ring
->slot
[last
].flags
&= ~NS_MOREFRAG
;
284 /* Now update ring->cur and ring->head. */
285 ring
->cur
= ring
->head
= i
;
287 ioctl(s
->me
.fd
, NIOCTXSYNC
, NULL
);
289 return iov_size(iov
, iovcnt
);
292 /* Complete a previous send (backend --> guest) and enable the
294 static void netmap_send_completed(NetClientState
*nc
, ssize_t len
)
296 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
298 netmap_read_poll(s
, true);
301 static void netmap_send(void *opaque
)
303 NetmapState
*s
= opaque
;
304 struct netmap_ring
*ring
= s
->me
.rx
;
306 /* Keep sending while there are available packets into the netmap
307 RX ring and the forwarding path towards the peer is open. */
308 while (!nm_ring_empty(ring
)) {
317 idx
= ring
->slot
[i
].buf_idx
;
318 morefrag
= (ring
->slot
[i
].flags
& NS_MOREFRAG
);
319 s
->iov
[iovcnt
].iov_base
= (u_char
*)NETMAP_BUF(ring
, idx
);
320 s
->iov
[iovcnt
].iov_len
= ring
->slot
[i
].len
;
323 ring
->cur
= ring
->head
= nm_ring_next(ring
, i
);
324 } while (!nm_ring_empty(ring
) && morefrag
);
326 if (unlikely(nm_ring_empty(ring
) && morefrag
)) {
327 RD(5, "[netmap_send] ran out of slots, with a pending"
328 "incomplete packet\n");
331 iovsize
= qemu_sendv_packet_async(&s
->nc
, s
->iov
, iovcnt
,
332 netmap_send_completed
);
335 /* The peer does not receive anymore. Packet is queued, stop
336 * reading from the backend until netmap_send_completed()
338 netmap_read_poll(s
, false);
344 /* Flush and close. */
345 static void netmap_cleanup(NetClientState
*nc
)
347 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
349 qemu_purge_queued_packets(nc
);
351 netmap_poll(nc
, false);
352 munmap(s
->me
.mem
, s
->me
.memsize
);
358 /* Offloading manipulation support callbacks. */
359 static bool netmap_has_ufo(NetClientState
*nc
)
364 static bool netmap_has_vnet_hdr(NetClientState
*nc
)
369 static bool netmap_has_vnet_hdr_len(NetClientState
*nc
, int len
)
371 return len
== 0 || len
== sizeof(struct virtio_net_hdr
) ||
372 len
== sizeof(struct virtio_net_hdr_mrg_rxbuf
);
375 static void netmap_using_vnet_hdr(NetClientState
*nc
, bool enable
)
379 static void netmap_set_vnet_hdr_len(NetClientState
*nc
, int len
)
381 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
385 /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header
386 * length for the netmap adapter associated to 'me->ifname'.
388 memset(&req
, 0, sizeof(req
));
389 pstrcpy(req
.nr_name
, sizeof(req
.nr_name
), s
->me
.ifname
);
390 req
.nr_version
= NETMAP_API
;
391 req
.nr_cmd
= NETMAP_BDG_VNET_HDR
;
393 err
= ioctl(s
->me
.fd
, NIOCREGIF
, &req
);
395 error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s",
396 s
->me
.ifname
, strerror(errno
));
398 /* Keep track of the current length. */
399 s
->vnet_hdr_len
= len
;
403 static void netmap_set_offload(NetClientState
*nc
, int csum
, int tso4
, int tso6
,
406 NetmapState
*s
= DO_UPCAST(NetmapState
, nc
, nc
);
408 /* Setting a virtio-net header length greater than zero automatically
409 * enables the offloadings.
411 if (!s
->vnet_hdr_len
) {
412 netmap_set_vnet_hdr_len(nc
, sizeof(struct virtio_net_hdr
));
416 /* NetClientInfo methods */
417 static NetClientInfo net_netmap_info
= {
418 .type
= NET_CLIENT_OPTIONS_KIND_NETMAP
,
419 .size
= sizeof(NetmapState
),
420 .receive
= netmap_receive
,
421 .receive_iov
= netmap_receive_iov
,
423 .cleanup
= netmap_cleanup
,
424 .has_ufo
= netmap_has_ufo
,
425 .has_vnet_hdr
= netmap_has_vnet_hdr
,
426 .has_vnet_hdr_len
= netmap_has_vnet_hdr_len
,
427 .using_vnet_hdr
= netmap_using_vnet_hdr
,
428 .set_offload
= netmap_set_offload
,
429 .set_vnet_hdr_len
= netmap_set_vnet_hdr_len
,
432 /* The exported init function
434 * ... -net netmap,ifname="..."
436 int net_init_netmap(const NetClientOptions
*opts
,
437 const char *name
, NetClientState
*peer
, Error
**errp
)
439 const NetdevNetmapOptions
*netmap_opts
= opts
->u
.netmap
;
445 pstrcpy(me
.fdname
, sizeof(me
.fdname
),
446 netmap_opts
->has_devname
? netmap_opts
->devname
: "/dev/netmap");
447 /* Set default name for the port if not supplied. */
448 pstrcpy(me
.ifname
, sizeof(me
.ifname
), netmap_opts
->ifname
);
449 netmap_open(&me
, &err
);
451 error_propagate(errp
, err
);
454 /* Create the object. */
455 nc
= qemu_new_net_client(&net_netmap_info
, peer
, "netmap", name
);
456 s
= DO_UPCAST(NetmapState
, nc
, nc
);
459 netmap_read_poll(s
, true); /* Initially only poll for reads. */