2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2011 NetApp, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/linker_set.h>
36 #include <sys/select.h>
38 #include <sys/ioctl.h>
39 #include <net/ethernet.h>
40 #include <net/if.h> /* IFNAMSIZ */
53 #include <pthread_np.h>
61 #include "net_utils.h"
62 #include "net_backends.h"
65 #define VTNET_RINGSZ 1024
67 #define VTNET_MAXSEGS 256
69 #define VTNET_MAX_PKT_LEN (65536 + 64)
71 #define VTNET_MIN_MTU ETHERMIN
72 #define VTNET_MAX_MTU 65535
74 #define VTNET_S_HOSTCAPS \
75 ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \
76 VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
79 * PCI config-space "registers"
81 struct virtio_net_config
{
84 uint16_t max_virtqueue_pairs
;
93 #define VTNET_CTLQ 2 /* NB: not yet supported */
100 static int pci_vtnet_debug
;
101 #define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params
102 #define WPRINTF(params) PRINTLN params
107 struct pci_vtnet_softc
{
108 struct virtio_softc vsc_vs
;
109 struct vqueue_info vsc_queues
[VTNET_MAXQ
- 1];
110 pthread_mutex_t vsc_mtx
;
112 net_backend_t
*vsc_be
;
114 bool features_negotiated
; /* protected by rx_mtx */
116 int resetting
; /* protected by tx_mtx */
118 uint64_t vsc_features
; /* negotiated features */
120 pthread_mutex_t rx_mtx
;
121 int rx_merge
; /* merged rx bufs in use */
124 pthread_mutex_t tx_mtx
;
125 pthread_cond_t tx_cond
;
131 struct virtio_net_config vsc_config
;
132 struct virtio_consts vsc_consts
;
135 static void pci_vtnet_reset(void *);
136 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */
137 static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
138 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
139 static void pci_vtnet_neg_features(void *, uint64_t);
141 static struct virtio_consts vtnet_vi_consts
= {
143 .vc_nvq
= VTNET_MAXQ
- 1,
144 .vc_cfgsize
= sizeof(struct virtio_net_config
),
145 .vc_reset
= pci_vtnet_reset
,
146 .vc_cfgread
= pci_vtnet_cfgread
,
147 .vc_cfgwrite
= pci_vtnet_cfgwrite
,
148 .vc_apply_features
= pci_vtnet_neg_features
,
149 .vc_hv_caps
= VTNET_S_HOSTCAPS
,
153 pci_vtnet_reset(void *vsc
)
155 struct pci_vtnet_softc
*sc
= vsc
;
157 DPRINTF(("vtnet: device reset requested !"));
159 /* Acquire the RX lock to block RX processing. */
160 pthread_mutex_lock(&sc
->rx_mtx
);
163 * Make sure receive operation is disabled at least until we
164 * re-negotiate the features, since receive operation depends
165 * on the value of sc->rx_merge and the header length, which
166 * are both set in pci_vtnet_neg_features().
167 * Receive operation will be enabled again once the guest adds
168 * the first receive buffers and kicks us.
170 sc
->features_negotiated
= false;
171 netbe_rx_disable(sc
->vsc_be
);
173 /* Set sc->resetting and give a chance to the TX thread to stop. */
174 pthread_mutex_lock(&sc
->tx_mtx
);
176 while (sc
->tx_in_progress
) {
177 pthread_mutex_unlock(&sc
->tx_mtx
);
179 pthread_mutex_lock(&sc
->tx_mtx
);
183 * Now reset rings, MSI-X vectors, and negotiated capabilities.
184 * Do that with the TX lock held, since we need to reset
187 vi_reset_dev(&sc
->vsc_vs
);
190 pthread_mutex_unlock(&sc
->tx_mtx
);
191 pthread_mutex_unlock(&sc
->rx_mtx
);
194 static __inline
struct iovec
*
195 iov_trim_hdr(struct iovec
*iov
, int *iovcnt
, unsigned int hlen
)
199 if (iov
[0].iov_len
< hlen
) {
201 * Not enough header space in the first fragment.
202 * That's not ok for us.
207 iov
[0].iov_len
-= hlen
;
208 if (iov
[0].iov_len
== 0) {
212 * Only space for the header. That's not
219 iov
[0].iov_base
= (void *)((uintptr_t)iov
[0].iov_base
+ hlen
);
226 struct virtio_mrg_rxbuf_info
{
233 pci_vtnet_rx(struct pci_vtnet_softc
*sc
)
235 int prepend_hdr_len
= sc
->vhdrlen
- sc
->be_vhdrlen
;
236 struct virtio_mrg_rxbuf_info info
[VTNET_MAXSEGS
];
237 struct iovec iov
[VTNET_MAXSEGS
+ 1];
238 struct vqueue_info
*vq
;
241 vq
= &sc
->vsc_queues
[VTNET_RXQ
];
243 /* Features must be negotiated */
244 if (!sc
->features_negotiated
) {
249 struct virtio_net_rxhdr
*hdr
;
258 plen
= netbe_peek_recvlen(sc
->vsc_be
);
261 * No more packets (plen == 0), or backend errored
262 * (plen < 0). Interrupt if needed and stop.
264 vq_endchains(vq
, /*used_all_avail=*/0);
267 plen
+= prepend_hdr_len
;
270 * Get a descriptor chain to store the next ingress
271 * packet. In case of mergeable rx buffers, get as
272 * many chains as necessary in order to make room
280 int n
= vq_getchain(vq
, riov
, VTNET_MAXSEGS
- riov_len
,
282 info
[n_chains
].idx
= req
.idx
;
286 * No rx buffers. Enable RX kicks and double
290 if (!vq_has_descs(vq
)) {
292 * Still no buffers. Return the unused
293 * chains (if any), interrupt if needed
294 * (including for NOTIFY_ON_EMPTY), and
295 * disable the backend until the next
298 vq_retchains(vq
, n_chains
);
299 vq_endchains(vq
, /*used_all_avail=*/1);
300 netbe_rx_disable(sc
->vsc_be
);
304 /* More rx buffers found, so keep going. */
311 * An error from vq_getchain() means that
312 * an invalid descriptor was found.
314 vq_retchains(vq
, n_chains
);
315 vq_endchains(vq
, /*used_all_avail=*/0);
319 assert(n
>= 1 && riov_len
+ n
<= VTNET_MAXSEGS
);
326 size_t c
= count_iov(riov
, n
);
327 if (c
> UINT32_MAX
) {
328 vq_retchains(vq
, n_chains
);
329 vq_endchains(vq
, /*used_all_avail=*/0);
332 info
[n_chains
].len
= (uint32_t)c
;
334 info
[n_chains
].len
= (uint32_t)count_iov(riov
, n
);
336 riov_bytes
+= info
[n_chains
].len
;
339 } while (riov_bytes
< plen
&& riov_len
< VTNET_MAXSEGS
);
343 hdr
= riov
[0].iov_base
;
345 hdr
= (struct virtio_net_rxhdr
*)riov
[0].iov_base
;
347 if (prepend_hdr_len
> 0) {
349 * The frontend uses a virtio-net header, but the
350 * backend does not. We need to prepend a zeroed
353 riov
= iov_trim_hdr(riov
, &riov_len
, prepend_hdr_len
);
356 * The first collected chain is nonsensical,
357 * as it is not even enough to store the
358 * virtio-net header. Just drop it.
360 vq_relchain(vq
, info
[0].idx
, 0);
361 vq_retchains(vq
, n_chains
- 1);
364 memset(hdr
, 0, prepend_hdr_len
);
367 rlen
= netbe_recv(sc
->vsc_be
, riov
, riov_len
);
368 if (rlen
!= plen
- prepend_hdr_len
) {
370 * If this happens it means there is something
371 * wrong with the backend (e.g., some other
372 * process is stealing our packets).
374 WPRINTF(("netbe_recv: expected %zd bytes, "
375 "got %zd", plen
- prepend_hdr_len
, rlen
));
376 vq_retchains(vq
, n_chains
);
380 ulen
= (uint32_t)plen
;
383 * Publish the used buffers to the guest, reporting the
384 * number of bytes that we wrote.
387 vq_relchain(vq
, info
[0].idx
, ulen
);
397 vq_relchain_prepare(vq
, info
[i
].idx
, iolen
);
403 vq_relchain_publish(vq
);
404 assert(i
== n_chains
);
411 * Called when there is read activity on the backend file descriptor.
412 * Each buffer posted by the guest is assumed to be able to contain
413 * an entire ethernet frame + rx header.
416 pci_vtnet_rx_callback(int fd __unused
, enum ev_type type __unused
, void *param
)
418 struct pci_vtnet_softc
*sc
= param
;
420 pthread_mutex_lock(&sc
->rx_mtx
);
422 pthread_mutex_unlock(&sc
->rx_mtx
);
426 /* Called on RX kick. */
428 pci_vtnet_ping_rxq(void *vsc
, struct vqueue_info
*vq
)
430 struct pci_vtnet_softc
*sc
= vsc
;
433 * A qnotify means that the rx process can now begin.
434 * Enable RX only if features are negotiated.
436 pthread_mutex_lock(&sc
->rx_mtx
);
437 if (!sc
->features_negotiated
) {
438 pthread_mutex_unlock(&sc
->rx_mtx
);
443 netbe_rx_enable(sc
->vsc_be
);
444 pthread_mutex_unlock(&sc
->rx_mtx
);
447 /* TX virtqueue processing, called by the TX thread. */
449 pci_vtnet_proctx(struct pci_vtnet_softc
*sc
, struct vqueue_info
*vq
)
451 struct iovec iov
[VTNET_MAXSEGS
+ 1];
452 struct iovec
*siov
= iov
;
458 * Obtain chain of descriptors. The first descriptor also
459 * contains the virtio-net header.
461 n
= vq_getchain(vq
, iov
, VTNET_MAXSEGS
, &req
);
462 assert(n
>= 1 && n
<= VTNET_MAXSEGS
);
464 if (sc
->vhdrlen
!= sc
->be_vhdrlen
) {
466 * The frontend uses a virtio-net header, but the backend
467 * does not. We simply strip the header and ignore it, as
468 * it should be zero-filled.
470 siov
= iov_trim_hdr(siov
, &n
, sc
->vhdrlen
);
474 /* The chain is nonsensical. Just drop it. */
477 len
= netbe_send(sc
->vsc_be
, siov
, n
);
480 * If send failed, report that 0 bytes
488 * Return the processed chain to the guest, reporting
489 * the number of bytes that we read.
491 vq_relchain(vq
, req
.idx
, len
);
494 /* Called on TX kick. */
496 pci_vtnet_ping_txq(void *vsc
, struct vqueue_info
*vq
)
498 struct pci_vtnet_softc
*sc
= vsc
;
501 * Any ring entries to process?
503 if (!vq_has_descs(vq
))
506 /* Signal the tx thread for processing */
507 pthread_mutex_lock(&sc
->tx_mtx
);
509 if (sc
->tx_in_progress
== 0)
510 pthread_cond_signal(&sc
->tx_cond
);
511 pthread_mutex_unlock(&sc
->tx_mtx
);
515 * Thread which will handle processing of TX desc
518 pci_vtnet_tx_thread(void *param
)
520 struct pci_vtnet_softc
*sc
= param
;
521 struct vqueue_info
*vq
;
524 vq
= &sc
->vsc_queues
[VTNET_TXQ
];
527 * Let us wait till the tx queue pointers get initialised &
530 pthread_mutex_lock(&sc
->tx_mtx
);
531 error
= pthread_cond_wait(&sc
->tx_cond
, &sc
->tx_mtx
);
535 /* note - tx mutex is locked here */
536 while (sc
->resetting
|| !vq_has_descs(vq
)) {
538 if (!sc
->resetting
&& vq_has_descs(vq
))
541 sc
->tx_in_progress
= 0;
542 error
= pthread_cond_wait(&sc
->tx_cond
, &sc
->tx_mtx
);
546 sc
->tx_in_progress
= 1;
547 pthread_mutex_unlock(&sc
->tx_mtx
);
551 * Run through entries, placing them into
552 * iovecs and sending when an end-of-packet
555 pci_vtnet_proctx(sc
, vq
);
556 } while (vq_has_descs(vq
));
559 * Generate an interrupt if needed.
561 vq_endchains(vq
, /*used_all_avail=*/1);
563 pthread_mutex_lock(&sc
->tx_mtx
);
572 pci_vtnet_ping_ctlq(void *vsc
, struct vqueue_info
*vq
)
575 DPRINTF(("vtnet: control qnotify!"));
580 pci_vtnet_init(struct vmctx
*ctx __unused
, struct pci_devinst
*pi
,
583 struct pci_vtnet_softc
*sc
;
585 char tname
[MAXCOMLEN
+ 1];
586 unsigned long mtu
= ETHERMTU
;
590 * Allocate data structures for further virtio initializations.
591 * sc also contains a copy of vtnet_vi_consts, since capabilities
592 * change depending on the backend.
594 sc
= calloc(1, sizeof(struct pci_vtnet_softc
));
596 sc
->vsc_consts
= vtnet_vi_consts
;
597 pthread_mutex_init(&sc
->vsc_mtx
, NULL
);
599 sc
->vsc_queues
[VTNET_RXQ
].vq_qsize
= VTNET_RINGSZ
;
600 sc
->vsc_queues
[VTNET_RXQ
].vq_notify
= pci_vtnet_ping_rxq
;
601 sc
->vsc_queues
[VTNET_TXQ
].vq_qsize
= VTNET_RINGSZ
;
602 sc
->vsc_queues
[VTNET_TXQ
].vq_notify
= pci_vtnet_ping_txq
;
604 sc
->vsc_queues
[VTNET_CTLQ
].vq_qsize
= VTNET_RINGSZ
;
605 sc
->vsc_queues
[VTNET_CTLQ
].vq_notify
= pci_vtnet_ping_ctlq
;
608 value
= get_config_value_node(nvl
, "mac");
610 err
= net_parsemac(value
, sc
->vsc_config
.mac
);
616 net_genmac(pi
, sc
->vsc_config
.mac
);
618 value
= get_config_value_node(nvl
, "mtu");
620 err
= net_parsemtu(value
, &mtu
);
626 if (mtu
< VTNET_MIN_MTU
|| mtu
> VTNET_MAX_MTU
) {
632 sc
->vsc_consts
.vc_hv_caps
|= VIRTIO_NET_F_MTU
;
634 sc
->vsc_config
.mtu
= mtu
;
636 /* Permit interfaces without a configured backend. */
637 if (get_config_value_node(nvl
, "backend") != NULL
) {
638 err
= netbe_init(&sc
->vsc_be
, nvl
, pci_vtnet_rx_callback
, sc
);
644 size_t buflen
= sizeof (sc
->vsc_config
.mac
);
646 err
= netbe_get_mac(sc
->vsc_be
, sc
->vsc_config
.mac
, &buflen
);
654 sc
->vsc_consts
.vc_hv_caps
|= VIRTIO_NET_F_MRG_RXBUF
|
655 netbe_get_cap(sc
->vsc_be
);
658 * Since we do not actually support multiqueue,
659 * set the maximum virtqueue pairs to 1.
661 sc
->vsc_config
.max_virtqueue_pairs
= 1;
663 /* initialize config space */
664 pci_set_cfgdata16(pi
, PCIR_DEVICE
, VIRTIO_DEV_NET
);
665 pci_set_cfgdata16(pi
, PCIR_VENDOR
, VIRTIO_VENDOR
);
666 pci_set_cfgdata8(pi
, PCIR_CLASS
, PCIC_NETWORK
);
667 pci_set_cfgdata16(pi
, PCIR_SUBDEV_0
, VIRTIO_ID_NETWORK
);
668 pci_set_cfgdata16(pi
, PCIR_SUBVEND_0
, VIRTIO_VENDOR
);
670 /* Link is always up. */
671 sc
->vsc_config
.status
= 1;
673 vi_softc_linkup(&sc
->vsc_vs
, &sc
->vsc_consts
, sc
, pi
, sc
->vsc_queues
);
674 sc
->vsc_vs
.vs_mtx
= &sc
->vsc_mtx
;
676 /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
677 if (vi_intr_init(&sc
->vsc_vs
, 1, fbsdrun_virtio_msix())) {
682 /* use BAR 0 to map config regs in IO space */
683 vi_set_io_bar(&sc
->vsc_vs
, 0);
688 sc
->vhdrlen
= sizeof(struct virtio_net_rxhdr
) - 2;
689 pthread_mutex_init(&sc
->rx_mtx
, NULL
);
692 * Initialize tx semaphore & spawn TX processing thread.
693 * As of now, only one thread for TX desc processing is
696 sc
->tx_in_progress
= 0;
697 pthread_mutex_init(&sc
->tx_mtx
, NULL
);
698 pthread_cond_init(&sc
->tx_cond
, NULL
);
699 pthread_create(&sc
->tx_tid
, NULL
, pci_vtnet_tx_thread
, (void *)sc
);
700 snprintf(tname
, sizeof(tname
), "vtnet-%d:%d tx", pi
->pi_slot
,
702 pthread_set_name_np(sc
->tx_tid
, tname
);
708 pci_vtnet_cfgwrite(void *vsc
, int offset
, int size
, uint32_t value
)
710 struct pci_vtnet_softc
*sc
= vsc
;
713 if (offset
< (int)sizeof(sc
->vsc_config
.mac
)) {
714 assert(offset
+ size
<= (int)sizeof(sc
->vsc_config
.mac
));
716 * The driver is allowed to change the MAC address
718 ptr
= &sc
->vsc_config
.mac
[offset
];
719 memcpy(ptr
, &value
, size
);
721 /* silently ignore other writes */
722 DPRINTF(("vtnet: write to readonly reg %d", offset
));
729 pci_vtnet_cfgread(void *vsc
, int offset
, int size
, uint32_t *retval
)
731 struct pci_vtnet_softc
*sc
= vsc
;
734 ptr
= (uint8_t *)&sc
->vsc_config
+ offset
;
735 memcpy(retval
, ptr
, size
);
740 pci_vtnet_neg_features(void *vsc
, uint64_t negotiated_features
)
742 struct pci_vtnet_softc
*sc
= vsc
;
744 sc
->vsc_features
= negotiated_features
;
746 if (negotiated_features
& VIRTIO_NET_F_MRG_RXBUF
) {
747 sc
->vhdrlen
= sizeof(struct virtio_net_rxhdr
);
751 * Without mergeable rx buffers, virtio-net header is 2
752 * bytes shorter than sizeof(struct virtio_net_rxhdr).
754 sc
->vhdrlen
= sizeof(struct virtio_net_rxhdr
) - 2;
758 /* Tell the backend to enable some capabilities it has advertised. */
759 netbe_set_cap(sc
->vsc_be
, negotiated_features
, sc
->vhdrlen
);
760 sc
->be_vhdrlen
= netbe_get_vnet_hdr_len(sc
->vsc_be
);
761 assert(sc
->be_vhdrlen
== 0 || sc
->be_vhdrlen
== sc
->vhdrlen
);
763 pthread_mutex_lock(&sc
->rx_mtx
);
764 sc
->features_negotiated
= true;
765 pthread_mutex_unlock(&sc
->rx_mtx
);
768 static const struct pci_devemu pci_de_vnet
= {
769 .pe_emu
= "virtio-net",
770 .pe_init
= pci_vtnet_init
,
771 .pe_legacy_config
= netbe_legacy_config
,
772 .pe_barwrite
= vi_pci_write
,
773 .pe_barread
= vi_pci_read
,
775 PCI_EMUL_SET(pci_de_vnet
);