2 * Copyright (c) 2011 NetApp, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * This file and its contents are supplied under the terms of the
28 * Common Development and Distribution License ("CDDL"), version 1.0.
29 * You may only use this file in accordance with the terms of version
32 * A full copy of the text of the CDDL should have accompanied this
33 * source. A copy of the CDDL is also available via the Internet at
34 * http://www.illumos.org/license/CDDL.
36 * Copyright 2015 Pluribus Networks Inc.
37 * Copyright 2019 Joyent, Inc.
38 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
41 #include <sys/cdefs.h>
43 #include <sys/param.h>
44 #include <sys/linker_set.h>
45 #include <sys/ioctl.h>
46 #include <sys/viona_io.h>
61 #include <libdllink.h>
62 #include <libdlvnic.h>
64 #include <machine/vmm.h>
72 #define VIONA_RINGSZ 1024
75 * PCI config-space register offsets
77 #define VIONA_R_CFG0 24
78 #define VIONA_R_CFG1 25
79 #define VIONA_R_CFG2 26
80 #define VIONA_R_CFG3 27
81 #define VIONA_R_CFG4 28
82 #define VIONA_R_CFG5 29
83 #define VIONA_R_CFG6 30
84 #define VIONA_R_CFG7 31
85 #define VIONA_R_MAX 31
87 #define VIONA_REGSZ VIONA_R_MAX+1
101 static volatile int pci_viona_debug
;
102 #define DPRINTF(params) if (pci_viona_debug) printf params
103 #define WPRINTF(params) printf params
108 struct pci_viona_softc
{
109 struct pci_devinst
*vsc_pi
;
110 pthread_mutex_t vsc_mtx
;
116 datalink_id_t vsc_linkid
;
119 /* Configurable parameters */
120 char vsc_linkname
[MAXLINKNAMELEN
];
121 uint32_t vsc_feature_mask
;
122 uint16_t vsc_vq_size
;
124 uint32_t vsc_features
;
125 uint8_t vsc_macaddr
[6];
127 uint64_t vsc_pfn
[VIONA_MAXQ
];
128 uint16_t vsc_msix_table_idx
[VIONA_MAXQ
];
129 boolean_t vsc_msix_active
;
133 * Return the size of IO BAR that maps virtio header and device specific
134 * region. The size would vary depending on whether MSI-X is enabled or
138 pci_viona_iosize(struct pci_devinst
*pi
)
140 if (pci_msix_enabled(pi
)) {
141 return (VIONA_REGSZ
);
143 return (VIONA_REGSZ
-
144 (VIRTIO_PCI_CONFIG_OFF(1) - VIRTIO_PCI_CONFIG_OFF(0)));
149 pci_viona_qsize(struct pci_viona_softc
*sc
, int qnum
)
151 /* XXX no ctl queue currently */
152 if (qnum
== VIONA_CTLQ
) {
156 return (sc
->vsc_vq_size
);
160 pci_viona_ring_reset(struct pci_viona_softc
*sc
, int ring
)
162 assert(ring
< VIONA_MAXQ
);
176 res
= ioctl(sc
->vsc_vnafd
, VNA_IOC_RING_RESET
, ring
);
179 } else if (errno
!= EINTR
) {
180 WPRINTF(("ioctl viona ring %d reset failed %d\n",
186 sc
->vsc_pfn
[ring
] = 0;
190 pci_viona_update_status(struct pci_viona_softc
*sc
, uint32_t value
)
194 DPRINTF(("viona: device reset requested !\n"));
195 pci_viona_ring_reset(sc
, VIONA_RXQ
);
196 pci_viona_ring_reset(sc
, VIONA_TXQ
);
199 sc
->vsc_status
= value
;
203 pci_viona_poll_thread(void *param
)
205 struct pci_viona_softc
*sc
= param
;
207 const int fd
= sc
->vsc_vnafd
;
210 pollset
.events
= POLLRDBAND
;
213 if (poll(&pollset
, 1, -1) < 0) {
214 if (errno
== EINTR
|| errno
== EAGAIN
) {
217 WPRINTF(("pci_viona_poll_thread poll()"
218 "error %d\n", errno
));
222 if (pollset
.revents
& POLLRDBAND
) {
223 vioc_intr_poll_t vip
;
226 boolean_t assert_lintr
= B_FALSE
;
227 const boolean_t do_msix
= pci_msix_enabled(sc
->vsc_pi
);
229 res
= ioctl(fd
, VNA_IOC_INTR_POLL
, &vip
);
230 for (i
= 0; res
> 0 && i
< VIONA_VQ_MAX
; i
++) {
231 if (vip
.vip_status
[i
] == 0) {
235 pci_generate_msix(sc
->vsc_pi
,
236 sc
->vsc_msix_table_idx
[i
]);
238 assert_lintr
= B_TRUE
;
240 res
= ioctl(fd
, VNA_IOC_RING_INTR_CLR
, i
);
242 WPRINTF(("ioctl viona vq %d intr "
243 "clear failed %d\n", i
, errno
));
247 pthread_mutex_lock(&sc
->vsc_mtx
);
248 sc
->vsc_isr
|= VIRTIO_PCI_ISR_INTR
;
249 pci_lintr_assert(sc
->vsc_pi
);
250 pthread_mutex_unlock(&sc
->vsc_mtx
);
259 pci_viona_ring_init(struct pci_viona_softc
*sc
, uint64_t pfn
)
261 int qnum
= sc
->vsc_curq
;
262 vioc_ring_init_t vna_ri
;
265 assert(qnum
< VIONA_MAXQ
);
267 if (qnum
== VIONA_CTLQ
) {
271 sc
->vsc_pfn
[qnum
] = (pfn
<< VRING_PFN
);
273 vna_ri
.ri_index
= qnum
;
274 vna_ri
.ri_qsize
= pci_viona_qsize(sc
, qnum
);
275 vna_ri
.ri_qaddr
= (pfn
<< VRING_PFN
);
276 error
= ioctl(sc
->vsc_vnafd
, VNA_IOC_RING_INIT
, &vna_ri
);
279 WPRINTF(("ioctl viona ring %u init failed %d\n", qnum
, errno
));
284 pci_viona_viona_init(struct vmctx
*ctx
, struct pci_viona_softc
*sc
)
286 vioc_create_t vna_create
;
289 sc
->vsc_vnafd
= open("/dev/viona", O_RDWR
| O_EXCL
);
290 if (sc
->vsc_vnafd
== -1) {
291 WPRINTF(("open viona ctl failed: %d\n", errno
));
295 vna_create
.c_linkid
= sc
->vsc_linkid
;
296 vna_create
.c_vmfd
= vm_get_device_fd(ctx
);
297 error
= ioctl(sc
->vsc_vnafd
, VNA_IOC_CREATE
, &vna_create
);
299 (void) close(sc
->vsc_vnafd
);
300 WPRINTF(("ioctl viona create failed %d\n", errno
));
308 pci_viona_legacy_config(nvlist_t
*nvl
, const char *opt
)
310 char *config
, *name
, *tofree
, *value
;
315 config
= tofree
= strdup(opt
);
316 while ((name
= strsep(&config
, ",")) != NULL
) {
317 value
= strchr(name
, '=');
320 set_config_value_node(nvl
, name
, value
);
322 set_config_value_node(nvl
, "vnic", name
);
330 pci_viona_parse_opts(struct pci_viona_softc
*sc
, nvlist_t
*nvl
)
335 sc
->vsc_vq_size
= VIONA_RINGSZ
;
336 sc
->vsc_feature_mask
= 0;
337 sc
->vsc_linkname
[0] = '\0';
339 value
= get_config_value_node(nvl
, "feature_mask");
344 num
= strtol(value
, NULL
, 0);
345 if (errno
!= 0 || num
< 0) {
347 "viona: invalid mask '%s'", value
);
349 sc
->vsc_feature_mask
= num
;
353 value
= get_config_value_node(nvl
, "vqsize");
358 num
= strtol(value
, NULL
, 0);
361 "viona: invalid vsqize '%s'", value
);
363 } else if (num
<= 2 || num
> 32768) {
365 "viona: vqsize out of range", num
);
367 } else if ((1 << (ffs(num
) - 1)) != num
) {
369 "viona: vqsize must be power of 2", num
);
372 sc
->vsc_vq_size
= num
;
376 value
= get_config_value_node(nvl
, "vnic");
378 fprintf(stderr
, "viona: vnic name required");
381 (void) strlcpy(sc
->vsc_linkname
, value
, MAXLINKNAMELEN
);
384 DPRINTF(("viona=%p dev=%s vqsize=%x feature_mask=%x\n", sc
,
385 sc
->vsc_linkname
, sc
->vsc_vq_size
, sc
->vsc_feature_mask
));
390 pci_viona_init(struct vmctx
*ctx
, struct pci_devinst
*pi
, nvlist_t
*nvl
)
392 dladm_handle_t handle
;
393 dladm_status_t status
;
394 dladm_vnic_attr_t attr
;
395 char errmsg
[DLADM_STRSIZE
];
396 char tname
[MAXCOMLEN
+ 1];
398 struct pci_viona_softc
*sc
;
402 vnic
= get_config_value_node(nvl
, "vnic");
404 printf("virtio-viona: vnic required\n");
408 sc
= malloc(sizeof (struct pci_viona_softc
));
409 memset(sc
, 0, sizeof (struct pci_viona_softc
));
414 pthread_mutex_init(&sc
->vsc_mtx
, NULL
);
416 if (pci_viona_parse_opts(sc
, nvl
) != 0) {
421 if ((status
= dladm_open(&handle
)) != DLADM_STATUS_OK
) {
422 WPRINTF(("could not open /dev/dld"));
427 if ((status
= dladm_name2info(handle
, sc
->vsc_linkname
, &sc
->vsc_linkid
,
428 NULL
, NULL
, NULL
)) != DLADM_STATUS_OK
) {
429 WPRINTF(("dladm_name2info() for %s failed: %s\n", vnic
,
430 dladm_status2str(status
, errmsg
)));
436 if ((status
= dladm_vnic_info(handle
, sc
->vsc_linkid
, &attr
,
437 DLADM_OPT_ACTIVE
)) != DLADM_STATUS_OK
) {
438 WPRINTF(("dladm_vnic_info() for %s failed: %s\n", vnic
,
439 dladm_status2str(status
, errmsg
)));
445 memcpy(sc
->vsc_macaddr
, attr
.va_mac_addr
, ETHERADDRL
);
449 error
= pci_viona_viona_init(ctx
, sc
);
455 error
= pthread_create(&tid
, NULL
, pci_viona_poll_thread
, sc
);
457 snprintf(tname
, sizeof (tname
), "vionapoll:%s", vnic
);
458 pthread_set_name_np(tid
, tname
);
460 /* initialize config space */
461 pci_set_cfgdata16(pi
, PCIR_DEVICE
, VIRTIO_DEV_NET
);
462 pci_set_cfgdata16(pi
, PCIR_VENDOR
, VIRTIO_VENDOR
);
463 pci_set_cfgdata8(pi
, PCIR_CLASS
, PCIC_NETWORK
);
464 pci_set_cfgdata16(pi
, PCIR_SUBDEV_0
, VIRTIO_ID_NETWORK
);
465 pci_set_cfgdata16(pi
, PCIR_SUBVEND_0
, VIRTIO_VENDOR
);
468 for (i
= 0; i
< VIONA_MAXQ
; i
++)
469 sc
->vsc_msix_table_idx
[i
] = VIRTIO_MSI_NO_VECTOR
;
471 /* BAR 1 used to map MSI-X table and PBA */
472 if (pci_emul_add_msixcap(pi
, VIONA_MAXQ
, 1)) {
477 /* BAR 0 for legacy-style virtio register access. */
478 error
= pci_emul_alloc_bar(pi
, 0, PCIBAR_IO
, VIONA_REGSZ
);
480 WPRINTF(("could not allocate virtio BAR\n"));
486 * Need a legacy interrupt for virtio compliance, even though MSI-X
487 * operation is _strongly_ suggested for adequate performance.
489 pci_lintr_request(pi
);
495 viona_adjust_offset(struct pci_devinst
*pi
, uint64_t offset
)
498 * Device specific offsets used by guest would change based on
499 * whether MSI-X capability is enabled or not
501 if (!pci_msix_enabled(pi
)) {
502 if (offset
>= VIRTIO_PCI_CONFIG_OFF(0)) {
503 return (offset
+ (VIRTIO_PCI_CONFIG_OFF(1) -
504 VIRTIO_PCI_CONFIG_OFF(0)));
512 pci_viona_ring_set_msix(struct pci_devinst
*pi
, uint_t ring
)
514 struct pci_viona_softc
*sc
= pi
->pi_arg
;
515 struct msix_table_entry mte
;
520 assert(ring
<= VIONA_VQ_TX
);
525 tab_index
= sc
->vsc_msix_table_idx
[ring
];
527 if (tab_index
!= VIRTIO_MSI_NO_VECTOR
&& sc
->vsc_msix_active
) {
528 mte
= pi
->pi_msix
.table
[tab_index
];
529 if ((mte
.vector_control
& PCIM_MSIX_VCTRL_MASK
) == 0) {
530 vrm
.rm_addr
= mte
.addr
;
531 vrm
.rm_msg
= mte
.msg_data
;
535 res
= ioctl(sc
->vsc_vnafd
, VNA_IOC_RING_SET_MSI
, &vrm
);
537 WPRINTF(("ioctl viona set_msi %d failed %d\n", ring
, errno
));
542 pci_viona_lintrupdate(struct pci_devinst
*pi
)
544 struct pci_viona_softc
*sc
= pi
->pi_arg
;
545 boolean_t msix_on
= B_FALSE
;
547 pthread_mutex_lock(&sc
->vsc_mtx
);
548 msix_on
= pci_msix_enabled(pi
) && (pi
->pi_msix
.function_mask
== 0);
549 if ((sc
->vsc_msix_active
&& !msix_on
) ||
550 (msix_on
&& !sc
->vsc_msix_active
)) {
553 sc
->vsc_msix_active
= msix_on
;
554 /* Update in-kernel ring configs */
555 for (i
= 0; i
<= VIONA_VQ_TX
; i
++) {
556 pci_viona_ring_set_msix(pi
, i
);
559 pthread_mutex_unlock(&sc
->vsc_mtx
);
563 pci_viona_msix_update(struct pci_devinst
*pi
, uint64_t offset
)
565 struct pci_viona_softc
*sc
= pi
->pi_arg
;
568 pthread_mutex_lock(&sc
->vsc_mtx
);
569 if (!sc
->vsc_msix_active
) {
570 pthread_mutex_unlock(&sc
->vsc_mtx
);
575 * Rather than update every possible MSI-X vector, cheat and use the
576 * offset to calculate the entry within the table. Since this should
577 * only be called when a write to the table succeeds, the index should
580 tab_index
= offset
/ MSIX_TABLE_ENTRY_SIZE
;
582 for (i
= 0; i
<= VIONA_VQ_TX
; i
++) {
583 if (sc
->vsc_msix_table_idx
[i
] != tab_index
) {
586 pci_viona_ring_set_msix(pi
, i
);
589 pthread_mutex_unlock(&sc
->vsc_mtx
);
593 pci_viona_qnotify(struct pci_viona_softc
*sc
, int ring
)
600 error
= ioctl(sc
->vsc_vnafd
, VNA_IOC_RING_KICK
, ring
);
602 WPRINTF(("ioctl viona ring %d kick failed %d\n",
607 DPRINTF(("viona: control qnotify!\n"));
615 pci_viona_baraddr(struct vmctx
*ctx
, struct pci_devinst
*pi
, int baridx
,
616 int enabled
, uint64_t address
)
618 struct pci_viona_softc
*sc
= pi
->pi_arg
;
626 error
= ioctl(sc
->vsc_vnafd
, VNA_IOC_SET_NOTIFY_IOP
, 0);
628 WPRINTF(("uninstall ioport hook failed %d\n", errno
));
633 * Install ioport hook for virtqueue notification.
634 * This is part of the virtio common configuration area so the
635 * address does not change with MSI-X status.
637 ioport
= address
+ VIRTIO_PCI_QUEUE_NOTIFY
;
638 error
= ioctl(sc
->vsc_vnafd
, VNA_IOC_SET_NOTIFY_IOP
, ioport
);
640 WPRINTF(("install ioport hook at %x failed %d\n",
646 pci_viona_write(struct vmctx
*ctx __unused
, struct pci_devinst
*pi
,
647 int baridx
, uint64_t offset
, int size
, uint64_t value
)
649 struct pci_viona_softc
*sc
= pi
->pi_arg
;
653 if (baridx
== pci_msix_table_bar(pi
) ||
654 baridx
== pci_msix_pba_bar(pi
)) {
655 if (pci_emul_msix_twrite(pi
, offset
, size
, value
) == 0) {
656 pci_viona_msix_update(pi
, offset
);
663 if (offset
+ size
> pci_viona_iosize(pi
)) {
664 DPRINTF(("viona_write: 2big, offset %ld size %d\n",
669 pthread_mutex_lock(&sc
->vsc_mtx
);
671 offset
= viona_adjust_offset(pi
, offset
);
674 case VIRTIO_PCI_GUEST_FEATURES
:
676 value
&= ~(sc
->vsc_feature_mask
);
677 err
= ioctl(sc
->vsc_vnafd
, VNA_IOC_SET_FEATURES
, &value
);
679 WPRINTF(("ioctl feature negotiation returned"
680 " err = %d\n", errno
));
682 sc
->vsc_features
= value
;
685 case VIRTIO_PCI_QUEUE_PFN
:
687 pci_viona_ring_init(sc
, value
);
689 case VIRTIO_PCI_QUEUE_SEL
:
691 assert(value
< VIONA_MAXQ
);
692 sc
->vsc_curq
= value
;
694 case VIRTIO_PCI_QUEUE_NOTIFY
:
696 assert(value
< VIONA_MAXQ
);
697 pci_viona_qnotify(sc
, value
);
699 case VIRTIO_PCI_STATUS
:
701 pci_viona_update_status(sc
, value
);
703 case VIRTIO_MSI_CONFIG_VECTOR
:
705 sc
->vsc_msix_table_idx
[VIONA_CTLQ
] = value
;
707 case VIRTIO_MSI_QUEUE_VECTOR
:
709 assert(sc
->vsc_curq
!= VIONA_CTLQ
);
710 sc
->vsc_msix_table_idx
[sc
->vsc_curq
] = value
;
711 pci_viona_ring_set_msix(pi
, sc
->vsc_curq
);
719 assert((size
+ offset
) <= (VIONA_R_CFG5
+ 1));
720 ptr
= &sc
->vsc_macaddr
[offset
- VIONA_R_CFG0
];
722 * The driver is allowed to change the MAC address
724 sc
->vsc_macaddr
[offset
- VIONA_R_CFG0
] = value
;
726 *(uint8_t *)ptr
= value
;
727 } else if (size
== 2) {
728 *(uint16_t *)ptr
= value
;
730 *(uint32_t *)ptr
= value
;
733 case VIRTIO_PCI_HOST_FEATURES
:
734 case VIRTIO_PCI_QUEUE_NUM
:
738 DPRINTF(("viona: write to readonly reg %ld\n\r", offset
));
741 DPRINTF(("viona: unknown i/o write offset %ld\n\r", offset
));
746 pthread_mutex_unlock(&sc
->vsc_mtx
);
750 pci_viona_read(struct vmctx
*ctx __unused
, struct pci_devinst
*pi
,
751 int baridx
, uint64_t offset
, int size
)
753 struct pci_viona_softc
*sc
= pi
->pi_arg
;
758 if (baridx
== pci_msix_table_bar(pi
) ||
759 baridx
== pci_msix_pba_bar(pi
)) {
760 return (pci_emul_msix_tread(pi
, offset
, size
));
765 if (offset
+ size
> pci_viona_iosize(pi
)) {
766 DPRINTF(("viona_read: 2big, offset %ld size %d\n",
771 pthread_mutex_lock(&sc
->vsc_mtx
);
773 offset
= viona_adjust_offset(pi
, offset
);
776 case VIRTIO_PCI_HOST_FEATURES
:
778 err
= ioctl(sc
->vsc_vnafd
, VNA_IOC_GET_FEATURES
, &value
);
780 WPRINTF(("ioctl get host features returned"
781 " err = %d\n", errno
));
783 value
&= ~sc
->vsc_feature_mask
;
785 case VIRTIO_PCI_GUEST_FEATURES
:
787 value
= sc
->vsc_features
; /* XXX never read ? */
789 case VIRTIO_PCI_QUEUE_PFN
:
791 value
= sc
->vsc_pfn
[sc
->vsc_curq
] >> VRING_PFN
;
793 case VIRTIO_PCI_QUEUE_NUM
:
795 value
= pci_viona_qsize(sc
, sc
->vsc_curq
);
797 case VIRTIO_PCI_QUEUE_SEL
:
799 value
= sc
->vsc_curq
; /* XXX never read ? */
801 case VIRTIO_PCI_QUEUE_NOTIFY
:
803 value
= sc
->vsc_curq
; /* XXX never read ? */
805 case VIRTIO_PCI_STATUS
:
807 value
= sc
->vsc_status
;
812 sc
->vsc_isr
= 0; /* a read clears this flag */
814 pci_lintr_deassert(pi
);
817 case VIRTIO_MSI_CONFIG_VECTOR
:
819 value
= sc
->vsc_msix_table_idx
[VIONA_CTLQ
];
821 case VIRTIO_MSI_QUEUE_VECTOR
:
823 assert(sc
->vsc_curq
!= VIONA_CTLQ
);
824 value
= sc
->vsc_msix_table_idx
[sc
->vsc_curq
];
832 assert((size
+ offset
) <= (VIONA_R_CFG5
+ 1));
833 ptr
= &sc
->vsc_macaddr
[offset
- VIONA_R_CFG0
];
835 value
= *(uint8_t *)ptr
;
836 } else if (size
== 2) {
837 value
= *(uint16_t *)ptr
;
839 value
= *(uint32_t *)ptr
;
844 value
= 0x01; /* XXX link always up */
848 value
= 0; /* XXX link status in LSB */
851 DPRINTF(("viona: unknown i/o read offset %ld\n\r", offset
));
856 pthread_mutex_unlock(&sc
->vsc_mtx
);
861 struct pci_devemu pci_de_viona
= {
862 .pe_emu
= "virtio-net-viona",
863 .pe_init
= pci_viona_init
,
864 .pe_legacy_config
= pci_viona_legacy_config
,
865 .pe_barwrite
= pci_viona_write
,
866 .pe_barread
= pci_viona_read
,
867 .pe_baraddr
= pci_viona_baraddr
,
868 .pe_lintrupdate
= pci_viona_lintrupdate
870 PCI_EMUL_SET(pci_de_viona
);