Merge commit 'b1e7e97d3b60469b243b3b2e22c7d8cbd11c7c90'
[unleashed.git] / kernel / drivers / net / vioif / vioif.c
blobd5dd1e8e3977d93728a6cb5e0f0578b75690d092
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2013 Nexenta Inc. All rights reserved.
14 * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
15 * Copyright 2015 Joyent, Inc.
18 /* Based on the NetBSD virtio driver by Minoura Makoto. */
20 * Copyright (c) 2010 Minoura Makoto.
21 * All rights reserved.
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the above copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
32 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
33 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
35 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
36 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
41 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 #include <sys/types.h>
45 #include <sys/errno.h>
46 #include <sys/param.h>
47 #include <sys/stropts.h>
48 #include <sys/stream.h>
49 #include <sys/strsubr.h>
50 #include <sys/kmem.h>
51 #include <sys/conf.h>
52 #include <sys/devops.h>
53 #include <sys/ksynch.h>
54 #include <sys/stat.h>
55 #include <sys/modctl.h>
56 #include <sys/debug.h>
57 #include <sys/pci.h>
58 #include <sys/ethernet.h>
59 #include <sys/vlan.h>
61 #include <sys/dlpi.h>
62 #include <sys/taskq.h>
63 #include <sys/cyclic.h>
65 #include <sys/pattr.h>
66 #include <sys/strsun.h>
68 #include <sys/random.h>
69 #include <sys/containerof.h>
70 #include <sys/stream.h>
72 #include <sys/mac.h>
73 #include <sys/mac_provider.h>
74 #include <sys/mac_ether.h>
76 #include "virtiovar.h"
77 #include "virtioreg.h"
79 /* Configuration registers */
80 #define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */
81 #define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */
83 /* Feature bits */
84 #define VIRTIO_NET_F_CSUM (1 << 0) /* Host handles pkts w/ partial csum */
85 #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* Guest handles pkts w/ part csum */
86 #define VIRTIO_NET_F_MAC (1 << 5) /* Host has given MAC address. */
87 #define VIRTIO_NET_F_GSO (1 << 6) /* Host handles pkts w/ any GSO type */
88 #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* Guest can handle TSOv4 in. */
89 #define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* Guest can handle TSOv6 in. */
90 #define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* Guest can handle TSO[6] w/ ECN in */
91 #define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* Guest can handle UFO in. */
92 #define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* Host can handle TSOv4 in. */
93 #define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* Host can handle TSOv6 in. */
94 #define VIRTIO_NET_F_HOST_ECN (1 << 13) /* Host can handle TSO[6] w/ ECN in */
95 #define VIRTIO_NET_F_HOST_UFO (1 << 14) /* Host can handle UFO in. */
96 #define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* Host can merge receive buffers. */
97 #define VIRTIO_NET_F_STATUS (1 << 16) /* Config.status available */
98 #define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* Control channel available */
99 #define VIRTIO_NET_F_CTRL_RX (1 << 18) /* Control channel RX mode support */
100 #define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* Control channel VLAN filtering */
101 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
103 #define VIRTIO_NET_FEATURE_BITS \
104 "\020" \
105 "\1CSUM" \
106 "\2GUEST_CSUM" \
107 "\6MAC" \
108 "\7GSO" \
109 "\10GUEST_TSO4" \
110 "\11GUEST_TSO6" \
111 "\12GUEST_ECN" \
112 "\13GUEST_UFO" \
113 "\14HOST_TSO4" \
114 "\15HOST_TSO6" \
115 "\16HOST_ECN" \
116 "\17HOST_UFO" \
117 "\20MRG_RXBUF" \
118 "\21STATUS" \
119 "\22CTRL_VQ" \
120 "\23CTRL_RX" \
121 "\24CTRL_VLAN" \
122 "\25CTRL_RX_EXTRA"
124 /* Status */
125 #define VIRTIO_NET_S_LINK_UP 1
127 #pragma pack(1)
128 /* Packet header structure */
129 struct virtio_net_hdr {
130 uint8_t flags;
131 uint8_t gso_type;
132 uint16_t hdr_len;
133 uint16_t gso_size;
134 uint16_t csum_start;
135 uint16_t csum_offset;
137 #pragma pack()
139 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */
140 #define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */
141 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */
142 #define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */
143 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */
144 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */
147 /* Control virtqueue */
148 #pragma pack(1)
149 struct virtio_net_ctrl_cmd {
150 uint8_t class;
151 uint8_t command;
153 #pragma pack()
155 #define VIRTIO_NET_CTRL_RX 0
156 #define VIRTIO_NET_CTRL_RX_PROMISC 0
157 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1
159 #define VIRTIO_NET_CTRL_MAC 1
160 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
162 #define VIRTIO_NET_CTRL_VLAN 2
163 #define VIRTIO_NET_CTRL_VLAN_ADD 0
164 #define VIRTIO_NET_CTRL_VLAN_DEL 1
166 #pragma pack(1)
167 struct virtio_net_ctrl_status {
168 uint8_t ack;
171 struct virtio_net_ctrl_rx {
172 uint8_t onoff;
175 struct virtio_net_ctrl_mac_tbl {
176 uint32_t nentries;
177 uint8_t macs[][ETHERADDRL];
180 struct virtio_net_ctrl_vlan {
181 uint16_t id;
183 #pragma pack()
185 static int vioif_quiesce(dev_info_t *);
186 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
187 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
189 DDI_DEFINE_STREAM_OPS(vioif_ops,
190 nulldev, /* identify */
191 nulldev, /* probe */
192 vioif_attach, /* attach */
193 vioif_detach, /* detach */
194 nodev, /* reset */
195 NULL, /* cb_ops */
196 D_MP, /* bus_ops */
197 NULL, /* power */
198 vioif_quiesce /* quiesce */);
200 static char vioif_ident[] = "VirtIO ethernet driver";
202 /* Standard Module linkage initialization for a Streams driver */
203 extern struct mod_ops mod_driverops;
205 static struct modldrv modldrv = {
206 &mod_driverops, /* Type of module. This one is a driver */
207 vioif_ident, /* short description */
208 &vioif_ops /* driver specific ops */
211 static struct modlinkage modlinkage = {
212 MODREV_1,
214 (void *)&modldrv,
215 NULL,
219 ddi_device_acc_attr_t vioif_attr = {
220 DDI_DEVICE_ATTR_V0,
221 DDI_NEVERSWAP_ACC, /* virtio is always native byte order */
222 DDI_STORECACHING_OK_ACC,
223 DDI_DEFAULT_ACC
227 * A mapping represents a binding for a single buffer that is contiguous in the
228 * virtual address space.
230 struct vioif_buf_mapping {
231 caddr_t vbm_buf;
232 ddi_dma_handle_t vbm_dmah;
233 ddi_acc_handle_t vbm_acch;
234 ddi_dma_cookie_t vbm_dmac;
235 unsigned int vbm_ncookies;
239 * Rx buffers can be loaned upstream, so the code has
240 * to allocate them dynamically.
242 struct vioif_rx_buf {
243 struct vioif_softc *rb_sc;
244 frtn_t rb_frtn;
246 struct vioif_buf_mapping rb_mapping;
250 * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
251 * used to hold the virtio_net_header. Small packets also get copied there, as
252 * it's faster then mapping them. Bigger packets get mapped using the "external"
253 * mapping array. An array is used, because a packet may consist of muptiple
254 * fragments, so each fragment gets bound to an entry. According to my
255 * observations, the number of fragments does not exceed 2, but just in case,
256 * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
257 * the dma handles are allocated lazily in the tx path.
259 struct vioif_tx_buf {
260 mblk_t *tb_mp;
262 /* inline buffer */
263 struct vioif_buf_mapping tb_inline_mapping;
265 /* External buffers */
266 struct vioif_buf_mapping *tb_external_mapping;
267 unsigned int tb_external_num;
270 struct vioif_softc {
271 dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */
272 struct virtio_softc sc_virtio;
274 mac_handle_t sc_mac_handle;
275 mac_register_t *sc_macp;
277 struct virtqueue *sc_rx_vq;
278 struct virtqueue *sc_tx_vq;
279 struct virtqueue *sc_ctrl_vq;
281 unsigned int sc_tx_stopped:1;
283 /* Feature bits. */
284 unsigned int sc_rx_csum:1;
285 unsigned int sc_tx_csum:1;
286 unsigned int sc_tx_tso4:1;
289 * For debugging, it is useful to know whether the MAC address we
290 * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
291 * was otherwise generated or set from within the guest.
293 unsigned int sc_mac_from_host:1;
295 int sc_mtu;
296 uint8_t sc_mac[ETHERADDRL];
298 * For rx buffers, we keep a pointer array, because the buffers
299 * can be loaned upstream, and we have to repopulate the array with
300 * new members.
302 struct vioif_rx_buf **sc_rxbufs;
305 * For tx, we just allocate an array of buffers. The packet can
306 * either be copied into the inline buffer, or the external mapping
307 * could be used to map the packet
309 struct vioif_tx_buf *sc_txbufs;
311 kstat_t *sc_intrstat;
313 * We "loan" rx buffers upstream and reuse them after they are
314 * freed. This lets us avoid allocations in the hot path.
316 kmem_cache_t *sc_rxbuf_cache;
317 ulong_t sc_rxloan;
319 /* Copying small packets turns out to be faster then mapping them. */
320 unsigned long sc_rxcopy_thresh;
321 unsigned long sc_txcopy_thresh;
324 * Statistics visible through mac:
326 uint64_t sc_ipackets;
327 uint64_t sc_opackets;
328 uint64_t sc_rbytes;
329 uint64_t sc_obytes;
330 uint64_t sc_brdcstxmt;
331 uint64_t sc_brdcstrcv;
332 uint64_t sc_multixmt;
333 uint64_t sc_multircv;
334 uint64_t sc_norecvbuf;
335 uint64_t sc_notxbuf;
336 uint64_t sc_ierrors;
337 uint64_t sc_oerrors;
340 * Internal debugging statistics:
342 uint64_t sc_rxfail_dma_handle;
343 uint64_t sc_rxfail_dma_buffer;
344 uint64_t sc_rxfail_dma_bind;
345 uint64_t sc_rxfail_chain_undersize;
346 uint64_t sc_rxfail_no_descriptors;
347 uint64_t sc_txfail_dma_handle;
348 uint64_t sc_txfail_dma_bind;
349 uint64_t sc_txfail_indirect_limit;
352 #define ETHER_HEADER_LEN sizeof (struct ether_header)
354 /* MTU + the ethernet header. */
355 #define MAX_PAYLOAD 65535
356 #define MAX_MTU (MAX_PAYLOAD - ETHER_HEADER_LEN)
357 #define DEFAULT_MTU ETHERMTU
360 * Yeah, we spend 8M per device. Turns out, there is no point
361 * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
362 * because vhost does not support them, and we expect to be used with
363 * vhost in production environment.
365 /* The buffer keeps both the packet data and the virtio_net_header. */
366 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
369 * We win a bit on header alignment, but the host wins a lot
370 * more on moving aligned buffers. Might need more thought.
372 #define VIOIF_IP_ALIGN 0
374 /* Maximum number of indirect descriptors, somewhat arbitrary. */
375 #define VIOIF_INDIRECT_MAX 128
378 * We pre-allocate a reasonably large buffer to copy small packets
379 * there. Bigger packets are mapped, packets with multiple
380 * cookies are mapped as indirect buffers.
382 #define VIOIF_TX_INLINE_SIZE 2048
384 /* Native queue size for all queues */
385 #define VIOIF_RX_QLEN 0
386 #define VIOIF_TX_QLEN 0
387 #define VIOIF_CTRL_QLEN 0
389 static uchar_t vioif_broadcast[ETHERADDRL] = {
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
393 #define VIOIF_TX_THRESH_MAX 640
394 #define VIOIF_RX_THRESH_MAX 640
396 #define CACHE_NAME_SIZE 32
398 static char vioif_txcopy_thresh[] =
399 "vioif_txcopy_thresh";
400 static char vioif_rxcopy_thresh[] =
401 "vioif_rxcopy_thresh";
403 static char *vioif_priv_props[] = {
404 vioif_txcopy_thresh,
405 vioif_rxcopy_thresh,
406 NULL
409 /* Add up to ddi? */
410 static ddi_dma_cookie_t *
411 vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
413 ddi_dma_impl_t *dmah_impl = (void *) dmah;
414 ASSERT(dmah_impl->dmai_cookie);
415 return (dmah_impl->dmai_cookie);
418 static void
419 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
421 ddi_dma_impl_t *dmah_impl = (void *) dmah;
422 dmah_impl->dmai_cookie = dmac;
425 static link_state_t
426 vioif_link_state(struct vioif_softc *sc)
428 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
429 if (virtio_read_device_config_2(&sc->sc_virtio,
430 VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
431 return (LINK_STATE_UP);
432 } else {
433 return (LINK_STATE_DOWN);
437 return (LINK_STATE_UP);
440 static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
441 DMA_ATTR_V0, /* Version number */
442 0, /* low address */
443 0xFFFFFFFFFFFFFFFF, /* high address */
444 0xFFFFFFFF, /* counter register max */
445 1, /* page alignment */
446 1, /* burst sizes: 1 - 32 */
447 1, /* minimum transfer size */
448 0xFFFFFFFF, /* max transfer size */
449 0xFFFFFFFFFFFFFFF, /* address register max */
450 1, /* scatter-gather capacity */
451 1, /* device operates on bytes */
452 0, /* attr flag: set to 0 */
455 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
456 DMA_ATTR_V0, /* Version number */
457 0, /* low address */
458 0xFFFFFFFFFFFFFFFF, /* high address */
459 0xFFFFFFFF, /* counter register max */
460 1, /* page alignment */
461 1, /* burst sizes: 1 - 32 */
462 1, /* minimum transfer size */
463 0xFFFFFFFF, /* max transfer size */
464 0xFFFFFFFFFFFFFFF, /* address register max */
466 /* One entry is used for the virtio_net_hdr on the tx path */
467 VIOIF_INDIRECT_MAX - 1, /* scatter-gather capacity */
468 1, /* device operates on bytes */
469 0, /* attr flag: set to 0 */
472 static ddi_device_acc_attr_t vioif_bufattr = {
473 DDI_DEVICE_ATTR_V0,
474 DDI_NEVERSWAP_ACC,
475 DDI_STORECACHING_OK_ACC,
476 DDI_DEFAULT_ACC
479 static void
480 vioif_rx_free(caddr_t free_arg)
482 struct vioif_rx_buf *buf = (void *) free_arg;
483 struct vioif_softc *sc = buf->rb_sc;
485 kmem_cache_free(sc->sc_rxbuf_cache, buf);
486 atomic_dec_ulong(&sc->sc_rxloan);
489 static int
490 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
492 _NOTE(ARGUNUSED(kmflags));
493 struct vioif_softc *sc = user_arg;
494 struct vioif_rx_buf *buf = buffer;
495 size_t len;
497 if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
498 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
499 sc->sc_rxfail_dma_handle++;
500 goto exit_handle;
503 if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
504 VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
505 &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
506 NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
507 sc->sc_rxfail_dma_buffer++;
508 goto exit_alloc;
510 ASSERT(len >= VIOIF_RX_SIZE);
512 if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
513 buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
514 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
515 &buf->rb_mapping.vbm_ncookies)) {
516 sc->sc_rxfail_dma_bind++;
517 goto exit_bind;
520 ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
522 buf->rb_sc = sc;
523 buf->rb_frtn.free_arg = (void *) buf;
524 buf->rb_frtn.free_func = vioif_rx_free;
526 return (0);
527 exit_bind:
528 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
529 exit_alloc:
530 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
531 exit_handle:
533 return (ENOMEM);
536 static void
537 vioif_rx_destruct(void *buffer, void *user_arg)
539 _NOTE(ARGUNUSED(user_arg));
540 struct vioif_rx_buf *buf = buffer;
542 ASSERT(buf->rb_mapping.vbm_acch);
543 ASSERT(buf->rb_mapping.vbm_acch);
545 (void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
546 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
547 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
550 static void
551 vioif_free_mems(struct vioif_softc *sc)
553 int i;
555 for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
556 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
557 int j;
559 /* Tear down the internal mapping. */
561 ASSERT(buf->tb_inline_mapping.vbm_acch);
562 ASSERT(buf->tb_inline_mapping.vbm_dmah);
564 (void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
565 ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
566 ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
568 /* We should not see any in-flight buffers at this point. */
569 ASSERT(!buf->tb_mp);
571 /* Free all the dma hdnales we allocated lazily. */
572 for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
573 ddi_dma_free_handle(
574 &buf->tb_external_mapping[j].vbm_dmah);
575 /* Free the external mapping array. */
576 kmem_free(buf->tb_external_mapping,
577 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
580 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
581 sc->sc_tx_vq->vq_num);
583 for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
584 struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
586 if (buf)
587 kmem_cache_free(sc->sc_rxbuf_cache, buf);
589 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
590 sc->sc_rx_vq->vq_num);
593 static int
594 vioif_alloc_mems(struct vioif_softc *sc)
596 int i, txqsize, rxqsize;
597 size_t len;
598 unsigned int nsegments;
600 txqsize = sc->sc_tx_vq->vq_num;
601 rxqsize = sc->sc_rx_vq->vq_num;
603 sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
604 KM_SLEEP);
605 if (sc->sc_txbufs == NULL) {
606 dev_err(sc->sc_dev, CE_WARN,
607 "Failed to allocate the tx buffers array");
608 goto exit_txalloc;
612 * We don't allocate the rx vioif_bufs, just the pointers, as
613 * rx vioif_bufs can be loaned upstream, and we don't know the
614 * total number we need.
616 sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
617 KM_SLEEP);
618 if (sc->sc_rxbufs == NULL) {
619 dev_err(sc->sc_dev, CE_WARN,
620 "Failed to allocate the rx buffers pointer array");
621 goto exit_rxalloc;
624 for (i = 0; i < txqsize; i++) {
625 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
627 /* Allocate and bind an inline mapping. */
629 if (ddi_dma_alloc_handle(sc->sc_dev,
630 &vioif_inline_buf_dma_attr,
631 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
633 dev_err(sc->sc_dev, CE_WARN,
634 "Can't allocate dma handle for tx buffer %d", i);
635 goto exit_tx;
638 if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
639 VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
640 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
641 &len, &buf->tb_inline_mapping.vbm_acch)) {
643 dev_err(sc->sc_dev, CE_WARN,
644 "Can't allocate tx buffer %d", i);
645 goto exit_tx;
647 ASSERT(len >= VIOIF_TX_INLINE_SIZE);
649 if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
650 NULL, buf->tb_inline_mapping.vbm_buf, len,
651 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
652 &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
654 dev_err(sc->sc_dev, CE_WARN,
655 "Can't bind tx buffer %d", i);
656 goto exit_tx;
659 /* We asked for a single segment */
660 ASSERT(nsegments == 1);
663 * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
664 * In reality, I don't expect more then 2-3 used, but who
665 * knows.
667 buf->tb_external_mapping = kmem_zalloc(
668 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
669 KM_SLEEP);
672 * The external mapping's dma handles are allocate lazily,
673 * as we don't expect most of them to be used..
677 return (0);
679 exit_tx:
680 for (i = 0; i < txqsize; i++) {
681 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
683 if (buf->tb_inline_mapping.vbm_dmah)
684 (void) ddi_dma_unbind_handle(
685 buf->tb_inline_mapping.vbm_dmah);
687 if (buf->tb_inline_mapping.vbm_acch)
688 ddi_dma_mem_free(
689 &buf->tb_inline_mapping.vbm_acch);
691 if (buf->tb_inline_mapping.vbm_dmah)
692 ddi_dma_free_handle(
693 &buf->tb_inline_mapping.vbm_dmah);
695 if (buf->tb_external_mapping)
696 kmem_free(buf->tb_external_mapping,
697 sizeof (struct vioif_tx_buf) *
698 VIOIF_INDIRECT_MAX - 1);
701 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
703 exit_rxalloc:
704 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
705 exit_txalloc:
706 return (ENOMEM);
709 /* ARGSUSED */
711 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
713 return (DDI_SUCCESS);
716 /* ARGSUSED */
718 vioif_promisc(void *arg, boolean_t on)
720 return (DDI_SUCCESS);
723 /* ARGSUSED */
725 vioif_unicst(void *arg, const uint8_t *macaddr)
727 return (DDI_FAILURE);
731 static uint_t
732 vioif_add_rx(struct vioif_softc *sc, int kmflag)
734 uint_t num_added = 0;
735 struct vq_entry *ve;
737 while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
738 struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
740 if (buf == NULL) {
741 /* First run, allocate the buffer. */
742 buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
743 sc->sc_rxbufs[ve->qe_index] = buf;
746 /* Still nothing? Bye. */
747 if (buf == NULL) {
748 sc->sc_norecvbuf++;
749 vq_free_entry(sc->sc_rx_vq, ve);
750 break;
753 ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
756 * For an unknown reason, the virtio_net_hdr must be placed
757 * as a separate virtio queue entry.
759 virtio_ve_add_indirect_buf(ve,
760 buf->rb_mapping.vbm_dmac.dmac_laddress,
761 sizeof (struct virtio_net_hdr), B_FALSE);
763 /* Add the rest of the first cookie. */
764 virtio_ve_add_indirect_buf(ve,
765 buf->rb_mapping.vbm_dmac.dmac_laddress +
766 sizeof (struct virtio_net_hdr),
767 buf->rb_mapping.vbm_dmac.dmac_size -
768 sizeof (struct virtio_net_hdr), B_FALSE);
771 * If the buffer consists of a single cookie (unlikely for a
772 * 64-k buffer), we are done. Otherwise, add the rest of the
773 * cookies using indirect entries.
775 if (buf->rb_mapping.vbm_ncookies > 1) {
776 ddi_dma_cookie_t *first_extra_dmac;
777 ddi_dma_cookie_t dmac;
778 first_extra_dmac =
779 vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
781 ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
782 virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
783 dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
784 vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
785 first_extra_dmac);
788 virtio_push_chain(ve, B_FALSE);
789 num_added++;
792 return (num_added);
795 static uint_t
796 vioif_populate_rx(struct vioif_softc *sc, int kmflag)
798 uint_t num_added = vioif_add_rx(sc, kmflag);
800 if (num_added > 0)
801 virtio_sync_vq(sc->sc_rx_vq);
803 return (num_added);
806 static uint_t
807 vioif_process_rx(struct vioif_softc *sc)
809 struct vq_entry *ve;
810 struct vioif_rx_buf *buf;
811 mblk_t *mphead = NULL, *lastmp = NULL, *mp;
812 uint32_t len;
813 uint_t num_processed = 0;
815 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
817 buf = sc->sc_rxbufs[ve->qe_index];
818 ASSERT(buf);
820 if (len < sizeof (struct virtio_net_hdr)) {
821 sc->sc_rxfail_chain_undersize++;
822 sc->sc_ierrors++;
823 virtio_free_chain(ve);
824 continue;
827 len -= sizeof (struct virtio_net_hdr);
829 * We copy small packets that happen to fit into a single
830 * cookie and reuse the buffers. For bigger ones, we loan
831 * the buffers upstream.
833 if (len < sc->sc_rxcopy_thresh) {
834 mp = allocb(len, 0);
835 if (mp == NULL) {
836 sc->sc_norecvbuf++;
837 sc->sc_ierrors++;
839 virtio_free_chain(ve);
840 break;
843 bcopy((char *)buf->rb_mapping.vbm_buf +
844 sizeof (struct virtio_net_hdr), mp->b_rptr, len);
845 mp->b_wptr = mp->b_rptr + len;
847 } else {
848 mp = desballoc((unsigned char *)
849 buf->rb_mapping.vbm_buf +
850 sizeof (struct virtio_net_hdr) +
851 VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
852 if (mp == NULL) {
853 sc->sc_norecvbuf++;
854 sc->sc_ierrors++;
856 virtio_free_chain(ve);
857 break;
859 mp->b_wptr = mp->b_rptr + len;
861 atomic_inc_ulong(&sc->sc_rxloan);
863 * Buffer loaned, we will have to allocate a new one
864 * for this slot.
866 sc->sc_rxbufs[ve->qe_index] = NULL;
870 * virtio-net does not tell us if this packet is multicast
871 * or broadcast, so we have to check it.
873 if (mp->b_rptr[0] & 0x1) {
874 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
875 sc->sc_multircv++;
876 else
877 sc->sc_brdcstrcv++;
880 sc->sc_rbytes += len;
881 sc->sc_ipackets++;
883 virtio_free_chain(ve);
885 if (lastmp == NULL) {
886 mphead = mp;
887 } else {
888 lastmp->b_next = mp;
890 lastmp = mp;
891 num_processed++;
894 if (mphead != NULL) {
895 mac_rx(sc->sc_mac_handle, NULL, mphead);
898 return (num_processed);
901 static uint_t
902 vioif_reclaim_used_tx(struct vioif_softc *sc)
904 struct vq_entry *ve;
905 struct vioif_tx_buf *buf;
906 uint32_t len;
907 mblk_t *mp;
908 uint_t num_reclaimed = 0;
910 while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
911 /* We don't chain descriptors for tx, so don't expect any. */
912 ASSERT(!ve->qe_next);
914 buf = &sc->sc_txbufs[ve->qe_index];
915 mp = buf->tb_mp;
916 buf->tb_mp = NULL;
918 if (mp != NULL) {
919 for (int i = 0; i < buf->tb_external_num; i++)
920 (void) ddi_dma_unbind_handle(
921 buf->tb_external_mapping[i].vbm_dmah);
924 virtio_free_chain(ve);
926 /* External mapping used, mp was not freed in vioif_send() */
927 if (mp != NULL)
928 freemsg(mp);
929 num_reclaimed++;
932 if (sc->sc_tx_stopped && num_reclaimed > 0) {
933 sc->sc_tx_stopped = 0;
934 mac_tx_update(sc->sc_mac_handle);
937 return (num_reclaimed);
940 /* sc will be used to update stat counters. */
941 /* ARGSUSED */
942 static inline void
943 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
944 size_t msg_size)
946 struct vioif_tx_buf *buf;
947 buf = &sc->sc_txbufs[ve->qe_index];
949 ASSERT(buf);
951 /* Frees mp */
952 mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
953 sizeof (struct virtio_net_hdr));
955 virtio_ve_add_indirect_buf(ve,
956 buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
957 sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
960 static inline int
961 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
962 int i)
964 int ret = DDI_SUCCESS;
966 if (!buf->tb_external_mapping[i].vbm_dmah) {
967 ret = ddi_dma_alloc_handle(sc->sc_dev,
968 &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
969 &buf->tb_external_mapping[i].vbm_dmah);
970 if (ret != DDI_SUCCESS) {
971 sc->sc_txfail_dma_handle++;
975 return (ret);
978 static inline int
979 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
980 size_t msg_size)
982 _NOTE(ARGUNUSED(msg_size));
984 struct vioif_tx_buf *buf;
985 mblk_t *nmp;
986 int i, j;
987 int ret = DDI_SUCCESS;
989 buf = &sc->sc_txbufs[ve->qe_index];
991 ASSERT(buf);
993 buf->tb_external_num = 0;
994 i = 0;
995 nmp = mp;
997 while (nmp) {
998 size_t len;
999 ddi_dma_cookie_t dmac;
1000 unsigned int ncookies;
1002 len = MBLKL(nmp);
1004 * For some reason, the network stack can
1005 * actually send us zero-length fragments.
1007 if (len == 0) {
1008 nmp = nmp->b_cont;
1009 continue;
1012 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
1013 if (ret != DDI_SUCCESS) {
1014 sc->sc_notxbuf++;
1015 sc->sc_oerrors++;
1016 goto exit_lazy_alloc;
1018 ret = ddi_dma_addr_bind_handle(
1019 buf->tb_external_mapping[i].vbm_dmah, NULL,
1020 (caddr_t)nmp->b_rptr, len,
1021 DDI_DMA_WRITE | DDI_DMA_STREAMING,
1022 DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1024 if (ret != DDI_SUCCESS) {
1025 sc->sc_txfail_dma_bind++;
1026 sc->sc_oerrors++;
1027 goto exit_bind;
1030 /* Check if we still fit into the indirect table. */
1031 if (virtio_ve_indirect_available(ve) < ncookies) {
1032 sc->sc_txfail_indirect_limit++;
1033 sc->sc_notxbuf++;
1034 sc->sc_oerrors++;
1036 ret = DDI_FAILURE;
1037 goto exit_limit;
1040 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1041 dmac, ncookies, B_TRUE);
1043 nmp = nmp->b_cont;
1044 i++;
1047 buf->tb_external_num = i;
1048 /* Save the mp to free it when the packet is sent. */
1049 buf->tb_mp = mp;
1051 return (DDI_SUCCESS);
1053 exit_limit:
1054 exit_bind:
1055 exit_lazy_alloc:
1057 for (j = 0; j < i; j++) {
1058 (void) ddi_dma_unbind_handle(
1059 buf->tb_external_mapping[j].vbm_dmah);
1062 return (ret);
1065 static boolean_t
1066 vioif_send(struct vioif_softc *sc, mblk_t *mp)
1068 struct vq_entry *ve;
1069 struct vioif_tx_buf *buf;
1070 struct virtio_net_hdr *net_header = NULL;
1071 size_t msg_size = 0;
1072 uint32_t csum_start;
1073 uint32_t csum_stuff;
1074 uint32_t csum_flags;
1075 uint32_t lso_flags;
1076 uint32_t lso_mss;
1077 mblk_t *nmp;
1078 int ret;
1079 boolean_t lso_required = B_FALSE;
1081 for (nmp = mp; nmp; nmp = nmp->b_cont)
1082 msg_size += MBLKL(nmp);
1084 if (sc->sc_tx_tso4) {
1085 mac_lso_get(mp, &lso_mss, &lso_flags);
1086 lso_required = (lso_flags & HW_LSO);
1089 ve = vq_alloc_entry(sc->sc_tx_vq);
1091 if (ve == NULL) {
1092 sc->sc_notxbuf++;
1093 /* Out of free descriptors - try later. */
1094 return (B_FALSE);
1096 buf = &sc->sc_txbufs[ve->qe_index];
1098 /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1099 (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1100 sizeof (struct virtio_net_hdr));
1102 net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1104 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1105 NULL, &csum_flags);
1107 /* They want us to do the TCP/UDP csum calculation. */
1108 if (csum_flags & HCK_PARTIALCKSUM) {
1109 struct ether_header *eth_header;
1110 int eth_hsize;
1112 /* Did we ask for it? */
1113 ASSERT(sc->sc_tx_csum);
1115 /* We only asked for partial csum packets. */
1116 ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1117 ASSERT(!(csum_flags & HCK_FULLCKSUM));
1119 eth_header = (void *) mp->b_rptr;
1120 if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
1121 eth_hsize = sizeof (struct ether_vlan_header);
1122 } else {
1123 eth_hsize = sizeof (struct ether_header);
1125 net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1126 net_header->csum_start = eth_hsize + csum_start;
1127 net_header->csum_offset = csum_stuff - csum_start;
1130 /* setup LSO fields if required */
1131 if (lso_required) {
1132 net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1133 net_header->gso_size = (uint16_t)lso_mss;
1136 virtio_ve_add_indirect_buf(ve,
1137 buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
1138 sizeof (struct virtio_net_hdr), B_TRUE);
1140 /* meanwhile update the statistic */
1141 if (mp->b_rptr[0] & 0x1) {
1142 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
1143 sc->sc_multixmt++;
1144 else
1145 sc->sc_brdcstxmt++;
1149 * We copy small packets into the inline buffer. The bigger ones
1150 * get mapped using the mapped buffer.
1152 if (msg_size < sc->sc_txcopy_thresh) {
1153 vioif_tx_inline(sc, ve, mp, msg_size);
1154 } else {
1155 /* statistic gets updated by vioif_tx_external when fail */
1156 ret = vioif_tx_external(sc, ve, mp, msg_size);
1157 if (ret != DDI_SUCCESS)
1158 goto exit_tx_external;
1161 virtio_push_chain(ve, B_TRUE);
1163 sc->sc_opackets++;
1164 sc->sc_obytes += msg_size;
1166 return (B_TRUE);
1168 exit_tx_external:
1170 vq_free_entry(sc->sc_tx_vq, ve);
1172 * vioif_tx_external can fail when the buffer does not fit into the
1173 * indirect descriptor table. Free the mp. I don't expect this ever
1174 * to happen.
1176 freemsg(mp);
1178 return (B_TRUE);
1181 mblk_t *
1182 vioif_tx(void *arg, mblk_t *mp)
1184 struct vioif_softc *sc = arg;
1185 mblk_t *nmp;
1187 while (mp != NULL) {
1188 nmp = mp->b_next;
1189 mp->b_next = NULL;
1191 if (!vioif_send(sc, mp)) {
1192 sc->sc_tx_stopped = 1;
1193 mp->b_next = nmp;
1194 break;
1196 mp = nmp;
1199 return (mp);
1203 vioif_start(void *arg)
1205 struct vioif_softc *sc = arg;
1206 struct vq_entry *ve;
1207 uint32_t len;
1209 mac_link_update(sc->sc_mac_handle, vioif_link_state(sc));
1211 virtio_start_vq_intr(sc->sc_rx_vq);
1214 * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1215 * so the device will send a transmit interrupt when the queue is empty
1216 * and we can reclaim it in one sweep.
1220 * Clear any data that arrived early on the receive queue and populate
1221 * it with free buffers that the device can use moving forward.
1223 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1224 virtio_free_chain(ve);
1226 (void) vioif_populate_rx(sc, KM_SLEEP);
1228 return (DDI_SUCCESS);
1231 void
1232 vioif_stop(void *arg)
1234 struct vioif_softc *sc = arg;
1236 virtio_stop_vq_intr(sc->sc_rx_vq);
1239 /* ARGSUSED */
1240 static int
1241 vioif_stat(void *arg, uint_t stat, uint64_t *val)
1243 struct vioif_softc *sc = arg;
1245 switch (stat) {
1246 case MAC_STAT_IERRORS:
1247 *val = sc->sc_ierrors;
1248 break;
1249 case MAC_STAT_OERRORS:
1250 *val = sc->sc_oerrors;
1251 break;
1252 case MAC_STAT_MULTIRCV:
1253 *val = sc->sc_multircv;
1254 break;
1255 case MAC_STAT_BRDCSTRCV:
1256 *val = sc->sc_brdcstrcv;
1257 break;
1258 case MAC_STAT_MULTIXMT:
1259 *val = sc->sc_multixmt;
1260 break;
1261 case MAC_STAT_BRDCSTXMT:
1262 *val = sc->sc_brdcstxmt;
1263 break;
1264 case MAC_STAT_IPACKETS:
1265 *val = sc->sc_ipackets;
1266 break;
1267 case MAC_STAT_RBYTES:
1268 *val = sc->sc_rbytes;
1269 break;
1270 case MAC_STAT_OPACKETS:
1271 *val = sc->sc_opackets;
1272 break;
1273 case MAC_STAT_OBYTES:
1274 *val = sc->sc_obytes;
1275 break;
1276 case MAC_STAT_NORCVBUF:
1277 *val = sc->sc_norecvbuf;
1278 break;
1279 case MAC_STAT_NOXMTBUF:
1280 *val = sc->sc_notxbuf;
1281 break;
1282 case MAC_STAT_IFSPEED:
1283 /* always 1 Gbit */
1284 *val = 1000000000ULL;
1285 break;
1286 case ETHER_STAT_LINK_DUPLEX:
1287 /* virtual device, always full-duplex */
1288 *val = LINK_DUPLEX_FULL;
1289 break;
1291 default:
1292 return (ENOTSUP);
1295 return (DDI_SUCCESS);
1298 static int
1299 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
1300 uint_t pr_valsize, const void *pr_val)
1302 _NOTE(ARGUNUSED(pr_valsize));
1304 long result;
1306 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1308 if (pr_val == NULL)
1309 return (EINVAL);
1311 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1313 if (result < 0 || result > VIOIF_TX_THRESH_MAX)
1314 return (EINVAL);
1315 sc->sc_txcopy_thresh = result;
1317 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1319 if (pr_val == NULL)
1320 return (EINVAL);
1322 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1324 if (result < 0 || result > VIOIF_RX_THRESH_MAX)
1325 return (EINVAL);
1326 sc->sc_rxcopy_thresh = result;
1328 return (0);
1331 static int
1332 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1333 uint_t pr_valsize, const void *pr_val)
1335 struct vioif_softc *sc = arg;
1336 const uint32_t *new_mtu;
1337 int err;
1339 switch (pr_num) {
1340 case MAC_PROP_MTU:
1341 new_mtu = pr_val;
1343 if (*new_mtu > MAX_MTU) {
1344 return (EINVAL);
1347 err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
1348 if (err) {
1349 return (err);
1351 break;
1352 case MAC_PROP_PRIVATE:
1353 err = vioif_set_prop_private(sc, pr_name,
1354 pr_valsize, pr_val);
1355 if (err)
1356 return (err);
1357 break;
1358 default:
1359 return (ENOTSUP);
1362 return (0);
1365 static int
1366 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
1367 uint_t pr_valsize, void *pr_val)
1369 int err = ENOTSUP;
1370 int value;
1372 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1374 value = sc->sc_txcopy_thresh;
1375 err = 0;
1376 goto done;
1378 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1380 value = sc->sc_rxcopy_thresh;
1381 err = 0;
1382 goto done;
1384 done:
1385 if (err == 0) {
1386 (void) snprintf(pr_val, pr_valsize, "%d", value);
1388 return (err);
1391 static int
1392 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1393 uint_t pr_valsize, void *pr_val)
1395 struct vioif_softc *sc = arg;
1396 int err = ENOTSUP;
1398 switch (pr_num) {
1399 case MAC_PROP_PRIVATE:
1400 err = vioif_get_prop_private(sc, pr_name,
1401 pr_valsize, pr_val);
1402 break;
1403 default:
1404 break;
1406 return (err);
1409 static void
1410 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1411 mac_prop_info_handle_t prh)
1413 struct vioif_softc *sc = arg;
1414 char valstr[64];
1415 int value;
1417 switch (pr_num) {
1418 case MAC_PROP_MTU:
1419 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1420 break;
1422 case MAC_PROP_PRIVATE:
1423 bzero(valstr, sizeof (valstr));
1424 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1425 value = sc->sc_txcopy_thresh;
1426 } else if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1427 value = sc->sc_rxcopy_thresh;
1428 } else {
1429 return;
1431 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1432 break;
1434 default:
1435 break;
1439 static boolean_t
1440 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1442 struct vioif_softc *sc = arg;
1444 switch (cap) {
1445 case MAC_CAPAB_HCKSUM:
1446 if (sc->sc_tx_csum) {
1447 uint32_t *txflags = cap_data;
1449 *txflags = HCKSUM_INET_PARTIAL;
1450 return (B_TRUE);
1452 return (B_FALSE);
1453 case MAC_CAPAB_LSO:
1454 if (sc->sc_tx_tso4) {
1455 mac_capab_lso_t *cap_lso = cap_data;
1457 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1458 cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
1459 return (B_TRUE);
1461 return (B_FALSE);
1462 default:
1463 break;
1465 return (B_FALSE);
1468 static mac_callbacks_t vioif_m_callbacks = {
1469 .mc_callbacks = (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
1470 .mc_getstat = vioif_stat,
1471 .mc_start = vioif_start,
1472 .mc_stop = vioif_stop,
1473 .mc_setpromisc = vioif_promisc,
1474 .mc_multicst = vioif_multicst,
1475 .mc_unicst = vioif_unicst,
1476 .mc_tx = vioif_tx,
1477 /* Optional callbacks */
1478 .mc_reserved = NULL, /* reserved */
1479 .mc_ioctl = NULL, /* mc_ioctl */
1480 .mc_getcapab = vioif_getcapab, /* mc_getcapab */
1481 .mc_open = NULL, /* mc_open */
1482 .mc_close = NULL, /* mc_close */
1483 .mc_setprop = vioif_setprop,
1484 .mc_getprop = vioif_getprop,
1485 .mc_propinfo = vioif_propinfo,
1488 static void
1489 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1490 uint32_t features)
1492 char buf[512];
1493 char *bufp = buf;
1494 char *bufend = buf + sizeof (buf);
1496 /* LINTED E_PTRDIFF_OVERFLOW */
1497 bufp += snprintf(bufp, bufend - bufp, prefix);
1498 /* LINTED E_PTRDIFF_OVERFLOW */
1499 bufp += virtio_show_features(features, bufp, bufend - bufp);
1500 *bufp = '\0';
1502 /* Using '!' to only CE_NOTE this to the system log. */
1503 dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1504 VIRTIO_NET_FEATURE_BITS);
1508 * Find out which features are supported by the device and
1509 * choose which ones we wish to use.
1511 static int
1512 vioif_dev_features(struct vioif_softc *sc)
1514 uint32_t host_features;
1516 host_features = virtio_negotiate_features(&sc->sc_virtio,
1517 VIRTIO_NET_F_CSUM |
1518 VIRTIO_NET_F_HOST_TSO4 |
1519 VIRTIO_NET_F_HOST_ECN |
1520 VIRTIO_NET_F_MAC |
1521 VIRTIO_NET_F_STATUS |
1522 VIRTIO_F_RING_INDIRECT_DESC |
1523 VIRTIO_F_NOTIFY_ON_EMPTY);
1525 vioif_show_features(sc, "Host features: ", host_features);
1526 vioif_show_features(sc, "Negotiated features: ",
1527 sc->sc_virtio.sc_features);
1529 if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1530 dev_err(sc->sc_dev, CE_WARN,
1531 "Host does not support RING_INDIRECT_DESC. Cannot attach.");
1532 return (DDI_FAILURE);
1535 return (DDI_SUCCESS);
1538 static int
1539 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1541 return (virtio_has_feature(&sc->sc_virtio, feature));
1544 static void
1545 vioif_set_mac(struct vioif_softc *sc)
1547 int i;
1549 for (i = 0; i < ETHERADDRL; i++) {
1550 virtio_write_device_config_1(&sc->sc_virtio,
1551 VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1553 sc->sc_mac_from_host = 0;
1556 /* Get the mac address out of the hardware, or make up one. */
1557 static void
1558 vioif_get_mac(struct vioif_softc *sc)
1560 int i;
1561 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1562 for (i = 0; i < ETHERADDRL; i++) {
1563 sc->sc_mac[i] = virtio_read_device_config_1(
1564 &sc->sc_virtio,
1565 VIRTIO_NET_CONFIG_MAC + i);
1567 sc->sc_mac_from_host = 1;
1568 } else {
1569 /* Get a few random bytes */
1570 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1571 /* Make sure it's a unicast MAC */
1572 sc->sc_mac[0] &= ~1;
1573 /* Set the "locally administered" bit */
1574 sc->sc_mac[1] |= 2;
1576 vioif_set_mac(sc);
1578 dev_err(sc->sc_dev, CE_NOTE,
1579 "!Generated a random MAC address: %s",
1580 ether_sprintf((struct ether_addr *)sc->sc_mac));
1585 * Virtqueue interrupt handlers
1587 /* ARGSUSED */
1588 uint_t
1589 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1591 struct virtio_softc *vsc = (void *) arg1;
1592 struct vioif_softc *sc = __containerof(vsc,
1593 struct vioif_softc, sc_virtio);
1596 * The return values of these functions are not needed but they make
1597 * debugging interrupts simpler because you can use them to detect when
1598 * stuff was processed and repopulated in this handler.
1600 (void) vioif_process_rx(sc);
1601 (void) vioif_populate_rx(sc, KM_NOSLEEP);
1603 return (DDI_INTR_CLAIMED);
1606 /* ARGSUSED */
1607 uint_t
1608 vioif_tx_handler(caddr_t arg1, caddr_t arg2)
1610 struct virtio_softc *vsc = (void *)arg1;
1611 struct vioif_softc *sc = __containerof(vsc,
1612 struct vioif_softc, sc_virtio);
1615 * The return value of this function is not needed but makes debugging
1616 * interrupts simpler because you can use it to detect if anything was
1617 * reclaimed in this handler.
1619 (void) vioif_reclaim_used_tx(sc);
1621 return (DDI_INTR_CLAIMED);
1624 static int
1625 vioif_register_ints(struct vioif_softc *sc)
1627 int ret;
1629 struct virtio_int_handler vioif_vq_h[] = {
1630 { vioif_rx_handler },
1631 { vioif_tx_handler },
1632 { NULL }
1635 ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1637 return (ret);
1641 static void
1642 vioif_check_features(struct vioif_softc *sc)
1644 if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1645 /* The GSO/GRO featured depend on CSUM, check them here. */
1646 sc->sc_tx_csum = 1;
1647 sc->sc_rx_csum = 1;
1649 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1650 sc->sc_rx_csum = 0;
1652 dev_err(sc->sc_dev, CE_NOTE, "!Csum enabled.");
1654 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1656 sc->sc_tx_tso4 = 1;
1658 * We don't seem to have a way to ask the system
1659 * not to send us LSO packets with Explicit
1660 * Congestion Notification bit set, so we require
1661 * the device to support it in order to do
1662 * LSO.
1664 if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1665 dev_err(sc->sc_dev, CE_NOTE,
1666 "!TSO4 supported, but not ECN. "
1667 "Not using LSO.");
1668 sc->sc_tx_tso4 = 0;
1669 } else {
1670 dev_err(sc->sc_dev, CE_NOTE, "!LSO enabled");
1676 static int
1677 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1679 int ret, instance;
1680 struct vioif_softc *sc;
1681 struct virtio_softc *vsc;
1682 mac_register_t *macp;
1683 char cache_name[CACHE_NAME_SIZE];
1685 instance = ddi_get_instance(devinfo);
1687 switch (cmd) {
1688 case DDI_ATTACH:
1689 break;
1691 case DDI_RESUME:
1692 case DDI_PM_RESUME:
1693 /* We do not support suspend/resume for vioif. */
1694 goto exit;
1696 default:
1697 goto exit;
1700 sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
1701 ddi_set_driver_private(devinfo, sc);
1703 vsc = &sc->sc_virtio;
1705 /* Duplicate for less typing */
1706 sc->sc_dev = devinfo;
1707 vsc->sc_dev = devinfo;
1710 * Initialize interrupt kstat.
1712 sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
1713 KSTAT_TYPE_INTR, 1, 0);
1714 if (sc->sc_intrstat == NULL) {
1715 dev_err(devinfo, CE_WARN, "kstat_create failed");
1716 goto exit_intrstat;
1718 kstat_install(sc->sc_intrstat);
1720 /* map BAR 0 */
1721 ret = ddi_regs_map_setup(devinfo, 1,
1722 (caddr_t *)&sc->sc_virtio.sc_io_addr,
1723 0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
1724 if (ret != DDI_SUCCESS) {
1725 dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
1726 goto exit_map;
1729 virtio_device_reset(&sc->sc_virtio);
1730 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
1731 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
1733 ret = vioif_dev_features(sc);
1734 if (ret)
1735 goto exit_features;
1737 vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
1739 (void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
1740 sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
1741 sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
1742 vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
1743 if (sc->sc_rxbuf_cache == NULL) {
1744 dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
1745 goto exit_cache;
1748 ret = vioif_register_ints(sc);
1749 if (ret) {
1750 dev_err(sc->sc_dev, CE_WARN,
1751 "Failed to allocate interrupt(s)!");
1752 goto exit_ints;
1756 * Register layout determined, can now access the
1757 * device-specific bits
1759 vioif_get_mac(sc);
1761 sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
1762 VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
1763 if (!sc->sc_rx_vq)
1764 goto exit_alloc1;
1765 virtio_stop_vq_intr(sc->sc_rx_vq);
1767 sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
1768 VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
1769 if (!sc->sc_tx_vq)
1770 goto exit_alloc2;
1771 virtio_stop_vq_intr(sc->sc_tx_vq);
1773 if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
1774 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1775 VIOIF_CTRL_QLEN, 0, "ctrl");
1776 if (!sc->sc_ctrl_vq) {
1777 goto exit_alloc3;
1779 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1782 virtio_set_status(&sc->sc_virtio,
1783 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1785 sc->sc_rxloan = 0;
1787 /* set some reasonable-small default values */
1788 sc->sc_rxcopy_thresh = 300;
1789 sc->sc_txcopy_thresh = 300;
1790 sc->sc_mtu = ETHERMTU;
1792 vioif_check_features(sc);
1794 if (vioif_alloc_mems(sc) != 0)
1795 goto exit_alloc_mems;
1797 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1798 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1799 goto exit_macalloc;
1802 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1803 macp->m_driver = sc;
1804 macp->m_dip = devinfo;
1805 macp->m_src_addr = sc->sc_mac;
1806 macp->m_callbacks = &vioif_m_callbacks;
1807 macp->m_min_sdu = 0;
1808 macp->m_max_sdu = sc->sc_mtu;
1809 macp->m_margin = VLAN_TAGSZ;
1810 macp->m_priv_props = vioif_priv_props;
1812 sc->sc_macp = macp;
1814 /* Pre-fill the rx ring. */
1815 (void) vioif_populate_rx(sc, KM_SLEEP);
1817 ret = mac_register(macp, &sc->sc_mac_handle);
1818 if (ret != 0) {
1819 dev_err(devinfo, CE_WARN, "vioif_attach: "
1820 "mac_register() failed, ret=%d", ret);
1821 goto exit_register;
1824 ret = virtio_enable_ints(&sc->sc_virtio);
1825 if (ret) {
1826 dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
1827 goto exit_enable_ints;
1830 mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
1831 return (DDI_SUCCESS);
1833 exit_enable_ints:
1834 (void) mac_unregister(sc->sc_mac_handle);
1835 exit_register:
1836 mac_free(macp);
1837 exit_macalloc:
1838 vioif_free_mems(sc);
1839 exit_alloc_mems:
1840 virtio_release_ints(&sc->sc_virtio);
1841 if (sc->sc_ctrl_vq)
1842 virtio_free_vq(sc->sc_ctrl_vq);
1843 exit_alloc3:
1844 virtio_free_vq(sc->sc_tx_vq);
1845 exit_alloc2:
1846 virtio_free_vq(sc->sc_rx_vq);
1847 exit_alloc1:
1848 exit_ints:
1849 kmem_cache_destroy(sc->sc_rxbuf_cache);
1850 exit_cache:
1851 exit_features:
1852 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1853 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1854 exit_intrstat:
1855 exit_map:
1856 kstat_delete(sc->sc_intrstat);
1857 kmem_free(sc, sizeof (struct vioif_softc));
1858 exit:
1859 return (DDI_FAILURE);
1862 static int
1863 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1865 struct vioif_softc *sc;
1867 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1868 return (DDI_FAILURE);
1870 switch (cmd) {
1871 case DDI_DETACH:
1872 break;
1874 case DDI_PM_SUSPEND:
1875 /* We do not support suspend/resume for vioif. */
1876 return (DDI_FAILURE);
1878 default:
1879 return (DDI_FAILURE);
1882 if (sc->sc_rxloan > 0) {
1883 dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1884 " not detaching.");
1885 return (DDI_FAILURE);
1888 virtio_stop_vq_intr(sc->sc_rx_vq);
1889 virtio_stop_vq_intr(sc->sc_tx_vq);
1891 virtio_release_ints(&sc->sc_virtio);
1893 if (mac_unregister(sc->sc_mac_handle)) {
1894 return (DDI_FAILURE);
1897 mac_free(sc->sc_macp);
1899 vioif_free_mems(sc);
1900 virtio_free_vq(sc->sc_rx_vq);
1901 virtio_free_vq(sc->sc_tx_vq);
1903 virtio_device_reset(&sc->sc_virtio);
1905 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1907 kmem_cache_destroy(sc->sc_rxbuf_cache);
1908 kstat_delete(sc->sc_intrstat);
1909 kmem_free(sc, sizeof (struct vioif_softc));
1911 return (DDI_SUCCESS);
1914 static int
1915 vioif_quiesce(dev_info_t *devinfo)
1917 struct vioif_softc *sc;
1919 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1920 return (DDI_FAILURE);
1922 virtio_stop_vq_intr(sc->sc_rx_vq);
1923 virtio_stop_vq_intr(sc->sc_tx_vq);
1924 virtio_device_reset(&sc->sc_virtio);
1926 return (DDI_SUCCESS);
1930 _init(void)
1932 int ret = 0;
1934 mac_init_ops(&vioif_ops, "vioif");
1936 ret = mod_install(&modlinkage);
1937 if (ret != DDI_SUCCESS) {
1938 mac_fini_ops(&vioif_ops);
1939 return (ret);
1942 return (0);
1946 _fini(void)
1948 int ret;
1950 ret = mod_remove(&modlinkage);
1951 if (ret == DDI_SUCCESS) {
1952 mac_fini_ops(&vioif_ops);
1955 return (ret);
1959 _info(struct modinfo *pModinfo)
1961 return (mod_info(&modlinkage, pModinfo));