7764 vioif norcvbuf kstat goes up for each receive interrupt
[unleashed.git] / usr / src / uts / common / io / vioif / vioif.c
blob4bc1564a6be853fda69df8192f6c924332b77126
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2013 Nexenta Inc. All rights reserved.
14 * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
17 /* Based on the NetBSD virtio driver by Minoura Makoto. */
19 * Copyright (c) 2010 Minoura Makoto.
20 * All rights reserved.
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
31 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
32 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
34 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
40 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #include <sys/types.h>
44 #include <sys/errno.h>
45 #include <sys/param.h>
46 #include <sys/stropts.h>
47 #include <sys/stream.h>
48 #include <sys/strsubr.h>
49 #include <sys/kmem.h>
50 #include <sys/conf.h>
51 #include <sys/devops.h>
52 #include <sys/ksynch.h>
53 #include <sys/stat.h>
54 #include <sys/modctl.h>
55 #include <sys/debug.h>
56 #include <sys/pci.h>
57 #include <sys/ethernet.h>
58 #include <sys/vlan.h>
60 #include <sys/dlpi.h>
61 #include <sys/taskq.h>
62 #include <sys/cyclic.h>
64 #include <sys/pattr.h>
65 #include <sys/strsun.h>
67 #include <sys/random.h>
68 #include <sys/sysmacros.h>
69 #include <sys/stream.h>
71 #include <sys/mac.h>
72 #include <sys/mac_provider.h>
73 #include <sys/mac_ether.h>
75 #include "virtiovar.h"
76 #include "virtioreg.h"
78 /* Configuration registers */
79 #define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */
80 #define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */
82 /* Feature bits */
83 #define VIRTIO_NET_F_CSUM (1 << 0) /* Host handles pkts w/ partial csum */
84 #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* Guest handles pkts w/ part csum */
85 #define VIRTIO_NET_F_MAC (1 << 5) /* Host has given MAC address. */
86 #define VIRTIO_NET_F_GSO (1 << 6) /* Host handles pkts w/ any GSO type */
87 #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* Guest can handle TSOv4 in. */
88 #define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* Guest can handle TSOv6 in. */
89 #define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* Guest can handle TSO[6] w/ ECN in */
90 #define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* Guest can handle UFO in. */
91 #define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* Host can handle TSOv4 in. */
92 #define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* Host can handle TSOv6 in. */
93 #define VIRTIO_NET_F_HOST_ECN (1 << 13) /* Host can handle TSO[6] w/ ECN in */
94 #define VIRTIO_NET_F_HOST_UFO (1 << 14) /* Host can handle UFO in. */
95 #define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* Host can merge receive buffers. */
96 #define VIRTIO_NET_F_STATUS (1 << 16) /* Config.status available */
97 #define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* Control channel available */
98 #define VIRTIO_NET_F_CTRL_RX (1 << 18) /* Control channel RX mode support */
99 #define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* Control channel VLAN filtering */
100 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
102 #define VIRTIO_NET_FEATURE_BITS \
103 "\020" \
104 "\1CSUM" \
105 "\2GUEST_CSUM" \
106 "\6MAC" \
107 "\7GSO" \
108 "\10GUEST_TSO4" \
109 "\11GUEST_TSO6" \
110 "\12GUEST_ECN" \
111 "\13GUEST_UFO" \
112 "\14HOST_TSO4" \
113 "\15HOST_TSO6" \
114 "\16HOST_ECN" \
115 "\17HOST_UFO" \
116 "\20MRG_RXBUF" \
117 "\21STATUS" \
118 "\22CTRL_VQ" \
119 "\23CTRL_RX" \
120 "\24CTRL_VLAN" \
121 "\25CTRL_RX_EXTRA"
123 /* Status */
124 #define VIRTIO_NET_S_LINK_UP 1
126 #pragma pack(1)
127 /* Packet header structure */
128 struct virtio_net_hdr {
129 uint8_t flags;
130 uint8_t gso_type;
131 uint16_t hdr_len;
132 uint16_t gso_size;
133 uint16_t csum_start;
134 uint16_t csum_offset;
136 #pragma pack()
138 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */
139 #define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */
140 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */
141 #define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */
142 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */
143 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */
146 /* Control virtqueue */
147 #pragma pack(1)
148 struct virtio_net_ctrl_cmd {
149 uint8_t class;
150 uint8_t command;
152 #pragma pack()
154 #define VIRTIO_NET_CTRL_RX 0
155 #define VIRTIO_NET_CTRL_RX_PROMISC 0
156 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1
158 #define VIRTIO_NET_CTRL_MAC 1
159 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
161 #define VIRTIO_NET_CTRL_VLAN 2
162 #define VIRTIO_NET_CTRL_VLAN_ADD 0
163 #define VIRTIO_NET_CTRL_VLAN_DEL 1
165 #pragma pack(1)
166 struct virtio_net_ctrl_status {
167 uint8_t ack;
170 struct virtio_net_ctrl_rx {
171 uint8_t onoff;
174 struct virtio_net_ctrl_mac_tbl {
175 uint32_t nentries;
176 uint8_t macs[][ETHERADDRL];
179 struct virtio_net_ctrl_vlan {
180 uint16_t id;
182 #pragma pack()
184 static int vioif_quiesce(dev_info_t *);
185 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
186 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
188 DDI_DEFINE_STREAM_OPS(vioif_ops,
189 nulldev, /* identify */
190 nulldev, /* probe */
191 vioif_attach, /* attach */
192 vioif_detach, /* detach */
193 nodev, /* reset */
194 NULL, /* cb_ops */
195 D_MP, /* bus_ops */
196 NULL, /* power */
197 vioif_quiesce /* quiesce */);
199 static char vioif_ident[] = "VirtIO ethernet driver";
201 /* Standard Module linkage initialization for a Streams driver */
202 extern struct mod_ops mod_driverops;
204 static struct modldrv modldrv = {
205 &mod_driverops, /* Type of module. This one is a driver */
206 vioif_ident, /* short description */
207 &vioif_ops /* driver specific ops */
210 static struct modlinkage modlinkage = {
211 MODREV_1,
213 (void *)&modldrv,
214 NULL,
218 ddi_device_acc_attr_t vioif_attr = {
219 DDI_DEVICE_ATTR_V0,
220 DDI_NEVERSWAP_ACC, /* virtio is always native byte order */
221 DDI_STORECACHING_OK_ACC,
222 DDI_DEFAULT_ACC
226 * A mapping represents a binding for a single buffer that is contiguous in the
227 * virtual address space.
229 struct vioif_buf_mapping {
230 caddr_t vbm_buf;
231 ddi_dma_handle_t vbm_dmah;
232 ddi_acc_handle_t vbm_acch;
233 ddi_dma_cookie_t vbm_dmac;
234 unsigned int vbm_ncookies;
238 * Rx buffers can be loaned upstream, so the code has
239 * to allocate them dynamically.
241 struct vioif_rx_buf {
242 struct vioif_softc *rb_sc;
243 frtn_t rb_frtn;
245 struct vioif_buf_mapping rb_mapping;
249 * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
250 * used to hold the virtio_net_header. Small packets also get copied there, as
251 * it's faster then mapping them. Bigger packets get mapped using the "external"
252 * mapping array. An array is used, because a packet may consist of muptiple
253 * fragments, so each fragment gets bound to an entry. According to my
254 * observations, the number of fragments does not exceed 2, but just in case,
255 * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
256 * the dma handles are allocated lazily in the tx path.
258 struct vioif_tx_buf {
259 mblk_t *tb_mp;
261 /* inline buffer */
262 struct vioif_buf_mapping tb_inline_mapping;
264 /* External buffers */
265 struct vioif_buf_mapping *tb_external_mapping;
266 unsigned int tb_external_num;
269 struct vioif_softc {
270 dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */
271 struct virtio_softc sc_virtio;
273 mac_handle_t sc_mac_handle;
274 mac_register_t *sc_macp;
276 struct virtqueue *sc_rx_vq;
277 struct virtqueue *sc_tx_vq;
278 struct virtqueue *sc_ctrl_vq;
280 unsigned int sc_tx_stopped:1;
282 /* Feature bits. */
283 unsigned int sc_rx_csum:1;
284 unsigned int sc_tx_csum:1;
285 unsigned int sc_tx_tso4:1;
287 int sc_mtu;
288 uint8_t sc_mac[ETHERADDRL];
290 * For rx buffers, we keep a pointer array, because the buffers
291 * can be loaned upstream, and we have to repopulate the array with
292 * new members.
294 struct vioif_rx_buf **sc_rxbufs;
297 * For tx, we just allocate an array of buffers. The packet can
298 * either be copied into the inline buffer, or the external mapping
299 * could be used to map the packet
301 struct vioif_tx_buf *sc_txbufs;
303 kstat_t *sc_intrstat;
305 * We "loan" rx buffers upstream and reuse them after they are
306 * freed. This lets us avoid allocations in the hot path.
308 kmem_cache_t *sc_rxbuf_cache;
309 ulong_t sc_rxloan;
311 /* Copying small packets turns out to be faster then mapping them. */
312 unsigned long sc_rxcopy_thresh;
313 unsigned long sc_txcopy_thresh;
314 /* Some statistic coming here */
315 uint64_t sc_ipackets;
316 uint64_t sc_opackets;
317 uint64_t sc_rbytes;
318 uint64_t sc_obytes;
319 uint64_t sc_brdcstxmt;
320 uint64_t sc_brdcstrcv;
321 uint64_t sc_multixmt;
322 uint64_t sc_multircv;
323 uint64_t sc_norecvbuf;
324 uint64_t sc_notxbuf;
325 uint64_t sc_ierrors;
326 uint64_t sc_oerrors;
329 #define ETHER_HEADER_LEN sizeof (struct ether_header)
331 /* MTU + the ethernet header. */
332 #define MAX_PAYLOAD 65535
333 #define MAX_MTU (MAX_PAYLOAD - ETHER_HEADER_LEN)
334 #define DEFAULT_MTU ETHERMTU
337 * Yeah, we spend 8M per device. Turns out, there is no point
338 * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
339 * because vhost does not support them, and we expect to be used with
340 * vhost in production environment.
342 /* The buffer keeps both the packet data and the virtio_net_header. */
343 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
346 * We win a bit on header alignment, but the host wins a lot
347 * more on moving aligned buffers. Might need more thought.
349 #define VIOIF_IP_ALIGN 0
351 /* Maximum number of indirect descriptors, somewhat arbitrary. */
352 #define VIOIF_INDIRECT_MAX 128
355 * We pre-allocate a reasonably large buffer to copy small packets
356 * there. Bigger packets are mapped, packets with multiple
357 * cookies are mapped as indirect buffers.
359 #define VIOIF_TX_INLINE_SIZE 2048
361 /* Native queue size for all queues */
362 #define VIOIF_RX_QLEN 0
363 #define VIOIF_TX_QLEN 0
364 #define VIOIF_CTRL_QLEN 0
366 static uchar_t vioif_broadcast[ETHERADDRL] = {
367 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
370 #define VIOIF_TX_THRESH_MAX 640
371 #define VIOIF_RX_THRESH_MAX 640
373 #define CACHE_NAME_SIZE 32
375 static char vioif_txcopy_thresh[] =
376 "vioif_txcopy_thresh";
377 static char vioif_rxcopy_thresh[] =
378 "vioif_rxcopy_thresh";
380 static char *vioif_priv_props[] = {
381 vioif_txcopy_thresh,
382 vioif_rxcopy_thresh,
383 NULL
386 /* Add up to ddi? */
387 static ddi_dma_cookie_t *
388 vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
390 ddi_dma_impl_t *dmah_impl = (void *) dmah;
391 ASSERT(dmah_impl->dmai_cookie);
392 return (dmah_impl->dmai_cookie);
395 static void
396 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
398 ddi_dma_impl_t *dmah_impl = (void *) dmah;
399 dmah_impl->dmai_cookie = dmac;
402 static link_state_t
403 vioif_link_state(struct vioif_softc *sc)
405 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
406 if (virtio_read_device_config_2(&sc->sc_virtio,
407 VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
408 return (LINK_STATE_UP);
409 } else {
410 return (LINK_STATE_DOWN);
414 return (LINK_STATE_UP);
417 static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
418 DMA_ATTR_V0, /* Version number */
419 0, /* low address */
420 0xFFFFFFFFFFFFFFFF, /* high address */
421 0xFFFFFFFF, /* counter register max */
422 1, /* page alignment */
423 1, /* burst sizes: 1 - 32 */
424 1, /* minimum transfer size */
425 0xFFFFFFFF, /* max transfer size */
426 0xFFFFFFFFFFFFFFF, /* address register max */
427 1, /* scatter-gather capacity */
428 1, /* device operates on bytes */
429 0, /* attr flag: set to 0 */
432 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
433 DMA_ATTR_V0, /* Version number */
434 0, /* low address */
435 0xFFFFFFFFFFFFFFFF, /* high address */
436 0xFFFFFFFF, /* counter register max */
437 1, /* page alignment */
438 1, /* burst sizes: 1 - 32 */
439 1, /* minimum transfer size */
440 0xFFFFFFFF, /* max transfer size */
441 0xFFFFFFFFFFFFFFF, /* address register max */
443 /* One entry is used for the virtio_net_hdr on the tx path */
444 VIOIF_INDIRECT_MAX - 1, /* scatter-gather capacity */
445 1, /* device operates on bytes */
446 0, /* attr flag: set to 0 */
449 static ddi_device_acc_attr_t vioif_bufattr = {
450 DDI_DEVICE_ATTR_V0,
451 DDI_NEVERSWAP_ACC,
452 DDI_STORECACHING_OK_ACC,
453 DDI_DEFAULT_ACC
456 static void
457 vioif_rx_free(caddr_t free_arg)
459 struct vioif_rx_buf *buf = (void *) free_arg;
460 struct vioif_softc *sc = buf->rb_sc;
462 kmem_cache_free(sc->sc_rxbuf_cache, buf);
463 atomic_dec_ulong(&sc->sc_rxloan);
466 static int
467 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
469 _NOTE(ARGUNUSED(kmflags));
470 struct vioif_softc *sc = user_arg;
471 struct vioif_rx_buf *buf = buffer;
472 size_t len;
474 if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
475 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
476 dev_err(sc->sc_dev, CE_WARN,
477 "Can't allocate dma handle for rx buffer");
478 goto exit_handle;
481 if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
482 VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
483 &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
484 NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
485 dev_err(sc->sc_dev, CE_WARN,
486 "Can't allocate rx buffer");
487 goto exit_alloc;
489 ASSERT(len >= VIOIF_RX_SIZE);
491 if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
492 buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
493 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
494 &buf->rb_mapping.vbm_ncookies)) {
495 dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
497 goto exit_bind;
500 ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
502 buf->rb_sc = sc;
503 buf->rb_frtn.free_arg = (void *) buf;
504 buf->rb_frtn.free_func = vioif_rx_free;
506 return (0);
507 exit_bind:
508 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
509 exit_alloc:
510 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
511 exit_handle:
513 return (ENOMEM);
516 static void
517 vioif_rx_destruct(void *buffer, void *user_arg)
519 _NOTE(ARGUNUSED(user_arg));
520 struct vioif_rx_buf *buf = buffer;
522 ASSERT(buf->rb_mapping.vbm_acch);
523 ASSERT(buf->rb_mapping.vbm_acch);
525 (void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
526 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
527 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
530 static void
531 vioif_free_mems(struct vioif_softc *sc)
533 int i;
535 for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
536 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
537 int j;
539 /* Tear down the internal mapping. */
541 ASSERT(buf->tb_inline_mapping.vbm_acch);
542 ASSERT(buf->tb_inline_mapping.vbm_dmah);
544 (void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
545 ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
546 ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
548 /* We should not see any in-flight buffers at this point. */
549 ASSERT(!buf->tb_mp);
551 /* Free all the dma hdnales we allocated lazily. */
552 for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
553 ddi_dma_free_handle(
554 &buf->tb_external_mapping[j].vbm_dmah);
555 /* Free the external mapping array. */
556 kmem_free(buf->tb_external_mapping,
557 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
560 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
561 sc->sc_tx_vq->vq_num);
563 for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
564 struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
566 if (buf)
567 kmem_cache_free(sc->sc_rxbuf_cache, buf);
569 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
570 sc->sc_rx_vq->vq_num);
573 static int
574 vioif_alloc_mems(struct vioif_softc *sc)
576 int i, txqsize, rxqsize;
577 size_t len;
578 unsigned int nsegments;
580 txqsize = sc->sc_tx_vq->vq_num;
581 rxqsize = sc->sc_rx_vq->vq_num;
583 sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
584 KM_SLEEP);
585 if (sc->sc_txbufs == NULL) {
586 dev_err(sc->sc_dev, CE_WARN,
587 "Failed to allocate the tx buffers array");
588 goto exit_txalloc;
592 * We don't allocate the rx vioif_bufs, just the pointers, as
593 * rx vioif_bufs can be loaned upstream, and we don't know the
594 * total number we need.
596 sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
597 KM_SLEEP);
598 if (sc->sc_rxbufs == NULL) {
599 dev_err(sc->sc_dev, CE_WARN,
600 "Failed to allocate the rx buffers pointer array");
601 goto exit_rxalloc;
604 for (i = 0; i < txqsize; i++) {
605 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
607 /* Allocate and bind an inline mapping. */
609 if (ddi_dma_alloc_handle(sc->sc_dev,
610 &vioif_inline_buf_dma_attr,
611 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
613 dev_err(sc->sc_dev, CE_WARN,
614 "Can't allocate dma handle for tx buffer %d", i);
615 goto exit_tx;
618 if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
619 VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
620 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
621 &len, &buf->tb_inline_mapping.vbm_acch)) {
623 dev_err(sc->sc_dev, CE_WARN,
624 "Can't allocate tx buffer %d", i);
625 goto exit_tx;
627 ASSERT(len >= VIOIF_TX_INLINE_SIZE);
629 if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
630 NULL, buf->tb_inline_mapping.vbm_buf, len,
631 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
632 &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
634 dev_err(sc->sc_dev, CE_WARN,
635 "Can't bind tx buffer %d", i);
636 goto exit_tx;
639 /* We asked for a single segment */
640 ASSERT(nsegments == 1);
643 * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
644 * In reality, I don't expect more then 2-3 used, but who
645 * knows.
647 buf->tb_external_mapping = kmem_zalloc(
648 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
649 KM_SLEEP);
652 * The external mapping's dma handles are allocate lazily,
653 * as we don't expect most of them to be used..
657 return (0);
659 exit_tx:
660 for (i = 0; i < txqsize; i++) {
661 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
663 if (buf->tb_inline_mapping.vbm_dmah)
664 (void) ddi_dma_unbind_handle(
665 buf->tb_inline_mapping.vbm_dmah);
667 if (buf->tb_inline_mapping.vbm_acch)
668 ddi_dma_mem_free(
669 &buf->tb_inline_mapping.vbm_acch);
671 if (buf->tb_inline_mapping.vbm_dmah)
672 ddi_dma_free_handle(
673 &buf->tb_inline_mapping.vbm_dmah);
675 if (buf->tb_external_mapping)
676 kmem_free(buf->tb_external_mapping,
677 sizeof (struct vioif_tx_buf) *
678 VIOIF_INDIRECT_MAX - 1);
681 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
683 exit_rxalloc:
684 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
685 exit_txalloc:
686 return (ENOMEM);
689 /* ARGSUSED */
691 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
693 return (DDI_SUCCESS);
696 /* ARGSUSED */
698 vioif_promisc(void *arg, boolean_t on)
700 return (DDI_SUCCESS);
703 /* ARGSUSED */
705 vioif_unicst(void *arg, const uint8_t *macaddr)
707 return (DDI_FAILURE);
711 static uint_t
712 vioif_add_rx(struct vioif_softc *sc, int kmflag)
714 uint_t num_added = 0;
715 struct vq_entry *ve;
717 while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
718 struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
720 if (!buf) {
721 /* First run, allocate the buffer. */
722 buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
723 sc->sc_rxbufs[ve->qe_index] = buf;
726 /* Still nothing? Bye. */
727 if (!buf) {
728 dev_err(sc->sc_dev, CE_WARN,
729 "Can't allocate rx buffer");
730 sc->sc_norecvbuf++;
731 vq_free_entry(sc->sc_rx_vq, ve);
732 break;
735 ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
738 * For an unknown reason, the virtio_net_hdr must be placed
739 * as a separate virtio queue entry.
741 virtio_ve_add_indirect_buf(ve,
742 buf->rb_mapping.vbm_dmac.dmac_laddress,
743 sizeof (struct virtio_net_hdr), B_FALSE);
745 /* Add the rest of the first cookie. */
746 virtio_ve_add_indirect_buf(ve,
747 buf->rb_mapping.vbm_dmac.dmac_laddress +
748 sizeof (struct virtio_net_hdr),
749 buf->rb_mapping.vbm_dmac.dmac_size -
750 sizeof (struct virtio_net_hdr), B_FALSE);
753 * If the buffer consists of a single cookie (unlikely for a
754 * 64-k buffer), we are done. Otherwise, add the rest of the
755 * cookies using indirect entries.
757 if (buf->rb_mapping.vbm_ncookies > 1) {
758 ddi_dma_cookie_t *first_extra_dmac;
759 ddi_dma_cookie_t dmac;
760 first_extra_dmac =
761 vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
763 ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
764 virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
765 dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
766 vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
767 first_extra_dmac);
770 virtio_push_chain(ve, B_FALSE);
771 num_added++;
774 return (num_added);
777 static uint_t
778 vioif_populate_rx(struct vioif_softc *sc, int kmflag)
780 uint_t num_added = vioif_add_rx(sc, kmflag);
782 if (num_added > 0)
783 virtio_sync_vq(sc->sc_rx_vq);
785 return (num_added);
788 static uint_t
789 vioif_process_rx(struct vioif_softc *sc)
791 struct vq_entry *ve;
792 struct vioif_rx_buf *buf;
793 mblk_t *mphead = NULL, *lastmp = NULL, *mp;
794 uint32_t len;
795 uint_t num_processed = 0;
797 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
799 buf = sc->sc_rxbufs[ve->qe_index];
800 ASSERT(buf);
802 if (len < sizeof (struct virtio_net_hdr)) {
803 dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
804 len - (uint32_t)sizeof (struct virtio_net_hdr));
805 sc->sc_ierrors++;
806 virtio_free_chain(ve);
807 continue;
810 len -= sizeof (struct virtio_net_hdr);
812 * We copy small packets that happen to fit into a single
813 * cookie and reuse the buffers. For bigger ones, we loan
814 * the buffers upstream.
816 if (len < sc->sc_rxcopy_thresh) {
817 mp = allocb(len, 0);
818 if (!mp) {
819 sc->sc_norecvbuf++;
820 sc->sc_ierrors++;
822 virtio_free_chain(ve);
823 break;
826 bcopy((char *)buf->rb_mapping.vbm_buf +
827 sizeof (struct virtio_net_hdr), mp->b_rptr, len);
828 mp->b_wptr = mp->b_rptr + len;
830 } else {
831 mp = desballoc((unsigned char *)
832 buf->rb_mapping.vbm_buf +
833 sizeof (struct virtio_net_hdr) +
834 VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
835 if (!mp) {
836 sc->sc_norecvbuf++;
837 sc->sc_ierrors++;
839 virtio_free_chain(ve);
840 break;
842 mp->b_wptr = mp->b_rptr + len;
844 atomic_inc_ulong(&sc->sc_rxloan);
846 * Buffer loaned, we will have to allocate a new one
847 * for this slot.
849 sc->sc_rxbufs[ve->qe_index] = NULL;
853 * virtio-net does not tell us if this packet is multicast
854 * or broadcast, so we have to check it.
856 if (mp->b_rptr[0] & 0x1) {
857 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
858 sc->sc_multircv++;
859 else
860 sc->sc_brdcstrcv++;
863 sc->sc_rbytes += len;
864 sc->sc_ipackets++;
866 virtio_free_chain(ve);
868 if (lastmp == NULL) {
869 mphead = mp;
870 } else {
871 lastmp->b_next = mp;
873 lastmp = mp;
874 num_processed++;
877 if (mphead != NULL) {
878 mac_rx(sc->sc_mac_handle, NULL, mphead);
881 return (num_processed);
884 static uint_t
885 vioif_reclaim_used_tx(struct vioif_softc *sc)
887 struct vq_entry *ve;
888 struct vioif_tx_buf *buf;
889 uint32_t len;
890 mblk_t *mp;
891 uint_t num_reclaimed = 0;
893 while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
894 /* We don't chain descriptors for tx, so don't expect any. */
895 ASSERT(!ve->qe_next);
897 buf = &sc->sc_txbufs[ve->qe_index];
898 mp = buf->tb_mp;
899 buf->tb_mp = NULL;
901 if (mp) {
902 for (int i = 0; i < buf->tb_external_num; i++)
903 (void) ddi_dma_unbind_handle(
904 buf->tb_external_mapping[i].vbm_dmah);
907 virtio_free_chain(ve);
909 /* External mapping used, mp was not freed in vioif_send() */
910 if (mp)
911 freemsg(mp);
912 num_reclaimed++;
915 if (sc->sc_tx_stopped && num_reclaimed > 0) {
916 sc->sc_tx_stopped = 0;
917 mac_tx_update(sc->sc_mac_handle);
920 return (num_reclaimed);
923 /* sc will be used to update stat counters. */
924 /* ARGSUSED */
925 static inline void
926 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
927 size_t msg_size)
929 struct vioif_tx_buf *buf;
930 buf = &sc->sc_txbufs[ve->qe_index];
932 ASSERT(buf);
934 /* Frees mp */
935 mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
936 sizeof (struct virtio_net_hdr));
938 virtio_ve_add_indirect_buf(ve,
939 buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
940 sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
943 static inline int
944 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
945 int i)
947 int ret = DDI_SUCCESS;
949 if (!buf->tb_external_mapping[i].vbm_dmah) {
950 ret = ddi_dma_alloc_handle(sc->sc_dev,
951 &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
952 &buf->tb_external_mapping[i].vbm_dmah);
953 if (ret != DDI_SUCCESS) {
954 dev_err(sc->sc_dev, CE_WARN,
955 "Can't allocate dma handle for external tx buffer");
959 return (ret);
962 static inline int
963 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
964 size_t msg_size)
966 _NOTE(ARGUNUSED(msg_size));
968 struct vioif_tx_buf *buf;
969 mblk_t *nmp;
970 int i, j;
971 int ret = DDI_SUCCESS;
973 buf = &sc->sc_txbufs[ve->qe_index];
975 ASSERT(buf);
977 buf->tb_external_num = 0;
978 i = 0;
979 nmp = mp;
981 while (nmp) {
982 size_t len;
983 ddi_dma_cookie_t dmac;
984 unsigned int ncookies;
986 len = MBLKL(nmp);
988 * For some reason, the network stack can
989 * actually send us zero-length fragments.
991 if (len == 0) {
992 nmp = nmp->b_cont;
993 continue;
996 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
997 if (ret != DDI_SUCCESS) {
998 sc->sc_notxbuf++;
999 sc->sc_oerrors++;
1000 goto exit_lazy_alloc;
1002 ret = ddi_dma_addr_bind_handle(
1003 buf->tb_external_mapping[i].vbm_dmah, NULL,
1004 (caddr_t)nmp->b_rptr, len,
1005 DDI_DMA_WRITE | DDI_DMA_STREAMING,
1006 DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1008 if (ret != DDI_SUCCESS) {
1009 sc->sc_oerrors++;
1010 dev_err(sc->sc_dev, CE_NOTE,
1011 "TX: Failed to bind external handle");
1012 goto exit_bind;
1015 /* Check if we still fit into the indirect table. */
1016 if (virtio_ve_indirect_available(ve) < ncookies) {
1017 dev_err(sc->sc_dev, CE_NOTE,
1018 "TX: Indirect descriptor table limit reached."
1019 " It took %d fragments.", i);
1020 sc->sc_notxbuf++;
1021 sc->sc_oerrors++;
1023 ret = DDI_FAILURE;
1024 goto exit_limit;
1027 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1028 dmac, ncookies, B_TRUE);
1030 nmp = nmp->b_cont;
1031 i++;
1034 buf->tb_external_num = i;
1035 /* Save the mp to free it when the packet is sent. */
1036 buf->tb_mp = mp;
1038 return (DDI_SUCCESS);
1040 exit_limit:
1041 exit_bind:
1042 exit_lazy_alloc:
1044 for (j = 0; j < i; j++) {
1045 (void) ddi_dma_unbind_handle(
1046 buf->tb_external_mapping[j].vbm_dmah);
1049 return (ret);
1052 static boolean_t
1053 vioif_send(struct vioif_softc *sc, mblk_t *mp)
1055 struct vq_entry *ve;
1056 struct vioif_tx_buf *buf;
1057 struct virtio_net_hdr *net_header = NULL;
1058 size_t msg_size = 0;
1059 uint32_t csum_start;
1060 uint32_t csum_stuff;
1061 uint32_t csum_flags;
1062 uint32_t lso_flags;
1063 uint32_t lso_mss;
1064 mblk_t *nmp;
1065 int ret;
1066 boolean_t lso_required = B_FALSE;
1068 for (nmp = mp; nmp; nmp = nmp->b_cont)
1069 msg_size += MBLKL(nmp);
1071 if (sc->sc_tx_tso4) {
1072 mac_lso_get(mp, &lso_mss, &lso_flags);
1073 lso_required = (lso_flags & HW_LSO);
1076 ve = vq_alloc_entry(sc->sc_tx_vq);
1078 if (!ve) {
1079 sc->sc_notxbuf++;
1080 /* Out of free descriptors - try later. */
1081 return (B_FALSE);
1083 buf = &sc->sc_txbufs[ve->qe_index];
1085 /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1086 (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1087 sizeof (struct virtio_net_hdr));
1089 net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1091 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1092 NULL, &csum_flags);
1094 /* They want us to do the TCP/UDP csum calculation. */
1095 if (csum_flags & HCK_PARTIALCKSUM) {
1096 struct ether_header *eth_header;
1097 int eth_hsize;
1099 /* Did we ask for it? */
1100 ASSERT(sc->sc_tx_csum);
1102 /* We only asked for partial csum packets. */
1103 ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1104 ASSERT(!(csum_flags & HCK_FULLCKSUM));
1106 eth_header = (void *) mp->b_rptr;
1107 if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
1108 eth_hsize = sizeof (struct ether_vlan_header);
1109 } else {
1110 eth_hsize = sizeof (struct ether_header);
1112 net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1113 net_header->csum_start = eth_hsize + csum_start;
1114 net_header->csum_offset = csum_stuff - csum_start;
1117 /* setup LSO fields if required */
1118 if (lso_required) {
1119 net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1120 net_header->gso_size = (uint16_t)lso_mss;
1123 virtio_ve_add_indirect_buf(ve,
1124 buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
1125 sizeof (struct virtio_net_hdr), B_TRUE);
1127 /* meanwhile update the statistic */
1128 if (mp->b_rptr[0] & 0x1) {
1129 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
1130 sc->sc_multixmt++;
1131 else
1132 sc->sc_brdcstxmt++;
1136 * We copy small packets into the inline buffer. The bigger ones
1137 * get mapped using the mapped buffer.
1139 if (msg_size < sc->sc_txcopy_thresh) {
1140 vioif_tx_inline(sc, ve, mp, msg_size);
1141 } else {
1142 /* statistic gets updated by vioif_tx_external when fail */
1143 ret = vioif_tx_external(sc, ve, mp, msg_size);
1144 if (ret != DDI_SUCCESS)
1145 goto exit_tx_external;
1148 virtio_push_chain(ve, B_TRUE);
1150 sc->sc_opackets++;
1151 sc->sc_obytes += msg_size;
1153 return (B_TRUE);
1155 exit_tx_external:
1157 vq_free_entry(sc->sc_tx_vq, ve);
1159 * vioif_tx_external can fail when the buffer does not fit into the
1160 * indirect descriptor table. Free the mp. I don't expect this ever
1161 * to happen.
1163 freemsg(mp);
1165 return (B_TRUE);
1168 mblk_t *
1169 vioif_tx(void *arg, mblk_t *mp)
1171 struct vioif_softc *sc = arg;
1172 mblk_t *nmp;
1174 while (mp != NULL) {
1175 nmp = mp->b_next;
1176 mp->b_next = NULL;
1178 if (!vioif_send(sc, mp)) {
1179 sc->sc_tx_stopped = 1;
1180 mp->b_next = nmp;
1181 break;
1183 mp = nmp;
1186 return (mp);
1190 vioif_start(void *arg)
1192 struct vioif_softc *sc = arg;
1193 struct vq_entry *ve;
1194 uint32_t len;
1196 mac_link_update(sc->sc_mac_handle,
1197 vioif_link_state(sc));
1199 virtio_start_vq_intr(sc->sc_rx_vq);
1202 * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1203 * so the device will send a transmit interrupt when the queue is empty
1204 * and we can reclaim it in one sweep.
1208 * Clear any data that arrived early on the receive queue and populate
1209 * it with free buffers that the device can use moving forward.
1211 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1212 virtio_free_chain(ve);
1214 (void) vioif_populate_rx(sc, KM_SLEEP);
1216 return (DDI_SUCCESS);
1219 void
1220 vioif_stop(void *arg)
1222 struct vioif_softc *sc = arg;
1224 virtio_stop_vq_intr(sc->sc_rx_vq);
1227 /* ARGSUSED */
1228 static int
1229 vioif_stat(void *arg, uint_t stat, uint64_t *val)
1231 struct vioif_softc *sc = arg;
1233 switch (stat) {
1234 case MAC_STAT_IERRORS:
1235 *val = sc->sc_ierrors;
1236 break;
1237 case MAC_STAT_OERRORS:
1238 *val = sc->sc_oerrors;
1239 break;
1240 case MAC_STAT_MULTIRCV:
1241 *val = sc->sc_multircv;
1242 break;
1243 case MAC_STAT_BRDCSTRCV:
1244 *val = sc->sc_brdcstrcv;
1245 break;
1246 case MAC_STAT_MULTIXMT:
1247 *val = sc->sc_multixmt;
1248 break;
1249 case MAC_STAT_BRDCSTXMT:
1250 *val = sc->sc_brdcstxmt;
1251 break;
1252 case MAC_STAT_IPACKETS:
1253 *val = sc->sc_ipackets;
1254 break;
1255 case MAC_STAT_RBYTES:
1256 *val = sc->sc_rbytes;
1257 break;
1258 case MAC_STAT_OPACKETS:
1259 *val = sc->sc_opackets;
1260 break;
1261 case MAC_STAT_OBYTES:
1262 *val = sc->sc_obytes;
1263 break;
1264 case MAC_STAT_NORCVBUF:
1265 *val = sc->sc_norecvbuf;
1266 break;
1267 case MAC_STAT_NOXMTBUF:
1268 *val = sc->sc_notxbuf;
1269 break;
1270 case MAC_STAT_IFSPEED:
1271 /* always 1 Gbit */
1272 *val = 1000000000ULL;
1273 break;
1274 case ETHER_STAT_LINK_DUPLEX:
1275 /* virtual device, always full-duplex */
1276 *val = LINK_DUPLEX_FULL;
1277 break;
1279 default:
1280 return (ENOTSUP);
1283 return (DDI_SUCCESS);
1286 static int
1287 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
1288 uint_t pr_valsize, const void *pr_val)
1290 _NOTE(ARGUNUSED(pr_valsize));
1292 long result;
1294 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1296 if (pr_val == NULL)
1297 return (EINVAL);
1299 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1301 if (result < 0 || result > VIOIF_TX_THRESH_MAX)
1302 return (EINVAL);
1303 sc->sc_txcopy_thresh = result;
1305 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1307 if (pr_val == NULL)
1308 return (EINVAL);
1310 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1312 if (result < 0 || result > VIOIF_RX_THRESH_MAX)
1313 return (EINVAL);
1314 sc->sc_rxcopy_thresh = result;
1316 return (0);
1319 static int
1320 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1321 uint_t pr_valsize, const void *pr_val)
1323 struct vioif_softc *sc = arg;
1324 const uint32_t *new_mtu;
1325 int err;
1327 switch (pr_num) {
1328 case MAC_PROP_MTU:
1329 new_mtu = pr_val;
1331 if (*new_mtu > MAX_MTU) {
1332 return (EINVAL);
1335 err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
1336 if (err) {
1337 return (err);
1339 break;
1340 case MAC_PROP_PRIVATE:
1341 err = vioif_set_prop_private(sc, pr_name,
1342 pr_valsize, pr_val);
1343 if (err)
1344 return (err);
1345 break;
1346 default:
1347 return (ENOTSUP);
1350 return (0);
1353 static int
1354 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
1355 uint_t pr_valsize, void *pr_val)
1357 int err = ENOTSUP;
1358 int value;
1360 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1362 value = sc->sc_txcopy_thresh;
1363 err = 0;
1364 goto done;
1366 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1368 value = sc->sc_rxcopy_thresh;
1369 err = 0;
1370 goto done;
1372 done:
1373 if (err == 0) {
1374 (void) snprintf(pr_val, pr_valsize, "%d", value);
1376 return (err);
1379 static int
1380 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1381 uint_t pr_valsize, void *pr_val)
1383 struct vioif_softc *sc = arg;
1384 int err = ENOTSUP;
1386 switch (pr_num) {
1387 case MAC_PROP_PRIVATE:
1388 err = vioif_get_prop_private(sc, pr_name,
1389 pr_valsize, pr_val);
1390 break;
1391 default:
1392 break;
1394 return (err);
1397 static void
1398 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1399 mac_prop_info_handle_t prh)
1401 struct vioif_softc *sc = arg;
1402 char valstr[64];
1403 int value;
1405 switch (pr_num) {
1406 case MAC_PROP_MTU:
1407 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1408 break;
1410 case MAC_PROP_PRIVATE:
1411 bzero(valstr, sizeof (valstr));
1412 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1414 value = sc->sc_txcopy_thresh;
1415 } else if (strcmp(pr_name,
1416 vioif_rxcopy_thresh) == 0) {
1417 value = sc->sc_rxcopy_thresh;
1418 } else {
1419 return;
1421 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1422 break;
1424 default:
1425 break;
1429 static boolean_t
1430 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1432 struct vioif_softc *sc = arg;
1434 switch (cap) {
1435 case MAC_CAPAB_HCKSUM:
1436 if (sc->sc_tx_csum) {
1437 uint32_t *txflags = cap_data;
1439 *txflags = HCKSUM_INET_PARTIAL;
1440 return (B_TRUE);
1442 return (B_FALSE);
1443 case MAC_CAPAB_LSO:
1444 if (sc->sc_tx_tso4) {
1445 mac_capab_lso_t *cap_lso = cap_data;
1447 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1448 cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
1449 return (B_TRUE);
1451 return (B_FALSE);
1452 default:
1453 break;
1455 return (B_FALSE);
1458 static mac_callbacks_t vioif_m_callbacks = {
1459 .mc_callbacks = (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
1460 .mc_getstat = vioif_stat,
1461 .mc_start = vioif_start,
1462 .mc_stop = vioif_stop,
1463 .mc_setpromisc = vioif_promisc,
1464 .mc_multicst = vioif_multicst,
1465 .mc_unicst = vioif_unicst,
1466 .mc_tx = vioif_tx,
1467 /* Optional callbacks */
1468 .mc_reserved = NULL, /* reserved */
1469 .mc_ioctl = NULL, /* mc_ioctl */
1470 .mc_getcapab = vioif_getcapab, /* mc_getcapab */
1471 .mc_open = NULL, /* mc_open */
1472 .mc_close = NULL, /* mc_close */
1473 .mc_setprop = vioif_setprop,
1474 .mc_getprop = vioif_getprop,
1475 .mc_propinfo = vioif_propinfo,
1478 static void
1479 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1480 uint32_t features)
1482 char buf[512];
1483 char *bufp = buf;
1484 char *bufend = buf + sizeof (buf);
1486 /* LINTED E_PTRDIFF_OVERFLOW */
1487 bufp += snprintf(bufp, bufend - bufp, prefix);
1488 /* LINTED E_PTRDIFF_OVERFLOW */
1489 bufp += virtio_show_features(features, bufp, bufend - bufp);
1490 *bufp = '\0';
1493 /* Using '!' to only CE_NOTE this to the system log. */
1494 dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1495 VIRTIO_NET_FEATURE_BITS);
1499 * Find out which features are supported by the device and
1500 * choose which ones we wish to use.
1502 static int
1503 vioif_dev_features(struct vioif_softc *sc)
1505 uint32_t host_features;
1507 host_features = virtio_negotiate_features(&sc->sc_virtio,
1508 VIRTIO_NET_F_CSUM |
1509 VIRTIO_NET_F_HOST_TSO4 |
1510 VIRTIO_NET_F_HOST_ECN |
1511 VIRTIO_NET_F_MAC |
1512 VIRTIO_NET_F_STATUS |
1513 VIRTIO_F_RING_INDIRECT_DESC |
1514 VIRTIO_F_NOTIFY_ON_EMPTY);
1516 vioif_show_features(sc, "Host features: ", host_features);
1517 vioif_show_features(sc, "Negotiated features: ",
1518 sc->sc_virtio.sc_features);
1520 if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1521 dev_err(sc->sc_dev, CE_NOTE,
1522 "Host does not support RING_INDIRECT_DESC, bye.");
1523 return (DDI_FAILURE);
1526 return (DDI_SUCCESS);
1529 static int
1530 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1532 return (virtio_has_feature(&sc->sc_virtio, feature));
1535 static void
1536 vioif_set_mac(struct vioif_softc *sc)
1538 int i;
1540 for (i = 0; i < ETHERADDRL; i++) {
1541 virtio_write_device_config_1(&sc->sc_virtio,
1542 VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1546 /* Get the mac address out of the hardware, or make up one. */
1547 static void
1548 vioif_get_mac(struct vioif_softc *sc)
1550 int i;
1551 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1552 for (i = 0; i < ETHERADDRL; i++) {
1553 sc->sc_mac[i] = virtio_read_device_config_1(
1554 &sc->sc_virtio,
1555 VIRTIO_NET_CONFIG_MAC + i);
1557 dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
1558 ether_sprintf((struct ether_addr *)sc->sc_mac));
1559 } else {
1560 /* Get a few random bytes */
1561 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1562 /* Make sure it's a unicast MAC */
1563 sc->sc_mac[0] &= ~1;
1564 /* Set the "locally administered" bit */
1565 sc->sc_mac[1] |= 2;
1567 vioif_set_mac(sc);
1569 dev_err(sc->sc_dev, CE_NOTE,
1570 "Generated a random MAC address: %s",
1571 ether_sprintf((struct ether_addr *)sc->sc_mac));
1576 * Virtqueue interrupt handlers
1578 /* ARGSUSED */
1579 uint_t
1580 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1582 struct virtio_softc *vsc = (void *) arg1;
1583 struct vioif_softc *sc = container_of(vsc,
1584 struct vioif_softc, sc_virtio);
1587 * The return values of these functions are not needed but they make
1588 * debugging interrupts simpler because you can use them to detect when
1589 * stuff was processed and repopulated in this handler.
1591 (void) vioif_process_rx(sc);
1592 (void) vioif_populate_rx(sc, KM_NOSLEEP);
1594 return (DDI_INTR_CLAIMED);
1597 /* ARGSUSED */
1598 uint_t
1599 vioif_tx_handler(caddr_t arg1, caddr_t arg2)
1601 struct virtio_softc *vsc = (void *)arg1;
1602 struct vioif_softc *sc = container_of(vsc,
1603 struct vioif_softc, sc_virtio);
1606 * The return value of this function is not needed but makes debugging
1607 * interrupts simpler because you can use it to detect if anything was
1608 * reclaimed in this handler.
1610 (void) vioif_reclaim_used_tx(sc);
1612 return (DDI_INTR_CLAIMED);
1615 static int
1616 vioif_register_ints(struct vioif_softc *sc)
1618 int ret;
1620 struct virtio_int_handler vioif_vq_h[] = {
1621 { vioif_rx_handler },
1622 { vioif_tx_handler },
1623 { NULL }
1626 ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1628 return (ret);
1632 static void
1633 vioif_check_features(struct vioif_softc *sc)
1635 if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1636 /* The GSO/GRO featured depend on CSUM, check them here. */
1637 sc->sc_tx_csum = 1;
1638 sc->sc_rx_csum = 1;
1640 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1641 sc->sc_rx_csum = 0;
1643 cmn_err(CE_NOTE, "Csum enabled.");
1645 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1647 sc->sc_tx_tso4 = 1;
1649 * We don't seem to have a way to ask the system
1650 * not to send us LSO packets with Explicit
1651 * Congestion Notification bit set, so we require
1652 * the device to support it in order to do
1653 * LSO.
1655 if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1656 dev_err(sc->sc_dev, CE_NOTE,
1657 "TSO4 supported, but not ECN. "
1658 "Not using LSO.");
1659 sc->sc_tx_tso4 = 0;
1660 } else {
1661 cmn_err(CE_NOTE, "LSO enabled");
1667 static int
1668 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1670 int ret, instance;
1671 struct vioif_softc *sc;
1672 struct virtio_softc *vsc;
1673 mac_register_t *macp;
1674 char cache_name[CACHE_NAME_SIZE];
1676 instance = ddi_get_instance(devinfo);
1678 switch (cmd) {
1679 case DDI_ATTACH:
1680 break;
1682 case DDI_RESUME:
1683 case DDI_PM_RESUME:
1684 /* We do not support suspend/resume for vioif. */
1685 goto exit;
1687 default:
1688 goto exit;
1691 sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
1692 ddi_set_driver_private(devinfo, sc);
1694 vsc = &sc->sc_virtio;
1696 /* Duplicate for less typing */
1697 sc->sc_dev = devinfo;
1698 vsc->sc_dev = devinfo;
1701 * Initialize interrupt kstat.
1703 sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
1704 KSTAT_TYPE_INTR, 1, 0);
1705 if (sc->sc_intrstat == NULL) {
1706 dev_err(devinfo, CE_WARN, "kstat_create failed");
1707 goto exit_intrstat;
1709 kstat_install(sc->sc_intrstat);
1711 /* map BAR 0 */
1712 ret = ddi_regs_map_setup(devinfo, 1,
1713 (caddr_t *)&sc->sc_virtio.sc_io_addr,
1714 0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
1715 if (ret != DDI_SUCCESS) {
1716 dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
1717 goto exit_map;
1720 virtio_device_reset(&sc->sc_virtio);
1721 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
1722 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
1724 ret = vioif_dev_features(sc);
1725 if (ret)
1726 goto exit_features;
1728 vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
1730 (void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
1731 sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
1732 sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
1733 vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
1734 if (sc->sc_rxbuf_cache == NULL) {
1735 dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
1736 goto exit_cache;
1739 ret = vioif_register_ints(sc);
1740 if (ret) {
1741 dev_err(sc->sc_dev, CE_WARN,
1742 "Failed to allocate interrupt(s)!");
1743 goto exit_ints;
1747 * Register layout determined, can now access the
1748 * device-specific bits
1750 vioif_get_mac(sc);
1752 sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
1753 VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
1754 if (!sc->sc_rx_vq)
1755 goto exit_alloc1;
1756 virtio_stop_vq_intr(sc->sc_rx_vq);
1758 sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
1759 VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
1760 if (!sc->sc_rx_vq)
1761 goto exit_alloc2;
1762 virtio_stop_vq_intr(sc->sc_tx_vq);
1764 if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
1765 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1766 VIOIF_CTRL_QLEN, 0, "ctrl");
1767 if (!sc->sc_ctrl_vq) {
1768 goto exit_alloc3;
1770 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1773 virtio_set_status(&sc->sc_virtio,
1774 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1776 sc->sc_rxloan = 0;
1778 /* set some reasonable-small default values */
1779 sc->sc_rxcopy_thresh = 300;
1780 sc->sc_txcopy_thresh = 300;
1781 sc->sc_mtu = ETHERMTU;
1783 vioif_check_features(sc);
1785 if (vioif_alloc_mems(sc))
1786 goto exit_alloc_mems;
1788 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1789 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1790 goto exit_macalloc;
1793 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1794 macp->m_driver = sc;
1795 macp->m_dip = devinfo;
1796 macp->m_src_addr = sc->sc_mac;
1797 macp->m_callbacks = &vioif_m_callbacks;
1798 macp->m_min_sdu = 0;
1799 macp->m_max_sdu = sc->sc_mtu;
1800 macp->m_margin = VLAN_TAGSZ;
1801 macp->m_priv_props = vioif_priv_props;
1803 sc->sc_macp = macp;
1805 /* Pre-fill the rx ring. */
1806 (void) vioif_populate_rx(sc, KM_SLEEP);
1808 ret = mac_register(macp, &sc->sc_mac_handle);
1809 if (ret != 0) {
1810 dev_err(devinfo, CE_WARN, "vioif_attach: "
1811 "mac_register() failed, ret=%d", ret);
1812 goto exit_register;
1815 ret = virtio_enable_ints(&sc->sc_virtio);
1816 if (ret) {
1817 dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
1818 goto exit_enable_ints;
1821 mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
1822 return (DDI_SUCCESS);
1824 exit_enable_ints:
1825 (void) mac_unregister(sc->sc_mac_handle);
1826 exit_register:
1827 mac_free(macp);
1828 exit_macalloc:
1829 vioif_free_mems(sc);
1830 exit_alloc_mems:
1831 virtio_release_ints(&sc->sc_virtio);
1832 if (sc->sc_ctrl_vq)
1833 virtio_free_vq(sc->sc_ctrl_vq);
1834 exit_alloc3:
1835 virtio_free_vq(sc->sc_tx_vq);
1836 exit_alloc2:
1837 virtio_free_vq(sc->sc_rx_vq);
1838 exit_alloc1:
1839 exit_ints:
1840 kmem_cache_destroy(sc->sc_rxbuf_cache);
1841 exit_cache:
1842 exit_features:
1843 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1844 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1845 exit_intrstat:
1846 exit_map:
1847 kstat_delete(sc->sc_intrstat);
1848 kmem_free(sc, sizeof (struct vioif_softc));
1849 exit:
1850 return (DDI_FAILURE);
1853 static int
1854 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1856 struct vioif_softc *sc;
1858 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1859 return (DDI_FAILURE);
1861 switch (cmd) {
1862 case DDI_DETACH:
1863 break;
1865 case DDI_PM_SUSPEND:
1866 /* We do not support suspend/resume for vioif. */
1867 return (DDI_FAILURE);
1869 default:
1870 return (DDI_FAILURE);
1873 if (sc->sc_rxloan) {
1874 dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1875 " not detaching.");
1876 return (DDI_FAILURE);
1879 virtio_stop_vq_intr(sc->sc_rx_vq);
1880 virtio_stop_vq_intr(sc->sc_tx_vq);
1882 virtio_release_ints(&sc->sc_virtio);
1884 if (mac_unregister(sc->sc_mac_handle)) {
1885 return (DDI_FAILURE);
1888 mac_free(sc->sc_macp);
1890 vioif_free_mems(sc);
1891 virtio_free_vq(sc->sc_rx_vq);
1892 virtio_free_vq(sc->sc_tx_vq);
1894 virtio_device_reset(&sc->sc_virtio);
1896 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1898 kmem_cache_destroy(sc->sc_rxbuf_cache);
1899 kstat_delete(sc->sc_intrstat);
1900 kmem_free(sc, sizeof (struct vioif_softc));
1902 return (DDI_SUCCESS);
1905 static int
1906 vioif_quiesce(dev_info_t *devinfo)
1908 struct vioif_softc *sc;
1910 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1911 return (DDI_FAILURE);
1913 virtio_stop_vq_intr(sc->sc_rx_vq);
1914 virtio_stop_vq_intr(sc->sc_tx_vq);
1915 virtio_device_reset(&sc->sc_virtio);
1917 return (DDI_SUCCESS);
1921 _init(void)
1923 int ret = 0;
1925 mac_init_ops(&vioif_ops, "vioif");
1927 ret = mod_install(&modlinkage);
1928 if (ret != DDI_SUCCESS) {
1929 mac_fini_ops(&vioif_ops);
1930 return (ret);
1933 return (0);
1937 _fini(void)
1939 int ret;
1941 ret = mod_remove(&modlinkage);
1942 if (ret == DDI_SUCCESS) {
1943 mac_fini_ops(&vioif_ops);
1946 return (ret);
1950 _info(struct modinfo *pModinfo)
1952 return (mod_info(&modlinkage, pModinfo));