8572 ccompile.h: rename __GNU_UNUSED to __unused
[unleashed.git] / kernel / drivers / net / myri10ge / myri10ge.c
blob85dd3ba4bb6a7e8bf28069b59a1d0e70fe0ac986
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
29 * Use is subject to license terms.
33 * Copyright (c) 2014, Joyent, Inc.
34 * Copyright (c) 2016 by Delphix. All rights reserved.
37 static const char __idstring[] =
38 "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $";
40 #define MXGEFW_NDIS
41 #include "myri10ge_var.h"
42 #include "rss_eth_z8e.h"
43 #include "rss_ethp_z8e.h"
44 #include "mcp_gen_header.h"
46 #define MYRI10GE_MAX_ETHER_MTU 9014
47 #define MYRI10GE_MAX_GLD_MTU 9000
48 #define MYRI10GE_MIN_GLD_MTU 1500
50 #define MYRI10GE_ETH_STOPPED 0
51 #define MYRI10GE_ETH_STOPPING 1
52 #define MYRI10GE_ETH_STARTING 2
53 #define MYRI10GE_ETH_RUNNING 3
54 #define MYRI10GE_ETH_OPEN_FAILED 4
55 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
57 static int myri10ge_small_bytes = 510;
58 static int myri10ge_intr_coal_delay = 125;
59 static int myri10ge_flow_control = 1;
60 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
61 static int myri10ge_nvidia_ecrc_enable = 1;
62 #endif
63 static int myri10ge_mtu_override = 0;
64 static int myri10ge_tx_copylen = 512;
65 static int myri10ge_deassert_wait = 1;
66 static int myri10ge_verbose = 0;
67 static int myri10ge_watchdog_reset = 0;
68 static int myri10ge_use_msix = 1;
69 static int myri10ge_max_slices = -1;
70 static int myri10ge_use_msi = 1;
71 int myri10ge_force_firmware = 0;
72 static boolean_t myri10ge_use_lso = B_TRUE;
73 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
74 static int myri10ge_tx_hash = 1;
75 static int myri10ge_lro = 0;
76 static int myri10ge_lro_cnt = 8;
77 int myri10ge_lro_max_aggr = 2;
78 static int myri10ge_lso_copy = 0;
79 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp);
80 int myri10ge_tx_handles_initial = 128;
82 static kmutex_t myri10ge_param_lock;
83 static void* myri10ge_db_lastfree;
85 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
86 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
87 static int myri10ge_quiesce(dev_info_t *dip);
89 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach,
90 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce);
93 static struct modldrv modldrv = {
94 &mod_driverops,
95 "Myricom 10G driver (10GbE)",
96 &myri10ge_ops,
100 static struct modlinkage modlinkage = {
101 MODREV_1,
102 {&modldrv, NULL},
105 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
107 static ddi_dma_attr_t myri10ge_misc_dma_attr = {
108 DMA_ATTR_V0, /* version number. */
109 (uint64_t)0, /* low address */
110 (uint64_t)0xffffffffffffffffULL, /* high address */
111 (uint64_t)0x7ffffff, /* address counter max */
112 (uint64_t)4096, /* alignment */
113 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
114 (uint32_t)0x1, /* minimum transfer size */
115 (uint64_t)0x7fffffff, /* maximum transfer size */
116 (uint64_t)0x7fffffff, /* maximum segment size */
117 1, /* scatter/gather list length */
118 1, /* granularity */
119 0 /* attribute flags */
123 * The Myri10GE NIC has the following constraints on receive buffers:
124 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
125 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
128 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = {
129 DMA_ATTR_V0, /* version number. */
130 (uint64_t)0, /* low address */
131 (uint64_t)0xffffffffffffffffULL, /* high address */
132 (uint64_t)0x7ffffff, /* address counter max */
133 (uint64_t)4096, /* alignment */
134 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
135 (uint32_t)0x1, /* minimum transfer size */
136 (uint64_t)0x7fffffff, /* maximum transfer size */
137 UINT64_MAX, /* maximum segment size */
138 1, /* scatter/gather list length */
139 1, /* granularity */
140 0 /* attribute flags */
143 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = {
144 DMA_ATTR_V0, /* version number. */
145 (uint64_t)0, /* low address */
146 (uint64_t)0xffffffffffffffffULL, /* high address */
147 (uint64_t)0x7ffffff, /* address counter max */
148 #if defined sparc64 || defined __sparcv9
149 (uint64_t)4096, /* alignment */
150 #else
151 (uint64_t)0x80, /* alignment */
152 #endif
153 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
154 (uint32_t)0x1, /* minimum transfer size */
155 (uint64_t)0x7fffffff, /* maximum transfer size */
156 #if defined sparc64 || defined __sparcv9
157 UINT64_MAX, /* maximum segment size */
158 #else
159 (uint64_t)0xfff, /* maximum segment size */
160 #endif
161 1, /* scatter/gather list length */
162 1, /* granularity */
163 0 /* attribute flags */
166 static ddi_dma_attr_t myri10ge_tx_dma_attr = {
167 DMA_ATTR_V0, /* version number. */
168 (uint64_t)0, /* low address */
169 (uint64_t)0xffffffffffffffffULL, /* high address */
170 (uint64_t)0x7ffffff, /* address counter max */
171 (uint64_t)1, /* alignment */
172 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
173 (uint32_t)0x1, /* minimum transfer size */
174 (uint64_t)0x7fffffff, /* maximum transfer size */
175 UINT64_MAX, /* maximum segment size */
176 INT32_MAX, /* scatter/gather list length */
177 1, /* granularity */
178 0 /* attribute flags */
181 #if defined sparc64 || defined __sparcv9
182 #define WC 0
183 #else
184 #define WC 1
185 #endif
187 struct ddi_device_acc_attr myri10ge_dev_access_attr = {
188 DDI_DEVICE_ATTR_V0, /* version */
189 DDI_NEVERSWAP_ACC, /* endian flash */
190 #if WC
191 DDI_MERGING_OK_ACC /* data order */
192 #else
193 DDI_STRICTORDER_ACC
194 #endif
197 static void myri10ge_watchdog(void *arg);
199 #ifdef MYRICOM_PRIV
200 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ;
201 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MAX_GLD_MTU
202 #else
203 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ;
204 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MIN_GLD_MTU
205 #endif
206 int myri10ge_bigbufs_initial = 1024;
207 int myri10ge_bigbufs_max = 4096;
210 caddr_t
211 myri10ge_dma_alloc(dev_info_t *dip, size_t len,
212 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr,
213 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma,
214 int warn, int (*wait)(caddr_t))
216 caddr_t kaddr;
217 size_t real_length;
218 ddi_dma_cookie_t cookie;
219 uint_t count;
220 int err;
222 err = ddi_dma_alloc_handle(dip, attr, wait,
223 NULL, &dma->handle);
224 if (err != DDI_SUCCESS) {
225 if (warn)
226 cmn_err(CE_WARN,
227 "myri10ge: ddi_dma_alloc_handle failed\n");
228 goto abort_with_nothing;
231 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags,
232 wait, NULL, &kaddr, &real_length,
233 &dma->acc_handle);
234 if (err != DDI_SUCCESS) {
235 if (warn)
236 cmn_err(CE_WARN,
237 "myri10ge: ddi_dma_mem_alloc failed\n");
238 goto abort_with_handle;
241 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len,
242 bind_flags, wait, NULL, &cookie, &count);
244 if (err != DDI_SUCCESS) {
245 if (warn)
246 cmn_err(CE_WARN,
247 "myri10ge: ddi_dma_addr_bind_handle failed\n");
248 goto abort_with_mem;
251 if (count != 1) {
252 if (warn)
253 cmn_err(CE_WARN,
254 "myri10ge: got too many dma segments ");
255 goto abort_with_bind;
257 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
258 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
259 return (kaddr);
261 abort_with_bind:
262 (void) ddi_dma_unbind_handle(dma->handle);
264 abort_with_mem:
265 ddi_dma_mem_free(&dma->acc_handle);
267 abort_with_handle:
268 ddi_dma_free_handle(&dma->handle);
269 abort_with_nothing:
270 if (warn) {
271 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n ");
272 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
273 (void*) dip, len, (void*) attr);
274 cmn_err(CE_WARN,
275 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n",
276 (void*) accattr, alloc_flags);
277 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p",
278 bind_flags, (void*) dma);
280 return (NULL);
284 void
285 myri10ge_dma_free(struct myri10ge_dma_stuff *dma)
287 (void) ddi_dma_unbind_handle(dma->handle);
288 ddi_dma_mem_free(&dma->acc_handle);
289 ddi_dma_free_handle(&dma->handle);
292 static inline void
293 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size)
295 register volatile uint32_t *to32;
296 size_t i;
298 to32 = (volatile uint32_t *) to;
299 for (i = (size / 4); i; i--) {
300 *to32 = *from32;
301 to32++;
302 from32++;
306 #if defined(_LP64)
307 static inline void
308 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size)
310 register volatile uint64_t *to64;
311 size_t i;
313 to64 = (volatile uint64_t *) to;
314 for (i = (size / 8); i; i--) {
315 *to64 = *from64;
316 to64++;
317 from64++;
320 #endif
323 * This routine copies memory from the host to the NIC.
324 * The "size" argument must always be a multiple of
325 * the size of long (4 or 8 bytes), and to/from must also
326 * be naturally aligned.
328 static inline void
329 myri10ge_pio_copy(void *to, void *from, size_t size)
331 #if !defined(_LP64)
332 ASSERT((size % 4) == 0);
333 myri10ge_pio_copy32(to, (uint32_t *)from, size);
334 #else
335 ASSERT((size % 8) == 0);
336 myri10ge_pio_copy64(to, (uint64_t *)from, size);
337 #endif
342 * Due to various bugs in Solaris (especially bug 6186772 where the
343 * TCP/UDP checksum is calculated incorrectly on mblk chains with more
344 * than two elements), and the design bug where hardware checksums are
345 * ignored on mblk chains with more than 2 elements, we need to
346 * allocate private pool of physically contiguous receive buffers.
349 static void
350 myri10ge_jpool_init(struct myri10ge_slice_state *ss)
352 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
354 bzero(jpool, sizeof (*jpool));
355 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER,
356 ss->mgp->icookie);
357 jpool->head = NULL;
360 static void
361 myri10ge_jpool_fini(struct myri10ge_slice_state *ss)
363 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
365 if (jpool->head != NULL) {
366 cmn_err(CE_WARN,
367 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
368 ss->mgp->name);
370 mutex_destroy(&jpool->mtx);
375 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
376 * at most 32 bytes at a time, so as to avoid involving the software
377 * pio handler in the nic. We re-write the first segment's low
378 * DMA address to mark it valid only after we write the entire chunk
379 * in a burst
381 static inline void
382 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src)
384 src->addr_low |= BE_32(1);
385 myri10ge_pio_copy(dst, src, 4 * sizeof (*src));
386 mb();
387 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
388 mb();
389 src->addr_low &= ~(BE_32(1));
390 dst->addr_low = src->addr_low;
391 mb();
394 static void
395 myri10ge_pull_jpool(struct myri10ge_slice_state *ss)
397 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
398 struct myri10ge_jpool_entry *jtail, *j, *jfree;
399 volatile uintptr_t *putp;
400 uintptr_t put;
401 int i;
403 /* find tail */
404 jtail = NULL;
405 if (jpool->head != NULL) {
406 j = jpool->head;
407 while (j->next != NULL)
408 j = j->next;
409 jtail = j;
413 * iterate over all per-CPU caches, and add contents into
414 * jpool
416 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) {
417 /* take per-CPU free list */
418 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head;
419 if (*putp == (uintptr_t)NULL)
420 continue;
421 put = atomic_swap_ulong(putp, 0);
422 jfree = (struct myri10ge_jpool_entry *)put;
424 /* append to pool */
425 if (jtail == NULL) {
426 jpool->head = jfree;
427 } else {
428 jtail->next = jfree;
430 j = jfree;
431 while (j->next != NULL)
432 j = j->next;
433 jtail = j;
438 * Transfers buffers from the free pool to the nic
439 * Must be called holding the jpool mutex.
442 static inline void
443 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss)
445 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
446 struct myri10ge_jpool_entry *j;
447 myri10ge_rx_ring_t *rx;
448 int i, idx, limit;
450 rx = &ss->rx_big;
451 limit = ss->j_rx_cnt + (rx->mask + 1);
453 for (i = rx->cnt; i != limit; i++) {
454 idx = i & (rx->mask);
455 j = jpool->head;
456 if (j == NULL) {
457 myri10ge_pull_jpool(ss);
458 j = jpool->head;
459 if (j == NULL) {
460 break;
463 jpool->head = j->next;
464 rx->info[idx].j = j;
465 rx->shadow[idx].addr_low = j->dma.low;
466 rx->shadow[idx].addr_high = j->dma.high;
467 /* copy 4 descriptors (32-bytes) to the mcp at a time */
468 if ((idx & 7) == 7) {
469 myri10ge_submit_8rx(&rx->lanai[idx - 7],
470 &rx->shadow[idx - 7]);
473 rx->cnt = i;
477 * Transfer buffers from the nic to the free pool.
478 * Should be called holding the jpool mutex
481 static inline void
482 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss)
484 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
485 struct myri10ge_jpool_entry *j;
486 myri10ge_rx_ring_t *rx;
487 int i;
489 mutex_enter(&jpool->mtx);
490 rx = &ss->rx_big;
492 for (i = 0; i < rx->mask + 1; i++) {
493 j = rx->info[i].j;
494 rx->info[i].j = NULL;
495 if (j == NULL)
496 continue;
497 j->next = jpool->head;
498 jpool->head = j;
500 mutex_exit(&jpool->mtx);
506 * Free routine which is called when the mblk allocated via
507 * esballoc() is freed. Here we return the jumbo buffer
508 * to the free pool, and possibly pass some jumbo buffers
509 * to the nic
512 static void
513 myri10ge_jfree_rtn(void *arg)
515 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg;
516 struct myri10ge_jpool_stuff *jpool;
517 volatile uintptr_t *putp;
518 uintptr_t old, new;
520 jpool = &j->ss->jpool;
522 /* prepend buffer locklessly to per-CPU freelist */
523 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head;
524 new = (uintptr_t)j;
525 do {
526 old = *putp;
527 j->next = (void *)old;
528 } while (atomic_cas_ulong(putp, old, new) != old);
531 static void
532 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j)
534 (void) ddi_dma_unbind_handle(j->dma_handle);
535 ddi_dma_mem_free(&j->acc_handle);
536 ddi_dma_free_handle(&j->dma_handle);
537 kmem_free(j, sizeof (*j));
542 * Allocates one physically contiguous descriptor
543 * and add it to the jumbo buffer pool.
546 static int
547 myri10ge_add_jbuf(struct myri10ge_slice_state *ss)
549 struct myri10ge_jpool_entry *j;
550 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
551 ddi_dma_attr_t *rx_dma_attr;
552 size_t real_length;
553 ddi_dma_cookie_t cookie;
554 uint_t count;
555 int err;
557 if (myri10ge_mtu < 2048)
558 rx_dma_attr = &myri10ge_rx_std_dma_attr;
559 else
560 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr;
562 again:
563 j = (struct myri10ge_jpool_entry *)
564 kmem_alloc(sizeof (*j), KM_SLEEP);
565 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr,
566 DDI_DMA_DONTWAIT, NULL, &j->dma_handle);
567 if (err != DDI_SUCCESS)
568 goto abort_with_j;
570 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu,
571 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
572 NULL, &j->buf, &real_length, &j->acc_handle);
573 if (err != DDI_SUCCESS)
574 goto abort_with_handle;
576 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf,
577 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
578 NULL, &cookie, &count);
579 if (err != DDI_SUCCESS)
580 goto abort_with_mem;
583 * Make certain std MTU buffers do not cross a 4KB boundary:
585 * Setting dma_attr_align=4096 will do this, but the system
586 * will only allocate 1 RX buffer per 4KB page, rather than 2.
587 * Setting dma_attr_granular=4096 *seems* to work around this,
588 * but I'm paranoid about future systems no longer honoring
589 * this, so fall back to the safe, but memory wasting way if a
590 * buffer crosses a 4KB boundary.
593 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
594 rx_dma_attr->dma_attr_align != 4096) {
595 uint32_t start, end;
597 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
598 end = start + myri10ge_mtu;
599 if (((end >> 12) != (start >> 12)) && (start & 4095U)) {
600 printf("std buffer crossed a 4KB boundary!\n");
601 myri10ge_remove_jbuf(j);
602 rx_dma_attr->dma_attr_align = 4096;
603 rx_dma_attr->dma_attr_seg = UINT64_MAX;
604 goto again;
608 j->dma.low =
609 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
610 j->dma.high =
611 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
612 j->ss = ss;
615 j->free_func.free_func = myri10ge_jfree_rtn;
616 j->free_func.free_arg = (char *)j;
617 mutex_enter(&jpool->mtx);
618 j->next = jpool->head;
619 jpool->head = j;
620 jpool->num_alloc++;
621 mutex_exit(&jpool->mtx);
622 return (0);
624 abort_with_mem:
625 ddi_dma_mem_free(&j->acc_handle);
627 abort_with_handle:
628 ddi_dma_free_handle(&j->dma_handle);
630 abort_with_j:
631 kmem_free(j, sizeof (*j));
634 * If an allocation failed, perhaps it failed because it could
635 * not satisfy granularity requirement. Disable that, and
636 * try agin.
638 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
639 rx_dma_attr->dma_attr_align != 4096) {
640 cmn_err(CE_NOTE,
641 "!alloc failed, reverting to gran=1\n");
642 rx_dma_attr->dma_attr_align = 4096;
643 rx_dma_attr->dma_attr_seg = UINT64_MAX;
644 goto again;
646 return (err);
649 static int
650 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool)
652 int i;
653 struct myri10ge_jpool_entry *j;
655 mutex_enter(&jpool->mtx);
656 j = jpool->head;
657 i = 0;
658 while (j != NULL) {
659 i++;
660 j = j->next;
662 mutex_exit(&jpool->mtx);
663 return (i);
666 static int
667 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total)
669 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
670 int allocated = 0;
671 int err;
672 int needed;
675 * if total is set, user wants "num" jbufs in the pool,
676 * otherwise the user wants to "num" additional jbufs
677 * added to the pool
679 if (total && jpool->num_alloc) {
680 allocated = myri10ge_jfree_cnt(jpool);
681 needed = num - allocated;
682 } else {
683 needed = num;
686 while (needed > 0) {
687 needed--;
688 err = myri10ge_add_jbuf(ss);
689 if (err == 0) {
690 allocated++;
693 return (allocated);
696 static void
697 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss)
699 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
700 struct myri10ge_jpool_entry *j;
702 mutex_enter(&jpool->mtx);
703 myri10ge_pull_jpool(ss);
704 while (jpool->head != NULL) {
705 jpool->num_alloc--;
706 j = jpool->head;
707 jpool->head = j->next;
708 myri10ge_remove_jbuf(j);
710 mutex_exit(&jpool->mtx);
713 static void
714 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss)
716 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
717 struct myri10ge_jpool_entry *j = NULL;
718 caddr_t ptr;
719 uint32_t dma_low, dma_high;
720 int idx, len;
721 unsigned int alloc_size;
723 dma_low = dma_high = len = 0;
724 alloc_size = myri10ge_small_bytes + MXGEFW_PAD;
725 ptr = NULL;
726 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) {
727 /* Allocate a jumbo frame and carve it into small frames */
728 if (len < alloc_size) {
729 mutex_enter(&jpool->mtx);
730 /* remove jumbo from freelist */
731 j = jpool->head;
732 jpool->head = j->next;
733 /* place it onto small list */
734 j->next = ss->small_jpool;
735 ss->small_jpool = j;
736 mutex_exit(&jpool->mtx);
737 len = myri10ge_mtu;
738 dma_low = ntohl(j->dma.low);
739 dma_high = ntohl(j->dma.high);
740 ptr = j->buf;
742 ss->rx_small.info[idx].ptr = ptr;
743 ss->rx_small.shadow[idx].addr_low = htonl(dma_low);
744 ss->rx_small.shadow[idx].addr_high = htonl(dma_high);
745 len -= alloc_size;
746 ptr += alloc_size;
747 dma_low += alloc_size;
752 * Return the jumbo bufs we carved up for small to the jumbo pool
755 static void
756 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss)
758 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
759 struct myri10ge_jpool_entry *j = NULL;
761 mutex_enter(&jpool->mtx);
762 while (ss->small_jpool != NULL) {
763 j = ss->small_jpool;
764 ss->small_jpool = j->next;
765 j->next = jpool->head;
766 jpool->head = j;
768 mutex_exit(&jpool->mtx);
769 ss->jbufs_for_smalls = 0;
772 static int
773 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss)
775 myri10ge_tx_ring_t *tx = &ss->tx;
776 struct myri10ge_priv *mgp = ss->mgp;
777 struct myri10ge_tx_dma_handle *handle;
778 int err;
780 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP);
781 err = ddi_dma_alloc_handle(mgp->dip,
782 &myri10ge_tx_dma_attr,
783 DDI_DMA_SLEEP, NULL,
784 &handle->h);
785 if (err) {
786 static int limit = 0;
787 if (limit == 0)
788 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n",
789 mgp->name);
790 limit++;
791 kmem_free(handle, sizeof (*handle));
792 return (err);
794 mutex_enter(&tx->handle_lock);
795 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced);
796 handle->next = tx->free_tx_handles;
797 tx->free_tx_handles = handle;
798 mutex_exit(&tx->handle_lock);
799 return (DDI_SUCCESS);
802 static void
803 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss)
805 myri10ge_tx_ring_t *tx = &ss->tx;
806 struct myri10ge_tx_dma_handle *handle;
807 mutex_enter(&tx->handle_lock);
809 handle = tx->free_tx_handles;
810 while (handle != NULL) {
811 tx->free_tx_handles = handle->next;
812 ddi_dma_free_handle(&handle->h);
813 kmem_free(handle, sizeof (*handle));
814 handle = tx->free_tx_handles;
815 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced);
817 mutex_exit(&tx->handle_lock);
818 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) {
819 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n",
820 ss->mgp->name,
821 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced));
825 static void
826 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx,
827 struct myri10ge_tx_dma_handle_head *list)
829 mutex_enter(&tx->handle_lock);
830 list->tail->next = tx->free_tx_handles;
831 tx->free_tx_handles = list->head;
832 mutex_exit(&tx->handle_lock);
835 static void
836 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx,
837 struct myri10ge_tx_dma_handle *handle)
839 struct myri10ge_tx_dma_handle_head list;
841 if (handle == NULL)
842 return;
843 list.head = handle;
844 list.tail = handle;
845 while (handle != NULL) {
846 list.tail = handle;
847 handle = handle->next;
849 myri10ge_free_tx_handles(tx, &list);
852 static int
853 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count,
854 struct myri10ge_tx_dma_handle **ret)
856 myri10ge_tx_ring_t *tx = &ss->tx;
857 struct myri10ge_tx_dma_handle *handle;
858 int err, i;
860 mutex_enter(&tx->handle_lock);
861 for (i = 0; i < count; i++) {
862 handle = tx->free_tx_handles;
863 while (handle == NULL) {
864 mutex_exit(&tx->handle_lock);
865 err = myri10ge_add_tx_handle(ss);
866 if (err != DDI_SUCCESS) {
867 goto abort_with_handles;
869 mutex_enter(&tx->handle_lock);
870 handle = tx->free_tx_handles;
872 tx->free_tx_handles = handle->next;
873 handle->next = *ret;
874 *ret = handle;
876 mutex_exit(&tx->handle_lock);
877 return (DDI_SUCCESS);
879 abort_with_handles:
880 myri10ge_free_tx_handle_slist(tx, *ret);
881 return (err);
886 * Frees DMA resources associated with the send ring
888 static void
889 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss)
891 myri10ge_tx_ring_t *tx;
892 struct myri10ge_tx_dma_handle_head handles;
893 size_t bytes;
894 int idx;
896 tx = &ss->tx;
897 handles.head = NULL;
898 handles.tail = NULL;
899 for (idx = 0; idx < ss->tx.mask + 1; idx++) {
900 if (tx->info[idx].m) {
901 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
902 handles.head = tx->info[idx].handle;
903 if (handles.tail == NULL)
904 handles.tail = tx->info[idx].handle;
905 freeb(tx->info[idx].m);
906 tx->info[idx].m = 0;
907 tx->info[idx].handle = 0;
909 tx->cp[idx].va = NULL;
910 myri10ge_dma_free(&tx->cp[idx].dma);
912 bytes = sizeof (*tx->cp) * (tx->mask + 1);
913 kmem_free(tx->cp, bytes);
914 tx->cp = NULL;
915 if (handles.head != NULL)
916 myri10ge_free_tx_handles(tx, &handles);
917 myri10ge_remove_tx_handles(ss);
921 * Allocates DMA handles associated with the send ring
923 static inline int
924 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss)
926 struct myri10ge_tx_dma_handle *handles;
927 int h;
928 size_t bytes;
930 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
931 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP);
932 if (ss->tx.cp == NULL) {
933 cmn_err(CE_WARN,
934 "%s: Failed to allocate tx copyblock storage\n",
935 ss->mgp->name);
936 return (DDI_FAILURE);
940 /* allocate the TX copyblocks */
941 for (h = 0; h < ss->tx.mask + 1; h++) {
942 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip,
943 4096, &myri10ge_rx_jumbo_dma_attr,
944 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
945 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1,
946 DDI_DMA_DONTWAIT);
947 if (ss->tx.cp[h].va == NULL) {
948 cmn_err(CE_WARN, "%s: Failed to allocate tx "
949 "copyblock %d\n", ss->mgp->name, h);
950 goto abort_with_copyblocks;
953 /* pre-allocate transmit handles */
954 handles = NULL;
955 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial,
956 &handles);
957 if (handles != NULL)
958 myri10ge_free_tx_handle_slist(&ss->tx, handles);
960 return (DDI_SUCCESS);
962 abort_with_copyblocks:
963 while (h > 0) {
964 h--;
965 myri10ge_dma_free(&ss->tx.cp[h].dma);
968 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
969 kmem_free(ss->tx.cp, bytes);
970 ss->tx.cp = NULL;
971 return (DDI_FAILURE);
975 * The eeprom strings on the lanaiX have the format
976 * SN=x\0
977 * MAC=x:x:x:x:x:x\0
978 * PT:ddd mmm xx xx:xx:xx xx\0
979 * PV:ddd mmm xx xx:xx:xx xx\0
981 static int
982 myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
984 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
985 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \
986 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \
987 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
989 char *ptr, *limit;
990 int i, hv, lv;
992 ptr = mgp->eeprom_strings;
993 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
995 while (*ptr != '\0' && ptr < limit) {
996 if (memcmp(ptr, "MAC=", 4) == 0) {
997 ptr += 4;
998 if (myri10ge_verbose)
999 printf("%s: mac address = %s\n", mgp->name,
1000 ptr);
1001 mgp->mac_addr_string = ptr;
1002 for (i = 0; i < 6; i++) {
1003 if ((ptr + 2) > limit)
1004 goto abort;
1006 if (*(ptr+1) == ':') {
1007 hv = 0;
1008 lv = myri10ge_digit(*ptr); ptr++;
1009 } else {
1010 hv = myri10ge_digit(*ptr); ptr++;
1011 lv = myri10ge_digit(*ptr); ptr++;
1013 mgp->mac_addr[i] = (hv << 4) | lv;
1014 ptr++;
1017 if (memcmp((const void *)ptr, "SN=", 3) == 0) {
1018 ptr += 3;
1019 mgp->sn_str = (char *)ptr;
1021 if (memcmp((const void *)ptr, "PC=", 3) == 0) {
1022 ptr += 3;
1023 mgp->pc_str = (char *)ptr;
1025 MYRI10GE_NEXT_STRING(ptr);
1028 return (0);
1030 abort:
1031 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name);
1032 return (ENXIO);
1037 * Determine the register set containing the PCI resource we
1038 * want to map: the memory-mappable part of the interface. We do
1039 * this by scanning the DDI "reg" property of the interface,
1040 * which is an array of mx_ddi_reg_set structures.
1042 static int
1043 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span,
1044 unsigned long *busno, unsigned long *devno,
1045 unsigned long *funcno)
1048 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff)
1049 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07)
1050 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f)
1051 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff)
1052 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03)
1053 #define PCI_ADDR_HIGH(ip) (ip[1])
1054 #define PCI_ADDR_LOW(ip) (ip[2])
1055 #define PCI_SPAN_HIGH(ip) (ip[3])
1056 #define PCI_SPAN_LOW(ip) (ip[4])
1058 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1059 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1061 int *data, i, *rs;
1062 uint32_t nelementsp;
1064 #ifdef MYRI10GE_REGSET_VERBOSE
1065 char *address_space_name[] = { "Configuration Space",
1066 "I/O Space",
1067 "32-bit Memory Space",
1068 "64-bit Memory Space"
1070 #endif
1072 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1073 "reg", &data, &nelementsp) != DDI_SUCCESS) {
1074 printf("Could not determine register set.\n");
1075 return (ENXIO);
1078 #ifdef MYRI10GE_REGSET_VERBOSE
1079 printf("There are %d register sets.\n", nelementsp / 5);
1080 #endif
1081 if (!nelementsp) {
1082 printf("Didn't find any \"reg\" properties.\n");
1083 ddi_prop_free(data);
1084 return (ENODEV);
1087 /* Scan for the register number. */
1088 rs = &data[0];
1089 *busno = BUS_NUMBER(rs);
1090 *devno = DEVICE_NUMBER(rs);
1091 *funcno = FUNCTION_NUMBER(rs);
1093 #ifdef MYRI10GE_REGSET_VERBOSE
1094 printf("*** Scanning for register number.\n");
1095 #endif
1096 for (i = 0; i < nelementsp / 5; i++) {
1097 rs = &data[5 * i];
1098 #ifdef MYRI10GE_REGSET_VERBOSE
1099 printf("Examining register set %d:\n", i);
1100 printf(" Register number = %d.\n", REGISTER_NUMBER(rs));
1101 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs));
1102 printf(" Device number = %d.\n", DEVICE_NUMBER(rs));
1103 printf(" Bus number = %d.\n", BUS_NUMBER(rs));
1104 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs),
1105 address_space_name[ADDRESS_SPACE(rs)]);
1106 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs),
1107 PCI_ADDR_LOW(rs));
1108 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs),
1109 PCI_SPAN_LOW(rs));
1110 #endif
1111 /* We are looking for a memory property. */
1113 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE ||
1114 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) {
1115 *reg_set = i;
1117 #ifdef MYRI10GE_REGSET_VERBOSE
1118 printf("%s uses register set %d.\n",
1119 address_space_name[ADDRESS_SPACE(rs)], *reg_set);
1120 #endif
1122 *span = (PCI_SPAN_LOW(rs));
1123 #ifdef MYRI10GE_REGSET_VERBOSE
1124 printf("Board span is 0x%x\n", *span);
1125 #endif
1126 break;
1130 ddi_prop_free(data);
1132 /* If no match, fail. */
1133 if (i >= nelementsp / 5) {
1134 return (EIO);
1137 return (0);
1141 static int
1142 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit)
1144 void *inflate_buffer;
1145 int rv, status;
1146 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE;
1147 size_t destlen;
1148 mcp_gen_header_t *hdr;
1149 unsigned hdr_offset, i;
1152 *limit = 0; /* -Wuninitialized */
1153 status = 0;
1155 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP);
1156 if (!inflate_buffer) {
1157 cmn_err(CE_WARN,
1158 "%s: Could not allocate buffer to inflate mcp\n",
1159 mgp->name);
1160 return (ENOMEM);
1163 destlen = sram_size;
1164 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e,
1165 mgp->eth_z8e_length);
1167 if (rv != Z_OK) {
1168 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n",
1169 mgp->name, z_strerror(rv));
1170 status = ENXIO;
1171 goto abort;
1174 *limit = (uint32_t)destlen;
1176 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer +
1177 MCP_HEADER_PTR_OFFSET));
1178 hdr = (void *)((char *)inflate_buffer + hdr_offset);
1179 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
1180 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name,
1181 ntohl(hdr->mcp_type));
1182 status = EIO;
1183 goto abort;
1186 /* save firmware version for kstat */
1187 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version));
1188 if (myri10ge_verbose)
1189 printf("%s: firmware id: %s\n", mgp->name, hdr->version);
1191 /* Copy the inflated firmware to NIC SRAM. */
1192 for (i = 0; i < *limit; i += 256) {
1193 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i,
1194 (char *)inflate_buffer + i,
1195 min(256U, (unsigned)(*limit - i)));
1196 mb();
1197 (void) *(int *)(void *)mgp->sram;
1198 mb();
1201 abort:
1202 kmem_free(inflate_buffer, sram_size);
1204 return (status);
1210 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd,
1211 myri10ge_cmd_t *data)
1213 mcp_cmd_t *buf;
1214 char buf_bytes[sizeof (*buf) + 8];
1215 volatile mcp_cmd_response_t *response = mgp->cmd;
1216 volatile char *cmd_addr =
1217 (volatile char *)mgp->sram + MXGEFW_ETH_CMD;
1218 int sleep_total = 0;
1220 /* ensure buf is aligned to 8 bytes */
1221 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1223 buf->data0 = htonl(data->data0);
1224 buf->data1 = htonl(data->data1);
1225 buf->data2 = htonl(data->data2);
1226 buf->cmd = htonl(cmd);
1227 buf->response_addr.low = mgp->cmd_dma.low;
1228 buf->response_addr.high = mgp->cmd_dma.high;
1229 mutex_enter(&mgp->cmd_lock);
1230 response->result = 0xffffffff;
1231 mb();
1233 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf));
1235 /* wait up to 20ms */
1236 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
1237 mb();
1238 if (response->result != 0xffffffff) {
1239 if (response->result == 0) {
1240 data->data0 = ntohl(response->data);
1241 mutex_exit(&mgp->cmd_lock);
1242 return (0);
1243 } else if (ntohl(response->result)
1244 == MXGEFW_CMD_UNKNOWN) {
1245 mutex_exit(&mgp->cmd_lock);
1246 return (ENOSYS);
1247 } else if (ntohl(response->result)
1248 == MXGEFW_CMD_ERROR_UNALIGNED) {
1249 mutex_exit(&mgp->cmd_lock);
1250 return (E2BIG);
1251 } else {
1252 cmn_err(CE_WARN,
1253 "%s: command %d failed, result = %d\n",
1254 mgp->name, cmd, ntohl(response->result));
1255 mutex_exit(&mgp->cmd_lock);
1256 return (ENXIO);
1259 drv_usecwait(1000);
1261 mutex_exit(&mgp->cmd_lock);
1262 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n",
1263 mgp->name, cmd, ntohl(response->result));
1264 return (EAGAIN);
1268 * Enable or disable periodic RDMAs from the host to make certain
1269 * chipsets resend dropped PCIe messages
1272 static void
1273 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
1275 char buf_bytes[72];
1276 volatile uint32_t *confirm;
1277 volatile char *submit;
1278 uint32_t *buf;
1279 int i;
1281 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1283 /* clear confirmation addr */
1284 confirm = (volatile uint32_t *)mgp->cmd;
1285 *confirm = 0;
1286 mb();
1289 * send an rdma command to the PCIe engine, and wait for the
1290 * response in the confirmation address. The firmware should
1291 * write a -1 there to indicate it is alive and well
1294 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1295 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1296 buf[2] = htonl(0xffffffff); /* confirm data */
1297 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */
1298 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */
1299 buf[5] = htonl(enable); /* enable? */
1302 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA);
1304 myri10ge_pio_copy((char *)submit, buf, 64);
1305 mb();
1306 drv_usecwait(1000);
1307 mb();
1308 i = 0;
1309 while (*confirm != 0xffffffff && i < 20) {
1310 drv_usecwait(1000);
1311 i++;
1313 if (*confirm != 0xffffffff) {
1314 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)",
1315 mgp->name,
1316 (enable ? "enable" : "disable"), (void*) confirm, *confirm);
1320 static int
1321 myri10ge_load_firmware(struct myri10ge_priv *mgp)
1323 myri10ge_cmd_t cmd;
1324 volatile uint32_t *confirm;
1325 volatile char *submit;
1326 char buf_bytes[72];
1327 uint32_t *buf, size;
1328 int status, i;
1330 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1332 status = myri10ge_load_firmware_from_zlib(mgp, &size);
1333 if (status) {
1334 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name);
1335 return (status);
1338 /* clear confirmation addr */
1339 confirm = (volatile uint32_t *)mgp->cmd;
1340 *confirm = 0;
1341 mb();
1344 * send a reload command to the bootstrap MCP, and wait for the
1345 * response in the confirmation address. The firmware should
1346 * write a -1 there to indicate it is alive and well
1349 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1350 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1351 buf[2] = htonl(0xffffffff); /* confirm data */
1354 * FIX: All newest firmware should un-protect the bottom of
1355 * the sram before handoff. However, the very first interfaces
1356 * do not. Therefore the handoff copy must skip the first 8 bytes
1358 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
1359 buf[4] = htonl(size - 8); /* length of code */
1360 buf[5] = htonl(8); /* where to copy to */
1361 buf[6] = htonl(0); /* where to jump to */
1363 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF);
1365 myri10ge_pio_copy((char *)submit, buf, 64);
1366 mb();
1367 drv_usecwait(1000);
1368 mb();
1369 i = 0;
1370 while (*confirm != 0xffffffff && i < 1000) {
1371 drv_usecwait(1000);
1372 i++;
1374 if (*confirm != 0xffffffff) {
1375 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)",
1376 mgp->name, (void *) confirm, *confirm);
1378 return (ENXIO);
1380 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1381 if (status != 0) {
1382 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1383 mgp->name);
1384 return (ENXIO);
1387 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
1388 myri10ge_dummy_rdma(mgp, 1);
1389 return (0);
1392 static int
1393 myri10ge_m_unicst(void *arg, const uint8_t *addr)
1395 struct myri10ge_priv *mgp = arg;
1396 myri10ge_cmd_t cmd;
1397 int status;
1399 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1400 | (addr[2] << 8) | addr[3]);
1402 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1404 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd);
1405 if (status == 0 && (addr != mgp->mac_addr))
1406 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr));
1408 return (status);
1411 static int
1412 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
1414 myri10ge_cmd_t cmd;
1415 int status;
1417 if (pause)
1418 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL,
1419 &cmd);
1420 else
1421 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL,
1422 &cmd);
1424 if (status) {
1425 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n",
1426 mgp->name);
1427 return (ENXIO);
1429 mgp->pause = pause;
1430 return (0);
1433 static void
1434 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc)
1436 myri10ge_cmd_t cmd;
1437 int status;
1439 if (promisc)
1440 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd);
1441 else
1442 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd);
1444 if (status) {
1445 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n",
1446 mgp->name);
1450 static int
1451 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
1453 myri10ge_cmd_t cmd;
1454 int status;
1455 uint32_t len;
1456 void *dmabench;
1457 struct myri10ge_dma_stuff dmabench_dma;
1458 char *test = " ";
1461 * Run a small DMA test.
1462 * The magic multipliers to the length tell the firmware
1463 * tp do DMA read, write, or read+write tests. The
1464 * results are returned in cmd.data0. The upper 16
1465 * bits or the return is the number of transfers completed.
1466 * The lower 16 bits is the time in 0.5us ticks that the
1467 * transfers took to complete
1470 len = mgp->tx_boundary;
1472 dmabench = myri10ge_dma_alloc(mgp->dip, len,
1473 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr,
1474 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING,
1475 &dmabench_dma, 1, DDI_DMA_DONTWAIT);
1476 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0;
1477 if (dmabench == NULL) {
1478 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name);
1479 return (ENOMEM);
1482 cmd.data0 = ntohl(dmabench_dma.low);
1483 cmd.data1 = ntohl(dmabench_dma.high);
1484 cmd.data2 = len * 0x10000;
1485 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1486 if (status != 0) {
1487 test = "read";
1488 goto abort;
1490 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1492 cmd.data0 = ntohl(dmabench_dma.low);
1493 cmd.data1 = ntohl(dmabench_dma.high);
1494 cmd.data2 = len * 0x1;
1495 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1496 if (status != 0) {
1497 test = "write";
1498 goto abort;
1500 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1502 cmd.data0 = ntohl(dmabench_dma.low);
1503 cmd.data1 = ntohl(dmabench_dma.high);
1504 cmd.data2 = len * 0x10001;
1505 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1506 if (status != 0) {
1507 test = "read/write";
1508 goto abort;
1510 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
1511 (cmd.data0 & 0xffff);
1514 abort:
1515 myri10ge_dma_free(&dmabench_dma);
1516 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
1517 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name,
1518 test);
1519 return (status);
1522 static int
1523 myri10ge_reset(struct myri10ge_priv *mgp)
1525 myri10ge_cmd_t cmd;
1526 struct myri10ge_nic_stat *ethstat;
1527 struct myri10ge_slice_state *ss;
1528 int i, status;
1529 size_t bytes;
1531 /* send a reset command to the card to see if it is alive */
1532 (void) memset(&cmd, 0, sizeof (cmd));
1533 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
1534 if (status != 0) {
1535 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
1536 return (ENXIO);
1539 /* Now exchange information about interrupts */
1541 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry);
1542 cmd.data0 = (uint32_t)bytes;
1543 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1546 * Even though we already know how many slices are supported
1547 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1548 * has magic side effects, and must be called after a reset.
1549 * It must be called prior to calling any RSS related cmds,
1550 * including assigning an interrupt queue for anything but
1551 * slice 0. It must also be called *after*
1552 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1553 * the firmware to compute offsets.
1556 if (mgp->num_slices > 1) {
1558 /* ask the maximum number of slices it supports */
1559 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1560 &cmd);
1561 if (status != 0) {
1562 cmn_err(CE_WARN,
1563 "%s: failed to get number of slices\n",
1564 mgp->name);
1565 return (status);
1569 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1570 * to setting up the interrupt queue DMA
1573 cmd.data0 = mgp->num_slices;
1574 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE |
1575 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1576 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1577 &cmd);
1578 if (status != 0) {
1579 cmn_err(CE_WARN,
1580 "%s: failed to set number of slices\n",
1581 mgp->name);
1582 return (status);
1585 for (i = 0; i < mgp->num_slices; i++) {
1586 ss = &mgp->ss[i];
1587 cmd.data0 = ntohl(ss->rx_done.dma.low);
1588 cmd.data1 = ntohl(ss->rx_done.dma.high);
1589 cmd.data2 = i;
1590 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1591 &cmd);
1594 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1595 for (i = 0; i < mgp->num_slices; i++) {
1596 ss = &mgp->ss[i];
1597 ss->irq_claim = (volatile unsigned int *)
1598 (void *)(mgp->sram + cmd.data0 + 8 * i);
1601 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
1602 status |= myri10ge_send_cmd(mgp,
1603 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1604 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1607 status |= myri10ge_send_cmd(mgp,
1608 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1609 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1611 if (status != 0) {
1612 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n",
1613 mgp->name);
1614 return (status);
1617 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
1618 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
1620 /* reset mcp/driver shared state back to 0 */
1622 for (i = 0; i < mgp->num_slices; i++) {
1623 ss = &mgp->ss[i];
1624 bytes = mgp->max_intr_slots *
1625 sizeof (*mgp->ss[0].rx_done.entry);
1626 (void) memset(ss->rx_done.entry, 0, bytes);
1627 ss->tx.req = 0;
1628 ss->tx.done = 0;
1629 ss->tx.pkt_done = 0;
1630 ss->rx_big.cnt = 0;
1631 ss->rx_small.cnt = 0;
1632 ss->rx_done.idx = 0;
1633 ss->rx_done.cnt = 0;
1634 ss->rx_token = 0;
1635 ss->tx.watchdog_done = 0;
1636 ss->tx.watchdog_req = 0;
1637 ss->tx.active = 0;
1638 ss->tx.activate = 0;
1640 mgp->watchdog_rx_pause = 0;
1641 if (mgp->ksp_stat != NULL) {
1642 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data;
1643 ethstat->link_changes.value.ul = 0;
1645 status = myri10ge_m_unicst(mgp, mgp->mac_addr);
1646 myri10ge_change_promisc(mgp, 0);
1647 (void) myri10ge_change_pause(mgp, mgp->pause);
1648 return (status);
1651 static int
1652 myri10ge_init_toeplitz(struct myri10ge_priv *mgp)
1654 myri10ge_cmd_t cmd;
1655 int i, b, s, t, j;
1656 int status;
1657 uint32_t k[8];
1658 uint32_t tmp;
1659 uint8_t *key;
1661 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
1662 &cmd);
1663 if (status != 0) {
1664 cmn_err(CE_WARN, "%s: failed to get rss key\n",
1665 mgp->name);
1666 return (EIO);
1668 myri10ge_pio_copy32(mgp->rss_key,
1669 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0),
1670 sizeof (mgp->rss_key));
1672 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256,
1673 KM_SLEEP);
1674 key = (uint8_t *)mgp->rss_key;
1675 t = 0;
1676 for (b = 0; b < 12; b++) {
1677 for (s = 0; s < 8; s++) {
1678 /* Bits: b*8+s, ..., b*8+s+31 */
1679 k[s] = 0;
1680 for (j = 0; j < 32; j++) {
1681 int bit = b*8+s+j;
1682 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7)));
1683 k[s] |= bit << (31 - j);
1687 for (i = 0; i <= 0xff; i++) {
1688 tmp = 0;
1689 if (i & (1 << 7)) { tmp ^= k[0]; }
1690 if (i & (1 << 6)) { tmp ^= k[1]; }
1691 if (i & (1 << 5)) { tmp ^= k[2]; }
1692 if (i & (1 << 4)) { tmp ^= k[3]; }
1693 if (i & (1 << 3)) { tmp ^= k[4]; }
1694 if (i & (1 << 2)) { tmp ^= k[5]; }
1695 if (i & (1 << 1)) { tmp ^= k[6]; }
1696 if (i & (1 << 0)) { tmp ^= k[7]; }
1697 mgp->toeplitz_hash_table[t++] = tmp;
1700 return (0);
1703 static inline struct myri10ge_slice_state *
1704 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1706 struct tcphdr *hdr;
1707 uint32_t saddr, daddr;
1708 uint32_t hash, slice;
1709 uint32_t *table = mgp->toeplitz_hash_table;
1710 uint16_t src, dst;
1713 * Note hashing order is reversed from how it is done
1714 * in the NIC, so as to generate the same hash value
1715 * for the connection to try to keep connections CPU local
1718 /* hash on IPv4 src/dst address */
1719 saddr = ntohl(ip->ip_src.s_addr);
1720 daddr = ntohl(ip->ip_dst.s_addr);
1721 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)];
1722 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)];
1723 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)];
1724 hash ^= table[(256 * 3) + ((daddr) & 0xff)];
1725 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)];
1726 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)];
1727 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)];
1728 hash ^= table[(256 * 7) + ((saddr) & 0xff)];
1729 /* hash on TCP port, if required */
1730 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) &&
1731 ip->ip_p == IPPROTO_TCP) {
1732 hdr = (struct tcphdr *)(void *)
1733 (((uint8_t *)ip) + (ip->ip_hl << 2));
1734 src = ntohs(hdr->th_sport);
1735 dst = ntohs(hdr->th_dport);
1737 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)];
1738 hash ^= table[(256 * 9) + ((dst) & 0xff)];
1739 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)];
1740 hash ^= table[(256 * 11) + ((src) & 0xff)];
1742 slice = (mgp->num_slices - 1) & hash;
1743 return (&mgp->ss[slice]);
1747 static inline struct myri10ge_slice_state *
1748 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1750 struct tcphdr *hdr;
1751 uint32_t slice, hash_val;
1754 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) {
1755 return (&mgp->ss[0]);
1757 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2));
1760 * Use the second byte of the *destination* address for
1761 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1763 hash_val = ntohs(hdr->th_dport) & 0xff;
1764 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT)
1765 hash_val += ntohs(hdr->th_sport) & 0xff;
1767 slice = (mgp->num_slices - 1) & hash_val;
1768 return (&mgp->ss[slice]);
1771 static inline struct myri10ge_slice_state *
1772 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp)
1774 unsigned int slice = 0;
1775 struct ether_header *eh;
1776 struct ether_vlan_header *vh;
1777 struct ip *ip;
1778 int ehl, ihl;
1780 if (mgp->num_slices == 1)
1781 return (&mgp->ss[0]);
1783 if (myri10ge_tx_hash == 0) {
1784 slice = CPU->cpu_id & (mgp->num_slices - 1);
1785 return (&mgp->ss[slice]);
1789 * ensure it is a TCP or UDP over IPv4 packet, and that the
1790 * headers are in the 1st mblk. Otherwise, punt
1792 ehl = sizeof (*eh);
1793 ihl = sizeof (*ip);
1794 if ((MBLKL(mp)) < (ehl + ihl + 8))
1795 return (&mgp->ss[0]);
1796 eh = (struct ether_header *)(void *)mp->b_rptr;
1797 ip = (struct ip *)(void *)(eh + 1);
1798 if (eh->ether_type != BE_16(ETHERTYPE_IP)) {
1799 if (eh->ether_type != BE_16(ETHERTYPE_VLAN))
1800 return (&mgp->ss[0]);
1801 vh = (struct ether_vlan_header *)(void *)mp->b_rptr;
1802 if (vh->ether_type != BE_16(ETHERTYPE_IP))
1803 return (&mgp->ss[0]);
1804 ehl += 4;
1805 ip = (struct ip *)(void *)(vh + 1);
1807 ihl = ip->ip_hl << 2;
1808 if (MBLKL(mp) < (ehl + ihl + 8))
1809 return (&mgp->ss[0]);
1810 switch (myri10ge_rss_hash) {
1811 case MXGEFW_RSS_HASH_TYPE_IPV4:
1812 /* fallthru */
1813 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4:
1814 /* fallthru */
1815 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4):
1816 return (myri10ge_toeplitz_send_hash(mgp, ip));
1817 case MXGEFW_RSS_HASH_TYPE_SRC_PORT:
1818 /* fallthru */
1819 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT:
1820 return (myri10ge_simple_send_hash(mgp, ip));
1821 default:
1822 break;
1824 return (&mgp->ss[0]);
1827 static int
1828 myri10ge_setup_slice(struct myri10ge_slice_state *ss)
1830 struct myri10ge_priv *mgp = ss->mgp;
1831 myri10ge_cmd_t cmd;
1832 int tx_ring_size, rx_ring_size;
1833 int tx_ring_entries, rx_ring_entries;
1834 int slice, status;
1835 int allocated, idx;
1836 size_t bytes;
1838 slice = ss - mgp->ss;
1839 cmd.data0 = slice;
1840 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1841 tx_ring_size = cmd.data0;
1842 cmd.data0 = slice;
1843 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1844 if (status != 0)
1845 return (status);
1846 rx_ring_size = cmd.data0;
1848 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send);
1849 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr);
1850 ss->tx.mask = tx_ring_entries - 1;
1851 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
1853 /* get the lanai pointers to the send and receive rings */
1855 cmd.data0 = slice;
1856 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
1857 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0);
1858 if (mgp->num_slices > 1) {
1859 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice;
1860 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP +
1861 64 * slice;
1862 } else {
1863 ss->tx.go = NULL;
1864 ss->tx.stop = NULL;
1867 cmd.data0 = slice;
1868 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
1869 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *)
1870 (void *)(mgp->sram + cmd.data0);
1872 cmd.data0 = slice;
1873 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
1874 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *)
1875 (mgp->sram + cmd.data0);
1877 if (status != 0) {
1878 cmn_err(CE_WARN,
1879 "%s: failed to get ring sizes or locations\n", mgp->name);
1880 return (status);
1883 status = ENOMEM;
1884 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
1885 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP);
1886 if (ss->rx_small.shadow == NULL)
1887 goto abort;
1888 (void) memset(ss->rx_small.shadow, 0, bytes);
1890 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
1891 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP);
1892 if (ss->rx_big.shadow == NULL)
1893 goto abort_with_rx_small_shadow;
1894 (void) memset(ss->rx_big.shadow, 0, bytes);
1896 /* allocate the host info rings */
1898 bytes = tx_ring_entries * sizeof (*ss->tx.info);
1899 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP);
1900 if (ss->tx.info == NULL)
1901 goto abort_with_rx_big_shadow;
1902 (void) memset(ss->tx.info, 0, bytes);
1904 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
1905 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP);
1906 if (ss->rx_small.info == NULL)
1907 goto abort_with_tx_info;
1908 (void) memset(ss->rx_small.info, 0, bytes);
1910 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1911 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP);
1912 if (ss->rx_big.info == NULL)
1913 goto abort_with_rx_small_info;
1914 (void) memset(ss->rx_big.info, 0, bytes);
1916 ss->tx.stall = ss->tx.sched = 0;
1917 ss->tx.stall_early = ss->tx.stall_late = 0;
1919 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) /
1920 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD));
1922 allocated = myri10ge_add_jbufs(ss,
1923 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1);
1924 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) {
1925 cmn_err(CE_WARN,
1926 "%s: Could not allocate enough receive buffers (%d/%d)\n",
1927 mgp->name, allocated,
1928 myri10ge_bigbufs_initial + ss->jbufs_for_smalls);
1929 goto abort_with_jumbos;
1932 myri10ge_carve_up_jbufs_into_small_ring(ss);
1933 ss->j_rx_cnt = 0;
1935 mutex_enter(&ss->jpool.mtx);
1936 if (allocated < rx_ring_entries)
1937 ss->jpool.low_water = allocated / 4;
1938 else
1939 ss->jpool.low_water = rx_ring_entries / 2;
1942 * invalidate the big receive ring in case we do not
1943 * allocate sufficient jumbos to fill it
1945 (void) memset(ss->rx_big.shadow, 1,
1946 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0]));
1947 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) {
1948 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7],
1949 &ss->rx_big.shadow[idx - 7]);
1950 mb();
1954 myri10ge_restock_jumbos(ss);
1956 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) {
1957 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7],
1958 &ss->rx_small.shadow[idx - 7]);
1959 mb();
1961 ss->rx_small.cnt = ss->rx_small.mask + 1;
1963 mutex_exit(&ss->jpool.mtx);
1965 status = myri10ge_prepare_tx_ring(ss);
1967 if (status != 0)
1968 goto abort_with_small_jbufs;
1970 cmd.data0 = ntohl(ss->fw_stats_dma.low);
1971 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1972 cmd.data2 = sizeof (mcp_irq_data_t);
1973 cmd.data2 |= (slice << 16);
1974 bzero(ss->fw_stats, sizeof (*ss->fw_stats));
1975 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
1976 if (status == ENOSYS) {
1977 cmd.data0 = ntohl(ss->fw_stats_dma.low) +
1978 offsetof(mcp_irq_data_t, send_done_count);
1979 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1980 status = myri10ge_send_cmd(mgp,
1981 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd);
1983 if (status) {
1984 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name);
1985 goto abort_with_tx;
1988 return (0);
1990 abort_with_tx:
1991 myri10ge_unprepare_tx_ring(ss);
1993 abort_with_small_jbufs:
1994 myri10ge_release_small_jbufs(ss);
1996 abort_with_jumbos:
1997 if (allocated != 0) {
1998 mutex_enter(&ss->jpool.mtx);
1999 ss->jpool.low_water = 0;
2000 mutex_exit(&ss->jpool.mtx);
2001 myri10ge_unstock_jumbos(ss);
2002 myri10ge_remove_jbufs(ss);
2005 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2006 kmem_free(ss->rx_big.info, bytes);
2008 abort_with_rx_small_info:
2009 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2010 kmem_free(ss->rx_small.info, bytes);
2012 abort_with_tx_info:
2013 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2014 kmem_free(ss->tx.info, bytes);
2016 abort_with_rx_big_shadow:
2017 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2018 kmem_free(ss->rx_big.shadow, bytes);
2020 abort_with_rx_small_shadow:
2021 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2022 kmem_free(ss->rx_small.shadow, bytes);
2023 abort:
2024 return (status);
2028 static void
2029 myri10ge_teardown_slice(struct myri10ge_slice_state *ss)
2031 int tx_ring_entries, rx_ring_entries;
2032 size_t bytes;
2034 /* ignore slices that have not been fully setup */
2035 if (ss->tx.cp == NULL)
2036 return;
2037 /* Free the TX copy buffers */
2038 myri10ge_unprepare_tx_ring(ss);
2040 /* stop passing returned buffers to firmware */
2042 mutex_enter(&ss->jpool.mtx);
2043 ss->jpool.low_water = 0;
2044 mutex_exit(&ss->jpool.mtx);
2045 myri10ge_release_small_jbufs(ss);
2047 /* Release the free jumbo frame pool */
2048 myri10ge_unstock_jumbos(ss);
2049 myri10ge_remove_jbufs(ss);
2051 rx_ring_entries = ss->rx_big.mask + 1;
2052 tx_ring_entries = ss->tx.mask + 1;
2054 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2055 kmem_free(ss->rx_big.info, bytes);
2057 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2058 kmem_free(ss->rx_small.info, bytes);
2060 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2061 kmem_free(ss->tx.info, bytes);
2063 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2064 kmem_free(ss->rx_big.shadow, bytes);
2066 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2067 kmem_free(ss->rx_small.shadow, bytes);
2070 static int
2071 myri10ge_start_locked(struct myri10ge_priv *mgp)
2073 myri10ge_cmd_t cmd;
2074 int status, big_pow2, i;
2075 volatile uint8_t *itable;
2077 status = DDI_SUCCESS;
2078 /* Allocate DMA resources and receive buffers */
2080 status = myri10ge_reset(mgp);
2081 if (status != 0) {
2082 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
2083 return (DDI_FAILURE);
2086 if (mgp->num_slices > 1) {
2087 cmd.data0 = mgp->num_slices;
2088 cmd.data1 = 1; /* use MSI-X */
2089 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2090 &cmd);
2091 if (status != 0) {
2092 cmn_err(CE_WARN,
2093 "%s: failed to set number of slices\n",
2094 mgp->name);
2095 goto abort_with_nothing;
2097 /* setup the indirection table */
2098 cmd.data0 = mgp->num_slices;
2099 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2100 &cmd);
2102 status |= myri10ge_send_cmd(mgp,
2103 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
2104 if (status != 0) {
2105 cmn_err(CE_WARN,
2106 "%s: failed to setup rss tables\n", mgp->name);
2109 /* just enable an identity mapping */
2110 itable = mgp->sram + cmd.data0;
2111 for (i = 0; i < mgp->num_slices; i++)
2112 itable[i] = (uint8_t)i;
2114 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) {
2115 status = myri10ge_init_toeplitz(mgp);
2116 if (status != 0) {
2117 cmn_err(CE_WARN, "%s: failed to setup "
2118 "toeplitz tx hash table", mgp->name);
2119 goto abort_with_nothing;
2122 cmd.data0 = 1;
2123 cmd.data1 = myri10ge_rss_hash;
2124 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2125 &cmd);
2126 if (status != 0) {
2127 cmn_err(CE_WARN,
2128 "%s: failed to enable slices\n", mgp->name);
2129 goto abort_with_toeplitz;
2133 for (i = 0; i < mgp->num_slices; i++) {
2134 status = myri10ge_setup_slice(&mgp->ss[i]);
2135 if (status != 0)
2136 goto abort_with_slices;
2140 * Tell the MCP how many buffers it has, and to
2141 * bring the ethernet interface up
2143 * Firmware needs the big buff size as a power of 2. Lie and
2144 * tell it the buffer is larger, because we only use 1
2145 * buffer/pkt, and the mtu will prevent overruns
2147 big_pow2 = myri10ge_mtu + MXGEFW_PAD;
2148 while (!ISP2(big_pow2))
2149 big_pow2++;
2151 /* now give firmware buffers sizes, and MTU */
2152 cmd.data0 = myri10ge_mtu;
2153 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd);
2154 cmd.data0 = myri10ge_small_bytes;
2155 status |=
2156 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
2157 cmd.data0 = big_pow2;
2158 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2159 if (status) {
2160 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name);
2161 goto abort_with_slices;
2165 cmd.data0 = 1;
2166 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd);
2167 if (status) {
2168 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n",
2169 mgp->name, status);
2170 } else {
2171 mgp->features |= MYRI10GE_TSO;
2174 mgp->link_state = -1;
2175 mgp->rdma_tags_available = 15;
2176 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd);
2177 if (status) {
2178 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name);
2179 goto abort_with_slices;
2181 mgp->running = MYRI10GE_ETH_RUNNING;
2182 return (DDI_SUCCESS);
2184 abort_with_slices:
2185 for (i = 0; i < mgp->num_slices; i++)
2186 myri10ge_teardown_slice(&mgp->ss[i]);
2188 mgp->running = MYRI10GE_ETH_STOPPED;
2190 abort_with_toeplitz:
2191 if (mgp->toeplitz_hash_table != NULL) {
2192 kmem_free(mgp->toeplitz_hash_table,
2193 sizeof (uint32_t) * 12 * 256);
2194 mgp->toeplitz_hash_table = NULL;
2197 abort_with_nothing:
2198 return (DDI_FAILURE);
2201 static void
2202 myri10ge_stop_locked(struct myri10ge_priv *mgp)
2204 int status, old_down_cnt;
2205 myri10ge_cmd_t cmd;
2206 int wait_time = 10;
2207 int i, polling;
2209 old_down_cnt = mgp->down_cnt;
2210 mb();
2211 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2212 if (status) {
2213 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
2216 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2217 ddi_sleep(1);
2218 wait_time--;
2219 if (wait_time == 0)
2220 break;
2222 again:
2223 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2224 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name);
2225 for (i = 0; i < mgp->num_slices; i++) {
2227 * take and release the rx lock to ensure
2228 * that no interrupt thread is blocked
2229 * elsewhere in the stack, preventing
2230 * completion
2233 mutex_enter(&mgp->ss[i].rx_lock);
2234 printf("%s: slice %d rx irq idle\n",
2235 mgp->name, i);
2236 mutex_exit(&mgp->ss[i].rx_lock);
2238 /* verify that the poll handler is inactive */
2239 mutex_enter(&mgp->ss->poll_lock);
2240 polling = mgp->ss->rx_polling;
2241 mutex_exit(&mgp->ss->poll_lock);
2242 if (polling) {
2243 printf("%s: slice %d is polling\n",
2244 mgp->name, i);
2245 ddi_sleep(1);
2246 goto again;
2249 ddi_sleep(1);
2250 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2251 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name);
2255 for (i = 0; i < mgp->num_slices; i++)
2256 myri10ge_teardown_slice(&mgp->ss[i]);
2258 if (mgp->toeplitz_hash_table != NULL) {
2259 kmem_free(mgp->toeplitz_hash_table,
2260 sizeof (uint32_t) * 12 * 256);
2261 mgp->toeplitz_hash_table = NULL;
2263 mgp->running = MYRI10GE_ETH_STOPPED;
2266 static int
2267 myri10ge_m_start(void *arg)
2269 struct myri10ge_priv *mgp = arg;
2270 int status;
2272 mutex_enter(&mgp->intrlock);
2274 if (mgp->running != MYRI10GE_ETH_STOPPED) {
2275 mutex_exit(&mgp->intrlock);
2276 return (DDI_FAILURE);
2278 status = myri10ge_start_locked(mgp);
2279 mutex_exit(&mgp->intrlock);
2281 if (status != DDI_SUCCESS)
2282 return (status);
2284 /* start the watchdog timer */
2285 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
2286 mgp->timer_ticks);
2287 return (DDI_SUCCESS);
2291 static void
2292 myri10ge_m_stop(void *arg)
2294 struct myri10ge_priv *mgp = arg;
2296 mutex_enter(&mgp->intrlock);
2297 /* if the device not running give up */
2298 if (mgp->running != MYRI10GE_ETH_RUNNING) {
2299 mutex_exit(&mgp->intrlock);
2300 return;
2303 mgp->running = MYRI10GE_ETH_STOPPING;
2304 mutex_exit(&mgp->intrlock);
2305 (void) untimeout(mgp->timer_id);
2306 mutex_enter(&mgp->intrlock);
2307 myri10ge_stop_locked(mgp);
2308 mutex_exit(&mgp->intrlock);
2312 static inline void
2313 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum)
2315 struct ether_header *eh;
2316 struct ip *ip;
2317 struct ip6_hdr *ip6;
2318 uint32_t start, stuff, end, partial, hdrlen;
2321 csum = ntohs((uint16_t)csum);
2322 eh = (struct ether_header *)(void *)mp->b_rptr;
2323 hdrlen = sizeof (*eh);
2324 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2325 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2326 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2327 s->brdcstrcv++;
2328 else
2329 s->multircv++;
2332 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
2334 * fix checksum by subtracting 4 bytes after what the
2335 * firmware thought was the end of the ether hdr
2337 partial = *(uint32_t *)
2338 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE);
2339 csum += ~partial;
2340 csum += (csum < ~partial);
2341 csum = (csum >> 16) + (csum & 0xFFFF);
2342 csum = (csum >> 16) + (csum & 0xFFFF);
2343 hdrlen += VLAN_TAGSZ;
2346 if (eh->ether_type == BE_16(ETHERTYPE_IP)) {
2347 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen);
2348 start = ip->ip_hl << 2;
2350 if (ip->ip_p == IPPROTO_TCP)
2351 stuff = start + offsetof(struct tcphdr, th_sum);
2352 else if (ip->ip_p == IPPROTO_UDP)
2353 stuff = start + offsetof(struct udphdr, uh_sum);
2354 else
2355 return;
2356 end = ntohs(ip->ip_len);
2357 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) {
2358 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen);
2359 start = sizeof (*ip6);
2360 if (ip6->ip6_nxt == IPPROTO_TCP) {
2361 stuff = start + offsetof(struct tcphdr, th_sum);
2362 } else if (ip6->ip6_nxt == IPPROTO_UDP)
2363 stuff = start + offsetof(struct udphdr, uh_sum);
2364 else
2365 return;
2366 end = start + ntohs(ip6->ip6_plen);
2368 * IPv6 headers do not contain a checksum, and hence
2369 * do not checksum to zero, so they don't "fall out"
2370 * of the partial checksum calculation like IPv4
2371 * headers do. We need to fix the partial checksum by
2372 * subtracting the checksum of the IPv6 header.
2375 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6));
2376 csum += ~partial;
2377 csum += (csum < ~partial);
2378 csum = (csum >> 16) + (csum & 0xFFFF);
2379 csum = (csum >> 16) + (csum & 0xFFFF);
2380 } else {
2381 return;
2384 if (MBLKL(mp) > hdrlen + end) {
2385 /* padded frame, so hw csum may be invalid */
2386 return;
2389 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM);
2392 static mblk_t *
2393 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len,
2394 uint32_t csum)
2396 mblk_t *mp;
2397 myri10ge_rx_ring_t *rx;
2398 int idx;
2400 rx = &ss->rx_small;
2401 idx = rx->cnt & rx->mask;
2402 ss->rx_small.cnt++;
2404 /* allocate a new buffer to pass up the stack */
2405 mp = allocb(len + MXGEFW_PAD, 0);
2406 if (mp == NULL) {
2407 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf);
2408 goto abort;
2410 bcopy(ss->rx_small.info[idx].ptr,
2411 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2412 mp->b_wptr += len + MXGEFW_PAD;
2413 mp->b_rptr += MXGEFW_PAD;
2415 ss->rx_stats.ibytes += len;
2416 ss->rx_stats.ipackets += 1;
2417 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2419 abort:
2420 if ((idx & 7) == 7) {
2421 myri10ge_submit_8rx(&rx->lanai[idx - 7],
2422 &rx->shadow[idx - 7]);
2425 return (mp);
2429 static mblk_t *
2430 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len,
2431 uint32_t csum)
2433 struct myri10ge_jpool_stuff *jpool;
2434 struct myri10ge_jpool_entry *j;
2435 mblk_t *mp;
2436 int idx, num_owned_by_mcp;
2438 jpool = &ss->jpool;
2439 idx = ss->j_rx_cnt & ss->rx_big.mask;
2440 j = ss->rx_big.info[idx].j;
2442 if (j == NULL) {
2443 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2444 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt);
2445 return (NULL);
2449 ss->rx_big.info[idx].j = NULL;
2450 ss->j_rx_cnt++;
2454 * Check to see if we are low on rx buffers.
2455 * Note that we must leave at least 8 free so there are
2456 * enough to free in a single 64-byte write.
2458 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2459 if (num_owned_by_mcp < jpool->low_water) {
2460 mutex_enter(&jpool->mtx);
2461 myri10ge_restock_jumbos(ss);
2462 mutex_exit(&jpool->mtx);
2463 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2464 /* if we are still low, then we have to copy */
2465 if (num_owned_by_mcp < 16) {
2466 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy);
2467 /* allocate a new buffer to pass up the stack */
2468 mp = allocb(len + MXGEFW_PAD, 0);
2469 if (mp == NULL) {
2470 goto abort;
2472 bcopy(j->buf,
2473 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2474 myri10ge_jfree_rtn(j);
2475 /* push buffer back to NIC */
2476 mutex_enter(&jpool->mtx);
2477 myri10ge_restock_jumbos(ss);
2478 mutex_exit(&jpool->mtx);
2479 goto set_len;
2483 /* loan our buffer to the stack */
2484 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func);
2485 if (mp == NULL) {
2486 goto abort;
2489 set_len:
2490 mp->b_rptr += MXGEFW_PAD;
2491 mp->b_wptr = ((unsigned char *) mp->b_rptr + len);
2493 ss->rx_stats.ibytes += len;
2494 ss->rx_stats.ipackets += 1;
2495 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2497 return (mp);
2499 abort:
2500 myri10ge_jfree_rtn(j);
2501 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf);
2502 return (NULL);
2506 * Free all transmit buffers up until the specified index
2508 static inline void
2509 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index)
2511 myri10ge_tx_ring_t *tx;
2512 struct myri10ge_tx_dma_handle_head handles;
2513 int idx;
2514 int limit = 0;
2516 tx = &ss->tx;
2517 handles.head = NULL;
2518 handles.tail = NULL;
2519 while (tx->pkt_done != (int)mcp_index) {
2520 idx = tx->done & tx->mask;
2523 * mblk & DMA handle attached only to first slot
2524 * per buffer in the packet
2527 if (tx->info[idx].m) {
2528 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
2529 tx->info[idx].handle->next = handles.head;
2530 handles.head = tx->info[idx].handle;
2531 if (handles.tail == NULL)
2532 handles.tail = tx->info[idx].handle;
2533 freeb(tx->info[idx].m);
2534 tx->info[idx].m = 0;
2535 tx->info[idx].handle = 0;
2537 if (tx->info[idx].ostat.opackets != 0) {
2538 tx->stats.multixmt += tx->info[idx].ostat.multixmt;
2539 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt;
2540 tx->stats.obytes += tx->info[idx].ostat.obytes;
2541 tx->stats.opackets += tx->info[idx].ostat.opackets;
2542 tx->info[idx].stat.un.all = 0;
2543 tx->pkt_done++;
2546 tx->done++;
2548 * if we stalled the queue, wake it. But Wait until
2549 * we have at least 1/2 our slots free.
2551 if ((tx->req - tx->done) < (tx->mask >> 1) &&
2552 tx->stall != tx->sched) {
2553 mutex_enter(&ss->tx.lock);
2554 tx->sched = tx->stall;
2555 mutex_exit(&ss->tx.lock);
2556 mac_tx_ring_update(ss->mgp->mh, tx->rh);
2559 /* limit potential for livelock */
2560 if (unlikely(++limit > 2 * tx->mask))
2561 break;
2563 if (tx->req == tx->done && tx->stop != NULL) {
2565 * Nic has sent all pending requests, allow it
2566 * to stop polling this queue
2568 mutex_enter(&tx->lock);
2569 if (tx->req == tx->done && tx->active) {
2570 *(int *)(void *)tx->stop = 1;
2571 tx->active = 0;
2572 mb();
2574 mutex_exit(&tx->lock);
2576 if (handles.head != NULL)
2577 myri10ge_free_tx_handles(tx, &handles);
2580 static void
2581 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl)
2583 mbl->head = NULL;
2584 mbl->tail = &mbl->head;
2585 mbl->cnt = 0;
2588 /*ARGSUSED*/
2589 void
2590 myri10ge_mbl_append(struct myri10ge_slice_state *ss,
2591 struct myri10ge_mblk_list *mbl, mblk_t *mp)
2593 *(mbl->tail) = mp;
2594 mbl->tail = &mp->b_next;
2595 mp->b_next = NULL;
2596 mbl->cnt++;
2600 static inline void
2601 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss,
2602 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop)
2604 myri10ge_rx_done_t *rx_done = &ss->rx_done;
2605 struct myri10ge_priv *mgp = ss->mgp;
2606 mblk_t *mp;
2607 struct lro_entry *lro;
2608 uint16_t length;
2609 uint16_t checksum;
2612 while (rx_done->entry[rx_done->idx].length != 0) {
2613 if (unlikely (*stop)) {
2614 break;
2616 length = ntohs(rx_done->entry[rx_done->idx].length);
2617 length &= (~MXGEFW_RSS_HASH_MASK);
2619 /* limit potential for livelock */
2620 limit -= length;
2621 if (unlikely(limit < 0))
2622 break;
2624 rx_done->entry[rx_done->idx].length = 0;
2625 checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
2626 if (length <= myri10ge_small_bytes)
2627 mp = myri10ge_rx_done_small(ss, length, checksum);
2628 else
2629 mp = myri10ge_rx_done_big(ss, length, checksum);
2630 if (mp != NULL) {
2631 if (!myri10ge_lro ||
2632 0 != myri10ge_lro_rx(ss, mp, checksum, mbl))
2633 myri10ge_mbl_append(ss, mbl, mp);
2635 rx_done->cnt++;
2636 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1);
2638 while (ss->lro_active != NULL) {
2639 lro = ss->lro_active;
2640 ss->lro_active = lro->next;
2641 myri10ge_lro_flush(ss, lro, mbl);
2645 static void
2646 myri10ge_intr_rx(struct myri10ge_slice_state *ss)
2648 uint64_t gen;
2649 struct myri10ge_mblk_list mbl;
2651 myri10ge_mbl_init(&mbl);
2652 if (mutex_tryenter(&ss->rx_lock) == 0)
2653 return;
2654 gen = ss->rx_gen_num;
2655 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL,
2656 &ss->rx_polling);
2657 if (mbl.head != NULL)
2658 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen);
2659 mutex_exit(&ss->rx_lock);
2663 static mblk_t *
2664 myri10ge_poll_rx(void *arg, int bytes)
2666 struct myri10ge_slice_state *ss = arg;
2667 struct myri10ge_mblk_list mbl;
2668 boolean_t dummy = B_FALSE;
2670 if (bytes == 0)
2671 return (NULL);
2673 myri10ge_mbl_init(&mbl);
2674 mutex_enter(&ss->rx_lock);
2675 if (ss->rx_polling)
2676 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy);
2677 else
2678 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss -
2679 ss->mgp->ss), ss->rx_token, ss->rx_polling);
2680 mutex_exit(&ss->rx_lock);
2681 return (mbl.head);
2684 /*ARGSUSED*/
2685 static uint_t
2686 myri10ge_intr(caddr_t arg0, caddr_t arg1)
2688 struct myri10ge_slice_state *ss =
2689 (struct myri10ge_slice_state *)(void *)arg0;
2690 struct myri10ge_priv *mgp = ss->mgp;
2691 mcp_irq_data_t *stats = ss->fw_stats;
2692 myri10ge_tx_ring_t *tx = &ss->tx;
2693 uint32_t send_done_count;
2694 uint8_t valid;
2697 /* make sure the DMA has finished */
2698 if (!stats->valid) {
2699 return (DDI_INTR_UNCLAIMED);
2701 valid = stats->valid;
2703 /* low bit indicates receives are present */
2704 if (valid & 1)
2705 myri10ge_intr_rx(ss);
2707 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
2708 /* lower legacy IRQ */
2709 *mgp->irq_deassert = 0;
2710 if (!myri10ge_deassert_wait)
2711 /* don't wait for conf. that irq is low */
2712 stats->valid = 0;
2713 mb();
2714 } else {
2715 /* no need to wait for conf. that irq is low */
2716 stats->valid = 0;
2719 do {
2720 /* check for transmit completes and receives */
2721 send_done_count = ntohl(stats->send_done_count);
2722 if (send_done_count != tx->pkt_done)
2723 myri10ge_tx_done(ss, (int)send_done_count);
2724 } while (*((volatile uint8_t *) &stats->valid));
2726 if (stats->stats_updated) {
2727 if (mgp->link_state != stats->link_up || stats->link_down) {
2728 mgp->link_state = stats->link_up;
2729 if (stats->link_down) {
2730 mgp->down_cnt += stats->link_down;
2731 mgp->link_state = 0;
2733 if (mgp->link_state) {
2734 if (myri10ge_verbose)
2735 printf("%s: link up\n", mgp->name);
2736 mac_link_update(mgp->mh, LINK_STATE_UP);
2737 } else {
2738 if (myri10ge_verbose)
2739 printf("%s: link down\n", mgp->name);
2740 mac_link_update(mgp->mh, LINK_STATE_DOWN);
2742 MYRI10GE_NIC_STAT_INC(link_changes);
2744 if (mgp->rdma_tags_available !=
2745 ntohl(ss->fw_stats->rdma_tags_available)) {
2746 mgp->rdma_tags_available =
2747 ntohl(ss->fw_stats->rdma_tags_available);
2748 cmn_err(CE_NOTE, "%s: RDMA timed out! "
2749 "%d tags left\n", mgp->name,
2750 mgp->rdma_tags_available);
2754 mb();
2755 /* check to see if we have rx token to pass back */
2756 if (valid & 0x1) {
2757 mutex_enter(&ss->poll_lock);
2758 if (ss->rx_polling) {
2759 ss->rx_token = 1;
2760 } else {
2761 *ss->irq_claim = BE_32(3);
2762 ss->rx_token = 0;
2764 mutex_exit(&ss->poll_lock);
2766 *(ss->irq_claim + 1) = BE_32(3);
2767 return (DDI_INTR_CLAIMED);
2771 * Add or remove a multicast address. This is called with our
2772 * macinfo's lock held by GLD, so we do not need to worry about
2773 * our own locking here.
2775 static int
2776 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr)
2778 myri10ge_cmd_t cmd;
2779 struct myri10ge_priv *mgp = arg;
2780 int status, join_leave;
2782 if (add)
2783 join_leave = MXGEFW_JOIN_MULTICAST_GROUP;
2784 else
2785 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP;
2786 (void) memcpy(&cmd.data0, multicastaddr, 4);
2787 (void) memcpy(&cmd.data1, multicastaddr + 4, 2);
2788 cmd.data0 = htonl(cmd.data0);
2789 cmd.data1 = htonl(cmd.data1);
2790 status = myri10ge_send_cmd(mgp, join_leave, &cmd);
2791 if (status == 0)
2792 return (0);
2794 cmn_err(CE_WARN, "%s: failed to set multicast address\n",
2795 mgp->name);
2796 return (status);
2800 static int
2801 myri10ge_m_promisc(void *arg, boolean_t on)
2803 struct myri10ge_priv *mgp = arg;
2805 myri10ge_change_promisc(mgp, on);
2806 return (0);
2810 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2811 * backwards one at a time and handle ring wraps
2814 static inline void
2815 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx,
2816 mcp_kreq_ether_send_t *src, int cnt)
2818 int idx, starting_slot;
2819 starting_slot = tx->req;
2820 while (cnt > 1) {
2821 cnt--;
2822 idx = (starting_slot + cnt) & tx->mask;
2823 myri10ge_pio_copy(&tx->lanai[idx],
2824 &src[cnt], sizeof (*src));
2825 mb();
2830 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2831 * at most 32 bytes at a time, so as to avoid involving the software
2832 * pio handler in the nic. We re-write the first segment's flags
2833 * to mark them valid only after writing the entire chain
2836 static inline void
2837 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
2838 int cnt)
2840 int idx, i;
2841 uint32_t *src_ints, *dst_ints;
2842 mcp_kreq_ether_send_t *srcp, *dstp, *dst;
2843 uint8_t last_flags;
2845 idx = tx->req & tx->mask;
2847 last_flags = src->flags;
2848 src->flags = 0;
2849 mb();
2850 dst = dstp = &tx->lanai[idx];
2851 srcp = src;
2853 if ((idx + cnt) < tx->mask) {
2854 for (i = 0; i < (cnt - 1); i += 2) {
2855 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src));
2856 mb(); /* force write every 32 bytes */
2857 srcp += 2;
2858 dstp += 2;
2860 } else {
2862 * submit all but the first request, and ensure
2863 * that it is submitted below
2865 myri10ge_submit_req_backwards(tx, src, cnt);
2866 i = 0;
2868 if (i < cnt) {
2869 /* submit the first request */
2870 myri10ge_pio_copy(dstp, srcp, sizeof (*src));
2871 mb(); /* barrier before setting valid flag */
2874 /* re-write the last 32-bits with the valid flags */
2875 src->flags |= last_flags;
2876 src_ints = (uint32_t *)src;
2877 src_ints += 3;
2878 dst_ints = (uint32_t *)dst;
2879 dst_ints += 3;
2880 *dst_ints = *src_ints;
2881 tx->req += cnt;
2882 mb();
2883 /* notify NIC to poll this tx ring */
2884 if (!tx->active && tx->go != NULL) {
2885 *(int *)(void *)tx->go = 1;
2886 tx->active = 1;
2887 tx->activate++;
2888 mb();
2892 /* ARGSUSED */
2893 static inline void
2894 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
2896 uint32_t lso_flag;
2897 mac_lso_get(mp, mss, &lso_flag);
2898 (*flags) |= lso_flag;
2902 /* like pullupmsg, except preserve hcksum/LSO attributes */
2903 static int
2904 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp)
2906 uint32_t start, stuff, tx_offload_flags, mss;
2907 int ok;
2909 mss = 0;
2910 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
2911 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
2913 ok = pullupmsg(mp, -1);
2914 if (!ok) {
2915 printf("pullupmsg failed");
2916 return (DDI_FAILURE);
2918 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup);
2919 mac_hcksum_set(mp, start, stuff, 0, 0, tx_offload_flags);
2920 if (tx_offload_flags & HW_LSO)
2921 DB_LSOMSS(mp) = (uint16_t)mss;
2922 lso_info_set(mp, mss, tx_offload_flags);
2923 return (DDI_SUCCESS);
2926 static inline void
2927 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh,
2928 int opackets, int obytes)
2930 s->un.all = 0;
2931 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2932 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2933 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2934 s->un.s.brdcstxmt = 1;
2935 else
2936 s->un.s.multixmt = 1;
2938 s->un.s.opackets = (uint16_t)opackets;
2939 s->un.s.obytes = obytes;
2942 static int
2943 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
2944 mcp_kreq_ether_send_t *req)
2946 myri10ge_tx_ring_t *tx = &ss->tx;
2947 caddr_t ptr;
2948 struct myri10ge_tx_copybuf *cp;
2949 mblk_t *bp;
2950 int idx, mblen, avail;
2951 uint16_t len;
2953 mutex_enter(&tx->lock);
2954 avail = tx->mask - (tx->req - tx->done);
2955 if (avail <= 1) {
2956 mutex_exit(&tx->lock);
2957 return (EBUSY);
2959 idx = tx->req & tx->mask;
2960 cp = &tx->cp[idx];
2961 ptr = cp->va;
2962 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
2963 mblen = MBLKL(bp);
2964 bcopy(bp->b_rptr, ptr, mblen);
2965 ptr += mblen;
2966 len += mblen;
2968 /* ensure runts are padded to 60 bytes */
2969 if (len < 60) {
2970 bzero(ptr, 64 - len);
2971 len = 60;
2973 req->addr_low = cp->dma.low;
2974 req->addr_high = cp->dma.high;
2975 req->length = htons(len);
2976 req->pad = 0;
2977 req->rdma_count = 1;
2978 myri10ge_tx_stat(&tx->info[idx].stat,
2979 (struct ether_header *)(void *)cp->va, 1, len);
2980 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV);
2981 myri10ge_submit_req(&ss->tx, req, 1);
2982 mutex_exit(&tx->lock);
2983 freemsg(mp);
2984 return (DDI_SUCCESS);
2988 static void
2989 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list,
2990 struct myri10ge_tx_buffer_state *tx_info,
2991 int count)
2993 int i, idx;
2995 idx = 0; /* gcc -Wuninitialized */
2996 /* store unmapping and bp info for tx irq handler */
2997 for (i = 0; i < count; i++) {
2998 idx = (tx->req + i) & tx->mask;
2999 tx->info[idx].m = tx_info[i].m;
3000 tx->info[idx].handle = tx_info[i].handle;
3002 tx->info[idx].stat.un.all = tx_info[0].stat.un.all;
3004 /* submit the frame to the nic */
3005 myri10ge_submit_req(tx, req_list, count);
3012 static void
3013 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf)
3015 mblk_t *bp;
3016 int seglen;
3017 uint_t count;
3019 bp = mp;
3021 while (off > 0) {
3022 seglen = MBLKL(bp);
3023 if (off < seglen)
3024 break;
3025 off -= seglen;
3026 bp = bp->b_cont;
3028 while (len > 0) {
3029 seglen = MBLKL(bp);
3030 count = min(seglen - off, len);
3031 bcopy(bp->b_rptr + off, buf, count);
3032 len -= count;
3033 buf += count;
3034 off = 0;
3035 bp = bp->b_cont;
3039 static int
3040 myri10ge_ether_parse_header(mblk_t *mp)
3042 struct ether_header eh_copy;
3043 struct ether_header *eh;
3044 int eth_hdr_len, seglen;
3046 seglen = MBLKL(mp);
3047 eth_hdr_len = sizeof (*eh);
3048 if (seglen < eth_hdr_len) {
3049 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy);
3050 eh = &eh_copy;
3051 } else {
3052 eh = (struct ether_header *)(void *)mp->b_rptr;
3054 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
3055 eth_hdr_len += 4;
3058 return (eth_hdr_len);
3061 static int
3062 myri10ge_lso_parse_header(mblk_t *mp, int off)
3064 char buf[128];
3065 int seglen, sum_off;
3066 struct ip *ip;
3067 struct tcphdr *tcp;
3069 seglen = MBLKL(mp);
3070 if (seglen < off + sizeof (*ip)) {
3071 myri10ge_copydata(mp, off, sizeof (*ip), buf);
3072 ip = (struct ip *)(void *)buf;
3073 } else {
3074 ip = (struct ip *)(void *)(mp->b_rptr + off);
3076 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) {
3077 myri10ge_copydata(mp, off,
3078 (ip->ip_hl << 2) + sizeof (*tcp), buf);
3079 ip = (struct ip *)(void *)buf;
3081 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2));
3084 * NIC expects ip_sum to be zero. Recent changes to
3085 * OpenSolaris leave the correct ip checksum there, rather
3086 * than the required zero, so we need to zero it. Otherwise,
3087 * the NIC will produce bad checksums when sending LSO packets.
3089 if (ip->ip_sum != 0) {
3090 if (((char *)ip) != buf) {
3091 /* ip points into mblk, so just zero it */
3092 ip->ip_sum = 0;
3093 } else {
3095 * ip points into a copy, so walk the chain
3096 * to find the ip_csum, then zero it
3098 sum_off = off + _PTRDIFF(&ip->ip_sum, buf);
3099 while (sum_off > (int)(MBLKL(mp) - 1)) {
3100 sum_off -= MBLKL(mp);
3101 mp = mp->b_cont;
3103 mp->b_rptr[sum_off] = 0;
3104 sum_off++;
3105 while (sum_off > MBLKL(mp) - 1) {
3106 sum_off -= MBLKL(mp);
3107 mp = mp->b_cont;
3109 mp->b_rptr[sum_off] = 0;
3112 return (off + ((ip->ip_hl + tcp->th_off) << 2));
3115 static int
3116 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
3117 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size,
3118 uint16_t mss, uint8_t cksum_offset)
3120 myri10ge_tx_ring_t *tx = &ss->tx;
3121 struct myri10ge_priv *mgp = ss->mgp;
3122 mblk_t *bp;
3123 mcp_kreq_ether_send_t *req;
3124 struct myri10ge_tx_copybuf *cp;
3125 caddr_t rptr, ptr;
3126 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp;
3127 int resid, avail, idx, hdr_size_tmp, tx_boundary;
3128 int rdma_count;
3129 uint32_t seglen, len, boundary, low, high_swapped;
3130 uint16_t pseudo_hdr_offset = htons(mss);
3131 uint8_t flags;
3133 tx_boundary = mgp->tx_boundary;
3134 hdr_size_tmp = hdr_size;
3135 resid = tx_boundary;
3136 count = 1;
3137 mutex_enter(&tx->lock);
3139 /* check to see if the slots are really there */
3140 avail = tx->mask - (tx->req - tx->done);
3141 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) {
3142 atomic_inc_32(&tx->stall);
3143 mutex_exit(&tx->lock);
3144 return (EBUSY);
3147 /* copy */
3148 cum_len = -hdr_size;
3149 count = 0;
3150 req = req_list;
3151 idx = tx->mask & tx->req;
3152 cp = &tx->cp[idx];
3153 low = ntohl(cp->dma.low);
3154 ptr = cp->va;
3155 cp->len = 0;
3156 if (mss) {
3157 int payload = pkt_size - hdr_size;
3158 uint16_t opackets = (payload / mss) + ((payload % mss) != 0);
3159 tx->info[idx].ostat.opackets = opackets;
3160 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size
3161 + pkt_size;
3163 hdr_size_tmp = hdr_size;
3164 mss_resid = mss;
3165 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3166 tx_req = tx->req;
3167 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3168 mblen = MBLKL(bp);
3169 rptr = (caddr_t)bp->b_rptr;
3170 len = min(hdr_size_tmp, mblen);
3171 if (len) {
3172 bcopy(rptr, ptr, len);
3173 rptr += len;
3174 ptr += len;
3175 resid -= len;
3176 mblen -= len;
3177 hdr_size_tmp -= len;
3178 cp->len += len;
3179 if (hdr_size_tmp)
3180 continue;
3181 if (resid < mss) {
3182 tx_req++;
3183 idx = tx->mask & tx_req;
3184 cp = &tx->cp[idx];
3185 low = ntohl(cp->dma.low);
3186 ptr = cp->va;
3187 resid = tx_boundary;
3190 while (mblen) {
3191 len = min(mss_resid, mblen);
3192 bcopy(rptr, ptr, len);
3193 mss_resid -= len;
3194 resid -= len;
3195 mblen -= len;
3196 rptr += len;
3197 ptr += len;
3198 cp->len += len;
3199 if (mss_resid == 0) {
3200 mss_resid = mss;
3201 if (resid < mss) {
3202 tx_req++;
3203 idx = tx->mask & tx_req;
3204 cp = &tx->cp[idx];
3205 cp->len = 0;
3206 low = ntohl(cp->dma.low);
3207 ptr = cp->va;
3208 resid = tx_boundary;
3214 req = req_list;
3215 pkt_size_tmp = pkt_size;
3216 count = 0;
3217 rdma_count = 0;
3218 tx_req = tx->req;
3219 while (pkt_size_tmp) {
3220 idx = tx->mask & tx_req;
3221 cp = &tx->cp[idx];
3222 high_swapped = cp->dma.high;
3223 low = ntohl(cp->dma.low);
3224 len = cp->len;
3225 if (len == 0) {
3226 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3227 pkt_size_tmp, pkt_size);
3228 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3229 mblen = MBLKL(bp);
3230 printf("mblen:%d\n", mblen);
3232 pkt_size_tmp = pkt_size;
3233 tx_req = tx->req;
3234 while (pkt_size_tmp > 0) {
3235 idx = tx->mask & tx_req;
3236 cp = &tx->cp[idx];
3237 printf("cp->len = %d\n", cp->len);
3238 pkt_size_tmp -= cp->len;
3239 tx_req++;
3241 printf("dropped\n");
3242 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3243 goto done;
3245 pkt_size_tmp -= len;
3246 while (len) {
3247 while (len) {
3248 uint8_t flags_next;
3249 int cum_len_next;
3251 boundary = (low + mgp->tx_boundary) &
3252 ~(mgp->tx_boundary - 1);
3253 seglen = boundary - low;
3254 if (seglen > len)
3255 seglen = len;
3257 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3258 cum_len_next = cum_len + seglen;
3259 (req-rdma_count)->rdma_count = rdma_count + 1;
3260 if (likely(cum_len >= 0)) {
3261 /* payload */
3262 int next_is_first, chop;
3264 chop = (cum_len_next > mss);
3265 cum_len_next = cum_len_next % mss;
3266 next_is_first = (cum_len_next == 0);
3267 flags |= chop *
3268 MXGEFW_FLAGS_TSO_CHOP;
3269 flags_next |= next_is_first *
3270 MXGEFW_FLAGS_FIRST;
3271 rdma_count |= -(chop | next_is_first);
3272 rdma_count += chop & !next_is_first;
3273 } else if (likely(cum_len_next >= 0)) {
3274 /* header ends */
3275 int small;
3277 rdma_count = -1;
3278 cum_len_next = 0;
3279 seglen = -cum_len;
3280 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
3281 flags_next = MXGEFW_FLAGS_TSO_PLD |
3282 MXGEFW_FLAGS_FIRST |
3283 (small * MXGEFW_FLAGS_SMALL);
3285 req->addr_high = high_swapped;
3286 req->addr_low = htonl(low);
3287 req->pseudo_hdr_offset = pseudo_hdr_offset;
3288 req->pad = 0; /* complete solid 16-byte block */
3289 req->rdma_count = 1;
3290 req->cksum_offset = cksum_offset;
3291 req->length = htons(seglen);
3292 req->flags = flags | ((cum_len & 1) *
3293 MXGEFW_FLAGS_ALIGN_ODD);
3294 if (cksum_offset > seglen)
3295 cksum_offset -= seglen;
3296 else
3297 cksum_offset = 0;
3298 low += seglen;
3299 len -= seglen;
3300 cum_len = cum_len_next;
3301 req++;
3302 req->flags = 0;
3303 flags = flags_next;
3304 count++;
3305 rdma_count++;
3308 tx_req++;
3310 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3311 do {
3312 req--;
3313 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3314 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3315 MXGEFW_FLAGS_FIRST)));
3317 myri10ge_submit_req(tx, req_list, count);
3318 done:
3319 mutex_exit(&tx->lock);
3320 freemsg(mp);
3321 return (DDI_SUCCESS);
3325 * Try to send the chain of buffers described by the mp. We must not
3326 * encapsulate more than eth->tx.req - eth->tx.done, or
3327 * MXGEFW_MAX_SEND_DESC, whichever is more.
3330 static int
3331 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp,
3332 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info)
3334 struct myri10ge_priv *mgp = ss->mgp;
3335 myri10ge_tx_ring_t *tx = &ss->tx;
3336 mcp_kreq_ether_send_t *req;
3337 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL;
3338 mblk_t *bp;
3339 ddi_dma_cookie_t cookie;
3340 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen,
3341 rdma_count, cum_len, lso_hdr_size;
3342 uint32_t start, stuff, tx_offload_flags;
3343 uint32_t seglen, len, mss, boundary, low, high_swapped;
3344 uint_t ncookies;
3345 uint16_t pseudo_hdr_offset;
3346 uint8_t flags, cksum_offset, odd_flag;
3347 int pkt_size;
3348 int lso_copy = myri10ge_lso_copy;
3349 try_pullup = 1;
3351 again:
3352 /* Setup checksum offloading, if needed */
3353 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
3354 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
3355 if (tx_offload_flags & HW_LSO) {
3356 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3357 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) {
3358 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags);
3359 freemsg(mp);
3360 return (DDI_SUCCESS);
3362 } else {
3363 max_segs = MXGEFW_MAX_SEND_DESC;
3364 mss = 0;
3366 req = req_list;
3367 cksum_offset = 0;
3368 pseudo_hdr_offset = 0;
3370 /* leave an extra slot keep the ring from wrapping */
3371 avail = tx->mask - (tx->req - tx->done);
3374 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3375 * message will need to be pulled up in order to fit.
3376 * Otherwise, we are low on transmit descriptors, it is
3377 * probably better to stall and try again rather than pullup a
3378 * message to fit.
3381 if (avail < max_segs) {
3382 err = EBUSY;
3383 atomic_inc_32(&tx->stall_early);
3384 goto stall;
3387 /* find out how long the frame is and how many segments it is */
3388 count = 0;
3389 odd_flag = 0;
3390 pkt_size = 0;
3391 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
3392 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3393 dblk_t *dbp;
3394 mblen = MBLKL(bp);
3395 if (mblen == 0) {
3397 * we can't simply skip over 0-length mblks
3398 * because the hardware can't deal with them,
3399 * and we could leak them.
3401 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len);
3402 err = EIO;
3403 goto pullup;
3406 * There's no advantage to copying most gesballoc
3407 * attached blocks, so disable lso copy in that case
3409 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) {
3410 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) {
3411 lso_copy = 0;
3414 pkt_size += mblen;
3415 count++;
3418 /* Try to pull up excessivly long chains */
3419 if (count >= max_segs) {
3420 err = myri10ge_pullup(ss, mp);
3421 if (likely(err == DDI_SUCCESS)) {
3422 count = 1;
3423 } else {
3424 if (count < MYRI10GE_MAX_SEND_DESC_TSO) {
3426 * just let the h/w send it, it will be
3427 * inefficient, but us better than dropping
3429 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3430 } else {
3431 /* drop it */
3432 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3433 freemsg(mp);
3434 return (0);
3439 cum_len = 0;
3440 maclen = myri10ge_ether_parse_header(mp);
3442 if (tx_offload_flags & HCK_PARTIALCKSUM) {
3444 cksum_offset = start + maclen;
3445 pseudo_hdr_offset = htons(stuff + maclen);
3446 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
3447 flags |= MXGEFW_FLAGS_CKSUM;
3450 lso_hdr_size = 0; /* -Wunitinialized */
3451 if (mss) { /* LSO */
3452 /* this removes any CKSUM flag from before */
3453 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3455 * parse the headers and set cum_len to a negative
3456 * value to reflect the offset of the TCP payload
3458 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen);
3459 cum_len = -lso_hdr_size;
3460 if ((mss < mgp->tx_boundary) && lso_copy) {
3461 err = myri10ge_tx_tso_copy(ss, mp, req_list,
3462 lso_hdr_size, pkt_size, mss, cksum_offset);
3463 return (err);
3467 * for TSO, pseudo_hdr_offset holds mss. The firmware
3468 * figures out where to put the checksum by parsing
3469 * the header.
3472 pseudo_hdr_offset = htons(mss);
3473 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) {
3474 flags |= MXGEFW_FLAGS_SMALL;
3475 if (pkt_size < myri10ge_tx_copylen) {
3476 req->cksum_offset = cksum_offset;
3477 req->pseudo_hdr_offset = pseudo_hdr_offset;
3478 req->flags = flags;
3479 err = myri10ge_tx_copy(ss, mp, req);
3480 return (err);
3482 cum_len = 0;
3485 /* pull one DMA handle for each bp from our freelist */
3486 handles = NULL;
3487 err = myri10ge_alloc_tx_handles(ss, count, &handles);
3488 if (err != DDI_SUCCESS) {
3489 err = DDI_FAILURE;
3490 goto stall;
3492 count = 0;
3493 rdma_count = 0;
3494 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3495 mblen = MBLKL(bp);
3496 dma_handle = handles;
3497 handles = handles->next;
3499 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL,
3500 (caddr_t)bp->b_rptr, mblen,
3501 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
3502 &cookie, &ncookies);
3503 if (unlikely(rv != DDI_DMA_MAPPED)) {
3504 err = EIO;
3505 try_pullup = 0;
3506 dma_handle->next = handles;
3507 handles = dma_handle;
3508 goto abort_with_handles;
3511 /* reserve the slot */
3512 tx_info[count].m = bp;
3513 tx_info[count].handle = dma_handle;
3515 for (; ; ) {
3516 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
3517 high_swapped =
3518 htonl(MYRI10GE_HIGHPART_TO_U32(
3519 cookie.dmac_laddress));
3520 len = (uint32_t)cookie.dmac_size;
3521 while (len) {
3522 uint8_t flags_next;
3523 int cum_len_next;
3525 boundary = (low + mgp->tx_boundary) &
3526 ~(mgp->tx_boundary - 1);
3527 seglen = boundary - low;
3528 if (seglen > len)
3529 seglen = len;
3531 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3532 cum_len_next = cum_len + seglen;
3533 if (mss) {
3534 (req-rdma_count)->rdma_count =
3535 rdma_count + 1;
3536 if (likely(cum_len >= 0)) {
3537 /* payload */
3538 int next_is_first, chop;
3540 chop = (cum_len_next > mss);
3541 cum_len_next =
3542 cum_len_next % mss;
3543 next_is_first =
3544 (cum_len_next == 0);
3545 flags |= chop *
3546 MXGEFW_FLAGS_TSO_CHOP;
3547 flags_next |= next_is_first *
3548 MXGEFW_FLAGS_FIRST;
3549 rdma_count |=
3550 -(chop | next_is_first);
3551 rdma_count +=
3552 chop & !next_is_first;
3553 } else if (likely(cum_len_next >= 0)) {
3554 /* header ends */
3555 int small;
3557 rdma_count = -1;
3558 cum_len_next = 0;
3559 seglen = -cum_len;
3560 small = (mss <=
3561 MXGEFW_SEND_SMALL_SIZE);
3562 flags_next =
3563 MXGEFW_FLAGS_TSO_PLD
3564 | MXGEFW_FLAGS_FIRST
3565 | (small *
3566 MXGEFW_FLAGS_SMALL);
3569 req->addr_high = high_swapped;
3570 req->addr_low = htonl(low);
3571 req->pseudo_hdr_offset = pseudo_hdr_offset;
3572 req->pad = 0; /* complete solid 16-byte block */
3573 req->rdma_count = 1;
3574 req->cksum_offset = cksum_offset;
3575 req->length = htons(seglen);
3576 req->flags = flags | ((cum_len & 1) * odd_flag);
3577 if (cksum_offset > seglen)
3578 cksum_offset -= seglen;
3579 else
3580 cksum_offset = 0;
3581 low += seglen;
3582 len -= seglen;
3583 cum_len = cum_len_next;
3584 count++;
3585 rdma_count++;
3586 /* make sure all the segments will fit */
3587 if (unlikely(count >= max_segs)) {
3588 MYRI10GE_ATOMIC_SLICE_STAT_INC(
3589 xmit_lowbuf);
3590 /* may try a pullup */
3591 err = EBUSY;
3592 if (try_pullup)
3593 try_pullup = 2;
3594 goto abort_with_handles;
3596 req++;
3597 req->flags = 0;
3598 flags = flags_next;
3599 tx_info[count].m = 0;
3601 ncookies--;
3602 if (ncookies == 0)
3603 break;
3604 ddi_dma_nextcookie(dma_handle->h, &cookie);
3607 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3609 if (mss) {
3610 do {
3611 req--;
3612 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3613 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3614 MXGEFW_FLAGS_FIRST)));
3617 /* calculate tx stats */
3618 if (mss) {
3619 uint16_t opackets;
3620 int payload;
3622 payload = pkt_size - lso_hdr_size;
3623 opackets = (payload / mss) + ((payload % mss) != 0);
3624 tx_info[0].stat.un.all = 0;
3625 tx_info[0].ostat.opackets = opackets;
3626 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size
3627 + pkt_size;
3628 } else {
3629 myri10ge_tx_stat(&tx_info[0].stat,
3630 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size);
3632 mutex_enter(&tx->lock);
3634 /* check to see if the slots are really there */
3635 avail = tx->mask - (tx->req - tx->done);
3636 if (unlikely(avail <= count)) {
3637 mutex_exit(&tx->lock);
3638 err = 0;
3639 goto late_stall;
3642 myri10ge_send_locked(tx, req_list, tx_info, count);
3643 mutex_exit(&tx->lock);
3644 return (DDI_SUCCESS);
3646 late_stall:
3647 try_pullup = 0;
3648 atomic_inc_32(&tx->stall_late);
3650 abort_with_handles:
3651 /* unbind and free handles from previous mblks */
3652 for (i = 0; i < count; i++) {
3653 bp = tx_info[i].m;
3654 tx_info[i].m = 0;
3655 if (bp) {
3656 dma_handle = tx_info[i].handle;
3657 (void) ddi_dma_unbind_handle(dma_handle->h);
3658 dma_handle->next = handles;
3659 handles = dma_handle;
3660 tx_info[i].handle = NULL;
3661 tx_info[i].m = NULL;
3664 myri10ge_free_tx_handle_slist(tx, handles);
3665 pullup:
3666 if (try_pullup) {
3667 err = myri10ge_pullup(ss, mp);
3668 if (err != DDI_SUCCESS && try_pullup == 2) {
3669 /* drop */
3670 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3671 freemsg(mp);
3672 return (0);
3674 try_pullup = 0;
3675 goto again;
3678 stall:
3679 if (err != 0) {
3680 if (err == EBUSY) {
3681 atomic_inc_32(&tx->stall);
3682 } else {
3683 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3686 return (err);
3689 static mblk_t *
3690 myri10ge_send_wrapper(void *arg, mblk_t *mp)
3692 struct myri10ge_slice_state *ss = arg;
3693 int err = 0;
3694 mcp_kreq_ether_send_t *req_list;
3695 #if defined(__i386)
3697 * We need about 2.5KB of scratch space to handle transmits.
3698 * i86pc has only 8KB of kernel stack space, so we malloc the
3699 * scratch space there rather than keeping it on the stack.
3701 size_t req_size, tx_info_size;
3702 struct myri10ge_tx_buffer_state *tx_info;
3703 caddr_t req_bytes;
3705 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3706 + 8;
3707 req_bytes = kmem_alloc(req_size, KM_SLEEP);
3708 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1);
3709 tx_info = kmem_alloc(tx_info_size, KM_SLEEP);
3710 #else
3711 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3712 + 8];
3713 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1];
3714 #endif
3716 /* ensure req_list entries are aligned to 8 bytes */
3717 req_list = (struct mcp_kreq_ether_send *)
3718 (((unsigned long)req_bytes + 7UL) & ~7UL);
3720 err = myri10ge_send(ss, mp, req_list, tx_info);
3722 #if defined(__i386)
3723 kmem_free(tx_info, tx_info_size);
3724 kmem_free(req_bytes, req_size);
3725 #endif
3726 if (err)
3727 return (mp);
3728 else
3729 return (NULL);
3732 static int
3733 myri10ge_addmac(void *arg, const uint8_t *mac_addr)
3735 struct myri10ge_priv *mgp = arg;
3736 int err;
3738 if (mac_addr == NULL)
3739 return (EINVAL);
3741 mutex_enter(&mgp->intrlock);
3742 if (mgp->macaddr_cnt) {
3743 mutex_exit(&mgp->intrlock);
3744 return (ENOSPC);
3746 err = myri10ge_m_unicst(mgp, mac_addr);
3747 if (!err)
3748 mgp->macaddr_cnt++;
3750 mutex_exit(&mgp->intrlock);
3751 if (err)
3752 return (err);
3754 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr));
3755 return (0);
3758 /*ARGSUSED*/
3759 static int
3760 myri10ge_remmac(void *arg, const uint8_t *mac_addr)
3762 struct myri10ge_priv *mgp = arg;
3764 mutex_enter(&mgp->intrlock);
3765 mgp->macaddr_cnt--;
3766 mutex_exit(&mgp->intrlock);
3768 return (0);
3771 /*ARGSUSED*/
3772 static void
3773 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index,
3774 mac_group_info_t *infop, mac_group_handle_t gh)
3776 struct myri10ge_priv *mgp = arg;
3778 if (rtype != MAC_RING_TYPE_RX)
3779 return;
3781 infop->mgi_driver = (mac_group_driver_t)mgp;
3782 infop->mgi_start = NULL;
3783 infop->mgi_stop = NULL;
3784 infop->mgi_addmac = myri10ge_addmac;
3785 infop->mgi_remmac = myri10ge_remmac;
3786 infop->mgi_count = mgp->num_slices;
3789 static int
3790 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num)
3792 struct myri10ge_slice_state *ss;
3794 ss = (struct myri10ge_slice_state *)rh;
3795 mutex_enter(&ss->rx_lock);
3796 ss->rx_gen_num = mr_gen_num;
3797 mutex_exit(&ss->rx_lock);
3798 return (0);
3802 * Retrieve a value for one of the statistics for a particular rx ring
3805 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3807 struct myri10ge_slice_state *ss;
3809 ss = (struct myri10ge_slice_state *)rh;
3810 switch (stat) {
3811 case MAC_STAT_RBYTES:
3812 *val = ss->rx_stats.ibytes;
3813 break;
3815 case MAC_STAT_IPACKETS:
3816 *val = ss->rx_stats.ipackets;
3817 break;
3819 default:
3820 *val = 0;
3821 return (ENOTSUP);
3824 return (0);
3828 * Retrieve a value for one of the statistics for a particular tx ring
3831 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3833 struct myri10ge_slice_state *ss;
3835 ss = (struct myri10ge_slice_state *)rh;
3836 switch (stat) {
3837 case MAC_STAT_OBYTES:
3838 *val = ss->tx.stats.obytes;
3839 break;
3841 case MAC_STAT_OPACKETS:
3842 *val = ss->tx.stats.opackets;
3843 break;
3845 default:
3846 *val = 0;
3847 return (ENOTSUP);
3850 return (0);
3853 static int
3854 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)
3856 struct myri10ge_slice_state *ss;
3858 ss = (struct myri10ge_slice_state *)intrh;
3859 mutex_enter(&ss->poll_lock);
3860 ss->rx_polling = B_TRUE;
3861 mutex_exit(&ss->poll_lock);
3862 return (0);
3865 static int
3866 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)
3868 struct myri10ge_slice_state *ss;
3870 ss = (struct myri10ge_slice_state *)intrh;
3871 mutex_enter(&ss->poll_lock);
3872 ss->rx_polling = B_FALSE;
3873 if (ss->rx_token) {
3874 *ss->irq_claim = BE_32(3);
3875 ss->rx_token = 0;
3877 mutex_exit(&ss->poll_lock);
3878 return (0);
3881 /*ARGSUSED*/
3882 static void
3883 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
3884 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
3886 struct myri10ge_priv *mgp = arg;
3887 struct myri10ge_slice_state *ss;
3888 mac_intr_t *mintr = &infop->mri_intr;
3890 ASSERT((unsigned int)ring_index < mgp->num_slices);
3892 ss = &mgp->ss[ring_index];
3893 switch (rtype) {
3894 case MAC_RING_TYPE_RX:
3895 ss->rx_rh = rh;
3896 infop->mri_driver = (mac_ring_driver_t)ss;
3897 infop->mri_start = myri10ge_ring_start;
3898 infop->mri_stop = NULL;
3899 infop->mri_poll = myri10ge_poll_rx;
3900 infop->mri_stat = myri10ge_rx_ring_stat;
3901 mintr->mi_handle = (mac_intr_handle_t)ss;
3902 mintr->mi_enable = myri10ge_rx_ring_intr_enable;
3903 mintr->mi_disable = myri10ge_rx_ring_intr_disable;
3904 break;
3905 case MAC_RING_TYPE_TX:
3906 ss->tx.rh = rh;
3907 infop->mri_driver = (mac_ring_driver_t)ss;
3908 infop->mri_start = NULL;
3909 infop->mri_stop = NULL;
3910 infop->mri_tx = myri10ge_send_wrapper;
3911 infop->mri_stat = myri10ge_tx_ring_stat;
3912 break;
3913 default:
3914 break;
3918 static void
3919 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp)
3921 if (mgp->ksp_stat == NULL)
3922 return;
3924 kstat_delete(mgp->ksp_stat);
3925 mgp->ksp_stat = NULL;
3928 static void
3929 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss)
3931 if (ss->ksp_stat == NULL)
3932 return;
3934 kstat_delete(ss->ksp_stat);
3935 ss->ksp_stat = NULL;
3938 static void
3939 myri10ge_info_destroy(struct myri10ge_priv *mgp)
3941 if (mgp->ksp_info == NULL)
3942 return;
3944 kstat_delete(mgp->ksp_info);
3945 mgp->ksp_info = NULL;
3948 static int
3949 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw)
3951 struct myri10ge_nic_stat *ethstat;
3952 struct myri10ge_priv *mgp;
3953 mcp_irq_data_t *fw_stats;
3956 if (rw == KSTAT_WRITE)
3957 return (EACCES);
3959 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data;
3960 mgp = (struct myri10ge_priv *)ksp->ks_private;
3961 fw_stats = mgp->ss[0].fw_stats;
3963 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma;
3964 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma;
3965 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma;
3966 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL)
3967 ethstat->dma_force_physical.value.ul = 1;
3968 else
3969 ethstat->dma_force_physical.value.ul = 0;
3970 ethstat->lanes.value.ul = mgp->pcie_link_width;
3971 ethstat->dropped_bad_crc32.value.ul =
3972 ntohl(fw_stats->dropped_bad_crc32);
3973 ethstat->dropped_bad_phy.value.ul =
3974 ntohl(fw_stats->dropped_bad_phy);
3975 ethstat->dropped_link_error_or_filtered.value.ul =
3976 ntohl(fw_stats->dropped_link_error_or_filtered);
3977 ethstat->dropped_link_overflow.value.ul =
3978 ntohl(fw_stats->dropped_link_overflow);
3979 ethstat->dropped_multicast_filtered.value.ul =
3980 ntohl(fw_stats->dropped_multicast_filtered);
3981 ethstat->dropped_no_big_buffer.value.ul =
3982 ntohl(fw_stats->dropped_no_big_buffer);
3983 ethstat->dropped_no_small_buffer.value.ul =
3984 ntohl(fw_stats->dropped_no_small_buffer);
3985 ethstat->dropped_overrun.value.ul =
3986 ntohl(fw_stats->dropped_overrun);
3987 ethstat->dropped_pause.value.ul =
3988 ntohl(fw_stats->dropped_pause);
3989 ethstat->dropped_runt.value.ul =
3990 ntohl(fw_stats->dropped_runt);
3991 ethstat->link_up.value.ul =
3992 ntohl(fw_stats->link_up);
3993 ethstat->dropped_unicast_filtered.value.ul =
3994 ntohl(fw_stats->dropped_unicast_filtered);
3995 return (0);
3998 static int
3999 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw)
4001 struct myri10ge_slice_stat *ethstat;
4002 struct myri10ge_slice_state *ss;
4004 if (rw == KSTAT_WRITE)
4005 return (EACCES);
4007 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data;
4008 ss = (struct myri10ge_slice_state *)ksp->ks_private;
4010 ethstat->rx_big.value.ul = ss->j_rx_cnt;
4011 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt;
4012 ethstat->rx_bigbuf_pool.value.ul =
4013 ss->jpool.num_alloc - ss->jbufs_for_smalls;
4014 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls;
4015 ethstat->rx_small.value.ul = ss->rx_small.cnt -
4016 (ss->rx_small.mask + 1);
4017 ethstat->tx_done.value.ul = ss->tx.done;
4018 ethstat->tx_req.value.ul = ss->tx.req;
4019 ethstat->tx_activate.value.ul = ss->tx.activate;
4020 ethstat->xmit_sched.value.ul = ss->tx.sched;
4021 ethstat->xmit_stall.value.ul = ss->tx.stall;
4022 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early;
4023 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late;
4024 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err);
4025 return (0);
4028 static int
4029 myri10ge_info_kstat_update(kstat_t *ksp, int rw)
4031 struct myri10ge_info *info;
4032 struct myri10ge_priv *mgp;
4035 if (rw == KSTAT_WRITE)
4036 return (EACCES);
4038 info = (struct myri10ge_info *)ksp->ks_data;
4039 mgp = (struct myri10ge_priv *)ksp->ks_private;
4040 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR);
4041 kstat_named_setstr(&info->firmware_version, mgp->fw_version);
4042 kstat_named_setstr(&info->firmware_name, mgp->fw_name);
4043 kstat_named_setstr(&info->interrupt_type, mgp->intr_type);
4044 kstat_named_setstr(&info->product_code, mgp->pc_str);
4045 kstat_named_setstr(&info->serial_number, mgp->sn_str);
4046 return (0);
4049 static struct myri10ge_info myri10ge_info_template = {
4050 { "driver_version", KSTAT_DATA_STRING },
4051 { "firmware_version", KSTAT_DATA_STRING },
4052 { "firmware_name", KSTAT_DATA_STRING },
4053 { "interrupt_type", KSTAT_DATA_STRING },
4054 { "product_code", KSTAT_DATA_STRING },
4055 { "serial_number", KSTAT_DATA_STRING },
4057 static kmutex_t myri10ge_info_template_lock;
4060 static int
4061 myri10ge_info_init(struct myri10ge_priv *mgp)
4063 struct kstat *ksp;
4065 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4066 "myri10ge_info", "net", KSTAT_TYPE_NAMED,
4067 sizeof (myri10ge_info_template) /
4068 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
4069 if (ksp == NULL) {
4070 cmn_err(CE_WARN,
4071 "%s: myri10ge_info_init: kstat_create failed", mgp->name);
4072 return (DDI_FAILURE);
4074 mgp->ksp_info = ksp;
4075 ksp->ks_update = myri10ge_info_kstat_update;
4076 ksp->ks_private = (void *) mgp;
4077 ksp->ks_data = &myri10ge_info_template;
4078 ksp->ks_lock = &myri10ge_info_template_lock;
4079 if (MYRI10GE_VERSION_STR != NULL)
4080 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1;
4081 if (mgp->fw_version != NULL)
4082 ksp->ks_data_size += strlen(mgp->fw_version) + 1;
4083 ksp->ks_data_size += strlen(mgp->fw_name) + 1;
4084 ksp->ks_data_size += strlen(mgp->intr_type) + 1;
4085 if (mgp->pc_str != NULL)
4086 ksp->ks_data_size += strlen(mgp->pc_str) + 1;
4087 if (mgp->sn_str != NULL)
4088 ksp->ks_data_size += strlen(mgp->sn_str) + 1;
4090 kstat_install(ksp);
4091 return (DDI_SUCCESS);
4095 static int
4096 myri10ge_nic_stat_init(struct myri10ge_priv *mgp)
4098 struct kstat *ksp;
4099 struct myri10ge_nic_stat *ethstat;
4101 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4102 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED,
4103 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4104 if (ksp == NULL) {
4105 cmn_err(CE_WARN,
4106 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4107 return (DDI_FAILURE);
4109 mgp->ksp_stat = ksp;
4110 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data);
4112 kstat_named_init(&ethstat->dma_read_bw_MBs,
4113 "dma_read_bw_MBs", KSTAT_DATA_ULONG);
4114 kstat_named_init(&ethstat->dma_write_bw_MBs,
4115 "dma_write_bw_MBs", KSTAT_DATA_ULONG);
4116 kstat_named_init(&ethstat->dma_read_write_bw_MBs,
4117 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG);
4118 kstat_named_init(&ethstat->dma_force_physical,
4119 "dma_force_physical", KSTAT_DATA_ULONG);
4120 kstat_named_init(&ethstat->lanes,
4121 "lanes", KSTAT_DATA_ULONG);
4122 kstat_named_init(&ethstat->dropped_bad_crc32,
4123 "dropped_bad_crc32", KSTAT_DATA_ULONG);
4124 kstat_named_init(&ethstat->dropped_bad_phy,
4125 "dropped_bad_phy", KSTAT_DATA_ULONG);
4126 kstat_named_init(&ethstat->dropped_link_error_or_filtered,
4127 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG);
4128 kstat_named_init(&ethstat->dropped_link_overflow,
4129 "dropped_link_overflow", KSTAT_DATA_ULONG);
4130 kstat_named_init(&ethstat->dropped_multicast_filtered,
4131 "dropped_multicast_filtered", KSTAT_DATA_ULONG);
4132 kstat_named_init(&ethstat->dropped_no_big_buffer,
4133 "dropped_no_big_buffer", KSTAT_DATA_ULONG);
4134 kstat_named_init(&ethstat->dropped_no_small_buffer,
4135 "dropped_no_small_buffer", KSTAT_DATA_ULONG);
4136 kstat_named_init(&ethstat->dropped_overrun,
4137 "dropped_overrun", KSTAT_DATA_ULONG);
4138 kstat_named_init(&ethstat->dropped_pause,
4139 "dropped_pause", KSTAT_DATA_ULONG);
4140 kstat_named_init(&ethstat->dropped_runt,
4141 "dropped_runt", KSTAT_DATA_ULONG);
4142 kstat_named_init(&ethstat->dropped_unicast_filtered,
4143 "dropped_unicast_filtered", KSTAT_DATA_ULONG);
4144 kstat_named_init(&ethstat->dropped_runt, "dropped_runt",
4145 KSTAT_DATA_ULONG);
4146 kstat_named_init(&ethstat->link_up, "link_up", KSTAT_DATA_ULONG);
4147 kstat_named_init(&ethstat->link_changes, "link_changes",
4148 KSTAT_DATA_ULONG);
4149 ksp->ks_update = myri10ge_nic_stat_kstat_update;
4150 ksp->ks_private = (void *) mgp;
4151 kstat_install(ksp);
4152 return (DDI_SUCCESS);
4155 static int
4156 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss)
4158 struct myri10ge_priv *mgp = ss->mgp;
4159 struct kstat *ksp;
4160 struct myri10ge_slice_stat *ethstat;
4161 int instance;
4164 * fake an instance so that the same slice numbers from
4165 * different instances do not collide
4167 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss);
4168 ksp = kstat_create("myri10ge", instance,
4169 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED,
4170 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4171 if (ksp == NULL) {
4172 cmn_err(CE_WARN,
4173 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4174 return (DDI_FAILURE);
4176 ss->ksp_stat = ksp;
4177 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data);
4178 kstat_named_init(&ethstat->lro_bad_csum, "lro_bad_csum",
4179 KSTAT_DATA_ULONG);
4180 kstat_named_init(&ethstat->lro_flushed, "lro_flushed",
4181 KSTAT_DATA_ULONG);
4182 kstat_named_init(&ethstat->lro_queued, "lro_queued",
4183 KSTAT_DATA_ULONG);
4184 kstat_named_init(&ethstat->rx_bigbuf_firmware, "rx_bigbuf_firmware",
4185 KSTAT_DATA_ULONG);
4186 kstat_named_init(&ethstat->rx_bigbuf_pool, "rx_bigbuf_pool",
4187 KSTAT_DATA_ULONG);
4188 kstat_named_init(&ethstat->rx_bigbuf_smalls, "rx_bigbuf_smalls",
4189 KSTAT_DATA_ULONG);
4190 kstat_named_init(&ethstat->rx_copy, "rx_copy",
4191 KSTAT_DATA_ULONG);
4192 kstat_named_init(&ethstat->rx_big_nobuf, "rx_big_nobuf",
4193 KSTAT_DATA_ULONG);
4194 kstat_named_init(&ethstat->rx_small_nobuf, "rx_small_nobuf",
4195 KSTAT_DATA_ULONG);
4196 kstat_named_init(&ethstat->xmit_zero_len, "xmit_zero_len",
4197 KSTAT_DATA_ULONG);
4198 kstat_named_init(&ethstat->xmit_pullup, "xmit_pullup",
4199 KSTAT_DATA_ULONG);
4200 kstat_named_init(&ethstat->xmit_pullup_first, "xmit_pullup_first",
4201 KSTAT_DATA_ULONG);
4202 kstat_named_init(&ethstat->xmit_lowbuf, "xmit_lowbuf",
4203 KSTAT_DATA_ULONG);
4204 kstat_named_init(&ethstat->xmit_lsobadflags, "xmit_lsobadflags",
4205 KSTAT_DATA_ULONG);
4206 kstat_named_init(&ethstat->xmit_sched, "xmit_sched",
4207 KSTAT_DATA_ULONG);
4208 kstat_named_init(&ethstat->xmit_stall, "xmit_stall",
4209 KSTAT_DATA_ULONG);
4210 kstat_named_init(&ethstat->xmit_stall_early, "xmit_stall_early",
4211 KSTAT_DATA_ULONG);
4212 kstat_named_init(&ethstat->xmit_stall_late, "xmit_stall_late",
4213 KSTAT_DATA_ULONG);
4214 kstat_named_init(&ethstat->xmit_err, "xmit_err",
4215 KSTAT_DATA_ULONG);
4216 kstat_named_init(&ethstat->tx_req, "tx_req",
4217 KSTAT_DATA_ULONG);
4218 kstat_named_init(&ethstat->tx_activate, "tx_activate",
4219 KSTAT_DATA_ULONG);
4220 kstat_named_init(&ethstat->tx_done, "tx_done",
4221 KSTAT_DATA_ULONG);
4222 kstat_named_init(&ethstat->tx_handles_alloced, "tx_handles_alloced",
4223 KSTAT_DATA_ULONG);
4224 kstat_named_init(&ethstat->rx_big, "rx_big",
4225 KSTAT_DATA_ULONG);
4226 kstat_named_init(&ethstat->rx_small, "rx_small",
4227 KSTAT_DATA_ULONG);
4228 ksp->ks_update = myri10ge_slice_stat_kstat_update;
4229 ksp->ks_private = (void *) ss;
4230 kstat_install(ksp);
4231 return (DDI_SUCCESS);
4236 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4238 #include <vm/hat.h>
4239 #include <sys/ddi_isa.h>
4240 void *device_arena_alloc(size_t size, int vm_flag);
4241 void device_arena_free(void *vaddr, size_t size);
4243 static void
4244 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4246 dev_info_t *parent_dip;
4247 ddi_acc_handle_t handle;
4248 unsigned long bus_number, dev_number, func_number;
4249 unsigned long cfg_pa, paddr, base, pgoffset;
4250 char *cvaddr, *ptr;
4251 uint32_t *ptr32;
4252 int retval = DDI_FAILURE;
4253 int dontcare;
4254 uint16_t read_vid, read_did, vendor_id, device_id;
4256 if (!myri10ge_nvidia_ecrc_enable)
4257 return;
4259 parent_dip = ddi_get_parent(mgp->dip);
4260 if (parent_dip == NULL) {
4261 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name);
4262 return;
4265 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) {
4266 cmn_err(CE_WARN,
4267 "%s: Could not access my parent's registers", mgp->name);
4268 return;
4271 vendor_id = pci_config_get16(handle, PCI_CONF_VENID);
4272 device_id = pci_config_get16(handle, PCI_CONF_DEVID);
4273 pci_config_teardown(&handle);
4275 if (myri10ge_verbose) {
4276 unsigned long bus_number, dev_number, func_number;
4277 int reg_set, span;
4278 (void) myri10ge_reg_set(parent_dip, &reg_set, &span,
4279 &bus_number, &dev_number, &func_number);
4280 if (myri10ge_verbose)
4281 printf("%s: parent at %ld:%ld:%ld\n", mgp->name,
4282 bus_number, dev_number, func_number);
4285 if (vendor_id != 0x10de)
4286 return;
4288 if (device_id != 0x005d /* CK804 */ &&
4289 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) {
4290 return;
4292 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare,
4293 &bus_number, &dev_number, &func_number);
4295 for (cfg_pa = 0xf0000000UL;
4296 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL;
4297 cfg_pa -= 0x10000000UL) {
4298 /* find the config space address for the nvidia bridge */
4299 paddr = (cfg_pa + bus_number * 0x00100000UL +
4300 (dev_number * 8 + func_number) * 0x00001000UL);
4302 base = paddr & (~MMU_PAGEOFFSET);
4303 pgoffset = paddr & MMU_PAGEOFFSET;
4305 /* map it into the kernel */
4306 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP);
4307 if (cvaddr == NULL)
4308 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n",
4309 mgp->name);
4311 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1),
4312 i_ddi_paddr_to_pfn(base),
4313 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK);
4315 ptr = cvaddr + pgoffset;
4316 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID);
4317 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID);
4318 if (vendor_id == read_did || device_id == read_did) {
4319 ptr32 = (uint32_t *)(void *)(ptr + 0x178);
4320 if (myri10ge_verbose)
4321 printf("%s: Enabling ECRC on upstream "
4322 "Nvidia bridge (0x%x:0x%x) "
4323 "at %ld:%ld:%ld\n", mgp->name,
4324 read_vid, read_did, bus_number,
4325 dev_number, func_number);
4326 *ptr32 |= 0x40;
4327 retval = DDI_SUCCESS;
4329 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK);
4330 device_arena_free(cvaddr, ptob(1));
4334 #else
4335 /*ARGSUSED*/
4336 static void
4337 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4340 #endif /* i386 */
4344 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4345 * when the PCI-E Completion packets are aligned on an 8-byte
4346 * boundary. Some PCI-E chip sets always align Completion packets; on
4347 * the ones that do not, the alignment can be enforced by enabling
4348 * ECRC generation (if supported).
4350 * When PCI-E Completion packets are not aligned, it is actually more
4351 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4353 * If the driver can neither enable ECRC nor verify that it has
4354 * already been enabled, then it must use a firmware image which works
4355 * around unaligned completion packets (ethp_z8e.dat), and it should
4356 * also ensure that it never gives the device a Read-DMA which is
4357 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
4358 * enabled, then the driver should use the aligned (eth_z8e.dat)
4359 * firmware image, and set tx.boundary to 4KB.
4363 static int
4364 myri10ge_firmware_probe(struct myri10ge_priv *mgp)
4366 int status;
4368 mgp->tx_boundary = 4096;
4370 * Verify the max read request size was set to 4KB
4371 * before trying the test with 4KB.
4373 if (mgp->max_read_request_4k == 0)
4374 mgp->tx_boundary = 2048;
4376 * load the optimized firmware which assumes aligned PCIe
4377 * completions in order to see if it works on this host.
4380 mgp->fw_name = "rss_eth_z8e";
4381 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4382 mgp->eth_z8e_length = rss_eth_z8e_length;
4384 status = myri10ge_load_firmware(mgp);
4385 if (status != 0) {
4386 return (status);
4389 * Enable ECRC if possible
4391 myri10ge_enable_nvidia_ecrc(mgp);
4394 * Run a DMA test which watches for unaligned completions and
4395 * aborts on the first one seen.
4397 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
4398 if (status == 0)
4399 return (0); /* keep the aligned firmware */
4401 if (status != E2BIG)
4402 cmn_err(CE_WARN, "%s: DMA test failed: %d\n",
4403 mgp->name, status);
4404 if (status == ENOSYS)
4405 cmn_err(CE_WARN, "%s: Falling back to ethp! "
4406 "Please install up to date fw\n", mgp->name);
4407 return (status);
4410 static int
4411 myri10ge_select_firmware(struct myri10ge_priv *mgp)
4413 int aligned;
4415 aligned = 0;
4417 if (myri10ge_force_firmware == 1) {
4418 if (myri10ge_verbose)
4419 printf("%s: Assuming aligned completions (forced)\n",
4420 mgp->name);
4421 aligned = 1;
4422 goto done;
4425 if (myri10ge_force_firmware == 2) {
4426 if (myri10ge_verbose)
4427 printf("%s: Assuming unaligned completions (forced)\n",
4428 mgp->name);
4429 aligned = 0;
4430 goto done;
4433 /* If the width is less than 8, we may used the aligned firmware */
4434 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) {
4435 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n",
4436 mgp->name, mgp->pcie_link_width);
4437 aligned = 1;
4438 goto done;
4441 if (0 == myri10ge_firmware_probe(mgp))
4442 return (0); /* keep optimized firmware */
4444 done:
4445 if (aligned) {
4446 mgp->fw_name = "rss_eth_z8e";
4447 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4448 mgp->eth_z8e_length = rss_eth_z8e_length;
4449 mgp->tx_boundary = 4096;
4450 } else {
4451 mgp->fw_name = "rss_ethp_z8e";
4452 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e;
4453 mgp->eth_z8e_length = rss_ethp_z8e_length;
4454 mgp->tx_boundary = 2048;
4457 return (myri10ge_load_firmware(mgp));
4460 static int
4461 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler)
4463 dev_info_t *devinfo = mgp->dip;
4464 int count, avail, actual, intr_types;
4465 int x, y, rc, inum = 0;
4468 rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4469 if (rc != DDI_SUCCESS) {
4470 cmn_err(CE_WARN,
4471 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name,
4472 rc);
4473 return (DDI_FAILURE);
4476 if (!myri10ge_use_msi)
4477 intr_types &= ~DDI_INTR_TYPE_MSI;
4478 if (!myri10ge_use_msix)
4479 intr_types &= ~DDI_INTR_TYPE_MSIX;
4481 if (intr_types & DDI_INTR_TYPE_MSIX) {
4482 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX;
4483 mgp->intr_type = "MSI-X";
4484 } else if (intr_types & DDI_INTR_TYPE_MSI) {
4485 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI;
4486 mgp->intr_type = "MSI";
4487 } else {
4488 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED;
4489 mgp->intr_type = "Legacy";
4491 /* Get number of interrupts */
4492 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count);
4493 if ((rc != DDI_SUCCESS) || (count == 0)) {
4494 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4495 "count: %d", mgp->name, rc, count);
4497 return (DDI_FAILURE);
4500 /* Get number of available interrupts */
4501 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail);
4502 if ((rc != DDI_SUCCESS) || (avail == 0)) {
4503 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, "
4504 "rc: %d, avail: %d\n", mgp->name, rc, avail);
4505 return (DDI_FAILURE);
4507 if (avail < count) {
4508 cmn_err(CE_NOTE,
4509 "!%s: nintrs() returned %d, navail returned %d",
4510 mgp->name, count, avail);
4511 count = avail;
4514 if (count < mgp->num_slices)
4515 return (DDI_FAILURE);
4517 if (count > mgp->num_slices)
4518 count = mgp->num_slices;
4520 /* Allocate memory for MSI interrupts */
4521 mgp->intr_size = count * sizeof (ddi_intr_handle_t);
4522 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP);
4524 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum,
4525 count, &actual, DDI_INTR_ALLOC_NORMAL);
4527 if ((rc != DDI_SUCCESS) || (actual == 0)) {
4528 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d",
4529 mgp->name, rc);
4531 kmem_free(mgp->htable, mgp->intr_size);
4532 mgp->htable = NULL;
4533 return (DDI_FAILURE);
4536 if ((actual < count) && myri10ge_verbose) {
4537 cmn_err(CE_NOTE, "%s: got %d/%d slices",
4538 mgp->name, actual, count);
4541 mgp->intr_cnt = actual;
4544 * Get priority for first irq, assume remaining are all the same
4546 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri)
4547 != DDI_SUCCESS) {
4548 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name);
4550 /* Free already allocated intr */
4551 for (y = 0; y < actual; y++) {
4552 (void) ddi_intr_free(mgp->htable[y]);
4555 kmem_free(mgp->htable, mgp->intr_size);
4556 mgp->htable = NULL;
4557 return (DDI_FAILURE);
4560 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri;
4562 if (!add_handler)
4563 return (DDI_SUCCESS);
4565 /* Call ddi_intr_add_handler() */
4566 for (x = 0; x < actual; x++) {
4567 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr,
4568 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) {
4569 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed",
4570 mgp->name);
4572 /* Free already allocated intr */
4573 for (y = 0; y < actual; y++) {
4574 (void) ddi_intr_free(mgp->htable[y]);
4577 kmem_free(mgp->htable, mgp->intr_size);
4578 mgp->htable = NULL;
4579 return (DDI_FAILURE);
4583 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap);
4584 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4585 /* Call ddi_intr_block_enable() for MSI */
4586 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt);
4587 } else {
4588 /* Call ddi_intr_enable() for MSI non block enable */
4589 for (x = 0; x < mgp->intr_cnt; x++) {
4590 (void) ddi_intr_enable(mgp->htable[x]);
4594 return (DDI_SUCCESS);
4597 static void
4598 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed)
4600 int x, err;
4602 /* Disable all interrupts */
4603 if (handler_installed) {
4604 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4605 /* Call ddi_intr_block_disable() */
4606 (void) ddi_intr_block_disable(mgp->htable,
4607 mgp->intr_cnt);
4608 } else {
4609 for (x = 0; x < mgp->intr_cnt; x++) {
4610 (void) ddi_intr_disable(mgp->htable[x]);
4615 for (x = 0; x < mgp->intr_cnt; x++) {
4616 if (handler_installed) {
4617 /* Call ddi_intr_remove_handler() */
4618 err = ddi_intr_remove_handler(mgp->htable[x]);
4619 if (err != DDI_SUCCESS) {
4620 cmn_err(CE_WARN,
4621 "%s: ddi_intr_remove_handler for"
4622 "vec %d returned %d\n", mgp->name,
4623 x, err);
4626 err = ddi_intr_free(mgp->htable[x]);
4627 if (err != DDI_SUCCESS) {
4628 cmn_err(CE_WARN,
4629 "%s: ddi_intr_free for vec %d returned %d\n",
4630 mgp->name, x, err);
4633 kmem_free(mgp->htable, mgp->intr_size);
4634 mgp->htable = NULL;
4637 static void
4638 myri10ge_test_physical(dev_info_t *dip)
4640 ddi_dma_handle_t handle;
4641 struct myri10ge_dma_stuff dma;
4642 void *addr;
4643 int err;
4645 /* test #1, sufficient for older sparc systems */
4646 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
4647 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr,
4648 DDI_DMA_DONTWAIT, NULL, &handle);
4649 if (err == DDI_DMA_BADATTR)
4650 goto fail;
4651 ddi_dma_free_handle(&handle);
4653 /* test #2, required on Olympis where the bind is what fails */
4654 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr,
4655 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
4656 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT);
4657 if (addr == NULL)
4658 goto fail;
4659 myri10ge_dma_free(&dma);
4660 return;
4662 fail:
4663 if (myri10ge_verbose)
4664 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4665 "using IOMMU\n", ddi_get_instance(dip));
4667 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL;
4670 static void
4671 myri10ge_get_props(dev_info_t *dip)
4674 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4675 "myri10ge_flow_control", myri10ge_flow_control);
4677 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4678 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay);
4680 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4681 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4682 "myri10ge_nvidia_ecrc_enable", 1);
4683 #endif
4686 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4687 "myri10ge_use_msi", myri10ge_use_msi);
4689 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4690 "myri10ge_deassert_wait", myri10ge_deassert_wait);
4692 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4693 "myri10ge_verbose", myri10ge_verbose);
4695 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4696 "myri10ge_tx_copylen", myri10ge_tx_copylen);
4698 if (myri10ge_tx_copylen < 60) {
4699 cmn_err(CE_WARN,
4700 "myri10ge_tx_copylen must be >= 60 bytes\n");
4701 myri10ge_tx_copylen = 60;
4704 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4705 "myri10ge_mtu_override", myri10ge_mtu_override);
4707 if (myri10ge_mtu_override >= MYRI10GE_MIN_GLD_MTU &&
4708 myri10ge_mtu_override <= MYRI10GE_MAX_GLD_MTU)
4709 myri10ge_mtu = myri10ge_mtu_override +
4710 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ;
4711 else if (myri10ge_mtu_override != 0) {
4712 cmn_err(CE_WARN,
4713 "myri10ge_mtu_override must be between 1500 and "
4714 "9000 bytes\n");
4717 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4718 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial);
4719 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4720 "myri10ge_bigbufs_max", myri10ge_bigbufs_max);
4722 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4723 "myri10ge_watchdog_reset", myri10ge_watchdog_reset);
4725 if (myri10ge_bigbufs_initial < 128) {
4726 cmn_err(CE_WARN,
4727 "myri10ge_bigbufs_initial be at least 128\n");
4728 myri10ge_bigbufs_initial = 128;
4730 if (myri10ge_bigbufs_max < 128) {
4731 cmn_err(CE_WARN,
4732 "myri10ge_bigbufs_max be at least 128\n");
4733 myri10ge_bigbufs_max = 128;
4736 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) {
4737 cmn_err(CE_WARN,
4738 "myri10ge_bigbufs_max must be >= "
4739 "myri10ge_bigbufs_initial\n");
4740 myri10ge_bigbufs_max = myri10ge_bigbufs_initial;
4743 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4744 "myri10ge_force_firmware", myri10ge_force_firmware);
4746 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4747 "myri10ge_max_slices", myri10ge_max_slices);
4749 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4750 "myri10ge_use_msix", myri10ge_use_msix);
4752 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4753 "myri10ge_rss_hash", myri10ge_rss_hash);
4755 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX ||
4756 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
4757 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n",
4758 myri10ge_rss_hash);
4759 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4761 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4762 "myri10ge_lro", myri10ge_lro);
4763 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4764 "myri10ge_lro_cnt", myri10ge_lro_cnt);
4765 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4766 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr);
4767 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4768 "myri10ge_tx_hash", myri10ge_tx_hash);
4769 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4770 "myri10ge_use_lso", myri10ge_use_lso);
4771 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4772 "myri10ge_lso_copy", myri10ge_lso_copy);
4773 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4774 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial);
4775 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4776 "myri10ge_small_bytes", myri10ge_small_bytes);
4777 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) {
4778 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n",
4779 myri10ge_small_bytes);
4780 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n");
4781 myri10ge_small_bytes += 128;
4782 myri10ge_small_bytes &= ~(128 -1);
4783 myri10ge_small_bytes -= MXGEFW_PAD;
4784 cmn_err(CE_WARN, "rounded up to %d\n",
4785 myri10ge_small_bytes);
4787 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4791 #ifndef PCI_EXP_LNKSTA
4792 #define PCI_EXP_LNKSTA 18
4793 #endif
4795 static int
4796 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid)
4798 uint16_t status;
4799 uint8_t ptr;
4801 /* check to see if we have capabilities */
4802 status = pci_config_get16(handle, PCI_CONF_STAT);
4803 if (!(status & PCI_STAT_CAP)) {
4804 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n");
4805 return (ENXIO);
4808 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR);
4810 /* Walk the capabilities list, looking for a PCI Express cap */
4811 while (ptr != PCI_CAP_NEXT_PTR_NULL) {
4812 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid)
4813 break;
4814 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR);
4816 if (ptr < 64) {
4817 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr);
4818 return (ENXIO);
4820 *capptr = ptr;
4821 return (0);
4824 static int
4825 myri10ge_set_max_readreq(ddi_acc_handle_t handle)
4827 int err;
4828 uint16_t val;
4829 uint8_t ptr;
4831 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4832 if (err != 0) {
4833 cmn_err(CE_WARN, "could not find PCIe cap\n");
4834 return (ENXIO);
4837 /* set max read req to 4096 */
4838 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4839 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) |
4840 PCIE_DEVCTL_MAX_READ_REQ_4096;
4841 pci_config_put16(handle, ptr + PCIE_DEVCTL, val);
4842 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4843 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) !=
4844 PCIE_DEVCTL_MAX_READ_REQ_4096) {
4845 cmn_err(CE_WARN, "could not set max read req (%x)\n", val);
4846 return (EINVAL);
4848 return (0);
4851 static int
4852 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link)
4854 int err;
4855 uint16_t val;
4856 uint8_t ptr;
4858 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4859 if (err != 0) {
4860 cmn_err(CE_WARN, "could not set max read req\n");
4861 return (ENXIO);
4864 /* read link width */
4865 val = pci_config_get16(handle, ptr + PCIE_LINKSTS);
4866 val &= PCIE_LINKSTS_NEG_WIDTH_MASK;
4867 *link = (val >> 4);
4868 return (0);
4871 static int
4872 myri10ge_reset_nic(struct myri10ge_priv *mgp)
4874 ddi_acc_handle_t handle = mgp->cfg_hdl;
4875 uint32_t reboot;
4876 uint16_t cmd;
4877 int err;
4879 cmd = pci_config_get16(handle, PCI_CONF_COMM);
4880 if ((cmd & PCI_COMM_ME) == 0) {
4882 * Bus master DMA disabled? Check to see if the card
4883 * rebooted due to a parity error For now, just report
4884 * it
4887 /* enter read32 mode */
4888 pci_config_put8(handle, mgp->vso + 0x10, 0x3);
4889 /* read REBOOT_STATUS (0xfffffff0) */
4890 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0);
4891 reboot = pci_config_get16(handle, mgp->vso + 0x14);
4892 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot);
4893 return (0);
4895 if (!myri10ge_watchdog_reset) {
4896 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name);
4897 return (1);
4900 myri10ge_stop_locked(mgp);
4901 err = myri10ge_start_locked(mgp);
4902 if (err == DDI_FAILURE) {
4903 return (0);
4905 mac_tx_update(mgp->mh);
4906 return (1);
4909 static inline int
4910 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx)
4912 if (tx->sched != tx->stall &&
4913 tx->done == tx->watchdog_done &&
4914 tx->watchdog_req != tx->watchdog_done)
4915 return (1);
4916 return (0);
4919 static void
4920 myri10ge_watchdog(void *arg)
4922 struct myri10ge_priv *mgp;
4923 struct myri10ge_slice_state *ss;
4924 myri10ge_tx_ring_t *tx;
4925 int nic_ok = 1;
4926 int slices_stalled, rx_pause, i;
4927 int add_rx;
4929 mgp = arg;
4930 mutex_enter(&mgp->intrlock);
4931 if (mgp->running != MYRI10GE_ETH_RUNNING) {
4932 cmn_err(CE_WARN,
4933 "%s not running, not rearming watchdog (%d)\n",
4934 mgp->name, mgp->running);
4935 mutex_exit(&mgp->intrlock);
4936 return;
4939 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause);
4942 * make sure nic is stalled before we reset the nic, so as to
4943 * ensure we don't rip the transmit data structures out from
4944 * under a pending transmit
4947 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) {
4948 tx = &mgp->ss[i].tx;
4949 slices_stalled = myri10ge_ring_stalled(tx);
4950 if (slices_stalled)
4951 break;
4954 if (slices_stalled) {
4955 if (mgp->watchdog_rx_pause == rx_pause) {
4956 cmn_err(CE_WARN,
4957 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4958 mgp->name, i, tx->sched, tx->stall,
4959 tx->done, tx->watchdog_done, tx->req, tx->pkt_done,
4960 (int)ntohl(mgp->ss[i].fw_stats->send_done_count));
4961 nic_ok = myri10ge_reset_nic(mgp);
4962 } else {
4963 cmn_err(CE_WARN,
4964 "%s Flow controlled, check link partner\n",
4965 mgp->name);
4969 if (!nic_ok) {
4970 cmn_err(CE_WARN,
4971 "%s Nic dead, not rearming watchdog\n", mgp->name);
4972 mutex_exit(&mgp->intrlock);
4973 return;
4975 for (i = 0; i < mgp->num_slices; i++) {
4976 ss = &mgp->ss[i];
4977 tx = &ss->tx;
4978 tx->watchdog_done = tx->done;
4979 tx->watchdog_req = tx->req;
4980 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) {
4981 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy);
4982 add_rx =
4983 min(ss->jpool.num_alloc,
4984 myri10ge_bigbufs_max -
4985 (ss->jpool.num_alloc -
4986 ss->jbufs_for_smalls));
4987 if (add_rx != 0) {
4988 (void) myri10ge_add_jbufs(ss, add_rx, 0);
4989 /* now feed them to the firmware */
4990 mutex_enter(&ss->jpool.mtx);
4991 myri10ge_restock_jumbos(ss);
4992 mutex_exit(&ss->jpool.mtx);
4996 mgp->watchdog_rx_pause = rx_pause;
4998 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
4999 mgp->timer_ticks);
5000 mutex_exit(&mgp->intrlock);
5003 /*ARGSUSED*/
5004 static int
5005 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5007 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5008 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay);
5009 return (0);
5012 /*ARGSUSED*/
5013 static int
5014 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value,
5015 caddr_t cp, cred_t *credp)
5017 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5018 char *end;
5019 size_t new_value;
5021 new_value = mi_strtol(value, &end, 10);
5022 if (end == value)
5023 return (EINVAL);
5025 mutex_enter(&myri10ge_param_lock);
5026 mgp->intr_coal_delay = (int)new_value;
5027 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
5028 mutex_exit(&myri10ge_param_lock);
5029 return (0);
5032 /*ARGSUSED*/
5033 static int
5034 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5036 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5037 (void) mi_mpprintf(mp, "%d", mgp->pause);
5038 return (0);
5041 /*ARGSUSED*/
5042 static int
5043 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value,
5044 caddr_t cp, cred_t *credp)
5046 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5047 char *end;
5048 size_t new_value;
5049 int err = 0;
5051 new_value = mi_strtol(value, &end, 10);
5052 if (end == value)
5053 return (EINVAL);
5054 if (new_value != 0)
5055 new_value = 1;
5057 mutex_enter(&myri10ge_param_lock);
5058 if (new_value != mgp->pause)
5059 err = myri10ge_change_pause(mgp, new_value);
5060 mutex_exit(&myri10ge_param_lock);
5061 return (err);
5064 /*ARGSUSED*/
5065 static int
5066 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5068 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp);
5069 return (0);
5072 /*ARGSUSED*/
5073 static int
5074 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value,
5075 caddr_t cp, cred_t *credp)
5077 char *end;
5078 size_t new_value;
5080 new_value = mi_strtol(value, &end, 10);
5081 if (end == value)
5082 return (EINVAL);
5083 *(int *)(void *)cp = new_value;
5085 return (0);
5088 static void
5089 myri10ge_ndd_init(struct myri10ge_priv *mgp)
5091 mgp->nd_head = NULL;
5093 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay",
5094 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp);
5095 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control",
5096 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp);
5097 (void) nd_load(&mgp->nd_head, "myri10ge_verbose",
5098 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose);
5099 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait",
5100 myri10ge_get_int, myri10ge_set_int,
5101 (caddr_t)&myri10ge_deassert_wait);
5102 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max",
5103 myri10ge_get_int, myri10ge_set_int,
5104 (caddr_t)&myri10ge_bigbufs_max);
5105 (void) nd_load(&mgp->nd_head, "myri10ge_lro",
5106 myri10ge_get_int, myri10ge_set_int,
5107 (caddr_t)&myri10ge_lro);
5108 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr",
5109 myri10ge_get_int, myri10ge_set_int,
5110 (caddr_t)&myri10ge_lro_max_aggr);
5111 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash",
5112 myri10ge_get_int, myri10ge_set_int,
5113 (caddr_t)&myri10ge_tx_hash);
5114 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy",
5115 myri10ge_get_int, myri10ge_set_int,
5116 (caddr_t)&myri10ge_lso_copy);
5119 static void
5120 myri10ge_ndd_fini(struct myri10ge_priv *mgp)
5122 nd_free(&mgp->nd_head);
5125 static void
5126 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
5128 struct iocblk *iocp;
5129 struct myri10ge_priv *mgp = arg;
5130 int cmd, ok, err;
5132 iocp = (struct iocblk *)(void *)mp->b_rptr;
5133 cmd = iocp->ioc_cmd;
5135 ok = 0;
5136 err = 0;
5138 switch (cmd) {
5139 case ND_GET:
5140 case ND_SET:
5141 ok = nd_getset(wq, mgp->nd_head, mp);
5142 break;
5143 default:
5144 break;
5146 if (!ok)
5147 err = EINVAL;
5148 else
5149 err = iocp->ioc_error;
5151 if (!err)
5152 miocack(wq, mp, iocp->ioc_count, err);
5153 else
5154 miocnak(wq, mp, 0, err);
5157 static struct myri10ge_priv *mgp_list;
5159 struct myri10ge_priv *
5160 myri10ge_get_instance(uint_t unit)
5162 struct myri10ge_priv *mgp;
5164 mutex_enter(&myri10ge_param_lock);
5165 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) {
5166 if (unit == ddi_get_instance(mgp->dip)) {
5167 mgp->refcnt++;
5168 break;
5171 mutex_exit(&myri10ge_param_lock);
5172 return (mgp);
5175 void
5176 myri10ge_put_instance(struct myri10ge_priv *mgp)
5178 mutex_enter(&myri10ge_param_lock);
5179 mgp->refcnt--;
5180 mutex_exit(&myri10ge_param_lock);
5183 static boolean_t
5184 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
5186 struct myri10ge_priv *mgp = arg;
5187 uint32_t *cap_hcksum;
5188 mac_capab_lso_t *cap_lso;
5189 mac_capab_rings_t *cap_rings;
5191 switch (cap) {
5192 case MAC_CAPAB_HCKSUM:
5193 cap_hcksum = cap_data;
5194 *cap_hcksum = HCKSUM_INET_PARTIAL;
5195 break;
5196 case MAC_CAPAB_RINGS:
5197 cap_rings = cap_data;
5198 switch (cap_rings->mr_type) {
5199 case MAC_RING_TYPE_RX:
5200 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5201 cap_rings->mr_rnum = mgp->num_slices;
5202 cap_rings->mr_gnum = 1;
5203 cap_rings->mr_rget = myri10ge_fill_ring;
5204 cap_rings->mr_gget = myri10ge_fill_group;
5205 break;
5206 case MAC_RING_TYPE_TX:
5207 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5208 cap_rings->mr_rnum = mgp->num_slices;
5209 cap_rings->mr_gnum = 0;
5210 cap_rings->mr_rget = myri10ge_fill_ring;
5211 cap_rings->mr_gget = NULL;
5212 break;
5213 default:
5214 return (B_FALSE);
5216 break;
5217 case MAC_CAPAB_LSO:
5218 cap_lso = cap_data;
5219 if (!myri10ge_use_lso)
5220 return (B_FALSE);
5221 if (!(mgp->features & MYRI10GE_TSO))
5222 return (B_FALSE);
5223 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
5224 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1;
5225 break;
5227 default:
5228 return (B_FALSE);
5230 return (B_TRUE);
5234 static int
5235 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val)
5237 struct myri10ge_priv *mgp = arg;
5238 struct myri10ge_rx_ring_stats *rstat;
5239 struct myri10ge_tx_ring_stats *tstat;
5240 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats;
5241 struct myri10ge_slice_state *ss;
5242 uint64_t tmp = 0;
5243 int i;
5245 switch (stat) {
5246 case MAC_STAT_IFSPEED:
5247 *val = 10ull * 1000ull * 1000000ull;
5248 break;
5250 case MAC_STAT_MULTIRCV:
5251 for (i = 0; i < mgp->num_slices; i++) {
5252 rstat = &mgp->ss[i].rx_stats;
5253 tmp += rstat->multircv;
5255 *val = tmp;
5256 break;
5258 case MAC_STAT_BRDCSTRCV:
5259 for (i = 0; i < mgp->num_slices; i++) {
5260 rstat = &mgp->ss[i].rx_stats;
5261 tmp += rstat->brdcstrcv;
5263 *val = tmp;
5264 break;
5266 case MAC_STAT_MULTIXMT:
5267 for (i = 0; i < mgp->num_slices; i++) {
5268 tstat = &mgp->ss[i].tx.stats;
5269 tmp += tstat->multixmt;
5271 *val = tmp;
5272 break;
5274 case MAC_STAT_BRDCSTXMT:
5275 for (i = 0; i < mgp->num_slices; i++) {
5276 tstat = &mgp->ss[i].tx.stats;
5277 tmp += tstat->brdcstxmt;
5279 *val = tmp;
5280 break;
5282 case MAC_STAT_NORCVBUF:
5283 tmp = ntohl(fw_stats->dropped_no_big_buffer);
5284 tmp += ntohl(fw_stats->dropped_no_small_buffer);
5285 tmp += ntohl(fw_stats->dropped_link_overflow);
5286 for (i = 0; i < mgp->num_slices; i++) {
5287 ss = &mgp->ss[i];
5288 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf);
5289 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf);
5291 *val = tmp;
5292 break;
5294 case MAC_STAT_IERRORS:
5295 tmp += ntohl(fw_stats->dropped_bad_crc32);
5296 tmp += ntohl(fw_stats->dropped_bad_phy);
5297 tmp += ntohl(fw_stats->dropped_runt);
5298 tmp += ntohl(fw_stats->dropped_overrun);
5299 *val = tmp;
5300 break;
5302 case MAC_STAT_OERRORS:
5303 for (i = 0; i < mgp->num_slices; i++) {
5304 ss = &mgp->ss[i];
5305 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags);
5306 tmp += MYRI10GE_SLICE_STAT(xmit_err);
5308 *val = tmp;
5309 break;
5311 case MAC_STAT_RBYTES:
5312 for (i = 0; i < mgp->num_slices; i++) {
5313 rstat = &mgp->ss[i].rx_stats;
5314 tmp += rstat->ibytes;
5316 *val = tmp;
5317 break;
5319 case MAC_STAT_IPACKETS:
5320 for (i = 0; i < mgp->num_slices; i++) {
5321 rstat = &mgp->ss[i].rx_stats;
5322 tmp += rstat->ipackets;
5324 *val = tmp;
5325 break;
5327 case MAC_STAT_OBYTES:
5328 for (i = 0; i < mgp->num_slices; i++) {
5329 tstat = &mgp->ss[i].tx.stats;
5330 tmp += tstat->obytes;
5332 *val = tmp;
5333 break;
5335 case MAC_STAT_OPACKETS:
5336 for (i = 0; i < mgp->num_slices; i++) {
5337 tstat = &mgp->ss[i].tx.stats;
5338 tmp += tstat->opackets;
5340 *val = tmp;
5341 break;
5343 case ETHER_STAT_TOOLONG_ERRORS:
5344 *val = ntohl(fw_stats->dropped_overrun);
5345 break;
5347 #ifdef SOLARIS_S11
5348 case ETHER_STAT_TOOSHORT_ERRORS:
5349 *val = ntohl(fw_stats->dropped_runt);
5350 break;
5351 #endif
5353 case ETHER_STAT_LINK_PAUSE:
5354 *val = mgp->pause;
5355 break;
5357 case ETHER_STAT_LINK_AUTONEG:
5358 *val = 1;
5359 break;
5361 case ETHER_STAT_LINK_DUPLEX:
5362 *val = LINK_DUPLEX_FULL;
5363 break;
5365 default:
5366 return (ENOTSUP);
5369 return (0);
5372 /* ARGSUSED */
5373 static void
5374 myri10ge_m_propinfo(void *arg, const char *pr_name,
5375 mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
5377 switch (pr_num) {
5378 case MAC_PROP_MTU:
5379 mac_prop_info_set_default_uint32(prh, MYRI10GE_DEFAULT_GLD_MTU);
5380 mac_prop_info_set_range_uint32(prh, MYRI10GE_MIN_GLD_MTU,
5381 MYRI10GE_MAX_GLD_MTU);
5382 break;
5383 default:
5384 break;
5388 /*ARGSUSED*/
5389 static int
5390 myri10ge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
5391 uint_t pr_valsize, const void *pr_val)
5393 int err = 0;
5394 struct myri10ge_priv *mgp = arg;
5396 switch (pr_num) {
5397 case MAC_PROP_MTU: {
5398 uint32_t mtu;
5399 if (pr_valsize < sizeof (mtu)) {
5400 err = EINVAL;
5401 break;
5403 bcopy(pr_val, &mtu, sizeof (mtu));
5404 if (mtu > MYRI10GE_MAX_GLD_MTU ||
5405 mtu < MYRI10GE_MIN_GLD_MTU) {
5406 err = EINVAL;
5407 break;
5410 mutex_enter(&mgp->intrlock);
5411 if (mgp->running != MYRI10GE_ETH_STOPPED) {
5412 err = EBUSY;
5413 mutex_exit(&mgp->intrlock);
5414 break;
5417 myri10ge_mtu = mtu + sizeof (struct ether_header) +
5418 MXGEFW_PAD + VLAN_TAGSZ;
5419 mutex_exit(&mgp->intrlock);
5420 break;
5422 default:
5423 err = ENOTSUP;
5424 break;
5427 return (err);
5430 static mac_callbacks_t myri10ge_m_callbacks = {
5431 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO),
5432 myri10ge_m_stat,
5433 myri10ge_m_start,
5434 myri10ge_m_stop,
5435 myri10ge_m_promisc,
5436 myri10ge_m_multicst,
5437 NULL,
5438 NULL,
5439 NULL,
5440 myri10ge_m_ioctl,
5441 myri10ge_m_getcapab,
5442 NULL,
5443 NULL,
5444 myri10ge_m_setprop,
5445 NULL,
5446 myri10ge_m_propinfo
5450 static int
5451 myri10ge_probe_slices(struct myri10ge_priv *mgp)
5453 myri10ge_cmd_t cmd;
5454 int status;
5456 mgp->num_slices = 1;
5458 /* hit the board with a reset to ensure it is alive */
5459 (void) memset(&cmd, 0, sizeof (cmd));
5460 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
5461 if (status != 0) {
5462 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
5463 return (ENXIO);
5466 if (myri10ge_use_msix == 0)
5467 return (0);
5469 /* tell it the size of the interrupt queues */
5470 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot);
5471 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
5472 if (status != 0) {
5473 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5474 mgp->name);
5475 return (ENXIO);
5478 /* ask the maximum number of slices it supports */
5479 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
5480 &cmd);
5481 if (status != 0)
5482 return (0);
5484 mgp->num_slices = cmd.data0;
5487 * if the admin did not specify a limit to how many
5488 * slices we should use, cap it automatically to the
5489 * number of CPUs currently online
5491 if (myri10ge_max_slices == -1)
5492 myri10ge_max_slices = ncpus;
5494 if (mgp->num_slices > myri10ge_max_slices)
5495 mgp->num_slices = myri10ge_max_slices;
5499 * Now try to allocate as many MSI-X vectors as we have
5500 * slices. We give up on MSI-X if we can only get a single
5501 * vector.
5503 while (mgp->num_slices > 1) {
5504 /* make sure it is a power of two */
5505 while (!ISP2(mgp->num_slices))
5506 mgp->num_slices--;
5507 if (mgp->num_slices == 1)
5508 return (0);
5510 status = myri10ge_add_intrs(mgp, 0);
5511 if (status == 0) {
5512 myri10ge_rem_intrs(mgp, 0);
5513 if (mgp->intr_cnt == mgp->num_slices) {
5514 if (myri10ge_verbose)
5515 printf("Got %d slices!\n",
5516 mgp->num_slices);
5517 return (0);
5519 mgp->num_slices = mgp->intr_cnt;
5520 } else {
5521 mgp->num_slices = mgp->num_slices / 2;
5525 if (myri10ge_verbose)
5526 printf("Got %d slices\n", mgp->num_slices);
5527 return (0);
5530 static void
5531 myri10ge_lro_free(struct myri10ge_slice_state *ss)
5533 struct lro_entry *lro;
5535 while (ss->lro_free != NULL) {
5536 lro = ss->lro_free;
5537 ss->lro_free = lro->next;
5538 kmem_free(lro, sizeof (*lro));
5542 static void
5543 myri10ge_lro_alloc(struct myri10ge_slice_state *ss)
5545 struct lro_entry *lro;
5546 int idx;
5548 ss->lro_free = NULL;
5549 ss->lro_active = NULL;
5551 for (idx = 0; idx < myri10ge_lro_cnt; idx++) {
5552 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP);
5553 if (lro == NULL)
5554 continue;
5555 lro->next = ss->lro_free;
5556 ss->lro_free = lro;
5560 static void
5561 myri10ge_free_slices(struct myri10ge_priv *mgp)
5563 struct myri10ge_slice_state *ss;
5564 size_t bytes;
5565 int i;
5567 if (mgp->ss == NULL)
5568 return;
5570 for (i = 0; i < mgp->num_slices; i++) {
5571 ss = &mgp->ss[i];
5572 if (ss->rx_done.entry == NULL)
5573 continue;
5574 myri10ge_dma_free(&ss->rx_done.dma);
5575 ss->rx_done.entry = NULL;
5576 if (ss->fw_stats == NULL)
5577 continue;
5578 myri10ge_dma_free(&ss->fw_stats_dma);
5579 ss->fw_stats = NULL;
5580 mutex_destroy(&ss->rx_lock);
5581 mutex_destroy(&ss->tx.lock);
5582 mutex_destroy(&ss->tx.handle_lock);
5583 mutex_destroy(&ss->poll_lock);
5584 myri10ge_jpool_fini(ss);
5585 myri10ge_slice_stat_destroy(ss);
5586 myri10ge_lro_free(ss);
5588 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5589 kmem_free(mgp->ss, bytes);
5590 mgp->ss = NULL;
5594 static int
5595 myri10ge_alloc_slices(struct myri10ge_priv *mgp)
5597 struct myri10ge_slice_state *ss;
5598 size_t bytes;
5599 int i;
5601 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5602 mgp->ss = kmem_zalloc(bytes, KM_SLEEP);
5603 if (mgp->ss == NULL)
5604 return (ENOMEM);
5605 for (i = 0; i < mgp->num_slices; i++) {
5606 ss = &mgp->ss[i];
5608 ss->mgp = mgp;
5610 /* allocate the per-slice firmware stats */
5611 bytes = sizeof (*ss->fw_stats);
5612 ss->fw_stats = (mcp_irq_data_t *)(void *)
5613 myri10ge_dma_alloc(mgp->dip, bytes,
5614 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5615 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5616 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT);
5617 if (ss->fw_stats == NULL)
5618 goto abort;
5619 (void) memset(ss->fw_stats, 0, bytes);
5621 /* allocate rx done ring */
5622 bytes = mgp->max_intr_slots *
5623 sizeof (*ss->rx_done.entry);
5624 ss->rx_done.entry = (mcp_slot_t *)(void *)
5625 myri10ge_dma_alloc(mgp->dip, bytes,
5626 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5627 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5628 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT);
5629 if (ss->rx_done.entry == NULL) {
5630 goto abort;
5632 (void) memset(ss->rx_done.entry, 0, bytes);
5633 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie);
5634 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL);
5635 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL);
5636 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL);
5637 myri10ge_jpool_init(ss);
5638 (void) myri10ge_slice_stat_init(ss);
5639 myri10ge_lro_alloc(ss);
5642 return (0);
5644 abort:
5645 myri10ge_free_slices(mgp);
5646 return (ENOMEM);
5649 static int
5650 myri10ge_save_msi_state(struct myri10ge_priv *mgp,
5651 ddi_acc_handle_t handle)
5653 uint8_t ptr;
5654 int err;
5656 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5657 if (err != 0) {
5658 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5659 mgp->name);
5660 return (DDI_FAILURE);
5662 mgp->pci_saved_state.msi_ctrl =
5663 pci_config_get16(handle, ptr + PCI_MSI_CTRL);
5664 mgp->pci_saved_state.msi_addr_low =
5665 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET);
5666 mgp->pci_saved_state.msi_addr_high =
5667 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4);
5668 mgp->pci_saved_state.msi_data_32 =
5669 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA);
5670 mgp->pci_saved_state.msi_data_64 =
5671 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA);
5672 return (DDI_SUCCESS);
5675 static int
5676 myri10ge_restore_msi_state(struct myri10ge_priv *mgp,
5677 ddi_acc_handle_t handle)
5679 uint8_t ptr;
5680 int err;
5682 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5683 if (err != 0) {
5684 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5685 mgp->name);
5686 return (DDI_FAILURE);
5689 pci_config_put16(handle, ptr + PCI_MSI_CTRL,
5690 mgp->pci_saved_state.msi_ctrl);
5691 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET,
5692 mgp->pci_saved_state.msi_addr_low);
5693 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4,
5694 mgp->pci_saved_state.msi_addr_high);
5695 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA,
5696 mgp->pci_saved_state.msi_data_32);
5697 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA,
5698 mgp->pci_saved_state.msi_data_64);
5700 return (DDI_SUCCESS);
5703 static int
5704 myri10ge_save_pci_state(struct myri10ge_priv *mgp)
5706 ddi_acc_handle_t handle = mgp->cfg_hdl;
5707 int i;
5708 int err = DDI_SUCCESS;
5711 /* Save the non-extended PCI config space 32-bits at a time */
5712 for (i = 0; i < 16; i++)
5713 mgp->pci_saved_state.base[i] =
5714 pci_config_get32(handle, i*4);
5716 /* now save MSI interrupt state *, if needed */
5717 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5718 err = myri10ge_save_msi_state(mgp, handle);
5720 return (err);
5723 static int
5724 myri10ge_restore_pci_state(struct myri10ge_priv *mgp)
5726 ddi_acc_handle_t handle = mgp->cfg_hdl;
5727 int i;
5728 int err = DDI_SUCCESS;
5731 /* Restore the non-extended PCI config space 32-bits at a time */
5732 for (i = 15; i >= 0; i--)
5733 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]);
5735 /* now restore MSI interrupt state *, if needed */
5736 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5737 err = myri10ge_restore_msi_state(mgp, handle);
5739 if (mgp->max_read_request_4k)
5740 (void) myri10ge_set_max_readreq(handle);
5741 return (err);
5745 static int
5746 myri10ge_suspend(dev_info_t *dip)
5748 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5749 int status;
5751 if (mgp == NULL) {
5752 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n");
5753 return (DDI_FAILURE);
5755 if (mgp->dip != dip) {
5756 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n");
5757 return (DDI_FAILURE);
5759 mutex_enter(&mgp->intrlock);
5760 if (mgp->running == MYRI10GE_ETH_RUNNING) {
5761 mgp->running = MYRI10GE_ETH_STOPPING;
5762 mutex_exit(&mgp->intrlock);
5763 (void) untimeout(mgp->timer_id);
5764 mutex_enter(&mgp->intrlock);
5765 myri10ge_stop_locked(mgp);
5766 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING;
5768 status = myri10ge_save_pci_state(mgp);
5769 mutex_exit(&mgp->intrlock);
5770 return (status);
5773 static int
5774 myri10ge_resume(dev_info_t *dip)
5776 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5777 int status = DDI_SUCCESS;
5779 if (mgp == NULL) {
5780 cmn_err(CE_WARN, "null dip in myri10ge_resume\n");
5781 return (DDI_FAILURE);
5783 if (mgp->dip != dip) {
5784 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n");
5785 return (DDI_FAILURE);
5788 mutex_enter(&mgp->intrlock);
5789 status = myri10ge_restore_pci_state(mgp);
5790 if (status == DDI_SUCCESS &&
5791 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) {
5792 status = myri10ge_start_locked(mgp);
5794 mutex_exit(&mgp->intrlock);
5795 if (status != DDI_SUCCESS)
5796 return (status);
5798 /* start the watchdog timer */
5799 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5800 mgp->timer_ticks);
5801 return (DDI_SUCCESS);
5804 static int
5805 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5808 struct myri10ge_priv *mgp;
5809 mac_register_t *macp, *omacp;
5810 ddi_acc_handle_t handle;
5811 uint32_t csr, hdr_offset;
5812 int status, span, link_width, max_read_request_4k;
5813 unsigned long bus_number, dev_number, func_number;
5814 size_t bytes;
5815 offset_t ss_offset;
5816 uint8_t vso;
5818 if (cmd == DDI_RESUME) {
5819 return (myri10ge_resume(dip));
5822 if (cmd != DDI_ATTACH)
5823 return (DDI_FAILURE);
5824 if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
5825 return (DDI_FAILURE);
5827 /* enable busmater and io space access */
5828 csr = pci_config_get32(handle, PCI_CONF_COMM);
5829 pci_config_put32(handle, PCI_CONF_COMM,
5830 (csr |PCI_COMM_ME|PCI_COMM_MAE));
5831 status = myri10ge_read_pcie_link_width(handle, &link_width);
5832 if (status != 0) {
5833 cmn_err(CE_WARN, "could not read link width!\n");
5834 link_width = 0;
5836 max_read_request_4k = !myri10ge_set_max_readreq(handle);
5837 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS);
5838 if (status != 0)
5839 goto abort_with_cfg_hdl;
5840 if ((omacp = mac_alloc(MAC_VERSION)) == NULL)
5841 goto abort_with_cfg_hdl;
5843 * XXXX Hack: mac_register_t grows in newer kernels. To be
5844 * able to write newer fields, such as m_margin, without
5845 * writing outside allocated memory, we allocate our own macp
5846 * and pass that to mac_register()
5848 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP);
5849 macp->m_version = omacp->m_version;
5851 if ((mgp = (struct myri10ge_priv *)
5852 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) {
5853 goto abort_with_macinfo;
5855 ddi_set_driver_private(dip, mgp);
5857 /* setup device name for log messages */
5858 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip));
5860 mutex_enter(&myri10ge_param_lock);
5861 myri10ge_get_props(dip);
5862 mgp->intr_coal_delay = myri10ge_intr_coal_delay;
5863 mgp->pause = myri10ge_flow_control;
5864 mutex_exit(&myri10ge_param_lock);
5866 mgp->max_read_request_4k = max_read_request_4k;
5867 mgp->pcie_link_width = link_width;
5868 mgp->running = MYRI10GE_ETH_STOPPED;
5869 mgp->vso = vso;
5870 mgp->dip = dip;
5871 mgp->cfg_hdl = handle;
5873 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */
5874 myri10ge_test_physical(dip);
5876 /* allocate command page */
5877 bytes = sizeof (*mgp->cmd);
5878 mgp->cmd = (mcp_cmd_response_t *)
5879 (void *)myri10ge_dma_alloc(dip, bytes,
5880 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5881 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT,
5882 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT);
5883 if (mgp->cmd == NULL)
5884 goto abort_with_mgp;
5886 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number,
5887 &dev_number, &func_number);
5888 if (myri10ge_verbose)
5889 printf("%s at %ld:%ld:%ld attaching\n", mgp->name,
5890 bus_number, dev_number, func_number);
5891 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram,
5892 0, (offset_t)span, &myri10ge_dev_access_attr,
5893 &mgp->io_handle);
5894 if (status != DDI_SUCCESS) {
5895 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name);
5896 printf("%s: reg_set = %d, span = %d, status = %d",
5897 mgp->name, mgp->reg_set, span, status);
5898 goto abort_with_mgp;
5901 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET);
5902 hdr_offset = ntohl(hdr_offset) & 0xffffc;
5903 ss_offset = hdr_offset +
5904 offsetof(struct mcp_gen_header, string_specs);
5905 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset));
5906 myri10ge_pio_copy32(mgp->eeprom_strings,
5907 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size),
5908 MYRI10GE_EEPROM_STRINGS_SIZE);
5909 (void) memset(mgp->eeprom_strings +
5910 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
5912 status = myri10ge_read_mac_addr(mgp);
5913 if (status) {
5914 goto abort_with_mapped;
5917 status = myri10ge_select_firmware(mgp);
5918 if (status != 0) {
5919 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name);
5920 goto abort_with_mapped;
5923 status = myri10ge_probe_slices(mgp);
5924 if (status != 0) {
5925 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name);
5926 goto abort_with_dummy_rdma;
5929 status = myri10ge_alloc_slices(mgp);
5930 if (status != 0) {
5931 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name);
5932 goto abort_with_dummy_rdma;
5935 /* add the interrupt handler */
5936 status = myri10ge_add_intrs(mgp, 1);
5937 if (status != 0) {
5938 cmn_err(CE_WARN, "%s: Failed to add interrupt\n",
5939 mgp->name);
5940 goto abort_with_slices;
5943 /* now that we have an iblock_cookie, init the mutexes */
5944 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie);
5945 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie);
5948 status = myri10ge_nic_stat_init(mgp);
5949 if (status != DDI_SUCCESS)
5950 goto abort_with_interrupts;
5951 status = myri10ge_info_init(mgp);
5952 if (status != DDI_SUCCESS)
5953 goto abort_with_stats;
5956 * Initialize GLD state
5959 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
5960 macp->m_driver = mgp;
5961 macp->m_dip = dip;
5962 macp->m_src_addr = mgp->mac_addr;
5963 macp->m_callbacks = &myri10ge_m_callbacks;
5964 macp->m_min_sdu = 0;
5965 macp->m_max_sdu = myri10ge_mtu -
5966 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ);
5967 #ifdef SOLARIS_S11
5968 macp->m_margin = VLAN_TAGSZ;
5969 #endif
5970 macp->m_v12n = MAC_VIRT_LEVEL1;
5971 status = mac_register(macp, &mgp->mh);
5972 if (status != 0) {
5973 cmn_err(CE_WARN, "%s: mac_register failed with %d\n",
5974 mgp->name, status);
5975 goto abort_with_info;
5977 myri10ge_ndd_init(mgp);
5978 if (myri10ge_verbose)
5979 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name,
5980 mgp->intr_type, mgp->tx_boundary, mgp->fw_name);
5981 mutex_enter(&myri10ge_param_lock);
5982 mgp->next = mgp_list;
5983 mgp_list = mgp;
5984 mutex_exit(&myri10ge_param_lock);
5985 kmem_free(macp, sizeof (*macp) * 8);
5986 mac_free(omacp);
5987 return (DDI_SUCCESS);
5989 abort_with_info:
5990 myri10ge_info_destroy(mgp);
5992 abort_with_stats:
5993 myri10ge_nic_stat_destroy(mgp);
5995 abort_with_interrupts:
5996 mutex_destroy(&mgp->cmd_lock);
5997 mutex_destroy(&mgp->intrlock);
5998 myri10ge_rem_intrs(mgp, 1);
6000 abort_with_slices:
6001 myri10ge_free_slices(mgp);
6003 abort_with_dummy_rdma:
6004 myri10ge_dummy_rdma(mgp, 0);
6006 abort_with_mapped:
6007 ddi_regs_map_free(&mgp->io_handle);
6009 myri10ge_dma_free(&mgp->cmd_dma);
6011 abort_with_mgp:
6012 kmem_free(mgp, sizeof (*mgp));
6014 abort_with_macinfo:
6015 kmem_free(macp, sizeof (*macp) * 8);
6016 mac_free(omacp);
6018 abort_with_cfg_hdl:
6019 pci_config_teardown(&handle);
6020 return (DDI_FAILURE);
6025 static int
6026 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6028 struct myri10ge_priv *mgp, *tmp;
6029 int status, i, jbufs_alloced;
6031 if (cmd == DDI_SUSPEND) {
6032 status = myri10ge_suspend(dip);
6033 return (status);
6036 if (cmd != DDI_DETACH) {
6037 return (DDI_FAILURE);
6039 /* Get the driver private (gld_mac_info_t) structure */
6040 mgp = ddi_get_driver_private(dip);
6042 mutex_enter(&mgp->intrlock);
6043 jbufs_alloced = 0;
6044 for (i = 0; i < mgp->num_slices; i++) {
6045 myri10ge_remove_jbufs(&mgp->ss[i]);
6046 jbufs_alloced += mgp->ss[i].jpool.num_alloc;
6048 mutex_exit(&mgp->intrlock);
6049 if (jbufs_alloced != 0) {
6050 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n",
6051 mgp->name, jbufs_alloced);
6052 return (DDI_FAILURE);
6055 mutex_enter(&myri10ge_param_lock);
6056 if (mgp->refcnt != 0) {
6057 mutex_exit(&myri10ge_param_lock);
6058 cmn_err(CE_NOTE, "%s: %d external refs remain\n",
6059 mgp->name, mgp->refcnt);
6060 return (DDI_FAILURE);
6062 mutex_exit(&myri10ge_param_lock);
6064 status = mac_unregister(mgp->mh);
6065 if (status != DDI_SUCCESS)
6066 return (status);
6068 myri10ge_ndd_fini(mgp);
6069 myri10ge_dummy_rdma(mgp, 0);
6070 myri10ge_nic_stat_destroy(mgp);
6071 myri10ge_info_destroy(mgp);
6073 mutex_destroy(&mgp->cmd_lock);
6074 mutex_destroy(&mgp->intrlock);
6076 myri10ge_rem_intrs(mgp, 1);
6078 myri10ge_free_slices(mgp);
6079 ddi_regs_map_free(&mgp->io_handle);
6080 myri10ge_dma_free(&mgp->cmd_dma);
6081 pci_config_teardown(&mgp->cfg_hdl);
6083 mutex_enter(&myri10ge_param_lock);
6084 if (mgp_list == mgp) {
6085 mgp_list = mgp->next;
6086 } else {
6087 tmp = mgp_list;
6088 while (tmp->next != mgp && tmp->next != NULL)
6089 tmp = tmp->next;
6090 if (tmp->next != NULL)
6091 tmp->next = tmp->next->next;
6093 kmem_free(mgp, sizeof (*mgp));
6094 mutex_exit(&myri10ge_param_lock);
6095 return (DDI_SUCCESS);
6099 * Helper for quiesce entry point: Interrupt threads are not being
6100 * scheduled, so we must poll for the confirmation DMA to arrive in
6101 * the firmware stats block for slice 0. We're essentially running
6102 * the guts of the interrupt handler, and just cherry picking the
6103 * confirmation that the NIC is queuesced (stats->link_down)
6106 static int
6107 myri10ge_poll_down(struct myri10ge_priv *mgp)
6109 struct myri10ge_slice_state *ss = mgp->ss;
6110 mcp_irq_data_t *stats = ss->fw_stats;
6111 int valid;
6112 int found_down = 0;
6115 /* check for a pending IRQ */
6117 if (! *((volatile uint8_t *)& stats->valid))
6118 return (0);
6119 valid = stats->valid;
6122 * Make sure to tell the NIC to lower a legacy IRQ, else
6123 * it may have corrupt state after restarting
6126 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
6127 /* lower legacy IRQ */
6128 *mgp->irq_deassert = 0;
6129 mb();
6130 /* wait for irq conf DMA */
6131 while (*((volatile uint8_t *)& stats->valid))
6134 if (stats->stats_updated && stats->link_down)
6135 found_down = 1;
6137 if (valid & 0x1)
6138 *ss->irq_claim = BE_32(3);
6139 *(ss->irq_claim + 1) = BE_32(3);
6141 return (found_down);
6144 static int
6145 myri10ge_quiesce(dev_info_t *dip)
6147 struct myri10ge_priv *mgp;
6148 myri10ge_cmd_t cmd;
6149 int status, down, i;
6151 mgp = ddi_get_driver_private(dip);
6152 if (mgp == NULL)
6153 return (DDI_FAILURE);
6155 /* if devices was unplumbed, it is guaranteed to be quiescent */
6156 if (mgp->running == MYRI10GE_ETH_STOPPED)
6157 return (DDI_SUCCESS);
6159 /* send a down CMD to queuesce NIC */
6160 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
6161 if (status) {
6162 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
6163 return (DDI_FAILURE);
6166 for (i = 0; i < 20; i++) {
6167 down = myri10ge_poll_down(mgp);
6168 if (down)
6169 break;
6170 delay(drv_usectohz(100000));
6171 mb();
6173 if (down)
6174 return (DDI_SUCCESS);
6175 return (DDI_FAILURE);
6179 * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6180 * storage.
6182 static void
6183 myri10ge_find_lastfree(void)
6185 mblk_t *mp = allocb(1024, 0);
6186 dblk_t *dbp;
6188 if (mp == NULL) {
6189 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n");
6190 return;
6192 dbp = mp->b_datap;
6193 myri10ge_db_lastfree = (void *)dbp->db_lastfree;
6197 _init(void)
6199 int i;
6201 if (myri10ge_verbose)
6202 cmn_err(CE_NOTE,
6203 "Myricom 10G driver (10GbE) version %s loading\n",
6204 MYRI10GE_VERSION_STR);
6205 myri10ge_find_lastfree();
6206 mac_init_ops(&myri10ge_ops, "myri10ge");
6207 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL);
6208 if ((i = mod_install(&modlinkage)) != 0) {
6209 cmn_err(CE_WARN, "mod_install returned %d\n", i);
6210 mac_fini_ops(&myri10ge_ops);
6211 mutex_destroy(&myri10ge_param_lock);
6213 return (i);
6217 _fini(void)
6219 int i;
6220 i = mod_remove(&modlinkage);
6221 if (i != 0) {
6222 return (i);
6224 mac_fini_ops(&myri10ge_ops);
6225 mutex_destroy(&myri10ge_param_lock);
6226 return (0);
6230 _info(struct modinfo *modinfop)
6232 return (mod_info(&modlinkage, modinfop));
6237 * This file uses MyriGE driver indentation.
6239 * Local Variables:
6240 * c-file-style:"sun"
6241 * tab-width:8
6242 * End: