8732 myri10ge: '__idstring' defined but not used
[unleashed.git] / usr / src / uts / common / io / myri10ge / drv / myri10ge.c
blobd706e3ab703a57c8104e9458b79704785f39b437
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
29 * Use is subject to license terms.
33 * Copyright (c) 2014, Joyent, Inc.
34 * Copyright (c) 2016 by Delphix. All rights reserved.
37 #define MXGEFW_NDIS
38 #include "myri10ge_var.h"
39 #include "rss_eth_z8e.h"
40 #include "rss_ethp_z8e.h"
41 #include "mcp_gen_header.h"
43 #define MYRI10GE_MAX_ETHER_MTU 9014
44 #define MYRI10GE_MAX_GLD_MTU 9000
45 #define MYRI10GE_MIN_GLD_MTU 1500
47 #define MYRI10GE_ETH_STOPPED 0
48 #define MYRI10GE_ETH_STOPPING 1
49 #define MYRI10GE_ETH_STARTING 2
50 #define MYRI10GE_ETH_RUNNING 3
51 #define MYRI10GE_ETH_OPEN_FAILED 4
52 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
54 static int myri10ge_small_bytes = 510;
55 static int myri10ge_intr_coal_delay = 125;
56 static int myri10ge_flow_control = 1;
57 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
58 static int myri10ge_nvidia_ecrc_enable = 1;
59 #endif
60 static int myri10ge_mtu_override = 0;
61 static int myri10ge_tx_copylen = 512;
62 static int myri10ge_deassert_wait = 1;
63 static int myri10ge_verbose = 0;
64 static int myri10ge_watchdog_reset = 0;
65 static int myri10ge_use_msix = 1;
66 static int myri10ge_max_slices = -1;
67 static int myri10ge_use_msi = 1;
68 int myri10ge_force_firmware = 0;
69 static boolean_t myri10ge_use_lso = B_TRUE;
70 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
71 static int myri10ge_tx_hash = 1;
72 static int myri10ge_lro = 0;
73 static int myri10ge_lro_cnt = 8;
74 int myri10ge_lro_max_aggr = 2;
75 static int myri10ge_lso_copy = 0;
76 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp);
77 int myri10ge_tx_handles_initial = 128;
79 static kmutex_t myri10ge_param_lock;
80 static void* myri10ge_db_lastfree;
82 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
83 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
84 static int myri10ge_quiesce(dev_info_t *dip);
86 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach,
87 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce);
90 static struct modldrv modldrv = {
91 &mod_driverops,
92 "Myricom 10G driver (10GbE)",
93 &myri10ge_ops,
97 static struct modlinkage modlinkage = {
98 MODREV_1,
99 {&modldrv, NULL},
102 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
104 static ddi_dma_attr_t myri10ge_misc_dma_attr = {
105 DMA_ATTR_V0, /* version number. */
106 (uint64_t)0, /* low address */
107 (uint64_t)0xffffffffffffffffULL, /* high address */
108 (uint64_t)0x7ffffff, /* address counter max */
109 (uint64_t)4096, /* alignment */
110 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
111 (uint32_t)0x1, /* minimum transfer size */
112 (uint64_t)0x7fffffff, /* maximum transfer size */
113 (uint64_t)0x7fffffff, /* maximum segment size */
114 1, /* scatter/gather list length */
115 1, /* granularity */
116 0 /* attribute flags */
120 * The Myri10GE NIC has the following constraints on receive buffers:
121 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
122 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
125 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = {
126 DMA_ATTR_V0, /* version number. */
127 (uint64_t)0, /* low address */
128 (uint64_t)0xffffffffffffffffULL, /* high address */
129 (uint64_t)0x7ffffff, /* address counter max */
130 (uint64_t)4096, /* alignment */
131 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
132 (uint32_t)0x1, /* minimum transfer size */
133 (uint64_t)0x7fffffff, /* maximum transfer size */
134 UINT64_MAX, /* maximum segment size */
135 1, /* scatter/gather list length */
136 1, /* granularity */
137 0 /* attribute flags */
140 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = {
141 DMA_ATTR_V0, /* version number. */
142 (uint64_t)0, /* low address */
143 (uint64_t)0xffffffffffffffffULL, /* high address */
144 (uint64_t)0x7ffffff, /* address counter max */
145 #if defined sparc64 || defined __sparcv9
146 (uint64_t)4096, /* alignment */
147 #else
148 (uint64_t)0x80, /* alignment */
149 #endif
150 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
151 (uint32_t)0x1, /* minimum transfer size */
152 (uint64_t)0x7fffffff, /* maximum transfer size */
153 #if defined sparc64 || defined __sparcv9
154 UINT64_MAX, /* maximum segment size */
155 #else
156 (uint64_t)0xfff, /* maximum segment size */
157 #endif
158 1, /* scatter/gather list length */
159 1, /* granularity */
160 0 /* attribute flags */
163 static ddi_dma_attr_t myri10ge_tx_dma_attr = {
164 DMA_ATTR_V0, /* version number. */
165 (uint64_t)0, /* low address */
166 (uint64_t)0xffffffffffffffffULL, /* high address */
167 (uint64_t)0x7ffffff, /* address counter max */
168 (uint64_t)1, /* alignment */
169 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
170 (uint32_t)0x1, /* minimum transfer size */
171 (uint64_t)0x7fffffff, /* maximum transfer size */
172 UINT64_MAX, /* maximum segment size */
173 INT32_MAX, /* scatter/gather list length */
174 1, /* granularity */
175 0 /* attribute flags */
178 #if defined sparc64 || defined __sparcv9
179 #define WC 0
180 #else
181 #define WC 1
182 #endif
184 struct ddi_device_acc_attr myri10ge_dev_access_attr = {
185 DDI_DEVICE_ATTR_V0, /* version */
186 DDI_NEVERSWAP_ACC, /* endian flash */
187 #if WC
188 DDI_MERGING_OK_ACC /* data order */
189 #else
190 DDI_STRICTORDER_ACC
191 #endif
194 static void myri10ge_watchdog(void *arg);
196 #ifdef MYRICOM_PRIV
197 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ;
198 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MAX_GLD_MTU
199 #else
200 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ;
201 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MIN_GLD_MTU
202 #endif
203 int myri10ge_bigbufs_initial = 1024;
204 int myri10ge_bigbufs_max = 4096;
207 caddr_t
208 myri10ge_dma_alloc(dev_info_t *dip, size_t len,
209 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr,
210 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma,
211 int warn, int (*wait)(caddr_t))
213 caddr_t kaddr;
214 size_t real_length;
215 ddi_dma_cookie_t cookie;
216 uint_t count;
217 int err;
219 err = ddi_dma_alloc_handle(dip, attr, wait,
220 NULL, &dma->handle);
221 if (err != DDI_SUCCESS) {
222 if (warn)
223 cmn_err(CE_WARN,
224 "myri10ge: ddi_dma_alloc_handle failed\n");
225 goto abort_with_nothing;
228 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags,
229 wait, NULL, &kaddr, &real_length,
230 &dma->acc_handle);
231 if (err != DDI_SUCCESS) {
232 if (warn)
233 cmn_err(CE_WARN,
234 "myri10ge: ddi_dma_mem_alloc failed\n");
235 goto abort_with_handle;
238 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len,
239 bind_flags, wait, NULL, &cookie, &count);
241 if (err != DDI_SUCCESS) {
242 if (warn)
243 cmn_err(CE_WARN,
244 "myri10ge: ddi_dma_addr_bind_handle failed\n");
245 goto abort_with_mem;
248 if (count != 1) {
249 if (warn)
250 cmn_err(CE_WARN,
251 "myri10ge: got too many dma segments ");
252 goto abort_with_bind;
254 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
255 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
256 return (kaddr);
258 abort_with_bind:
259 (void) ddi_dma_unbind_handle(dma->handle);
261 abort_with_mem:
262 ddi_dma_mem_free(&dma->acc_handle);
264 abort_with_handle:
265 ddi_dma_free_handle(&dma->handle);
266 abort_with_nothing:
267 if (warn) {
268 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n ");
269 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
270 (void*) dip, len, (void*) attr);
271 cmn_err(CE_WARN,
272 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n",
273 (void*) accattr, alloc_flags);
274 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p",
275 bind_flags, (void*) dma);
277 return (NULL);
281 void
282 myri10ge_dma_free(struct myri10ge_dma_stuff *dma)
284 (void) ddi_dma_unbind_handle(dma->handle);
285 ddi_dma_mem_free(&dma->acc_handle);
286 ddi_dma_free_handle(&dma->handle);
289 static inline void
290 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size)
292 register volatile uint32_t *to32;
293 size_t i;
295 to32 = (volatile uint32_t *) to;
296 for (i = (size / 4); i; i--) {
297 *to32 = *from32;
298 to32++;
299 from32++;
303 #if defined(_LP64)
304 static inline void
305 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size)
307 register volatile uint64_t *to64;
308 size_t i;
310 to64 = (volatile uint64_t *) to;
311 for (i = (size / 8); i; i--) {
312 *to64 = *from64;
313 to64++;
314 from64++;
317 #endif
320 * This routine copies memory from the host to the NIC.
321 * The "size" argument must always be a multiple of
322 * the size of long (4 or 8 bytes), and to/from must also
323 * be naturally aligned.
325 static inline void
326 myri10ge_pio_copy(void *to, void *from, size_t size)
328 #if !defined(_LP64)
329 ASSERT((size % 4) == 0);
330 myri10ge_pio_copy32(to, (uint32_t *)from, size);
331 #else
332 ASSERT((size % 8) == 0);
333 myri10ge_pio_copy64(to, (uint64_t *)from, size);
334 #endif
339 * Due to various bugs in Solaris (especially bug 6186772 where the
340 * TCP/UDP checksum is calculated incorrectly on mblk chains with more
341 * than two elements), and the design bug where hardware checksums are
342 * ignored on mblk chains with more than 2 elements, we need to
343 * allocate private pool of physically contiguous receive buffers.
346 static void
347 myri10ge_jpool_init(struct myri10ge_slice_state *ss)
349 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
351 bzero(jpool, sizeof (*jpool));
352 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER,
353 ss->mgp->icookie);
354 jpool->head = NULL;
357 static void
358 myri10ge_jpool_fini(struct myri10ge_slice_state *ss)
360 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
362 if (jpool->head != NULL) {
363 cmn_err(CE_WARN,
364 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
365 ss->mgp->name);
367 mutex_destroy(&jpool->mtx);
372 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
373 * at most 32 bytes at a time, so as to avoid involving the software
374 * pio handler in the nic. We re-write the first segment's low
375 * DMA address to mark it valid only after we write the entire chunk
376 * in a burst
378 static inline void
379 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src)
381 src->addr_low |= BE_32(1);
382 myri10ge_pio_copy(dst, src, 4 * sizeof (*src));
383 mb();
384 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
385 mb();
386 src->addr_low &= ~(BE_32(1));
387 dst->addr_low = src->addr_low;
388 mb();
391 static void
392 myri10ge_pull_jpool(struct myri10ge_slice_state *ss)
394 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
395 struct myri10ge_jpool_entry *jtail, *j, *jfree;
396 volatile uintptr_t *putp;
397 uintptr_t put;
398 int i;
400 /* find tail */
401 jtail = NULL;
402 if (jpool->head != NULL) {
403 j = jpool->head;
404 while (j->next != NULL)
405 j = j->next;
406 jtail = j;
410 * iterate over all per-CPU caches, and add contents into
411 * jpool
413 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) {
414 /* take per-CPU free list */
415 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head;
416 if (*putp == NULL)
417 continue;
418 put = atomic_swap_ulong(putp, 0);
419 jfree = (struct myri10ge_jpool_entry *)put;
421 /* append to pool */
422 if (jtail == NULL) {
423 jpool->head = jfree;
424 } else {
425 jtail->next = jfree;
427 j = jfree;
428 while (j->next != NULL)
429 j = j->next;
430 jtail = j;
435 * Transfers buffers from the free pool to the nic
436 * Must be called holding the jpool mutex.
439 static inline void
440 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss)
442 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
443 struct myri10ge_jpool_entry *j;
444 myri10ge_rx_ring_t *rx;
445 int i, idx, limit;
447 rx = &ss->rx_big;
448 limit = ss->j_rx_cnt + (rx->mask + 1);
450 for (i = rx->cnt; i != limit; i++) {
451 idx = i & (rx->mask);
452 j = jpool->head;
453 if (j == NULL) {
454 myri10ge_pull_jpool(ss);
455 j = jpool->head;
456 if (j == NULL) {
457 break;
460 jpool->head = j->next;
461 rx->info[idx].j = j;
462 rx->shadow[idx].addr_low = j->dma.low;
463 rx->shadow[idx].addr_high = j->dma.high;
464 /* copy 4 descriptors (32-bytes) to the mcp at a time */
465 if ((idx & 7) == 7) {
466 myri10ge_submit_8rx(&rx->lanai[idx - 7],
467 &rx->shadow[idx - 7]);
470 rx->cnt = i;
474 * Transfer buffers from the nic to the free pool.
475 * Should be called holding the jpool mutex
478 static inline void
479 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss)
481 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
482 struct myri10ge_jpool_entry *j;
483 myri10ge_rx_ring_t *rx;
484 int i;
486 mutex_enter(&jpool->mtx);
487 rx = &ss->rx_big;
489 for (i = 0; i < rx->mask + 1; i++) {
490 j = rx->info[i].j;
491 rx->info[i].j = NULL;
492 if (j == NULL)
493 continue;
494 j->next = jpool->head;
495 jpool->head = j;
497 mutex_exit(&jpool->mtx);
503 * Free routine which is called when the mblk allocated via
504 * esballoc() is freed. Here we return the jumbo buffer
505 * to the free pool, and possibly pass some jumbo buffers
506 * to the nic
509 static void
510 myri10ge_jfree_rtn(void *arg)
512 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg;
513 struct myri10ge_jpool_stuff *jpool;
514 volatile uintptr_t *putp;
515 uintptr_t old, new;
517 jpool = &j->ss->jpool;
519 /* prepend buffer locklessly to per-CPU freelist */
520 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head;
521 new = (uintptr_t)j;
522 do {
523 old = *putp;
524 j->next = (void *)old;
525 } while (atomic_cas_ulong(putp, old, new) != old);
528 static void
529 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j)
531 (void) ddi_dma_unbind_handle(j->dma_handle);
532 ddi_dma_mem_free(&j->acc_handle);
533 ddi_dma_free_handle(&j->dma_handle);
534 kmem_free(j, sizeof (*j));
539 * Allocates one physically contiguous descriptor
540 * and add it to the jumbo buffer pool.
543 static int
544 myri10ge_add_jbuf(struct myri10ge_slice_state *ss)
546 struct myri10ge_jpool_entry *j;
547 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
548 ddi_dma_attr_t *rx_dma_attr;
549 size_t real_length;
550 ddi_dma_cookie_t cookie;
551 uint_t count;
552 int err;
554 if (myri10ge_mtu < 2048)
555 rx_dma_attr = &myri10ge_rx_std_dma_attr;
556 else
557 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr;
559 again:
560 j = (struct myri10ge_jpool_entry *)
561 kmem_alloc(sizeof (*j), KM_SLEEP);
562 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr,
563 DDI_DMA_DONTWAIT, NULL, &j->dma_handle);
564 if (err != DDI_SUCCESS)
565 goto abort_with_j;
567 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu,
568 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
569 NULL, &j->buf, &real_length, &j->acc_handle);
570 if (err != DDI_SUCCESS)
571 goto abort_with_handle;
573 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf,
574 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
575 NULL, &cookie, &count);
576 if (err != DDI_SUCCESS)
577 goto abort_with_mem;
580 * Make certain std MTU buffers do not cross a 4KB boundary:
582 * Setting dma_attr_align=4096 will do this, but the system
583 * will only allocate 1 RX buffer per 4KB page, rather than 2.
584 * Setting dma_attr_granular=4096 *seems* to work around this,
585 * but I'm paranoid about future systems no longer honoring
586 * this, so fall back to the safe, but memory wasting way if a
587 * buffer crosses a 4KB boundary.
590 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
591 rx_dma_attr->dma_attr_align != 4096) {
592 uint32_t start, end;
594 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
595 end = start + myri10ge_mtu;
596 if (((end >> 12) != (start >> 12)) && (start & 4095U)) {
597 printf("std buffer crossed a 4KB boundary!\n");
598 myri10ge_remove_jbuf(j);
599 rx_dma_attr->dma_attr_align = 4096;
600 rx_dma_attr->dma_attr_seg = UINT64_MAX;
601 goto again;
605 j->dma.low =
606 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
607 j->dma.high =
608 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
609 j->ss = ss;
612 j->free_func.free_func = myri10ge_jfree_rtn;
613 j->free_func.free_arg = (char *)j;
614 mutex_enter(&jpool->mtx);
615 j->next = jpool->head;
616 jpool->head = j;
617 jpool->num_alloc++;
618 mutex_exit(&jpool->mtx);
619 return (0);
621 abort_with_mem:
622 ddi_dma_mem_free(&j->acc_handle);
624 abort_with_handle:
625 ddi_dma_free_handle(&j->dma_handle);
627 abort_with_j:
628 kmem_free(j, sizeof (*j));
631 * If an allocation failed, perhaps it failed because it could
632 * not satisfy granularity requirement. Disable that, and
633 * try agin.
635 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
636 rx_dma_attr->dma_attr_align != 4096) {
637 cmn_err(CE_NOTE,
638 "!alloc failed, reverting to gran=1\n");
639 rx_dma_attr->dma_attr_align = 4096;
640 rx_dma_attr->dma_attr_seg = UINT64_MAX;
641 goto again;
643 return (err);
646 static int
647 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool)
649 int i;
650 struct myri10ge_jpool_entry *j;
652 mutex_enter(&jpool->mtx);
653 j = jpool->head;
654 i = 0;
655 while (j != NULL) {
656 i++;
657 j = j->next;
659 mutex_exit(&jpool->mtx);
660 return (i);
663 static int
664 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total)
666 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
667 int allocated = 0;
668 int err;
669 int needed;
672 * if total is set, user wants "num" jbufs in the pool,
673 * otherwise the user wants to "num" additional jbufs
674 * added to the pool
676 if (total && jpool->num_alloc) {
677 allocated = myri10ge_jfree_cnt(jpool);
678 needed = num - allocated;
679 } else {
680 needed = num;
683 while (needed > 0) {
684 needed--;
685 err = myri10ge_add_jbuf(ss);
686 if (err == 0) {
687 allocated++;
690 return (allocated);
693 static void
694 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss)
696 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
697 struct myri10ge_jpool_entry *j;
699 mutex_enter(&jpool->mtx);
700 myri10ge_pull_jpool(ss);
701 while (jpool->head != NULL) {
702 jpool->num_alloc--;
703 j = jpool->head;
704 jpool->head = j->next;
705 myri10ge_remove_jbuf(j);
707 mutex_exit(&jpool->mtx);
710 static void
711 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss)
713 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
714 struct myri10ge_jpool_entry *j = NULL;
715 caddr_t ptr;
716 uint32_t dma_low, dma_high;
717 int idx, len;
718 unsigned int alloc_size;
720 dma_low = dma_high = len = 0;
721 alloc_size = myri10ge_small_bytes + MXGEFW_PAD;
722 ptr = NULL;
723 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) {
724 /* Allocate a jumbo frame and carve it into small frames */
725 if (len < alloc_size) {
726 mutex_enter(&jpool->mtx);
727 /* remove jumbo from freelist */
728 j = jpool->head;
729 jpool->head = j->next;
730 /* place it onto small list */
731 j->next = ss->small_jpool;
732 ss->small_jpool = j;
733 mutex_exit(&jpool->mtx);
734 len = myri10ge_mtu;
735 dma_low = ntohl(j->dma.low);
736 dma_high = ntohl(j->dma.high);
737 ptr = j->buf;
739 ss->rx_small.info[idx].ptr = ptr;
740 ss->rx_small.shadow[idx].addr_low = htonl(dma_low);
741 ss->rx_small.shadow[idx].addr_high = htonl(dma_high);
742 len -= alloc_size;
743 ptr += alloc_size;
744 dma_low += alloc_size;
749 * Return the jumbo bufs we carved up for small to the jumbo pool
752 static void
753 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss)
755 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
756 struct myri10ge_jpool_entry *j = NULL;
758 mutex_enter(&jpool->mtx);
759 while (ss->small_jpool != NULL) {
760 j = ss->small_jpool;
761 ss->small_jpool = j->next;
762 j->next = jpool->head;
763 jpool->head = j;
765 mutex_exit(&jpool->mtx);
766 ss->jbufs_for_smalls = 0;
769 static int
770 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss)
772 myri10ge_tx_ring_t *tx = &ss->tx;
773 struct myri10ge_priv *mgp = ss->mgp;
774 struct myri10ge_tx_dma_handle *handle;
775 int err;
777 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP);
778 err = ddi_dma_alloc_handle(mgp->dip,
779 &myri10ge_tx_dma_attr,
780 DDI_DMA_SLEEP, NULL,
781 &handle->h);
782 if (err) {
783 static int limit = 0;
784 if (limit == 0)
785 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n",
786 mgp->name);
787 limit++;
788 kmem_free(handle, sizeof (*handle));
789 return (err);
791 mutex_enter(&tx->handle_lock);
792 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced);
793 handle->next = tx->free_tx_handles;
794 tx->free_tx_handles = handle;
795 mutex_exit(&tx->handle_lock);
796 return (DDI_SUCCESS);
799 static void
800 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss)
802 myri10ge_tx_ring_t *tx = &ss->tx;
803 struct myri10ge_tx_dma_handle *handle;
804 mutex_enter(&tx->handle_lock);
806 handle = tx->free_tx_handles;
807 while (handle != NULL) {
808 tx->free_tx_handles = handle->next;
809 ddi_dma_free_handle(&handle->h);
810 kmem_free(handle, sizeof (*handle));
811 handle = tx->free_tx_handles;
812 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced);
814 mutex_exit(&tx->handle_lock);
815 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) {
816 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n",
817 ss->mgp->name,
818 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced));
822 static void
823 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx,
824 struct myri10ge_tx_dma_handle_head *list)
826 mutex_enter(&tx->handle_lock);
827 list->tail->next = tx->free_tx_handles;
828 tx->free_tx_handles = list->head;
829 mutex_exit(&tx->handle_lock);
832 static void
833 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx,
834 struct myri10ge_tx_dma_handle *handle)
836 struct myri10ge_tx_dma_handle_head list;
838 if (handle == NULL)
839 return;
840 list.head = handle;
841 list.tail = handle;
842 while (handle != NULL) {
843 list.tail = handle;
844 handle = handle->next;
846 myri10ge_free_tx_handles(tx, &list);
849 static int
850 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count,
851 struct myri10ge_tx_dma_handle **ret)
853 myri10ge_tx_ring_t *tx = &ss->tx;
854 struct myri10ge_tx_dma_handle *handle;
855 int err, i;
857 mutex_enter(&tx->handle_lock);
858 for (i = 0; i < count; i++) {
859 handle = tx->free_tx_handles;
860 while (handle == NULL) {
861 mutex_exit(&tx->handle_lock);
862 err = myri10ge_add_tx_handle(ss);
863 if (err != DDI_SUCCESS) {
864 goto abort_with_handles;
866 mutex_enter(&tx->handle_lock);
867 handle = tx->free_tx_handles;
869 tx->free_tx_handles = handle->next;
870 handle->next = *ret;
871 *ret = handle;
873 mutex_exit(&tx->handle_lock);
874 return (DDI_SUCCESS);
876 abort_with_handles:
877 myri10ge_free_tx_handle_slist(tx, *ret);
878 return (err);
883 * Frees DMA resources associated with the send ring
885 static void
886 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss)
888 myri10ge_tx_ring_t *tx;
889 struct myri10ge_tx_dma_handle_head handles;
890 size_t bytes;
891 int idx;
893 tx = &ss->tx;
894 handles.head = NULL;
895 handles.tail = NULL;
896 for (idx = 0; idx < ss->tx.mask + 1; idx++) {
897 if (tx->info[idx].m) {
898 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
899 handles.head = tx->info[idx].handle;
900 if (handles.tail == NULL)
901 handles.tail = tx->info[idx].handle;
902 freeb(tx->info[idx].m);
903 tx->info[idx].m = 0;
904 tx->info[idx].handle = 0;
906 tx->cp[idx].va = NULL;
907 myri10ge_dma_free(&tx->cp[idx].dma);
909 bytes = sizeof (*tx->cp) * (tx->mask + 1);
910 kmem_free(tx->cp, bytes);
911 tx->cp = NULL;
912 if (handles.head != NULL)
913 myri10ge_free_tx_handles(tx, &handles);
914 myri10ge_remove_tx_handles(ss);
918 * Allocates DMA handles associated with the send ring
920 static inline int
921 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss)
923 struct myri10ge_tx_dma_handle *handles;
924 int h;
925 size_t bytes;
927 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
928 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP);
929 if (ss->tx.cp == NULL) {
930 cmn_err(CE_WARN,
931 "%s: Failed to allocate tx copyblock storage\n",
932 ss->mgp->name);
933 return (DDI_FAILURE);
937 /* allocate the TX copyblocks */
938 for (h = 0; h < ss->tx.mask + 1; h++) {
939 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip,
940 4096, &myri10ge_rx_jumbo_dma_attr,
941 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
942 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1,
943 DDI_DMA_DONTWAIT);
944 if (ss->tx.cp[h].va == NULL) {
945 cmn_err(CE_WARN, "%s: Failed to allocate tx "
946 "copyblock %d\n", ss->mgp->name, h);
947 goto abort_with_copyblocks;
950 /* pre-allocate transmit handles */
951 handles = NULL;
952 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial,
953 &handles);
954 if (handles != NULL)
955 myri10ge_free_tx_handle_slist(&ss->tx, handles);
957 return (DDI_SUCCESS);
959 abort_with_copyblocks:
960 while (h > 0) {
961 h--;
962 myri10ge_dma_free(&ss->tx.cp[h].dma);
965 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
966 kmem_free(ss->tx.cp, bytes);
967 ss->tx.cp = NULL;
968 return (DDI_FAILURE);
972 * The eeprom strings on the lanaiX have the format
973 * SN=x\0
974 * MAC=x:x:x:x:x:x\0
975 * PT:ddd mmm xx xx:xx:xx xx\0
976 * PV:ddd mmm xx xx:xx:xx xx\0
978 static int
979 myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
981 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
982 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \
983 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \
984 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
986 char *ptr, *limit;
987 int i, hv, lv;
989 ptr = mgp->eeprom_strings;
990 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
992 while (*ptr != '\0' && ptr < limit) {
993 if (memcmp(ptr, "MAC=", 4) == 0) {
994 ptr += 4;
995 if (myri10ge_verbose)
996 printf("%s: mac address = %s\n", mgp->name,
997 ptr);
998 mgp->mac_addr_string = ptr;
999 for (i = 0; i < 6; i++) {
1000 if ((ptr + 2) > limit)
1001 goto abort;
1003 if (*(ptr+1) == ':') {
1004 hv = 0;
1005 lv = myri10ge_digit(*ptr); ptr++;
1006 } else {
1007 hv = myri10ge_digit(*ptr); ptr++;
1008 lv = myri10ge_digit(*ptr); ptr++;
1010 mgp->mac_addr[i] = (hv << 4) | lv;
1011 ptr++;
1014 if (memcmp((const void *)ptr, "SN=", 3) == 0) {
1015 ptr += 3;
1016 mgp->sn_str = (char *)ptr;
1018 if (memcmp((const void *)ptr, "PC=", 3) == 0) {
1019 ptr += 3;
1020 mgp->pc_str = (char *)ptr;
1022 MYRI10GE_NEXT_STRING(ptr);
1025 return (0);
1027 abort:
1028 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name);
1029 return (ENXIO);
1034 * Determine the register set containing the PCI resource we
1035 * want to map: the memory-mappable part of the interface. We do
1036 * this by scanning the DDI "reg" property of the interface,
1037 * which is an array of mx_ddi_reg_set structures.
1039 static int
1040 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span,
1041 unsigned long *busno, unsigned long *devno,
1042 unsigned long *funcno)
1045 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff)
1046 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07)
1047 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f)
1048 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff)
1049 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03)
1050 #define PCI_ADDR_HIGH(ip) (ip[1])
1051 #define PCI_ADDR_LOW(ip) (ip[2])
1052 #define PCI_SPAN_HIGH(ip) (ip[3])
1053 #define PCI_SPAN_LOW(ip) (ip[4])
1055 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1056 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1058 int *data, i, *rs;
1059 uint32_t nelementsp;
1061 #ifdef MYRI10GE_REGSET_VERBOSE
1062 char *address_space_name[] = { "Configuration Space",
1063 "I/O Space",
1064 "32-bit Memory Space",
1065 "64-bit Memory Space"
1067 #endif
1069 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1070 "reg", &data, &nelementsp) != DDI_SUCCESS) {
1071 printf("Could not determine register set.\n");
1072 return (ENXIO);
1075 #ifdef MYRI10GE_REGSET_VERBOSE
1076 printf("There are %d register sets.\n", nelementsp / 5);
1077 #endif
1078 if (!nelementsp) {
1079 printf("Didn't find any \"reg\" properties.\n");
1080 ddi_prop_free(data);
1081 return (ENODEV);
1084 /* Scan for the register number. */
1085 rs = &data[0];
1086 *busno = BUS_NUMBER(rs);
1087 *devno = DEVICE_NUMBER(rs);
1088 *funcno = FUNCTION_NUMBER(rs);
1090 #ifdef MYRI10GE_REGSET_VERBOSE
1091 printf("*** Scanning for register number.\n");
1092 #endif
1093 for (i = 0; i < nelementsp / 5; i++) {
1094 rs = &data[5 * i];
1095 #ifdef MYRI10GE_REGSET_VERBOSE
1096 printf("Examining register set %d:\n", i);
1097 printf(" Register number = %d.\n", REGISTER_NUMBER(rs));
1098 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs));
1099 printf(" Device number = %d.\n", DEVICE_NUMBER(rs));
1100 printf(" Bus number = %d.\n", BUS_NUMBER(rs));
1101 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs),
1102 address_space_name[ADDRESS_SPACE(rs)]);
1103 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs),
1104 PCI_ADDR_LOW(rs));
1105 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs),
1106 PCI_SPAN_LOW(rs));
1107 #endif
1108 /* We are looking for a memory property. */
1110 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE ||
1111 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) {
1112 *reg_set = i;
1114 #ifdef MYRI10GE_REGSET_VERBOSE
1115 printf("%s uses register set %d.\n",
1116 address_space_name[ADDRESS_SPACE(rs)], *reg_set);
1117 #endif
1119 *span = (PCI_SPAN_LOW(rs));
1120 #ifdef MYRI10GE_REGSET_VERBOSE
1121 printf("Board span is 0x%x\n", *span);
1122 #endif
1123 break;
1127 ddi_prop_free(data);
1129 /* If no match, fail. */
1130 if (i >= nelementsp / 5) {
1131 return (EIO);
1134 return (0);
1138 static int
1139 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit)
1141 void *inflate_buffer;
1142 int rv, status;
1143 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE;
1144 size_t destlen;
1145 mcp_gen_header_t *hdr;
1146 unsigned hdr_offset, i;
1149 *limit = 0; /* -Wuninitialized */
1150 status = 0;
1152 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP);
1153 if (!inflate_buffer) {
1154 cmn_err(CE_WARN,
1155 "%s: Could not allocate buffer to inflate mcp\n",
1156 mgp->name);
1157 return (ENOMEM);
1160 destlen = sram_size;
1161 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e,
1162 mgp->eth_z8e_length);
1164 if (rv != Z_OK) {
1165 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n",
1166 mgp->name, z_strerror(rv));
1167 status = ENXIO;
1168 goto abort;
1171 *limit = (uint32_t)destlen;
1173 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer +
1174 MCP_HEADER_PTR_OFFSET));
1175 hdr = (void *)((char *)inflate_buffer + hdr_offset);
1176 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
1177 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name,
1178 ntohl(hdr->mcp_type));
1179 status = EIO;
1180 goto abort;
1183 /* save firmware version for kstat */
1184 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version));
1185 if (myri10ge_verbose)
1186 printf("%s: firmware id: %s\n", mgp->name, hdr->version);
1188 /* Copy the inflated firmware to NIC SRAM. */
1189 for (i = 0; i < *limit; i += 256) {
1190 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i,
1191 (char *)inflate_buffer + i,
1192 min(256U, (unsigned)(*limit - i)));
1193 mb();
1194 (void) *(int *)(void *)mgp->sram;
1195 mb();
1198 abort:
1199 kmem_free(inflate_buffer, sram_size);
1201 return (status);
1207 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd,
1208 myri10ge_cmd_t *data)
1210 mcp_cmd_t *buf;
1211 char buf_bytes[sizeof (*buf) + 8];
1212 volatile mcp_cmd_response_t *response = mgp->cmd;
1213 volatile char *cmd_addr =
1214 (volatile char *)mgp->sram + MXGEFW_ETH_CMD;
1215 int sleep_total = 0;
1217 /* ensure buf is aligned to 8 bytes */
1218 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1220 buf->data0 = htonl(data->data0);
1221 buf->data1 = htonl(data->data1);
1222 buf->data2 = htonl(data->data2);
1223 buf->cmd = htonl(cmd);
1224 buf->response_addr.low = mgp->cmd_dma.low;
1225 buf->response_addr.high = mgp->cmd_dma.high;
1226 mutex_enter(&mgp->cmd_lock);
1227 response->result = 0xffffffff;
1228 mb();
1230 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf));
1232 /* wait up to 20ms */
1233 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
1234 mb();
1235 if (response->result != 0xffffffff) {
1236 if (response->result == 0) {
1237 data->data0 = ntohl(response->data);
1238 mutex_exit(&mgp->cmd_lock);
1239 return (0);
1240 } else if (ntohl(response->result)
1241 == MXGEFW_CMD_UNKNOWN) {
1242 mutex_exit(&mgp->cmd_lock);
1243 return (ENOSYS);
1244 } else if (ntohl(response->result)
1245 == MXGEFW_CMD_ERROR_UNALIGNED) {
1246 mutex_exit(&mgp->cmd_lock);
1247 return (E2BIG);
1248 } else {
1249 cmn_err(CE_WARN,
1250 "%s: command %d failed, result = %d\n",
1251 mgp->name, cmd, ntohl(response->result));
1252 mutex_exit(&mgp->cmd_lock);
1253 return (ENXIO);
1256 drv_usecwait(1000);
1258 mutex_exit(&mgp->cmd_lock);
1259 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n",
1260 mgp->name, cmd, ntohl(response->result));
1261 return (EAGAIN);
1265 * Enable or disable periodic RDMAs from the host to make certain
1266 * chipsets resend dropped PCIe messages
1269 static void
1270 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
1272 char buf_bytes[72];
1273 volatile uint32_t *confirm;
1274 volatile char *submit;
1275 uint32_t *buf;
1276 int i;
1278 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1280 /* clear confirmation addr */
1281 confirm = (volatile uint32_t *)mgp->cmd;
1282 *confirm = 0;
1283 mb();
1286 * send an rdma command to the PCIe engine, and wait for the
1287 * response in the confirmation address. The firmware should
1288 * write a -1 there to indicate it is alive and well
1291 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1292 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1293 buf[2] = htonl(0xffffffff); /* confirm data */
1294 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */
1295 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */
1296 buf[5] = htonl(enable); /* enable? */
1299 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA);
1301 myri10ge_pio_copy((char *)submit, buf, 64);
1302 mb();
1303 drv_usecwait(1000);
1304 mb();
1305 i = 0;
1306 while (*confirm != 0xffffffff && i < 20) {
1307 drv_usecwait(1000);
1308 i++;
1310 if (*confirm != 0xffffffff) {
1311 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)",
1312 mgp->name,
1313 (enable ? "enable" : "disable"), (void*) confirm, *confirm);
1317 static int
1318 myri10ge_load_firmware(struct myri10ge_priv *mgp)
1320 myri10ge_cmd_t cmd;
1321 volatile uint32_t *confirm;
1322 volatile char *submit;
1323 char buf_bytes[72];
1324 uint32_t *buf, size;
1325 int status, i;
1327 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1329 status = myri10ge_load_firmware_from_zlib(mgp, &size);
1330 if (status) {
1331 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name);
1332 return (status);
1335 /* clear confirmation addr */
1336 confirm = (volatile uint32_t *)mgp->cmd;
1337 *confirm = 0;
1338 mb();
1341 * send a reload command to the bootstrap MCP, and wait for the
1342 * response in the confirmation address. The firmware should
1343 * write a -1 there to indicate it is alive and well
1346 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1347 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1348 buf[2] = htonl(0xffffffff); /* confirm data */
1351 * FIX: All newest firmware should un-protect the bottom of
1352 * the sram before handoff. However, the very first interfaces
1353 * do not. Therefore the handoff copy must skip the first 8 bytes
1355 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
1356 buf[4] = htonl(size - 8); /* length of code */
1357 buf[5] = htonl(8); /* where to copy to */
1358 buf[6] = htonl(0); /* where to jump to */
1360 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF);
1362 myri10ge_pio_copy((char *)submit, buf, 64);
1363 mb();
1364 drv_usecwait(1000);
1365 mb();
1366 i = 0;
1367 while (*confirm != 0xffffffff && i < 1000) {
1368 drv_usecwait(1000);
1369 i++;
1371 if (*confirm != 0xffffffff) {
1372 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)",
1373 mgp->name, (void *) confirm, *confirm);
1375 return (ENXIO);
1377 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1378 if (status != 0) {
1379 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1380 mgp->name);
1381 return (ENXIO);
1384 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
1385 myri10ge_dummy_rdma(mgp, 1);
1386 return (0);
1389 static int
1390 myri10ge_m_unicst(void *arg, const uint8_t *addr)
1392 struct myri10ge_priv *mgp = arg;
1393 myri10ge_cmd_t cmd;
1394 int status;
1396 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1397 | (addr[2] << 8) | addr[3]);
1399 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1401 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd);
1402 if (status == 0 && (addr != mgp->mac_addr))
1403 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr));
1405 return (status);
1408 static int
1409 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
1411 myri10ge_cmd_t cmd;
1412 int status;
1414 if (pause)
1415 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL,
1416 &cmd);
1417 else
1418 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL,
1419 &cmd);
1421 if (status) {
1422 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n",
1423 mgp->name);
1424 return (ENXIO);
1426 mgp->pause = pause;
1427 return (0);
1430 static void
1431 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc)
1433 myri10ge_cmd_t cmd;
1434 int status;
1436 if (promisc)
1437 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd);
1438 else
1439 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd);
1441 if (status) {
1442 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n",
1443 mgp->name);
1447 static int
1448 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
1450 myri10ge_cmd_t cmd;
1451 int status;
1452 uint32_t len;
1453 void *dmabench;
1454 struct myri10ge_dma_stuff dmabench_dma;
1455 char *test = " ";
1458 * Run a small DMA test.
1459 * The magic multipliers to the length tell the firmware
1460 * tp do DMA read, write, or read+write tests. The
1461 * results are returned in cmd.data0. The upper 16
1462 * bits or the return is the number of transfers completed.
1463 * The lower 16 bits is the time in 0.5us ticks that the
1464 * transfers took to complete
1467 len = mgp->tx_boundary;
1469 dmabench = myri10ge_dma_alloc(mgp->dip, len,
1470 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr,
1471 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING,
1472 &dmabench_dma, 1, DDI_DMA_DONTWAIT);
1473 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0;
1474 if (dmabench == NULL) {
1475 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name);
1476 return (ENOMEM);
1479 cmd.data0 = ntohl(dmabench_dma.low);
1480 cmd.data1 = ntohl(dmabench_dma.high);
1481 cmd.data2 = len * 0x10000;
1482 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1483 if (status != 0) {
1484 test = "read";
1485 goto abort;
1487 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1489 cmd.data0 = ntohl(dmabench_dma.low);
1490 cmd.data1 = ntohl(dmabench_dma.high);
1491 cmd.data2 = len * 0x1;
1492 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1493 if (status != 0) {
1494 test = "write";
1495 goto abort;
1497 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1499 cmd.data0 = ntohl(dmabench_dma.low);
1500 cmd.data1 = ntohl(dmabench_dma.high);
1501 cmd.data2 = len * 0x10001;
1502 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1503 if (status != 0) {
1504 test = "read/write";
1505 goto abort;
1507 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
1508 (cmd.data0 & 0xffff);
1511 abort:
1512 myri10ge_dma_free(&dmabench_dma);
1513 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
1514 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name,
1515 test);
1516 return (status);
1519 static int
1520 myri10ge_reset(struct myri10ge_priv *mgp)
1522 myri10ge_cmd_t cmd;
1523 struct myri10ge_nic_stat *ethstat;
1524 struct myri10ge_slice_state *ss;
1525 int i, status;
1526 size_t bytes;
1528 /* send a reset command to the card to see if it is alive */
1529 (void) memset(&cmd, 0, sizeof (cmd));
1530 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
1531 if (status != 0) {
1532 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
1533 return (ENXIO);
1536 /* Now exchange information about interrupts */
1538 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry);
1539 cmd.data0 = (uint32_t)bytes;
1540 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1543 * Even though we already know how many slices are supported
1544 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1545 * has magic side effects, and must be called after a reset.
1546 * It must be called prior to calling any RSS related cmds,
1547 * including assigning an interrupt queue for anything but
1548 * slice 0. It must also be called *after*
1549 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1550 * the firmware to compute offsets.
1553 if (mgp->num_slices > 1) {
1555 /* ask the maximum number of slices it supports */
1556 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1557 &cmd);
1558 if (status != 0) {
1559 cmn_err(CE_WARN,
1560 "%s: failed to get number of slices\n",
1561 mgp->name);
1562 return (status);
1566 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1567 * to setting up the interrupt queue DMA
1570 cmd.data0 = mgp->num_slices;
1571 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE |
1572 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1573 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1574 &cmd);
1575 if (status != 0) {
1576 cmn_err(CE_WARN,
1577 "%s: failed to set number of slices\n",
1578 mgp->name);
1579 return (status);
1582 for (i = 0; i < mgp->num_slices; i++) {
1583 ss = &mgp->ss[i];
1584 cmd.data0 = ntohl(ss->rx_done.dma.low);
1585 cmd.data1 = ntohl(ss->rx_done.dma.high);
1586 cmd.data2 = i;
1587 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1588 &cmd);
1591 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1592 for (i = 0; i < mgp->num_slices; i++) {
1593 ss = &mgp->ss[i];
1594 ss->irq_claim = (volatile unsigned int *)
1595 (void *)(mgp->sram + cmd.data0 + 8 * i);
1598 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
1599 status |= myri10ge_send_cmd(mgp,
1600 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1601 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1604 status |= myri10ge_send_cmd(mgp,
1605 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1606 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1608 if (status != 0) {
1609 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n",
1610 mgp->name);
1611 return (status);
1614 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
1615 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
1617 /* reset mcp/driver shared state back to 0 */
1619 for (i = 0; i < mgp->num_slices; i++) {
1620 ss = &mgp->ss[i];
1621 bytes = mgp->max_intr_slots *
1622 sizeof (*mgp->ss[0].rx_done.entry);
1623 (void) memset(ss->rx_done.entry, 0, bytes);
1624 ss->tx.req = 0;
1625 ss->tx.done = 0;
1626 ss->tx.pkt_done = 0;
1627 ss->rx_big.cnt = 0;
1628 ss->rx_small.cnt = 0;
1629 ss->rx_done.idx = 0;
1630 ss->rx_done.cnt = 0;
1631 ss->rx_token = 0;
1632 ss->tx.watchdog_done = 0;
1633 ss->tx.watchdog_req = 0;
1634 ss->tx.active = 0;
1635 ss->tx.activate = 0;
1637 mgp->watchdog_rx_pause = 0;
1638 if (mgp->ksp_stat != NULL) {
1639 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data;
1640 ethstat->link_changes.value.ul = 0;
1642 status = myri10ge_m_unicst(mgp, mgp->mac_addr);
1643 myri10ge_change_promisc(mgp, 0);
1644 (void) myri10ge_change_pause(mgp, mgp->pause);
1645 return (status);
1648 static int
1649 myri10ge_init_toeplitz(struct myri10ge_priv *mgp)
1651 myri10ge_cmd_t cmd;
1652 int i, b, s, t, j;
1653 int status;
1654 uint32_t k[8];
1655 uint32_t tmp;
1656 uint8_t *key;
1658 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
1659 &cmd);
1660 if (status != 0) {
1661 cmn_err(CE_WARN, "%s: failed to get rss key\n",
1662 mgp->name);
1663 return (EIO);
1665 myri10ge_pio_copy32(mgp->rss_key,
1666 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0),
1667 sizeof (mgp->rss_key));
1669 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256,
1670 KM_SLEEP);
1671 key = (uint8_t *)mgp->rss_key;
1672 t = 0;
1673 for (b = 0; b < 12; b++) {
1674 for (s = 0; s < 8; s++) {
1675 /* Bits: b*8+s, ..., b*8+s+31 */
1676 k[s] = 0;
1677 for (j = 0; j < 32; j++) {
1678 int bit = b*8+s+j;
1679 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7)));
1680 k[s] |= bit << (31 - j);
1684 for (i = 0; i <= 0xff; i++) {
1685 tmp = 0;
1686 if (i & (1 << 7)) { tmp ^= k[0]; }
1687 if (i & (1 << 6)) { tmp ^= k[1]; }
1688 if (i & (1 << 5)) { tmp ^= k[2]; }
1689 if (i & (1 << 4)) { tmp ^= k[3]; }
1690 if (i & (1 << 3)) { tmp ^= k[4]; }
1691 if (i & (1 << 2)) { tmp ^= k[5]; }
1692 if (i & (1 << 1)) { tmp ^= k[6]; }
1693 if (i & (1 << 0)) { tmp ^= k[7]; }
1694 mgp->toeplitz_hash_table[t++] = tmp;
1697 return (0);
1700 static inline struct myri10ge_slice_state *
1701 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1703 struct tcphdr *hdr;
1704 uint32_t saddr, daddr;
1705 uint32_t hash, slice;
1706 uint32_t *table = mgp->toeplitz_hash_table;
1707 uint16_t src, dst;
1710 * Note hashing order is reversed from how it is done
1711 * in the NIC, so as to generate the same hash value
1712 * for the connection to try to keep connections CPU local
1715 /* hash on IPv4 src/dst address */
1716 saddr = ntohl(ip->ip_src.s_addr);
1717 daddr = ntohl(ip->ip_dst.s_addr);
1718 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)];
1719 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)];
1720 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)];
1721 hash ^= table[(256 * 3) + ((daddr) & 0xff)];
1722 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)];
1723 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)];
1724 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)];
1725 hash ^= table[(256 * 7) + ((saddr) & 0xff)];
1726 /* hash on TCP port, if required */
1727 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) &&
1728 ip->ip_p == IPPROTO_TCP) {
1729 hdr = (struct tcphdr *)(void *)
1730 (((uint8_t *)ip) + (ip->ip_hl << 2));
1731 src = ntohs(hdr->th_sport);
1732 dst = ntohs(hdr->th_dport);
1734 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)];
1735 hash ^= table[(256 * 9) + ((dst) & 0xff)];
1736 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)];
1737 hash ^= table[(256 * 11) + ((src) & 0xff)];
1739 slice = (mgp->num_slices - 1) & hash;
1740 return (&mgp->ss[slice]);
1744 static inline struct myri10ge_slice_state *
1745 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1747 struct tcphdr *hdr;
1748 uint32_t slice, hash_val;
1751 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) {
1752 return (&mgp->ss[0]);
1754 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2));
1757 * Use the second byte of the *destination* address for
1758 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1760 hash_val = ntohs(hdr->th_dport) & 0xff;
1761 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT)
1762 hash_val += ntohs(hdr->th_sport) & 0xff;
1764 slice = (mgp->num_slices - 1) & hash_val;
1765 return (&mgp->ss[slice]);
1768 static inline struct myri10ge_slice_state *
1769 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp)
1771 unsigned int slice = 0;
1772 struct ether_header *eh;
1773 struct ether_vlan_header *vh;
1774 struct ip *ip;
1775 int ehl, ihl;
1777 if (mgp->num_slices == 1)
1778 return (&mgp->ss[0]);
1780 if (myri10ge_tx_hash == 0) {
1781 slice = CPU->cpu_id & (mgp->num_slices - 1);
1782 return (&mgp->ss[slice]);
1786 * ensure it is a TCP or UDP over IPv4 packet, and that the
1787 * headers are in the 1st mblk. Otherwise, punt
1789 ehl = sizeof (*eh);
1790 ihl = sizeof (*ip);
1791 if ((MBLKL(mp)) < (ehl + ihl + 8))
1792 return (&mgp->ss[0]);
1793 eh = (struct ether_header *)(void *)mp->b_rptr;
1794 ip = (struct ip *)(void *)(eh + 1);
1795 if (eh->ether_type != BE_16(ETHERTYPE_IP)) {
1796 if (eh->ether_type != BE_16(ETHERTYPE_VLAN))
1797 return (&mgp->ss[0]);
1798 vh = (struct ether_vlan_header *)(void *)mp->b_rptr;
1799 if (vh->ether_type != BE_16(ETHERTYPE_IP))
1800 return (&mgp->ss[0]);
1801 ehl += 4;
1802 ip = (struct ip *)(void *)(vh + 1);
1804 ihl = ip->ip_hl << 2;
1805 if (MBLKL(mp) < (ehl + ihl + 8))
1806 return (&mgp->ss[0]);
1807 switch (myri10ge_rss_hash) {
1808 case MXGEFW_RSS_HASH_TYPE_IPV4:
1809 /* fallthru */
1810 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4:
1811 /* fallthru */
1812 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4):
1813 return (myri10ge_toeplitz_send_hash(mgp, ip));
1814 case MXGEFW_RSS_HASH_TYPE_SRC_PORT:
1815 /* fallthru */
1816 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT:
1817 return (myri10ge_simple_send_hash(mgp, ip));
1818 default:
1819 break;
1821 return (&mgp->ss[0]);
1824 static int
1825 myri10ge_setup_slice(struct myri10ge_slice_state *ss)
1827 struct myri10ge_priv *mgp = ss->mgp;
1828 myri10ge_cmd_t cmd;
1829 int tx_ring_size, rx_ring_size;
1830 int tx_ring_entries, rx_ring_entries;
1831 int slice, status;
1832 int allocated, idx;
1833 size_t bytes;
1835 slice = ss - mgp->ss;
1836 cmd.data0 = slice;
1837 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1838 tx_ring_size = cmd.data0;
1839 cmd.data0 = slice;
1840 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1841 if (status != 0)
1842 return (status);
1843 rx_ring_size = cmd.data0;
1845 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send);
1846 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr);
1847 ss->tx.mask = tx_ring_entries - 1;
1848 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
1850 /* get the lanai pointers to the send and receive rings */
1852 cmd.data0 = slice;
1853 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
1854 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0);
1855 if (mgp->num_slices > 1) {
1856 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice;
1857 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP +
1858 64 * slice;
1859 } else {
1860 ss->tx.go = NULL;
1861 ss->tx.stop = NULL;
1864 cmd.data0 = slice;
1865 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
1866 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *)
1867 (void *)(mgp->sram + cmd.data0);
1869 cmd.data0 = slice;
1870 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
1871 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *)
1872 (mgp->sram + cmd.data0);
1874 if (status != 0) {
1875 cmn_err(CE_WARN,
1876 "%s: failed to get ring sizes or locations\n", mgp->name);
1877 return (status);
1880 status = ENOMEM;
1881 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
1882 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP);
1883 if (ss->rx_small.shadow == NULL)
1884 goto abort;
1885 (void) memset(ss->rx_small.shadow, 0, bytes);
1887 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
1888 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP);
1889 if (ss->rx_big.shadow == NULL)
1890 goto abort_with_rx_small_shadow;
1891 (void) memset(ss->rx_big.shadow, 0, bytes);
1893 /* allocate the host info rings */
1895 bytes = tx_ring_entries * sizeof (*ss->tx.info);
1896 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP);
1897 if (ss->tx.info == NULL)
1898 goto abort_with_rx_big_shadow;
1899 (void) memset(ss->tx.info, 0, bytes);
1901 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
1902 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP);
1903 if (ss->rx_small.info == NULL)
1904 goto abort_with_tx_info;
1905 (void) memset(ss->rx_small.info, 0, bytes);
1907 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1908 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP);
1909 if (ss->rx_big.info == NULL)
1910 goto abort_with_rx_small_info;
1911 (void) memset(ss->rx_big.info, 0, bytes);
1913 ss->tx.stall = ss->tx.sched = 0;
1914 ss->tx.stall_early = ss->tx.stall_late = 0;
1916 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) /
1917 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD));
1919 allocated = myri10ge_add_jbufs(ss,
1920 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1);
1921 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) {
1922 cmn_err(CE_WARN,
1923 "%s: Could not allocate enough receive buffers (%d/%d)\n",
1924 mgp->name, allocated,
1925 myri10ge_bigbufs_initial + ss->jbufs_for_smalls);
1926 goto abort_with_jumbos;
1929 myri10ge_carve_up_jbufs_into_small_ring(ss);
1930 ss->j_rx_cnt = 0;
1932 mutex_enter(&ss->jpool.mtx);
1933 if (allocated < rx_ring_entries)
1934 ss->jpool.low_water = allocated / 4;
1935 else
1936 ss->jpool.low_water = rx_ring_entries / 2;
1939 * invalidate the big receive ring in case we do not
1940 * allocate sufficient jumbos to fill it
1942 (void) memset(ss->rx_big.shadow, 1,
1943 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0]));
1944 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) {
1945 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7],
1946 &ss->rx_big.shadow[idx - 7]);
1947 mb();
1951 myri10ge_restock_jumbos(ss);
1953 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) {
1954 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7],
1955 &ss->rx_small.shadow[idx - 7]);
1956 mb();
1958 ss->rx_small.cnt = ss->rx_small.mask + 1;
1960 mutex_exit(&ss->jpool.mtx);
1962 status = myri10ge_prepare_tx_ring(ss);
1964 if (status != 0)
1965 goto abort_with_small_jbufs;
1967 cmd.data0 = ntohl(ss->fw_stats_dma.low);
1968 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1969 cmd.data2 = sizeof (mcp_irq_data_t);
1970 cmd.data2 |= (slice << 16);
1971 bzero(ss->fw_stats, sizeof (*ss->fw_stats));
1972 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
1973 if (status == ENOSYS) {
1974 cmd.data0 = ntohl(ss->fw_stats_dma.low) +
1975 offsetof(mcp_irq_data_t, send_done_count);
1976 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1977 status = myri10ge_send_cmd(mgp,
1978 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd);
1980 if (status) {
1981 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name);
1982 goto abort_with_tx;
1985 return (0);
1987 abort_with_tx:
1988 myri10ge_unprepare_tx_ring(ss);
1990 abort_with_small_jbufs:
1991 myri10ge_release_small_jbufs(ss);
1993 abort_with_jumbos:
1994 if (allocated != 0) {
1995 mutex_enter(&ss->jpool.mtx);
1996 ss->jpool.low_water = 0;
1997 mutex_exit(&ss->jpool.mtx);
1998 myri10ge_unstock_jumbos(ss);
1999 myri10ge_remove_jbufs(ss);
2002 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2003 kmem_free(ss->rx_big.info, bytes);
2005 abort_with_rx_small_info:
2006 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2007 kmem_free(ss->rx_small.info, bytes);
2009 abort_with_tx_info:
2010 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2011 kmem_free(ss->tx.info, bytes);
2013 abort_with_rx_big_shadow:
2014 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2015 kmem_free(ss->rx_big.shadow, bytes);
2017 abort_with_rx_small_shadow:
2018 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2019 kmem_free(ss->rx_small.shadow, bytes);
2020 abort:
2021 return (status);
2025 static void
2026 myri10ge_teardown_slice(struct myri10ge_slice_state *ss)
2028 int tx_ring_entries, rx_ring_entries;
2029 size_t bytes;
2031 /* ignore slices that have not been fully setup */
2032 if (ss->tx.cp == NULL)
2033 return;
2034 /* Free the TX copy buffers */
2035 myri10ge_unprepare_tx_ring(ss);
2037 /* stop passing returned buffers to firmware */
2039 mutex_enter(&ss->jpool.mtx);
2040 ss->jpool.low_water = 0;
2041 mutex_exit(&ss->jpool.mtx);
2042 myri10ge_release_small_jbufs(ss);
2044 /* Release the free jumbo frame pool */
2045 myri10ge_unstock_jumbos(ss);
2046 myri10ge_remove_jbufs(ss);
2048 rx_ring_entries = ss->rx_big.mask + 1;
2049 tx_ring_entries = ss->tx.mask + 1;
2051 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2052 kmem_free(ss->rx_big.info, bytes);
2054 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2055 kmem_free(ss->rx_small.info, bytes);
2057 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2058 kmem_free(ss->tx.info, bytes);
2060 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2061 kmem_free(ss->rx_big.shadow, bytes);
2063 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2064 kmem_free(ss->rx_small.shadow, bytes);
2067 static int
2068 myri10ge_start_locked(struct myri10ge_priv *mgp)
2070 myri10ge_cmd_t cmd;
2071 int status, big_pow2, i;
2072 volatile uint8_t *itable;
2074 status = DDI_SUCCESS;
2075 /* Allocate DMA resources and receive buffers */
2077 status = myri10ge_reset(mgp);
2078 if (status != 0) {
2079 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
2080 return (DDI_FAILURE);
2083 if (mgp->num_slices > 1) {
2084 cmd.data0 = mgp->num_slices;
2085 cmd.data1 = 1; /* use MSI-X */
2086 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2087 &cmd);
2088 if (status != 0) {
2089 cmn_err(CE_WARN,
2090 "%s: failed to set number of slices\n",
2091 mgp->name);
2092 goto abort_with_nothing;
2094 /* setup the indirection table */
2095 cmd.data0 = mgp->num_slices;
2096 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2097 &cmd);
2099 status |= myri10ge_send_cmd(mgp,
2100 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
2101 if (status != 0) {
2102 cmn_err(CE_WARN,
2103 "%s: failed to setup rss tables\n", mgp->name);
2106 /* just enable an identity mapping */
2107 itable = mgp->sram + cmd.data0;
2108 for (i = 0; i < mgp->num_slices; i++)
2109 itable[i] = (uint8_t)i;
2111 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) {
2112 status = myri10ge_init_toeplitz(mgp);
2113 if (status != 0) {
2114 cmn_err(CE_WARN, "%s: failed to setup "
2115 "toeplitz tx hash table", mgp->name);
2116 goto abort_with_nothing;
2119 cmd.data0 = 1;
2120 cmd.data1 = myri10ge_rss_hash;
2121 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2122 &cmd);
2123 if (status != 0) {
2124 cmn_err(CE_WARN,
2125 "%s: failed to enable slices\n", mgp->name);
2126 goto abort_with_toeplitz;
2130 for (i = 0; i < mgp->num_slices; i++) {
2131 status = myri10ge_setup_slice(&mgp->ss[i]);
2132 if (status != 0)
2133 goto abort_with_slices;
2137 * Tell the MCP how many buffers it has, and to
2138 * bring the ethernet interface up
2140 * Firmware needs the big buff size as a power of 2. Lie and
2141 * tell it the buffer is larger, because we only use 1
2142 * buffer/pkt, and the mtu will prevent overruns
2144 big_pow2 = myri10ge_mtu + MXGEFW_PAD;
2145 while (!ISP2(big_pow2))
2146 big_pow2++;
2148 /* now give firmware buffers sizes, and MTU */
2149 cmd.data0 = myri10ge_mtu;
2150 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd);
2151 cmd.data0 = myri10ge_small_bytes;
2152 status |=
2153 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
2154 cmd.data0 = big_pow2;
2155 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2156 if (status) {
2157 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name);
2158 goto abort_with_slices;
2162 cmd.data0 = 1;
2163 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd);
2164 if (status) {
2165 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n",
2166 mgp->name, status);
2167 } else {
2168 mgp->features |= MYRI10GE_TSO;
2171 mgp->link_state = -1;
2172 mgp->rdma_tags_available = 15;
2173 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd);
2174 if (status) {
2175 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name);
2176 goto abort_with_slices;
2178 mgp->running = MYRI10GE_ETH_RUNNING;
2179 return (DDI_SUCCESS);
2181 abort_with_slices:
2182 for (i = 0; i < mgp->num_slices; i++)
2183 myri10ge_teardown_slice(&mgp->ss[i]);
2185 mgp->running = MYRI10GE_ETH_STOPPED;
2187 abort_with_toeplitz:
2188 if (mgp->toeplitz_hash_table != NULL) {
2189 kmem_free(mgp->toeplitz_hash_table,
2190 sizeof (uint32_t) * 12 * 256);
2191 mgp->toeplitz_hash_table = NULL;
2194 abort_with_nothing:
2195 return (DDI_FAILURE);
2198 static void
2199 myri10ge_stop_locked(struct myri10ge_priv *mgp)
2201 int status, old_down_cnt;
2202 myri10ge_cmd_t cmd;
2203 int wait_time = 10;
2204 int i, polling;
2206 old_down_cnt = mgp->down_cnt;
2207 mb();
2208 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2209 if (status) {
2210 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
2213 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2214 delay(1 * drv_usectohz(1000000));
2215 wait_time--;
2216 if (wait_time == 0)
2217 break;
2219 again:
2220 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2221 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name);
2222 for (i = 0; i < mgp->num_slices; i++) {
2224 * take and release the rx lock to ensure
2225 * that no interrupt thread is blocked
2226 * elsewhere in the stack, preventing
2227 * completion
2230 mutex_enter(&mgp->ss[i].rx_lock);
2231 printf("%s: slice %d rx irq idle\n",
2232 mgp->name, i);
2233 mutex_exit(&mgp->ss[i].rx_lock);
2235 /* verify that the poll handler is inactive */
2236 mutex_enter(&mgp->ss->poll_lock);
2237 polling = mgp->ss->rx_polling;
2238 mutex_exit(&mgp->ss->poll_lock);
2239 if (polling) {
2240 printf("%s: slice %d is polling\n",
2241 mgp->name, i);
2242 delay(1 * drv_usectohz(1000000));
2243 goto again;
2246 delay(1 * drv_usectohz(1000000));
2247 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2248 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name);
2252 for (i = 0; i < mgp->num_slices; i++)
2253 myri10ge_teardown_slice(&mgp->ss[i]);
2255 if (mgp->toeplitz_hash_table != NULL) {
2256 kmem_free(mgp->toeplitz_hash_table,
2257 sizeof (uint32_t) * 12 * 256);
2258 mgp->toeplitz_hash_table = NULL;
2260 mgp->running = MYRI10GE_ETH_STOPPED;
2263 static int
2264 myri10ge_m_start(void *arg)
2266 struct myri10ge_priv *mgp = arg;
2267 int status;
2269 mutex_enter(&mgp->intrlock);
2271 if (mgp->running != MYRI10GE_ETH_STOPPED) {
2272 mutex_exit(&mgp->intrlock);
2273 return (DDI_FAILURE);
2275 status = myri10ge_start_locked(mgp);
2276 mutex_exit(&mgp->intrlock);
2278 if (status != DDI_SUCCESS)
2279 return (status);
2281 /* start the watchdog timer */
2282 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
2283 mgp->timer_ticks);
2284 return (DDI_SUCCESS);
2288 static void
2289 myri10ge_m_stop(void *arg)
2291 struct myri10ge_priv *mgp = arg;
2293 mutex_enter(&mgp->intrlock);
2294 /* if the device not running give up */
2295 if (mgp->running != MYRI10GE_ETH_RUNNING) {
2296 mutex_exit(&mgp->intrlock);
2297 return;
2300 mgp->running = MYRI10GE_ETH_STOPPING;
2301 mutex_exit(&mgp->intrlock);
2302 (void) untimeout(mgp->timer_id);
2303 mutex_enter(&mgp->intrlock);
2304 myri10ge_stop_locked(mgp);
2305 mutex_exit(&mgp->intrlock);
2309 static inline void
2310 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum)
2312 struct ether_header *eh;
2313 struct ip *ip;
2314 struct ip6_hdr *ip6;
2315 uint32_t start, stuff, end, partial, hdrlen;
2318 csum = ntohs((uint16_t)csum);
2319 eh = (struct ether_header *)(void *)mp->b_rptr;
2320 hdrlen = sizeof (*eh);
2321 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2322 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2323 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2324 s->brdcstrcv++;
2325 else
2326 s->multircv++;
2329 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
2331 * fix checksum by subtracting 4 bytes after what the
2332 * firmware thought was the end of the ether hdr
2334 partial = *(uint32_t *)
2335 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE);
2336 csum += ~partial;
2337 csum += (csum < ~partial);
2338 csum = (csum >> 16) + (csum & 0xFFFF);
2339 csum = (csum >> 16) + (csum & 0xFFFF);
2340 hdrlen += VLAN_TAGSZ;
2343 if (eh->ether_type == BE_16(ETHERTYPE_IP)) {
2344 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen);
2345 start = ip->ip_hl << 2;
2347 if (ip->ip_p == IPPROTO_TCP)
2348 stuff = start + offsetof(struct tcphdr, th_sum);
2349 else if (ip->ip_p == IPPROTO_UDP)
2350 stuff = start + offsetof(struct udphdr, uh_sum);
2351 else
2352 return;
2353 end = ntohs(ip->ip_len);
2354 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) {
2355 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen);
2356 start = sizeof (*ip6);
2357 if (ip6->ip6_nxt == IPPROTO_TCP) {
2358 stuff = start + offsetof(struct tcphdr, th_sum);
2359 } else if (ip6->ip6_nxt == IPPROTO_UDP)
2360 stuff = start + offsetof(struct udphdr, uh_sum);
2361 else
2362 return;
2363 end = start + ntohs(ip6->ip6_plen);
2365 * IPv6 headers do not contain a checksum, and hence
2366 * do not checksum to zero, so they don't "fall out"
2367 * of the partial checksum calculation like IPv4
2368 * headers do. We need to fix the partial checksum by
2369 * subtracting the checksum of the IPv6 header.
2372 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6));
2373 csum += ~partial;
2374 csum += (csum < ~partial);
2375 csum = (csum >> 16) + (csum & 0xFFFF);
2376 csum = (csum >> 16) + (csum & 0xFFFF);
2377 } else {
2378 return;
2381 if (MBLKL(mp) > hdrlen + end) {
2382 /* padded frame, so hw csum may be invalid */
2383 return;
2386 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM);
2389 static mblk_t *
2390 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len,
2391 uint32_t csum)
2393 mblk_t *mp;
2394 myri10ge_rx_ring_t *rx;
2395 int idx;
2397 rx = &ss->rx_small;
2398 idx = rx->cnt & rx->mask;
2399 ss->rx_small.cnt++;
2401 /* allocate a new buffer to pass up the stack */
2402 mp = allocb(len + MXGEFW_PAD, 0);
2403 if (mp == NULL) {
2404 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf);
2405 goto abort;
2407 bcopy(ss->rx_small.info[idx].ptr,
2408 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2409 mp->b_wptr += len + MXGEFW_PAD;
2410 mp->b_rptr += MXGEFW_PAD;
2412 ss->rx_stats.ibytes += len;
2413 ss->rx_stats.ipackets += 1;
2414 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2416 abort:
2417 if ((idx & 7) == 7) {
2418 myri10ge_submit_8rx(&rx->lanai[idx - 7],
2419 &rx->shadow[idx - 7]);
2422 return (mp);
2426 static mblk_t *
2427 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len,
2428 uint32_t csum)
2430 struct myri10ge_jpool_stuff *jpool;
2431 struct myri10ge_jpool_entry *j;
2432 mblk_t *mp;
2433 int idx, num_owned_by_mcp;
2435 jpool = &ss->jpool;
2436 idx = ss->j_rx_cnt & ss->rx_big.mask;
2437 j = ss->rx_big.info[idx].j;
2439 if (j == NULL) {
2440 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2441 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt);
2442 return (NULL);
2446 ss->rx_big.info[idx].j = NULL;
2447 ss->j_rx_cnt++;
2451 * Check to see if we are low on rx buffers.
2452 * Note that we must leave at least 8 free so there are
2453 * enough to free in a single 64-byte write.
2455 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2456 if (num_owned_by_mcp < jpool->low_water) {
2457 mutex_enter(&jpool->mtx);
2458 myri10ge_restock_jumbos(ss);
2459 mutex_exit(&jpool->mtx);
2460 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2461 /* if we are still low, then we have to copy */
2462 if (num_owned_by_mcp < 16) {
2463 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy);
2464 /* allocate a new buffer to pass up the stack */
2465 mp = allocb(len + MXGEFW_PAD, 0);
2466 if (mp == NULL) {
2467 goto abort;
2469 bcopy(j->buf,
2470 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2471 myri10ge_jfree_rtn(j);
2472 /* push buffer back to NIC */
2473 mutex_enter(&jpool->mtx);
2474 myri10ge_restock_jumbos(ss);
2475 mutex_exit(&jpool->mtx);
2476 goto set_len;
2480 /* loan our buffer to the stack */
2481 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func);
2482 if (mp == NULL) {
2483 goto abort;
2486 set_len:
2487 mp->b_rptr += MXGEFW_PAD;
2488 mp->b_wptr = ((unsigned char *) mp->b_rptr + len);
2490 ss->rx_stats.ibytes += len;
2491 ss->rx_stats.ipackets += 1;
2492 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2494 return (mp);
2496 abort:
2497 myri10ge_jfree_rtn(j);
2498 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf);
2499 return (NULL);
2503 * Free all transmit buffers up until the specified index
2505 static inline void
2506 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index)
2508 myri10ge_tx_ring_t *tx;
2509 struct myri10ge_tx_dma_handle_head handles;
2510 int idx;
2511 int limit = 0;
2513 tx = &ss->tx;
2514 handles.head = NULL;
2515 handles.tail = NULL;
2516 while (tx->pkt_done != (int)mcp_index) {
2517 idx = tx->done & tx->mask;
2520 * mblk & DMA handle attached only to first slot
2521 * per buffer in the packet
2524 if (tx->info[idx].m) {
2525 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
2526 tx->info[idx].handle->next = handles.head;
2527 handles.head = tx->info[idx].handle;
2528 if (handles.tail == NULL)
2529 handles.tail = tx->info[idx].handle;
2530 freeb(tx->info[idx].m);
2531 tx->info[idx].m = 0;
2532 tx->info[idx].handle = 0;
2534 if (tx->info[idx].ostat.opackets != 0) {
2535 tx->stats.multixmt += tx->info[idx].ostat.multixmt;
2536 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt;
2537 tx->stats.obytes += tx->info[idx].ostat.obytes;
2538 tx->stats.opackets += tx->info[idx].ostat.opackets;
2539 tx->info[idx].stat.un.all = 0;
2540 tx->pkt_done++;
2543 tx->done++;
2545 * if we stalled the queue, wake it. But Wait until
2546 * we have at least 1/2 our slots free.
2548 if ((tx->req - tx->done) < (tx->mask >> 1) &&
2549 tx->stall != tx->sched) {
2550 mutex_enter(&ss->tx.lock);
2551 tx->sched = tx->stall;
2552 mutex_exit(&ss->tx.lock);
2553 mac_tx_ring_update(ss->mgp->mh, tx->rh);
2556 /* limit potential for livelock */
2557 if (unlikely(++limit > 2 * tx->mask))
2558 break;
2560 if (tx->req == tx->done && tx->stop != NULL) {
2562 * Nic has sent all pending requests, allow it
2563 * to stop polling this queue
2565 mutex_enter(&tx->lock);
2566 if (tx->req == tx->done && tx->active) {
2567 *(int *)(void *)tx->stop = 1;
2568 tx->active = 0;
2569 mb();
2571 mutex_exit(&tx->lock);
2573 if (handles.head != NULL)
2574 myri10ge_free_tx_handles(tx, &handles);
2577 static void
2578 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl)
2580 mbl->head = NULL;
2581 mbl->tail = &mbl->head;
2582 mbl->cnt = 0;
2585 /*ARGSUSED*/
2586 void
2587 myri10ge_mbl_append(struct myri10ge_slice_state *ss,
2588 struct myri10ge_mblk_list *mbl, mblk_t *mp)
2590 *(mbl->tail) = mp;
2591 mbl->tail = &mp->b_next;
2592 mp->b_next = NULL;
2593 mbl->cnt++;
2597 static inline void
2598 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss,
2599 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop)
2601 myri10ge_rx_done_t *rx_done = &ss->rx_done;
2602 struct myri10ge_priv *mgp = ss->mgp;
2603 mblk_t *mp;
2604 struct lro_entry *lro;
2605 uint16_t length;
2606 uint16_t checksum;
2609 while (rx_done->entry[rx_done->idx].length != 0) {
2610 if (unlikely (*stop)) {
2611 break;
2613 length = ntohs(rx_done->entry[rx_done->idx].length);
2614 length &= (~MXGEFW_RSS_HASH_MASK);
2616 /* limit potential for livelock */
2617 limit -= length;
2618 if (unlikely(limit < 0))
2619 break;
2621 rx_done->entry[rx_done->idx].length = 0;
2622 checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
2623 if (length <= myri10ge_small_bytes)
2624 mp = myri10ge_rx_done_small(ss, length, checksum);
2625 else
2626 mp = myri10ge_rx_done_big(ss, length, checksum);
2627 if (mp != NULL) {
2628 if (!myri10ge_lro ||
2629 0 != myri10ge_lro_rx(ss, mp, checksum, mbl))
2630 myri10ge_mbl_append(ss, mbl, mp);
2632 rx_done->cnt++;
2633 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1);
2635 while (ss->lro_active != NULL) {
2636 lro = ss->lro_active;
2637 ss->lro_active = lro->next;
2638 myri10ge_lro_flush(ss, lro, mbl);
2642 static void
2643 myri10ge_intr_rx(struct myri10ge_slice_state *ss)
2645 uint64_t gen;
2646 struct myri10ge_mblk_list mbl;
2648 myri10ge_mbl_init(&mbl);
2649 if (mutex_tryenter(&ss->rx_lock) == 0)
2650 return;
2651 gen = ss->rx_gen_num;
2652 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL,
2653 &ss->rx_polling);
2654 if (mbl.head != NULL)
2655 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen);
2656 mutex_exit(&ss->rx_lock);
2660 static mblk_t *
2661 myri10ge_poll_rx(void *arg, int bytes)
2663 struct myri10ge_slice_state *ss = arg;
2664 struct myri10ge_mblk_list mbl;
2665 boolean_t dummy = B_FALSE;
2667 if (bytes == 0)
2668 return (NULL);
2670 myri10ge_mbl_init(&mbl);
2671 mutex_enter(&ss->rx_lock);
2672 if (ss->rx_polling)
2673 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy);
2674 else
2675 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss -
2676 ss->mgp->ss), ss->rx_token, ss->rx_polling);
2677 mutex_exit(&ss->rx_lock);
2678 return (mbl.head);
2681 /*ARGSUSED*/
2682 static uint_t
2683 myri10ge_intr(caddr_t arg0, caddr_t arg1)
2685 struct myri10ge_slice_state *ss =
2686 (struct myri10ge_slice_state *)(void *)arg0;
2687 struct myri10ge_priv *mgp = ss->mgp;
2688 mcp_irq_data_t *stats = ss->fw_stats;
2689 myri10ge_tx_ring_t *tx = &ss->tx;
2690 uint32_t send_done_count;
2691 uint8_t valid;
2694 /* make sure the DMA has finished */
2695 if (!stats->valid) {
2696 return (DDI_INTR_UNCLAIMED);
2698 valid = stats->valid;
2700 /* low bit indicates receives are present */
2701 if (valid & 1)
2702 myri10ge_intr_rx(ss);
2704 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
2705 /* lower legacy IRQ */
2706 *mgp->irq_deassert = 0;
2707 if (!myri10ge_deassert_wait)
2708 /* don't wait for conf. that irq is low */
2709 stats->valid = 0;
2710 mb();
2711 } else {
2712 /* no need to wait for conf. that irq is low */
2713 stats->valid = 0;
2716 do {
2717 /* check for transmit completes and receives */
2718 send_done_count = ntohl(stats->send_done_count);
2719 if (send_done_count != tx->pkt_done)
2720 myri10ge_tx_done(ss, (int)send_done_count);
2721 } while (*((volatile uint8_t *) &stats->valid));
2723 if (stats->stats_updated) {
2724 if (mgp->link_state != stats->link_up || stats->link_down) {
2725 mgp->link_state = stats->link_up;
2726 if (stats->link_down) {
2727 mgp->down_cnt += stats->link_down;
2728 mgp->link_state = 0;
2730 if (mgp->link_state) {
2731 if (myri10ge_verbose)
2732 printf("%s: link up\n", mgp->name);
2733 mac_link_update(mgp->mh, LINK_STATE_UP);
2734 } else {
2735 if (myri10ge_verbose)
2736 printf("%s: link down\n", mgp->name);
2737 mac_link_update(mgp->mh, LINK_STATE_DOWN);
2739 MYRI10GE_NIC_STAT_INC(link_changes);
2741 if (mgp->rdma_tags_available !=
2742 ntohl(ss->fw_stats->rdma_tags_available)) {
2743 mgp->rdma_tags_available =
2744 ntohl(ss->fw_stats->rdma_tags_available);
2745 cmn_err(CE_NOTE, "%s: RDMA timed out! "
2746 "%d tags left\n", mgp->name,
2747 mgp->rdma_tags_available);
2751 mb();
2752 /* check to see if we have rx token to pass back */
2753 if (valid & 0x1) {
2754 mutex_enter(&ss->poll_lock);
2755 if (ss->rx_polling) {
2756 ss->rx_token = 1;
2757 } else {
2758 *ss->irq_claim = BE_32(3);
2759 ss->rx_token = 0;
2761 mutex_exit(&ss->poll_lock);
2763 *(ss->irq_claim + 1) = BE_32(3);
2764 return (DDI_INTR_CLAIMED);
2768 * Add or remove a multicast address. This is called with our
2769 * macinfo's lock held by GLD, so we do not need to worry about
2770 * our own locking here.
2772 static int
2773 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr)
2775 myri10ge_cmd_t cmd;
2776 struct myri10ge_priv *mgp = arg;
2777 int status, join_leave;
2779 if (add)
2780 join_leave = MXGEFW_JOIN_MULTICAST_GROUP;
2781 else
2782 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP;
2783 (void) memcpy(&cmd.data0, multicastaddr, 4);
2784 (void) memcpy(&cmd.data1, multicastaddr + 4, 2);
2785 cmd.data0 = htonl(cmd.data0);
2786 cmd.data1 = htonl(cmd.data1);
2787 status = myri10ge_send_cmd(mgp, join_leave, &cmd);
2788 if (status == 0)
2789 return (0);
2791 cmn_err(CE_WARN, "%s: failed to set multicast address\n",
2792 mgp->name);
2793 return (status);
2797 static int
2798 myri10ge_m_promisc(void *arg, boolean_t on)
2800 struct myri10ge_priv *mgp = arg;
2802 myri10ge_change_promisc(mgp, on);
2803 return (0);
2807 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2808 * backwards one at a time and handle ring wraps
2811 static inline void
2812 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx,
2813 mcp_kreq_ether_send_t *src, int cnt)
2815 int idx, starting_slot;
2816 starting_slot = tx->req;
2817 while (cnt > 1) {
2818 cnt--;
2819 idx = (starting_slot + cnt) & tx->mask;
2820 myri10ge_pio_copy(&tx->lanai[idx],
2821 &src[cnt], sizeof (*src));
2822 mb();
2827 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2828 * at most 32 bytes at a time, so as to avoid involving the software
2829 * pio handler in the nic. We re-write the first segment's flags
2830 * to mark them valid only after writing the entire chain
2833 static inline void
2834 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
2835 int cnt)
2837 int idx, i;
2838 uint32_t *src_ints, *dst_ints;
2839 mcp_kreq_ether_send_t *srcp, *dstp, *dst;
2840 uint8_t last_flags;
2842 idx = tx->req & tx->mask;
2844 last_flags = src->flags;
2845 src->flags = 0;
2846 mb();
2847 dst = dstp = &tx->lanai[idx];
2848 srcp = src;
2850 if ((idx + cnt) < tx->mask) {
2851 for (i = 0; i < (cnt - 1); i += 2) {
2852 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src));
2853 mb(); /* force write every 32 bytes */
2854 srcp += 2;
2855 dstp += 2;
2857 } else {
2859 * submit all but the first request, and ensure
2860 * that it is submitted below
2862 myri10ge_submit_req_backwards(tx, src, cnt);
2863 i = 0;
2865 if (i < cnt) {
2866 /* submit the first request */
2867 myri10ge_pio_copy(dstp, srcp, sizeof (*src));
2868 mb(); /* barrier before setting valid flag */
2871 /* re-write the last 32-bits with the valid flags */
2872 src->flags |= last_flags;
2873 src_ints = (uint32_t *)src;
2874 src_ints += 3;
2875 dst_ints = (uint32_t *)dst;
2876 dst_ints += 3;
2877 *dst_ints = *src_ints;
2878 tx->req += cnt;
2879 mb();
2880 /* notify NIC to poll this tx ring */
2881 if (!tx->active && tx->go != NULL) {
2882 *(int *)(void *)tx->go = 1;
2883 tx->active = 1;
2884 tx->activate++;
2885 mb();
2889 /* ARGSUSED */
2890 static inline void
2891 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
2893 uint32_t lso_flag;
2894 mac_lso_get(mp, mss, &lso_flag);
2895 (*flags) |= lso_flag;
2899 /* like pullupmsg, except preserve hcksum/LSO attributes */
2900 static int
2901 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp)
2903 uint32_t start, stuff, tx_offload_flags, mss;
2904 int ok;
2906 mss = 0;
2907 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
2908 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
2910 ok = pullupmsg(mp, -1);
2911 if (!ok) {
2912 printf("pullupmsg failed");
2913 return (DDI_FAILURE);
2915 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup);
2916 mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags);
2917 if (tx_offload_flags & HW_LSO)
2918 DB_LSOMSS(mp) = (uint16_t)mss;
2919 lso_info_set(mp, mss, tx_offload_flags);
2920 return (DDI_SUCCESS);
2923 static inline void
2924 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh,
2925 int opackets, int obytes)
2927 s->un.all = 0;
2928 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2929 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2930 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2931 s->un.s.brdcstxmt = 1;
2932 else
2933 s->un.s.multixmt = 1;
2935 s->un.s.opackets = (uint16_t)opackets;
2936 s->un.s.obytes = obytes;
2939 static int
2940 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
2941 mcp_kreq_ether_send_t *req)
2943 myri10ge_tx_ring_t *tx = &ss->tx;
2944 caddr_t ptr;
2945 struct myri10ge_tx_copybuf *cp;
2946 mblk_t *bp;
2947 int idx, mblen, avail;
2948 uint16_t len;
2950 mutex_enter(&tx->lock);
2951 avail = tx->mask - (tx->req - tx->done);
2952 if (avail <= 1) {
2953 mutex_exit(&tx->lock);
2954 return (EBUSY);
2956 idx = tx->req & tx->mask;
2957 cp = &tx->cp[idx];
2958 ptr = cp->va;
2959 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
2960 mblen = MBLKL(bp);
2961 bcopy(bp->b_rptr, ptr, mblen);
2962 ptr += mblen;
2963 len += mblen;
2965 /* ensure runts are padded to 60 bytes */
2966 if (len < 60) {
2967 bzero(ptr, 64 - len);
2968 len = 60;
2970 req->addr_low = cp->dma.low;
2971 req->addr_high = cp->dma.high;
2972 req->length = htons(len);
2973 req->pad = 0;
2974 req->rdma_count = 1;
2975 myri10ge_tx_stat(&tx->info[idx].stat,
2976 (struct ether_header *)(void *)cp->va, 1, len);
2977 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV);
2978 myri10ge_submit_req(&ss->tx, req, 1);
2979 mutex_exit(&tx->lock);
2980 freemsg(mp);
2981 return (DDI_SUCCESS);
2985 static void
2986 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list,
2987 struct myri10ge_tx_buffer_state *tx_info,
2988 int count)
2990 int i, idx;
2992 idx = 0; /* gcc -Wuninitialized */
2993 /* store unmapping and bp info for tx irq handler */
2994 for (i = 0; i < count; i++) {
2995 idx = (tx->req + i) & tx->mask;
2996 tx->info[idx].m = tx_info[i].m;
2997 tx->info[idx].handle = tx_info[i].handle;
2999 tx->info[idx].stat.un.all = tx_info[0].stat.un.all;
3001 /* submit the frame to the nic */
3002 myri10ge_submit_req(tx, req_list, count);
3009 static void
3010 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf)
3012 mblk_t *bp;
3013 int seglen;
3014 uint_t count;
3016 bp = mp;
3018 while (off > 0) {
3019 seglen = MBLKL(bp);
3020 if (off < seglen)
3021 break;
3022 off -= seglen;
3023 bp = bp->b_cont;
3025 while (len > 0) {
3026 seglen = MBLKL(bp);
3027 count = min(seglen - off, len);
3028 bcopy(bp->b_rptr + off, buf, count);
3029 len -= count;
3030 buf += count;
3031 off = 0;
3032 bp = bp->b_cont;
3036 static int
3037 myri10ge_ether_parse_header(mblk_t *mp)
3039 struct ether_header eh_copy;
3040 struct ether_header *eh;
3041 int eth_hdr_len, seglen;
3043 seglen = MBLKL(mp);
3044 eth_hdr_len = sizeof (*eh);
3045 if (seglen < eth_hdr_len) {
3046 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy);
3047 eh = &eh_copy;
3048 } else {
3049 eh = (struct ether_header *)(void *)mp->b_rptr;
3051 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
3052 eth_hdr_len += 4;
3055 return (eth_hdr_len);
3058 static int
3059 myri10ge_lso_parse_header(mblk_t *mp, int off)
3061 char buf[128];
3062 int seglen, sum_off;
3063 struct ip *ip;
3064 struct tcphdr *tcp;
3066 seglen = MBLKL(mp);
3067 if (seglen < off + sizeof (*ip)) {
3068 myri10ge_copydata(mp, off, sizeof (*ip), buf);
3069 ip = (struct ip *)(void *)buf;
3070 } else {
3071 ip = (struct ip *)(void *)(mp->b_rptr + off);
3073 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) {
3074 myri10ge_copydata(mp, off,
3075 (ip->ip_hl << 2) + sizeof (*tcp), buf);
3076 ip = (struct ip *)(void *)buf;
3078 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2));
3081 * NIC expects ip_sum to be zero. Recent changes to
3082 * OpenSolaris leave the correct ip checksum there, rather
3083 * than the required zero, so we need to zero it. Otherwise,
3084 * the NIC will produce bad checksums when sending LSO packets.
3086 if (ip->ip_sum != 0) {
3087 if (((char *)ip) != buf) {
3088 /* ip points into mblk, so just zero it */
3089 ip->ip_sum = 0;
3090 } else {
3092 * ip points into a copy, so walk the chain
3093 * to find the ip_csum, then zero it
3095 sum_off = off + _PTRDIFF(&ip->ip_sum, buf);
3096 while (sum_off > (int)(MBLKL(mp) - 1)) {
3097 sum_off -= MBLKL(mp);
3098 mp = mp->b_cont;
3100 mp->b_rptr[sum_off] = 0;
3101 sum_off++;
3102 while (sum_off > MBLKL(mp) - 1) {
3103 sum_off -= MBLKL(mp);
3104 mp = mp->b_cont;
3106 mp->b_rptr[sum_off] = 0;
3109 return (off + ((ip->ip_hl + tcp->th_off) << 2));
3112 static int
3113 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
3114 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size,
3115 uint16_t mss, uint8_t cksum_offset)
3117 myri10ge_tx_ring_t *tx = &ss->tx;
3118 struct myri10ge_priv *mgp = ss->mgp;
3119 mblk_t *bp;
3120 mcp_kreq_ether_send_t *req;
3121 struct myri10ge_tx_copybuf *cp;
3122 caddr_t rptr, ptr;
3123 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp;
3124 int resid, avail, idx, hdr_size_tmp, tx_boundary;
3125 int rdma_count;
3126 uint32_t seglen, len, boundary, low, high_swapped;
3127 uint16_t pseudo_hdr_offset = htons(mss);
3128 uint8_t flags;
3130 tx_boundary = mgp->tx_boundary;
3131 hdr_size_tmp = hdr_size;
3132 resid = tx_boundary;
3133 count = 1;
3134 mutex_enter(&tx->lock);
3136 /* check to see if the slots are really there */
3137 avail = tx->mask - (tx->req - tx->done);
3138 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) {
3139 atomic_inc_32(&tx->stall);
3140 mutex_exit(&tx->lock);
3141 return (EBUSY);
3144 /* copy */
3145 cum_len = -hdr_size;
3146 count = 0;
3147 req = req_list;
3148 idx = tx->mask & tx->req;
3149 cp = &tx->cp[idx];
3150 low = ntohl(cp->dma.low);
3151 ptr = cp->va;
3152 cp->len = 0;
3153 if (mss) {
3154 int payload = pkt_size - hdr_size;
3155 uint16_t opackets = (payload / mss) + ((payload % mss) != 0);
3156 tx->info[idx].ostat.opackets = opackets;
3157 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size
3158 + pkt_size;
3160 hdr_size_tmp = hdr_size;
3161 mss_resid = mss;
3162 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3163 tx_req = tx->req;
3164 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3165 mblen = MBLKL(bp);
3166 rptr = (caddr_t)bp->b_rptr;
3167 len = min(hdr_size_tmp, mblen);
3168 if (len) {
3169 bcopy(rptr, ptr, len);
3170 rptr += len;
3171 ptr += len;
3172 resid -= len;
3173 mblen -= len;
3174 hdr_size_tmp -= len;
3175 cp->len += len;
3176 if (hdr_size_tmp)
3177 continue;
3178 if (resid < mss) {
3179 tx_req++;
3180 idx = tx->mask & tx_req;
3181 cp = &tx->cp[idx];
3182 low = ntohl(cp->dma.low);
3183 ptr = cp->va;
3184 resid = tx_boundary;
3187 while (mblen) {
3188 len = min(mss_resid, mblen);
3189 bcopy(rptr, ptr, len);
3190 mss_resid -= len;
3191 resid -= len;
3192 mblen -= len;
3193 rptr += len;
3194 ptr += len;
3195 cp->len += len;
3196 if (mss_resid == 0) {
3197 mss_resid = mss;
3198 if (resid < mss) {
3199 tx_req++;
3200 idx = tx->mask & tx_req;
3201 cp = &tx->cp[idx];
3202 cp->len = 0;
3203 low = ntohl(cp->dma.low);
3204 ptr = cp->va;
3205 resid = tx_boundary;
3211 req = req_list;
3212 pkt_size_tmp = pkt_size;
3213 count = 0;
3214 rdma_count = 0;
3215 tx_req = tx->req;
3216 while (pkt_size_tmp) {
3217 idx = tx->mask & tx_req;
3218 cp = &tx->cp[idx];
3219 high_swapped = cp->dma.high;
3220 low = ntohl(cp->dma.low);
3221 len = cp->len;
3222 if (len == 0) {
3223 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3224 pkt_size_tmp, pkt_size);
3225 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3226 mblen = MBLKL(bp);
3227 printf("mblen:%d\n", mblen);
3229 pkt_size_tmp = pkt_size;
3230 tx_req = tx->req;
3231 while (pkt_size_tmp > 0) {
3232 idx = tx->mask & tx_req;
3233 cp = &tx->cp[idx];
3234 printf("cp->len = %d\n", cp->len);
3235 pkt_size_tmp -= cp->len;
3236 tx_req++;
3238 printf("dropped\n");
3239 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3240 goto done;
3242 pkt_size_tmp -= len;
3243 while (len) {
3244 while (len) {
3245 uint8_t flags_next;
3246 int cum_len_next;
3248 boundary = (low + mgp->tx_boundary) &
3249 ~(mgp->tx_boundary - 1);
3250 seglen = boundary - low;
3251 if (seglen > len)
3252 seglen = len;
3254 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3255 cum_len_next = cum_len + seglen;
3256 (req-rdma_count)->rdma_count = rdma_count + 1;
3257 if (likely(cum_len >= 0)) {
3258 /* payload */
3259 int next_is_first, chop;
3261 chop = (cum_len_next > mss);
3262 cum_len_next = cum_len_next % mss;
3263 next_is_first = (cum_len_next == 0);
3264 flags |= chop *
3265 MXGEFW_FLAGS_TSO_CHOP;
3266 flags_next |= next_is_first *
3267 MXGEFW_FLAGS_FIRST;
3268 rdma_count |= -(chop | next_is_first);
3269 rdma_count += chop & !next_is_first;
3270 } else if (likely(cum_len_next >= 0)) {
3271 /* header ends */
3272 int small;
3274 rdma_count = -1;
3275 cum_len_next = 0;
3276 seglen = -cum_len;
3277 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
3278 flags_next = MXGEFW_FLAGS_TSO_PLD |
3279 MXGEFW_FLAGS_FIRST |
3280 (small * MXGEFW_FLAGS_SMALL);
3282 req->addr_high = high_swapped;
3283 req->addr_low = htonl(low);
3284 req->pseudo_hdr_offset = pseudo_hdr_offset;
3285 req->pad = 0; /* complete solid 16-byte block */
3286 req->rdma_count = 1;
3287 req->cksum_offset = cksum_offset;
3288 req->length = htons(seglen);
3289 req->flags = flags | ((cum_len & 1) *
3290 MXGEFW_FLAGS_ALIGN_ODD);
3291 if (cksum_offset > seglen)
3292 cksum_offset -= seglen;
3293 else
3294 cksum_offset = 0;
3295 low += seglen;
3296 len -= seglen;
3297 cum_len = cum_len_next;
3298 req++;
3299 req->flags = 0;
3300 flags = flags_next;
3301 count++;
3302 rdma_count++;
3305 tx_req++;
3307 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3308 do {
3309 req--;
3310 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3311 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3312 MXGEFW_FLAGS_FIRST)));
3314 myri10ge_submit_req(tx, req_list, count);
3315 done:
3316 mutex_exit(&tx->lock);
3317 freemsg(mp);
3318 return (DDI_SUCCESS);
3322 * Try to send the chain of buffers described by the mp. We must not
3323 * encapsulate more than eth->tx.req - eth->tx.done, or
3324 * MXGEFW_MAX_SEND_DESC, whichever is more.
3327 static int
3328 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp,
3329 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info)
3331 struct myri10ge_priv *mgp = ss->mgp;
3332 myri10ge_tx_ring_t *tx = &ss->tx;
3333 mcp_kreq_ether_send_t *req;
3334 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL;
3335 mblk_t *bp;
3336 ddi_dma_cookie_t cookie;
3337 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen,
3338 rdma_count, cum_len, lso_hdr_size;
3339 uint32_t start, stuff, tx_offload_flags;
3340 uint32_t seglen, len, mss, boundary, low, high_swapped;
3341 uint_t ncookies;
3342 uint16_t pseudo_hdr_offset;
3343 uint8_t flags, cksum_offset, odd_flag;
3344 int pkt_size;
3345 int lso_copy = myri10ge_lso_copy;
3346 try_pullup = 1;
3348 again:
3349 /* Setup checksum offloading, if needed */
3350 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
3351 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
3352 if (tx_offload_flags & HW_LSO) {
3353 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3354 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) {
3355 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags);
3356 freemsg(mp);
3357 return (DDI_SUCCESS);
3359 } else {
3360 max_segs = MXGEFW_MAX_SEND_DESC;
3361 mss = 0;
3363 req = req_list;
3364 cksum_offset = 0;
3365 pseudo_hdr_offset = 0;
3367 /* leave an extra slot keep the ring from wrapping */
3368 avail = tx->mask - (tx->req - tx->done);
3371 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3372 * message will need to be pulled up in order to fit.
3373 * Otherwise, we are low on transmit descriptors, it is
3374 * probably better to stall and try again rather than pullup a
3375 * message to fit.
3378 if (avail < max_segs) {
3379 err = EBUSY;
3380 atomic_inc_32(&tx->stall_early);
3381 goto stall;
3384 /* find out how long the frame is and how many segments it is */
3385 count = 0;
3386 odd_flag = 0;
3387 pkt_size = 0;
3388 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
3389 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3390 dblk_t *dbp;
3391 mblen = MBLKL(bp);
3392 if (mblen == 0) {
3394 * we can't simply skip over 0-length mblks
3395 * because the hardware can't deal with them,
3396 * and we could leak them.
3398 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len);
3399 err = EIO;
3400 goto pullup;
3403 * There's no advantage to copying most gesballoc
3404 * attached blocks, so disable lso copy in that case
3406 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) {
3407 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) {
3408 lso_copy = 0;
3411 pkt_size += mblen;
3412 count++;
3415 /* Try to pull up excessivly long chains */
3416 if (count >= max_segs) {
3417 err = myri10ge_pullup(ss, mp);
3418 if (likely(err == DDI_SUCCESS)) {
3419 count = 1;
3420 } else {
3421 if (count < MYRI10GE_MAX_SEND_DESC_TSO) {
3423 * just let the h/w send it, it will be
3424 * inefficient, but us better than dropping
3426 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3427 } else {
3428 /* drop it */
3429 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3430 freemsg(mp);
3431 return (0);
3436 cum_len = 0;
3437 maclen = myri10ge_ether_parse_header(mp);
3439 if (tx_offload_flags & HCK_PARTIALCKSUM) {
3441 cksum_offset = start + maclen;
3442 pseudo_hdr_offset = htons(stuff + maclen);
3443 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
3444 flags |= MXGEFW_FLAGS_CKSUM;
3447 lso_hdr_size = 0; /* -Wunitinialized */
3448 if (mss) { /* LSO */
3449 /* this removes any CKSUM flag from before */
3450 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3452 * parse the headers and set cum_len to a negative
3453 * value to reflect the offset of the TCP payload
3455 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen);
3456 cum_len = -lso_hdr_size;
3457 if ((mss < mgp->tx_boundary) && lso_copy) {
3458 err = myri10ge_tx_tso_copy(ss, mp, req_list,
3459 lso_hdr_size, pkt_size, mss, cksum_offset);
3460 return (err);
3464 * for TSO, pseudo_hdr_offset holds mss. The firmware
3465 * figures out where to put the checksum by parsing
3466 * the header.
3469 pseudo_hdr_offset = htons(mss);
3470 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) {
3471 flags |= MXGEFW_FLAGS_SMALL;
3472 if (pkt_size < myri10ge_tx_copylen) {
3473 req->cksum_offset = cksum_offset;
3474 req->pseudo_hdr_offset = pseudo_hdr_offset;
3475 req->flags = flags;
3476 err = myri10ge_tx_copy(ss, mp, req);
3477 return (err);
3479 cum_len = 0;
3482 /* pull one DMA handle for each bp from our freelist */
3483 handles = NULL;
3484 err = myri10ge_alloc_tx_handles(ss, count, &handles);
3485 if (err != DDI_SUCCESS) {
3486 err = DDI_FAILURE;
3487 goto stall;
3489 count = 0;
3490 rdma_count = 0;
3491 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3492 mblen = MBLKL(bp);
3493 dma_handle = handles;
3494 handles = handles->next;
3496 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL,
3497 (caddr_t)bp->b_rptr, mblen,
3498 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
3499 &cookie, &ncookies);
3500 if (unlikely(rv != DDI_DMA_MAPPED)) {
3501 err = EIO;
3502 try_pullup = 0;
3503 dma_handle->next = handles;
3504 handles = dma_handle;
3505 goto abort_with_handles;
3508 /* reserve the slot */
3509 tx_info[count].m = bp;
3510 tx_info[count].handle = dma_handle;
3512 for (; ; ) {
3513 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
3514 high_swapped =
3515 htonl(MYRI10GE_HIGHPART_TO_U32(
3516 cookie.dmac_laddress));
3517 len = (uint32_t)cookie.dmac_size;
3518 while (len) {
3519 uint8_t flags_next;
3520 int cum_len_next;
3522 boundary = (low + mgp->tx_boundary) &
3523 ~(mgp->tx_boundary - 1);
3524 seglen = boundary - low;
3525 if (seglen > len)
3526 seglen = len;
3528 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3529 cum_len_next = cum_len + seglen;
3530 if (mss) {
3531 (req-rdma_count)->rdma_count =
3532 rdma_count + 1;
3533 if (likely(cum_len >= 0)) {
3534 /* payload */
3535 int next_is_first, chop;
3537 chop = (cum_len_next > mss);
3538 cum_len_next =
3539 cum_len_next % mss;
3540 next_is_first =
3541 (cum_len_next == 0);
3542 flags |= chop *
3543 MXGEFW_FLAGS_TSO_CHOP;
3544 flags_next |= next_is_first *
3545 MXGEFW_FLAGS_FIRST;
3546 rdma_count |=
3547 -(chop | next_is_first);
3548 rdma_count +=
3549 chop & !next_is_first;
3550 } else if (likely(cum_len_next >= 0)) {
3551 /* header ends */
3552 int small;
3554 rdma_count = -1;
3555 cum_len_next = 0;
3556 seglen = -cum_len;
3557 small = (mss <=
3558 MXGEFW_SEND_SMALL_SIZE);
3559 flags_next =
3560 MXGEFW_FLAGS_TSO_PLD
3561 | MXGEFW_FLAGS_FIRST
3562 | (small *
3563 MXGEFW_FLAGS_SMALL);
3566 req->addr_high = high_swapped;
3567 req->addr_low = htonl(low);
3568 req->pseudo_hdr_offset = pseudo_hdr_offset;
3569 req->pad = 0; /* complete solid 16-byte block */
3570 req->rdma_count = 1;
3571 req->cksum_offset = cksum_offset;
3572 req->length = htons(seglen);
3573 req->flags = flags | ((cum_len & 1) * odd_flag);
3574 if (cksum_offset > seglen)
3575 cksum_offset -= seglen;
3576 else
3577 cksum_offset = 0;
3578 low += seglen;
3579 len -= seglen;
3580 cum_len = cum_len_next;
3581 count++;
3582 rdma_count++;
3583 /* make sure all the segments will fit */
3584 if (unlikely(count >= max_segs)) {
3585 MYRI10GE_ATOMIC_SLICE_STAT_INC(
3586 xmit_lowbuf);
3587 /* may try a pullup */
3588 err = EBUSY;
3589 if (try_pullup)
3590 try_pullup = 2;
3591 goto abort_with_handles;
3593 req++;
3594 req->flags = 0;
3595 flags = flags_next;
3596 tx_info[count].m = 0;
3598 ncookies--;
3599 if (ncookies == 0)
3600 break;
3601 ddi_dma_nextcookie(dma_handle->h, &cookie);
3604 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3606 if (mss) {
3607 do {
3608 req--;
3609 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3610 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3611 MXGEFW_FLAGS_FIRST)));
3614 /* calculate tx stats */
3615 if (mss) {
3616 uint16_t opackets;
3617 int payload;
3619 payload = pkt_size - lso_hdr_size;
3620 opackets = (payload / mss) + ((payload % mss) != 0);
3621 tx_info[0].stat.un.all = 0;
3622 tx_info[0].ostat.opackets = opackets;
3623 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size
3624 + pkt_size;
3625 } else {
3626 myri10ge_tx_stat(&tx_info[0].stat,
3627 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size);
3629 mutex_enter(&tx->lock);
3631 /* check to see if the slots are really there */
3632 avail = tx->mask - (tx->req - tx->done);
3633 if (unlikely(avail <= count)) {
3634 mutex_exit(&tx->lock);
3635 err = 0;
3636 goto late_stall;
3639 myri10ge_send_locked(tx, req_list, tx_info, count);
3640 mutex_exit(&tx->lock);
3641 return (DDI_SUCCESS);
3643 late_stall:
3644 try_pullup = 0;
3645 atomic_inc_32(&tx->stall_late);
3647 abort_with_handles:
3648 /* unbind and free handles from previous mblks */
3649 for (i = 0; i < count; i++) {
3650 bp = tx_info[i].m;
3651 tx_info[i].m = 0;
3652 if (bp) {
3653 dma_handle = tx_info[i].handle;
3654 (void) ddi_dma_unbind_handle(dma_handle->h);
3655 dma_handle->next = handles;
3656 handles = dma_handle;
3657 tx_info[i].handle = NULL;
3658 tx_info[i].m = NULL;
3661 myri10ge_free_tx_handle_slist(tx, handles);
3662 pullup:
3663 if (try_pullup) {
3664 err = myri10ge_pullup(ss, mp);
3665 if (err != DDI_SUCCESS && try_pullup == 2) {
3666 /* drop */
3667 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3668 freemsg(mp);
3669 return (0);
3671 try_pullup = 0;
3672 goto again;
3675 stall:
3676 if (err != 0) {
3677 if (err == EBUSY) {
3678 atomic_inc_32(&tx->stall);
3679 } else {
3680 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3683 return (err);
3686 static mblk_t *
3687 myri10ge_send_wrapper(void *arg, mblk_t *mp)
3689 struct myri10ge_slice_state *ss = arg;
3690 int err = 0;
3691 mcp_kreq_ether_send_t *req_list;
3692 #if defined(__i386)
3694 * We need about 2.5KB of scratch space to handle transmits.
3695 * i86pc has only 8KB of kernel stack space, so we malloc the
3696 * scratch space there rather than keeping it on the stack.
3698 size_t req_size, tx_info_size;
3699 struct myri10ge_tx_buffer_state *tx_info;
3700 caddr_t req_bytes;
3702 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3703 + 8;
3704 req_bytes = kmem_alloc(req_size, KM_SLEEP);
3705 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1);
3706 tx_info = kmem_alloc(tx_info_size, KM_SLEEP);
3707 #else
3708 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3709 + 8];
3710 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1];
3711 #endif
3713 /* ensure req_list entries are aligned to 8 bytes */
3714 req_list = (struct mcp_kreq_ether_send *)
3715 (((unsigned long)req_bytes + 7UL) & ~7UL);
3717 err = myri10ge_send(ss, mp, req_list, tx_info);
3719 #if defined(__i386)
3720 kmem_free(tx_info, tx_info_size);
3721 kmem_free(req_bytes, req_size);
3722 #endif
3723 if (err)
3724 return (mp);
3725 else
3726 return (NULL);
3729 static int
3730 myri10ge_addmac(void *arg, const uint8_t *mac_addr)
3732 struct myri10ge_priv *mgp = arg;
3733 int err;
3735 if (mac_addr == NULL)
3736 return (EINVAL);
3738 mutex_enter(&mgp->intrlock);
3739 if (mgp->macaddr_cnt) {
3740 mutex_exit(&mgp->intrlock);
3741 return (ENOSPC);
3743 err = myri10ge_m_unicst(mgp, mac_addr);
3744 if (!err)
3745 mgp->macaddr_cnt++;
3747 mutex_exit(&mgp->intrlock);
3748 if (err)
3749 return (err);
3751 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr));
3752 return (0);
3755 /*ARGSUSED*/
3756 static int
3757 myri10ge_remmac(void *arg, const uint8_t *mac_addr)
3759 struct myri10ge_priv *mgp = arg;
3761 mutex_enter(&mgp->intrlock);
3762 mgp->macaddr_cnt--;
3763 mutex_exit(&mgp->intrlock);
3765 return (0);
3768 /*ARGSUSED*/
3769 static void
3770 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index,
3771 mac_group_info_t *infop, mac_group_handle_t gh)
3773 struct myri10ge_priv *mgp = arg;
3775 if (rtype != MAC_RING_TYPE_RX)
3776 return;
3778 infop->mgi_driver = (mac_group_driver_t)mgp;
3779 infop->mgi_start = NULL;
3780 infop->mgi_stop = NULL;
3781 infop->mgi_addmac = myri10ge_addmac;
3782 infop->mgi_remmac = myri10ge_remmac;
3783 infop->mgi_count = mgp->num_slices;
3786 static int
3787 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num)
3789 struct myri10ge_slice_state *ss;
3791 ss = (struct myri10ge_slice_state *)rh;
3792 mutex_enter(&ss->rx_lock);
3793 ss->rx_gen_num = mr_gen_num;
3794 mutex_exit(&ss->rx_lock);
3795 return (0);
3799 * Retrieve a value for one of the statistics for a particular rx ring
3802 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3804 struct myri10ge_slice_state *ss;
3806 ss = (struct myri10ge_slice_state *)rh;
3807 switch (stat) {
3808 case MAC_STAT_RBYTES:
3809 *val = ss->rx_stats.ibytes;
3810 break;
3812 case MAC_STAT_IPACKETS:
3813 *val = ss->rx_stats.ipackets;
3814 break;
3816 default:
3817 *val = 0;
3818 return (ENOTSUP);
3821 return (0);
3825 * Retrieve a value for one of the statistics for a particular tx ring
3828 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3830 struct myri10ge_slice_state *ss;
3832 ss = (struct myri10ge_slice_state *)rh;
3833 switch (stat) {
3834 case MAC_STAT_OBYTES:
3835 *val = ss->tx.stats.obytes;
3836 break;
3838 case MAC_STAT_OPACKETS:
3839 *val = ss->tx.stats.opackets;
3840 break;
3842 default:
3843 *val = 0;
3844 return (ENOTSUP);
3847 return (0);
3850 static int
3851 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)
3853 struct myri10ge_slice_state *ss;
3855 ss = (struct myri10ge_slice_state *)intrh;
3856 mutex_enter(&ss->poll_lock);
3857 ss->rx_polling = B_TRUE;
3858 mutex_exit(&ss->poll_lock);
3859 return (0);
3862 static int
3863 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)
3865 struct myri10ge_slice_state *ss;
3867 ss = (struct myri10ge_slice_state *)intrh;
3868 mutex_enter(&ss->poll_lock);
3869 ss->rx_polling = B_FALSE;
3870 if (ss->rx_token) {
3871 *ss->irq_claim = BE_32(3);
3872 ss->rx_token = 0;
3874 mutex_exit(&ss->poll_lock);
3875 return (0);
3878 /*ARGSUSED*/
3879 static void
3880 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
3881 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
3883 struct myri10ge_priv *mgp = arg;
3884 struct myri10ge_slice_state *ss;
3885 mac_intr_t *mintr = &infop->mri_intr;
3887 ASSERT((unsigned int)ring_index < mgp->num_slices);
3889 ss = &mgp->ss[ring_index];
3890 switch (rtype) {
3891 case MAC_RING_TYPE_RX:
3892 ss->rx_rh = rh;
3893 infop->mri_driver = (mac_ring_driver_t)ss;
3894 infop->mri_start = myri10ge_ring_start;
3895 infop->mri_stop = NULL;
3896 infop->mri_poll = myri10ge_poll_rx;
3897 infop->mri_stat = myri10ge_rx_ring_stat;
3898 mintr->mi_handle = (mac_intr_handle_t)ss;
3899 mintr->mi_enable = myri10ge_rx_ring_intr_enable;
3900 mintr->mi_disable = myri10ge_rx_ring_intr_disable;
3901 break;
3902 case MAC_RING_TYPE_TX:
3903 ss->tx.rh = rh;
3904 infop->mri_driver = (mac_ring_driver_t)ss;
3905 infop->mri_start = NULL;
3906 infop->mri_stop = NULL;
3907 infop->mri_tx = myri10ge_send_wrapper;
3908 infop->mri_stat = myri10ge_tx_ring_stat;
3909 break;
3910 default:
3911 break;
3915 static void
3916 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp)
3918 if (mgp->ksp_stat == NULL)
3919 return;
3921 kstat_delete(mgp->ksp_stat);
3922 mgp->ksp_stat = NULL;
3925 static void
3926 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss)
3928 if (ss->ksp_stat == NULL)
3929 return;
3931 kstat_delete(ss->ksp_stat);
3932 ss->ksp_stat = NULL;
3935 static void
3936 myri10ge_info_destroy(struct myri10ge_priv *mgp)
3938 if (mgp->ksp_info == NULL)
3939 return;
3941 kstat_delete(mgp->ksp_info);
3942 mgp->ksp_info = NULL;
3945 static int
3946 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw)
3948 struct myri10ge_nic_stat *ethstat;
3949 struct myri10ge_priv *mgp;
3950 mcp_irq_data_t *fw_stats;
3953 if (rw == KSTAT_WRITE)
3954 return (EACCES);
3956 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data;
3957 mgp = (struct myri10ge_priv *)ksp->ks_private;
3958 fw_stats = mgp->ss[0].fw_stats;
3960 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma;
3961 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma;
3962 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma;
3963 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL)
3964 ethstat->dma_force_physical.value.ul = 1;
3965 else
3966 ethstat->dma_force_physical.value.ul = 0;
3967 ethstat->lanes.value.ul = mgp->pcie_link_width;
3968 ethstat->dropped_bad_crc32.value.ul =
3969 ntohl(fw_stats->dropped_bad_crc32);
3970 ethstat->dropped_bad_phy.value.ul =
3971 ntohl(fw_stats->dropped_bad_phy);
3972 ethstat->dropped_link_error_or_filtered.value.ul =
3973 ntohl(fw_stats->dropped_link_error_or_filtered);
3974 ethstat->dropped_link_overflow.value.ul =
3975 ntohl(fw_stats->dropped_link_overflow);
3976 ethstat->dropped_multicast_filtered.value.ul =
3977 ntohl(fw_stats->dropped_multicast_filtered);
3978 ethstat->dropped_no_big_buffer.value.ul =
3979 ntohl(fw_stats->dropped_no_big_buffer);
3980 ethstat->dropped_no_small_buffer.value.ul =
3981 ntohl(fw_stats->dropped_no_small_buffer);
3982 ethstat->dropped_overrun.value.ul =
3983 ntohl(fw_stats->dropped_overrun);
3984 ethstat->dropped_pause.value.ul =
3985 ntohl(fw_stats->dropped_pause);
3986 ethstat->dropped_runt.value.ul =
3987 ntohl(fw_stats->dropped_runt);
3988 ethstat->link_up.value.ul =
3989 ntohl(fw_stats->link_up);
3990 ethstat->dropped_unicast_filtered.value.ul =
3991 ntohl(fw_stats->dropped_unicast_filtered);
3992 return (0);
3995 static int
3996 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw)
3998 struct myri10ge_slice_stat *ethstat;
3999 struct myri10ge_slice_state *ss;
4001 if (rw == KSTAT_WRITE)
4002 return (EACCES);
4004 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data;
4005 ss = (struct myri10ge_slice_state *)ksp->ks_private;
4007 ethstat->rx_big.value.ul = ss->j_rx_cnt;
4008 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt;
4009 ethstat->rx_bigbuf_pool.value.ul =
4010 ss->jpool.num_alloc - ss->jbufs_for_smalls;
4011 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls;
4012 ethstat->rx_small.value.ul = ss->rx_small.cnt -
4013 (ss->rx_small.mask + 1);
4014 ethstat->tx_done.value.ul = ss->tx.done;
4015 ethstat->tx_req.value.ul = ss->tx.req;
4016 ethstat->tx_activate.value.ul = ss->tx.activate;
4017 ethstat->xmit_sched.value.ul = ss->tx.sched;
4018 ethstat->xmit_stall.value.ul = ss->tx.stall;
4019 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early;
4020 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late;
4021 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err);
4022 return (0);
4025 static int
4026 myri10ge_info_kstat_update(kstat_t *ksp, int rw)
4028 struct myri10ge_info *info;
4029 struct myri10ge_priv *mgp;
4032 if (rw == KSTAT_WRITE)
4033 return (EACCES);
4035 info = (struct myri10ge_info *)ksp->ks_data;
4036 mgp = (struct myri10ge_priv *)ksp->ks_private;
4037 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR);
4038 kstat_named_setstr(&info->firmware_version, mgp->fw_version);
4039 kstat_named_setstr(&info->firmware_name, mgp->fw_name);
4040 kstat_named_setstr(&info->interrupt_type, mgp->intr_type);
4041 kstat_named_setstr(&info->product_code, mgp->pc_str);
4042 kstat_named_setstr(&info->serial_number, mgp->sn_str);
4043 return (0);
4046 static struct myri10ge_info myri10ge_info_template = {
4047 { "driver_version", KSTAT_DATA_STRING },
4048 { "firmware_version", KSTAT_DATA_STRING },
4049 { "firmware_name", KSTAT_DATA_STRING },
4050 { "interrupt_type", KSTAT_DATA_STRING },
4051 { "product_code", KSTAT_DATA_STRING },
4052 { "serial_number", KSTAT_DATA_STRING },
4054 static kmutex_t myri10ge_info_template_lock;
4057 static int
4058 myri10ge_info_init(struct myri10ge_priv *mgp)
4060 struct kstat *ksp;
4062 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4063 "myri10ge_info", "net", KSTAT_TYPE_NAMED,
4064 sizeof (myri10ge_info_template) /
4065 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
4066 if (ksp == NULL) {
4067 cmn_err(CE_WARN,
4068 "%s: myri10ge_info_init: kstat_create failed", mgp->name);
4069 return (DDI_FAILURE);
4071 mgp->ksp_info = ksp;
4072 ksp->ks_update = myri10ge_info_kstat_update;
4073 ksp->ks_private = (void *) mgp;
4074 ksp->ks_data = &myri10ge_info_template;
4075 ksp->ks_lock = &myri10ge_info_template_lock;
4076 if (MYRI10GE_VERSION_STR != NULL)
4077 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1;
4078 if (mgp->fw_version != NULL)
4079 ksp->ks_data_size += strlen(mgp->fw_version) + 1;
4080 ksp->ks_data_size += strlen(mgp->fw_name) + 1;
4081 ksp->ks_data_size += strlen(mgp->intr_type) + 1;
4082 if (mgp->pc_str != NULL)
4083 ksp->ks_data_size += strlen(mgp->pc_str) + 1;
4084 if (mgp->sn_str != NULL)
4085 ksp->ks_data_size += strlen(mgp->sn_str) + 1;
4087 kstat_install(ksp);
4088 return (DDI_SUCCESS);
4092 static int
4093 myri10ge_nic_stat_init(struct myri10ge_priv *mgp)
4095 struct kstat *ksp;
4096 struct myri10ge_nic_stat *ethstat;
4098 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4099 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED,
4100 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4101 if (ksp == NULL) {
4102 cmn_err(CE_WARN,
4103 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4104 return (DDI_FAILURE);
4106 mgp->ksp_stat = ksp;
4107 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data);
4109 kstat_named_init(&ethstat->dma_read_bw_MBs,
4110 "dma_read_bw_MBs", KSTAT_DATA_ULONG);
4111 kstat_named_init(&ethstat->dma_write_bw_MBs,
4112 "dma_write_bw_MBs", KSTAT_DATA_ULONG);
4113 kstat_named_init(&ethstat->dma_read_write_bw_MBs,
4114 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG);
4115 kstat_named_init(&ethstat->dma_force_physical,
4116 "dma_force_physical", KSTAT_DATA_ULONG);
4117 kstat_named_init(&ethstat->lanes,
4118 "lanes", KSTAT_DATA_ULONG);
4119 kstat_named_init(&ethstat->dropped_bad_crc32,
4120 "dropped_bad_crc32", KSTAT_DATA_ULONG);
4121 kstat_named_init(&ethstat->dropped_bad_phy,
4122 "dropped_bad_phy", KSTAT_DATA_ULONG);
4123 kstat_named_init(&ethstat->dropped_link_error_or_filtered,
4124 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG);
4125 kstat_named_init(&ethstat->dropped_link_overflow,
4126 "dropped_link_overflow", KSTAT_DATA_ULONG);
4127 kstat_named_init(&ethstat->dropped_multicast_filtered,
4128 "dropped_multicast_filtered", KSTAT_DATA_ULONG);
4129 kstat_named_init(&ethstat->dropped_no_big_buffer,
4130 "dropped_no_big_buffer", KSTAT_DATA_ULONG);
4131 kstat_named_init(&ethstat->dropped_no_small_buffer,
4132 "dropped_no_small_buffer", KSTAT_DATA_ULONG);
4133 kstat_named_init(&ethstat->dropped_overrun,
4134 "dropped_overrun", KSTAT_DATA_ULONG);
4135 kstat_named_init(&ethstat->dropped_pause,
4136 "dropped_pause", KSTAT_DATA_ULONG);
4137 kstat_named_init(&ethstat->dropped_runt,
4138 "dropped_runt", KSTAT_DATA_ULONG);
4139 kstat_named_init(&ethstat->dropped_unicast_filtered,
4140 "dropped_unicast_filtered", KSTAT_DATA_ULONG);
4141 kstat_named_init(&ethstat->dropped_runt, "dropped_runt",
4142 KSTAT_DATA_ULONG);
4143 kstat_named_init(&ethstat->link_up, "link_up", KSTAT_DATA_ULONG);
4144 kstat_named_init(&ethstat->link_changes, "link_changes",
4145 KSTAT_DATA_ULONG);
4146 ksp->ks_update = myri10ge_nic_stat_kstat_update;
4147 ksp->ks_private = (void *) mgp;
4148 kstat_install(ksp);
4149 return (DDI_SUCCESS);
4152 static int
4153 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss)
4155 struct myri10ge_priv *mgp = ss->mgp;
4156 struct kstat *ksp;
4157 struct myri10ge_slice_stat *ethstat;
4158 int instance;
4161 * fake an instance so that the same slice numbers from
4162 * different instances do not collide
4164 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss);
4165 ksp = kstat_create("myri10ge", instance,
4166 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED,
4167 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4168 if (ksp == NULL) {
4169 cmn_err(CE_WARN,
4170 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4171 return (DDI_FAILURE);
4173 ss->ksp_stat = ksp;
4174 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data);
4175 kstat_named_init(&ethstat->lro_bad_csum, "lro_bad_csum",
4176 KSTAT_DATA_ULONG);
4177 kstat_named_init(&ethstat->lro_flushed, "lro_flushed",
4178 KSTAT_DATA_ULONG);
4179 kstat_named_init(&ethstat->lro_queued, "lro_queued",
4180 KSTAT_DATA_ULONG);
4181 kstat_named_init(&ethstat->rx_bigbuf_firmware, "rx_bigbuf_firmware",
4182 KSTAT_DATA_ULONG);
4183 kstat_named_init(&ethstat->rx_bigbuf_pool, "rx_bigbuf_pool",
4184 KSTAT_DATA_ULONG);
4185 kstat_named_init(&ethstat->rx_bigbuf_smalls, "rx_bigbuf_smalls",
4186 KSTAT_DATA_ULONG);
4187 kstat_named_init(&ethstat->rx_copy, "rx_copy",
4188 KSTAT_DATA_ULONG);
4189 kstat_named_init(&ethstat->rx_big_nobuf, "rx_big_nobuf",
4190 KSTAT_DATA_ULONG);
4191 kstat_named_init(&ethstat->rx_small_nobuf, "rx_small_nobuf",
4192 KSTAT_DATA_ULONG);
4193 kstat_named_init(&ethstat->xmit_zero_len, "xmit_zero_len",
4194 KSTAT_DATA_ULONG);
4195 kstat_named_init(&ethstat->xmit_pullup, "xmit_pullup",
4196 KSTAT_DATA_ULONG);
4197 kstat_named_init(&ethstat->xmit_pullup_first, "xmit_pullup_first",
4198 KSTAT_DATA_ULONG);
4199 kstat_named_init(&ethstat->xmit_lowbuf, "xmit_lowbuf",
4200 KSTAT_DATA_ULONG);
4201 kstat_named_init(&ethstat->xmit_lsobadflags, "xmit_lsobadflags",
4202 KSTAT_DATA_ULONG);
4203 kstat_named_init(&ethstat->xmit_sched, "xmit_sched",
4204 KSTAT_DATA_ULONG);
4205 kstat_named_init(&ethstat->xmit_stall, "xmit_stall",
4206 KSTAT_DATA_ULONG);
4207 kstat_named_init(&ethstat->xmit_stall_early, "xmit_stall_early",
4208 KSTAT_DATA_ULONG);
4209 kstat_named_init(&ethstat->xmit_stall_late, "xmit_stall_late",
4210 KSTAT_DATA_ULONG);
4211 kstat_named_init(&ethstat->xmit_err, "xmit_err",
4212 KSTAT_DATA_ULONG);
4213 kstat_named_init(&ethstat->tx_req, "tx_req",
4214 KSTAT_DATA_ULONG);
4215 kstat_named_init(&ethstat->tx_activate, "tx_activate",
4216 KSTAT_DATA_ULONG);
4217 kstat_named_init(&ethstat->tx_done, "tx_done",
4218 KSTAT_DATA_ULONG);
4219 kstat_named_init(&ethstat->tx_handles_alloced, "tx_handles_alloced",
4220 KSTAT_DATA_ULONG);
4221 kstat_named_init(&ethstat->rx_big, "rx_big",
4222 KSTAT_DATA_ULONG);
4223 kstat_named_init(&ethstat->rx_small, "rx_small",
4224 KSTAT_DATA_ULONG);
4225 ksp->ks_update = myri10ge_slice_stat_kstat_update;
4226 ksp->ks_private = (void *) ss;
4227 kstat_install(ksp);
4228 return (DDI_SUCCESS);
4233 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4235 #include <vm/hat.h>
4236 #include <sys/ddi_isa.h>
4237 void *device_arena_alloc(size_t size, int vm_flag);
4238 void device_arena_free(void *vaddr, size_t size);
4240 static void
4241 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4243 dev_info_t *parent_dip;
4244 ddi_acc_handle_t handle;
4245 unsigned long bus_number, dev_number, func_number;
4246 unsigned long cfg_pa, paddr, base, pgoffset;
4247 char *cvaddr, *ptr;
4248 uint32_t *ptr32;
4249 int retval = DDI_FAILURE;
4250 int dontcare;
4251 uint16_t read_vid, read_did, vendor_id, device_id;
4253 if (!myri10ge_nvidia_ecrc_enable)
4254 return;
4256 parent_dip = ddi_get_parent(mgp->dip);
4257 if (parent_dip == NULL) {
4258 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name);
4259 return;
4262 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) {
4263 cmn_err(CE_WARN,
4264 "%s: Could not access my parent's registers", mgp->name);
4265 return;
4268 vendor_id = pci_config_get16(handle, PCI_CONF_VENID);
4269 device_id = pci_config_get16(handle, PCI_CONF_DEVID);
4270 pci_config_teardown(&handle);
4272 if (myri10ge_verbose) {
4273 unsigned long bus_number, dev_number, func_number;
4274 int reg_set, span;
4275 (void) myri10ge_reg_set(parent_dip, &reg_set, &span,
4276 &bus_number, &dev_number, &func_number);
4277 if (myri10ge_verbose)
4278 printf("%s: parent at %ld:%ld:%ld\n", mgp->name,
4279 bus_number, dev_number, func_number);
4282 if (vendor_id != 0x10de)
4283 return;
4285 if (device_id != 0x005d /* CK804 */ &&
4286 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) {
4287 return;
4289 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare,
4290 &bus_number, &dev_number, &func_number);
4292 for (cfg_pa = 0xf0000000UL;
4293 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL;
4294 cfg_pa -= 0x10000000UL) {
4295 /* find the config space address for the nvidia bridge */
4296 paddr = (cfg_pa + bus_number * 0x00100000UL +
4297 (dev_number * 8 + func_number) * 0x00001000UL);
4299 base = paddr & (~MMU_PAGEOFFSET);
4300 pgoffset = paddr & MMU_PAGEOFFSET;
4302 /* map it into the kernel */
4303 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP);
4304 if (cvaddr == NULL)
4305 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n",
4306 mgp->name);
4308 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1),
4309 i_ddi_paddr_to_pfn(base),
4310 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK);
4312 ptr = cvaddr + pgoffset;
4313 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID);
4314 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID);
4315 if (vendor_id == read_did || device_id == read_did) {
4316 ptr32 = (uint32_t *)(void *)(ptr + 0x178);
4317 if (myri10ge_verbose)
4318 printf("%s: Enabling ECRC on upstream "
4319 "Nvidia bridge (0x%x:0x%x) "
4320 "at %ld:%ld:%ld\n", mgp->name,
4321 read_vid, read_did, bus_number,
4322 dev_number, func_number);
4323 *ptr32 |= 0x40;
4324 retval = DDI_SUCCESS;
4326 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK);
4327 device_arena_free(cvaddr, ptob(1));
4331 #else
4332 /*ARGSUSED*/
4333 static void
4334 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4337 #endif /* i386 */
4341 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4342 * when the PCI-E Completion packets are aligned on an 8-byte
4343 * boundary. Some PCI-E chip sets always align Completion packets; on
4344 * the ones that do not, the alignment can be enforced by enabling
4345 * ECRC generation (if supported).
4347 * When PCI-E Completion packets are not aligned, it is actually more
4348 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4350 * If the driver can neither enable ECRC nor verify that it has
4351 * already been enabled, then it must use a firmware image which works
4352 * around unaligned completion packets (ethp_z8e.dat), and it should
4353 * also ensure that it never gives the device a Read-DMA which is
4354 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
4355 * enabled, then the driver should use the aligned (eth_z8e.dat)
4356 * firmware image, and set tx.boundary to 4KB.
4360 static int
4361 myri10ge_firmware_probe(struct myri10ge_priv *mgp)
4363 int status;
4365 mgp->tx_boundary = 4096;
4367 * Verify the max read request size was set to 4KB
4368 * before trying the test with 4KB.
4370 if (mgp->max_read_request_4k == 0)
4371 mgp->tx_boundary = 2048;
4373 * load the optimized firmware which assumes aligned PCIe
4374 * completions in order to see if it works on this host.
4377 mgp->fw_name = "rss_eth_z8e";
4378 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4379 mgp->eth_z8e_length = rss_eth_z8e_length;
4381 status = myri10ge_load_firmware(mgp);
4382 if (status != 0) {
4383 return (status);
4386 * Enable ECRC if possible
4388 myri10ge_enable_nvidia_ecrc(mgp);
4391 * Run a DMA test which watches for unaligned completions and
4392 * aborts on the first one seen.
4394 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
4395 if (status == 0)
4396 return (0); /* keep the aligned firmware */
4398 if (status != E2BIG)
4399 cmn_err(CE_WARN, "%s: DMA test failed: %d\n",
4400 mgp->name, status);
4401 if (status == ENOSYS)
4402 cmn_err(CE_WARN, "%s: Falling back to ethp! "
4403 "Please install up to date fw\n", mgp->name);
4404 return (status);
4407 static int
4408 myri10ge_select_firmware(struct myri10ge_priv *mgp)
4410 int aligned;
4412 aligned = 0;
4414 if (myri10ge_force_firmware == 1) {
4415 if (myri10ge_verbose)
4416 printf("%s: Assuming aligned completions (forced)\n",
4417 mgp->name);
4418 aligned = 1;
4419 goto done;
4422 if (myri10ge_force_firmware == 2) {
4423 if (myri10ge_verbose)
4424 printf("%s: Assuming unaligned completions (forced)\n",
4425 mgp->name);
4426 aligned = 0;
4427 goto done;
4430 /* If the width is less than 8, we may used the aligned firmware */
4431 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) {
4432 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n",
4433 mgp->name, mgp->pcie_link_width);
4434 aligned = 1;
4435 goto done;
4438 if (0 == myri10ge_firmware_probe(mgp))
4439 return (0); /* keep optimized firmware */
4441 done:
4442 if (aligned) {
4443 mgp->fw_name = "rss_eth_z8e";
4444 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4445 mgp->eth_z8e_length = rss_eth_z8e_length;
4446 mgp->tx_boundary = 4096;
4447 } else {
4448 mgp->fw_name = "rss_ethp_z8e";
4449 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e;
4450 mgp->eth_z8e_length = rss_ethp_z8e_length;
4451 mgp->tx_boundary = 2048;
4454 return (myri10ge_load_firmware(mgp));
4457 static int
4458 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler)
4460 dev_info_t *devinfo = mgp->dip;
4461 int count, avail, actual, intr_types;
4462 int x, y, rc, inum = 0;
4465 rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4466 if (rc != DDI_SUCCESS) {
4467 cmn_err(CE_WARN,
4468 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name,
4469 rc);
4470 return (DDI_FAILURE);
4473 if (!myri10ge_use_msi)
4474 intr_types &= ~DDI_INTR_TYPE_MSI;
4475 if (!myri10ge_use_msix)
4476 intr_types &= ~DDI_INTR_TYPE_MSIX;
4478 if (intr_types & DDI_INTR_TYPE_MSIX) {
4479 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX;
4480 mgp->intr_type = "MSI-X";
4481 } else if (intr_types & DDI_INTR_TYPE_MSI) {
4482 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI;
4483 mgp->intr_type = "MSI";
4484 } else {
4485 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED;
4486 mgp->intr_type = "Legacy";
4488 /* Get number of interrupts */
4489 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count);
4490 if ((rc != DDI_SUCCESS) || (count == 0)) {
4491 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4492 "count: %d", mgp->name, rc, count);
4494 return (DDI_FAILURE);
4497 /* Get number of available interrupts */
4498 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail);
4499 if ((rc != DDI_SUCCESS) || (avail == 0)) {
4500 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, "
4501 "rc: %d, avail: %d\n", mgp->name, rc, avail);
4502 return (DDI_FAILURE);
4504 if (avail < count) {
4505 cmn_err(CE_NOTE,
4506 "!%s: nintrs() returned %d, navail returned %d",
4507 mgp->name, count, avail);
4508 count = avail;
4511 if (count < mgp->num_slices)
4512 return (DDI_FAILURE);
4514 if (count > mgp->num_slices)
4515 count = mgp->num_slices;
4517 /* Allocate memory for MSI interrupts */
4518 mgp->intr_size = count * sizeof (ddi_intr_handle_t);
4519 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP);
4521 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum,
4522 count, &actual, DDI_INTR_ALLOC_NORMAL);
4524 if ((rc != DDI_SUCCESS) || (actual == 0)) {
4525 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d",
4526 mgp->name, rc);
4528 kmem_free(mgp->htable, mgp->intr_size);
4529 mgp->htable = NULL;
4530 return (DDI_FAILURE);
4533 if ((actual < count) && myri10ge_verbose) {
4534 cmn_err(CE_NOTE, "%s: got %d/%d slices",
4535 mgp->name, actual, count);
4538 mgp->intr_cnt = actual;
4541 * Get priority for first irq, assume remaining are all the same
4543 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri)
4544 != DDI_SUCCESS) {
4545 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name);
4547 /* Free already allocated intr */
4548 for (y = 0; y < actual; y++) {
4549 (void) ddi_intr_free(mgp->htable[y]);
4552 kmem_free(mgp->htable, mgp->intr_size);
4553 mgp->htable = NULL;
4554 return (DDI_FAILURE);
4557 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri;
4559 if (!add_handler)
4560 return (DDI_SUCCESS);
4562 /* Call ddi_intr_add_handler() */
4563 for (x = 0; x < actual; x++) {
4564 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr,
4565 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) {
4566 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed",
4567 mgp->name);
4569 /* Free already allocated intr */
4570 for (y = 0; y < actual; y++) {
4571 (void) ddi_intr_free(mgp->htable[y]);
4574 kmem_free(mgp->htable, mgp->intr_size);
4575 mgp->htable = NULL;
4576 return (DDI_FAILURE);
4580 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap);
4581 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4582 /* Call ddi_intr_block_enable() for MSI */
4583 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt);
4584 } else {
4585 /* Call ddi_intr_enable() for MSI non block enable */
4586 for (x = 0; x < mgp->intr_cnt; x++) {
4587 (void) ddi_intr_enable(mgp->htable[x]);
4591 return (DDI_SUCCESS);
4594 static void
4595 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed)
4597 int x, err;
4599 /* Disable all interrupts */
4600 if (handler_installed) {
4601 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4602 /* Call ddi_intr_block_disable() */
4603 (void) ddi_intr_block_disable(mgp->htable,
4604 mgp->intr_cnt);
4605 } else {
4606 for (x = 0; x < mgp->intr_cnt; x++) {
4607 (void) ddi_intr_disable(mgp->htable[x]);
4612 for (x = 0; x < mgp->intr_cnt; x++) {
4613 if (handler_installed) {
4614 /* Call ddi_intr_remove_handler() */
4615 err = ddi_intr_remove_handler(mgp->htable[x]);
4616 if (err != DDI_SUCCESS) {
4617 cmn_err(CE_WARN,
4618 "%s: ddi_intr_remove_handler for"
4619 "vec %d returned %d\n", mgp->name,
4620 x, err);
4623 err = ddi_intr_free(mgp->htable[x]);
4624 if (err != DDI_SUCCESS) {
4625 cmn_err(CE_WARN,
4626 "%s: ddi_intr_free for vec %d returned %d\n",
4627 mgp->name, x, err);
4630 kmem_free(mgp->htable, mgp->intr_size);
4631 mgp->htable = NULL;
4634 static void
4635 myri10ge_test_physical(dev_info_t *dip)
4637 ddi_dma_handle_t handle;
4638 struct myri10ge_dma_stuff dma;
4639 void *addr;
4640 int err;
4642 /* test #1, sufficient for older sparc systems */
4643 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
4644 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr,
4645 DDI_DMA_DONTWAIT, NULL, &handle);
4646 if (err == DDI_DMA_BADATTR)
4647 goto fail;
4648 ddi_dma_free_handle(&handle);
4650 /* test #2, required on Olympis where the bind is what fails */
4651 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr,
4652 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
4653 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT);
4654 if (addr == NULL)
4655 goto fail;
4656 myri10ge_dma_free(&dma);
4657 return;
4659 fail:
4660 if (myri10ge_verbose)
4661 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4662 "using IOMMU\n", ddi_get_instance(dip));
4664 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL;
4667 static void
4668 myri10ge_get_props(dev_info_t *dip)
4671 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4672 "myri10ge_flow_control", myri10ge_flow_control);
4674 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4675 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay);
4677 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4678 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4679 "myri10ge_nvidia_ecrc_enable", 1);
4680 #endif
4683 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4684 "myri10ge_use_msi", myri10ge_use_msi);
4686 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4687 "myri10ge_deassert_wait", myri10ge_deassert_wait);
4689 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4690 "myri10ge_verbose", myri10ge_verbose);
4692 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4693 "myri10ge_tx_copylen", myri10ge_tx_copylen);
4695 if (myri10ge_tx_copylen < 60) {
4696 cmn_err(CE_WARN,
4697 "myri10ge_tx_copylen must be >= 60 bytes\n");
4698 myri10ge_tx_copylen = 60;
4701 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4702 "myri10ge_mtu_override", myri10ge_mtu_override);
4704 if (myri10ge_mtu_override >= MYRI10GE_MIN_GLD_MTU &&
4705 myri10ge_mtu_override <= MYRI10GE_MAX_GLD_MTU)
4706 myri10ge_mtu = myri10ge_mtu_override +
4707 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ;
4708 else if (myri10ge_mtu_override != 0) {
4709 cmn_err(CE_WARN,
4710 "myri10ge_mtu_override must be between 1500 and "
4711 "9000 bytes\n");
4714 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4715 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial);
4716 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4717 "myri10ge_bigbufs_max", myri10ge_bigbufs_max);
4719 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4720 "myri10ge_watchdog_reset", myri10ge_watchdog_reset);
4722 if (myri10ge_bigbufs_initial < 128) {
4723 cmn_err(CE_WARN,
4724 "myri10ge_bigbufs_initial be at least 128\n");
4725 myri10ge_bigbufs_initial = 128;
4727 if (myri10ge_bigbufs_max < 128) {
4728 cmn_err(CE_WARN,
4729 "myri10ge_bigbufs_max be at least 128\n");
4730 myri10ge_bigbufs_max = 128;
4733 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) {
4734 cmn_err(CE_WARN,
4735 "myri10ge_bigbufs_max must be >= "
4736 "myri10ge_bigbufs_initial\n");
4737 myri10ge_bigbufs_max = myri10ge_bigbufs_initial;
4740 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4741 "myri10ge_force_firmware", myri10ge_force_firmware);
4743 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4744 "myri10ge_max_slices", myri10ge_max_slices);
4746 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4747 "myri10ge_use_msix", myri10ge_use_msix);
4749 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4750 "myri10ge_rss_hash", myri10ge_rss_hash);
4752 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX ||
4753 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
4754 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n",
4755 myri10ge_rss_hash);
4756 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4758 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4759 "myri10ge_lro", myri10ge_lro);
4760 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4761 "myri10ge_lro_cnt", myri10ge_lro_cnt);
4762 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4763 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr);
4764 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4765 "myri10ge_tx_hash", myri10ge_tx_hash);
4766 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4767 "myri10ge_use_lso", myri10ge_use_lso);
4768 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4769 "myri10ge_lso_copy", myri10ge_lso_copy);
4770 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4771 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial);
4772 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4773 "myri10ge_small_bytes", myri10ge_small_bytes);
4774 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) {
4775 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n",
4776 myri10ge_small_bytes);
4777 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n");
4778 myri10ge_small_bytes += 128;
4779 myri10ge_small_bytes &= ~(128 -1);
4780 myri10ge_small_bytes -= MXGEFW_PAD;
4781 cmn_err(CE_WARN, "rounded up to %d\n",
4782 myri10ge_small_bytes);
4784 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4788 #ifndef PCI_EXP_LNKSTA
4789 #define PCI_EXP_LNKSTA 18
4790 #endif
4792 static int
4793 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid)
4795 uint16_t status;
4796 uint8_t ptr;
4798 /* check to see if we have capabilities */
4799 status = pci_config_get16(handle, PCI_CONF_STAT);
4800 if (!(status & PCI_STAT_CAP)) {
4801 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n");
4802 return (ENXIO);
4805 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR);
4807 /* Walk the capabilities list, looking for a PCI Express cap */
4808 while (ptr != PCI_CAP_NEXT_PTR_NULL) {
4809 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid)
4810 break;
4811 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR);
4813 if (ptr < 64) {
4814 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr);
4815 return (ENXIO);
4817 *capptr = ptr;
4818 return (0);
4821 static int
4822 myri10ge_set_max_readreq(ddi_acc_handle_t handle)
4824 int err;
4825 uint16_t val;
4826 uint8_t ptr;
4828 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4829 if (err != 0) {
4830 cmn_err(CE_WARN, "could not find PCIe cap\n");
4831 return (ENXIO);
4834 /* set max read req to 4096 */
4835 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4836 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) |
4837 PCIE_DEVCTL_MAX_READ_REQ_4096;
4838 pci_config_put16(handle, ptr + PCIE_DEVCTL, val);
4839 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4840 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) !=
4841 PCIE_DEVCTL_MAX_READ_REQ_4096) {
4842 cmn_err(CE_WARN, "could not set max read req (%x)\n", val);
4843 return (EINVAL);
4845 return (0);
4848 static int
4849 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link)
4851 int err;
4852 uint16_t val;
4853 uint8_t ptr;
4855 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4856 if (err != 0) {
4857 cmn_err(CE_WARN, "could not set max read req\n");
4858 return (ENXIO);
4861 /* read link width */
4862 val = pci_config_get16(handle, ptr + PCIE_LINKSTS);
4863 val &= PCIE_LINKSTS_NEG_WIDTH_MASK;
4864 *link = (val >> 4);
4865 return (0);
4868 static int
4869 myri10ge_reset_nic(struct myri10ge_priv *mgp)
4871 ddi_acc_handle_t handle = mgp->cfg_hdl;
4872 uint32_t reboot;
4873 uint16_t cmd;
4874 int err;
4876 cmd = pci_config_get16(handle, PCI_CONF_COMM);
4877 if ((cmd & PCI_COMM_ME) == 0) {
4879 * Bus master DMA disabled? Check to see if the card
4880 * rebooted due to a parity error For now, just report
4881 * it
4884 /* enter read32 mode */
4885 pci_config_put8(handle, mgp->vso + 0x10, 0x3);
4886 /* read REBOOT_STATUS (0xfffffff0) */
4887 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0);
4888 reboot = pci_config_get16(handle, mgp->vso + 0x14);
4889 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot);
4890 return (0);
4892 if (!myri10ge_watchdog_reset) {
4893 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name);
4894 return (1);
4897 myri10ge_stop_locked(mgp);
4898 err = myri10ge_start_locked(mgp);
4899 if (err == DDI_FAILURE) {
4900 return (0);
4902 mac_tx_update(mgp->mh);
4903 return (1);
4906 static inline int
4907 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx)
4909 if (tx->sched != tx->stall &&
4910 tx->done == tx->watchdog_done &&
4911 tx->watchdog_req != tx->watchdog_done)
4912 return (1);
4913 return (0);
4916 static void
4917 myri10ge_watchdog(void *arg)
4919 struct myri10ge_priv *mgp;
4920 struct myri10ge_slice_state *ss;
4921 myri10ge_tx_ring_t *tx;
4922 int nic_ok = 1;
4923 int slices_stalled, rx_pause, i;
4924 int add_rx;
4926 mgp = arg;
4927 mutex_enter(&mgp->intrlock);
4928 if (mgp->running != MYRI10GE_ETH_RUNNING) {
4929 cmn_err(CE_WARN,
4930 "%s not running, not rearming watchdog (%d)\n",
4931 mgp->name, mgp->running);
4932 mutex_exit(&mgp->intrlock);
4933 return;
4936 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause);
4939 * make sure nic is stalled before we reset the nic, so as to
4940 * ensure we don't rip the transmit data structures out from
4941 * under a pending transmit
4944 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) {
4945 tx = &mgp->ss[i].tx;
4946 slices_stalled = myri10ge_ring_stalled(tx);
4947 if (slices_stalled)
4948 break;
4951 if (slices_stalled) {
4952 if (mgp->watchdog_rx_pause == rx_pause) {
4953 cmn_err(CE_WARN,
4954 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4955 mgp->name, i, tx->sched, tx->stall,
4956 tx->done, tx->watchdog_done, tx->req, tx->pkt_done,
4957 (int)ntohl(mgp->ss[i].fw_stats->send_done_count));
4958 nic_ok = myri10ge_reset_nic(mgp);
4959 } else {
4960 cmn_err(CE_WARN,
4961 "%s Flow controlled, check link partner\n",
4962 mgp->name);
4966 if (!nic_ok) {
4967 cmn_err(CE_WARN,
4968 "%s Nic dead, not rearming watchdog\n", mgp->name);
4969 mutex_exit(&mgp->intrlock);
4970 return;
4972 for (i = 0; i < mgp->num_slices; i++) {
4973 ss = &mgp->ss[i];
4974 tx = &ss->tx;
4975 tx->watchdog_done = tx->done;
4976 tx->watchdog_req = tx->req;
4977 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) {
4978 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy);
4979 add_rx =
4980 min(ss->jpool.num_alloc,
4981 myri10ge_bigbufs_max -
4982 (ss->jpool.num_alloc -
4983 ss->jbufs_for_smalls));
4984 if (add_rx != 0) {
4985 (void) myri10ge_add_jbufs(ss, add_rx, 0);
4986 /* now feed them to the firmware */
4987 mutex_enter(&ss->jpool.mtx);
4988 myri10ge_restock_jumbos(ss);
4989 mutex_exit(&ss->jpool.mtx);
4993 mgp->watchdog_rx_pause = rx_pause;
4995 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
4996 mgp->timer_ticks);
4997 mutex_exit(&mgp->intrlock);
5000 /*ARGSUSED*/
5001 static int
5002 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5004 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5005 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay);
5006 return (0);
5009 /*ARGSUSED*/
5010 static int
5011 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value,
5012 caddr_t cp, cred_t *credp)
5014 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5015 char *end;
5016 size_t new_value;
5018 new_value = mi_strtol(value, &end, 10);
5019 if (end == value)
5020 return (EINVAL);
5022 mutex_enter(&myri10ge_param_lock);
5023 mgp->intr_coal_delay = (int)new_value;
5024 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
5025 mutex_exit(&myri10ge_param_lock);
5026 return (0);
5029 /*ARGSUSED*/
5030 static int
5031 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5033 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5034 (void) mi_mpprintf(mp, "%d", mgp->pause);
5035 return (0);
5038 /*ARGSUSED*/
5039 static int
5040 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value,
5041 caddr_t cp, cred_t *credp)
5043 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5044 char *end;
5045 size_t new_value;
5046 int err = 0;
5048 new_value = mi_strtol(value, &end, 10);
5049 if (end == value)
5050 return (EINVAL);
5051 if (new_value != 0)
5052 new_value = 1;
5054 mutex_enter(&myri10ge_param_lock);
5055 if (new_value != mgp->pause)
5056 err = myri10ge_change_pause(mgp, new_value);
5057 mutex_exit(&myri10ge_param_lock);
5058 return (err);
5061 /*ARGSUSED*/
5062 static int
5063 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5065 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp);
5066 return (0);
5069 /*ARGSUSED*/
5070 static int
5071 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value,
5072 caddr_t cp, cred_t *credp)
5074 char *end;
5075 size_t new_value;
5077 new_value = mi_strtol(value, &end, 10);
5078 if (end == value)
5079 return (EINVAL);
5080 *(int *)(void *)cp = new_value;
5082 return (0);
5085 static void
5086 myri10ge_ndd_init(struct myri10ge_priv *mgp)
5088 mgp->nd_head = NULL;
5090 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay",
5091 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp);
5092 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control",
5093 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp);
5094 (void) nd_load(&mgp->nd_head, "myri10ge_verbose",
5095 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose);
5096 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait",
5097 myri10ge_get_int, myri10ge_set_int,
5098 (caddr_t)&myri10ge_deassert_wait);
5099 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max",
5100 myri10ge_get_int, myri10ge_set_int,
5101 (caddr_t)&myri10ge_bigbufs_max);
5102 (void) nd_load(&mgp->nd_head, "myri10ge_lro",
5103 myri10ge_get_int, myri10ge_set_int,
5104 (caddr_t)&myri10ge_lro);
5105 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr",
5106 myri10ge_get_int, myri10ge_set_int,
5107 (caddr_t)&myri10ge_lro_max_aggr);
5108 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash",
5109 myri10ge_get_int, myri10ge_set_int,
5110 (caddr_t)&myri10ge_tx_hash);
5111 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy",
5112 myri10ge_get_int, myri10ge_set_int,
5113 (caddr_t)&myri10ge_lso_copy);
5116 static void
5117 myri10ge_ndd_fini(struct myri10ge_priv *mgp)
5119 nd_free(&mgp->nd_head);
5122 static void
5123 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
5125 struct iocblk *iocp;
5126 struct myri10ge_priv *mgp = arg;
5127 int cmd, ok, err;
5129 iocp = (struct iocblk *)(void *)mp->b_rptr;
5130 cmd = iocp->ioc_cmd;
5132 ok = 0;
5133 err = 0;
5135 switch (cmd) {
5136 case ND_GET:
5137 case ND_SET:
5138 ok = nd_getset(wq, mgp->nd_head, mp);
5139 break;
5140 default:
5141 break;
5143 if (!ok)
5144 err = EINVAL;
5145 else
5146 err = iocp->ioc_error;
5148 if (!err)
5149 miocack(wq, mp, iocp->ioc_count, err);
5150 else
5151 miocnak(wq, mp, 0, err);
5154 static struct myri10ge_priv *mgp_list;
5156 struct myri10ge_priv *
5157 myri10ge_get_instance(uint_t unit)
5159 struct myri10ge_priv *mgp;
5161 mutex_enter(&myri10ge_param_lock);
5162 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) {
5163 if (unit == ddi_get_instance(mgp->dip)) {
5164 mgp->refcnt++;
5165 break;
5168 mutex_exit(&myri10ge_param_lock);
5169 return (mgp);
5172 void
5173 myri10ge_put_instance(struct myri10ge_priv *mgp)
5175 mutex_enter(&myri10ge_param_lock);
5176 mgp->refcnt--;
5177 mutex_exit(&myri10ge_param_lock);
5180 static boolean_t
5181 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
5183 struct myri10ge_priv *mgp = arg;
5184 uint32_t *cap_hcksum;
5185 mac_capab_lso_t *cap_lso;
5186 mac_capab_rings_t *cap_rings;
5188 switch (cap) {
5189 case MAC_CAPAB_HCKSUM:
5190 cap_hcksum = cap_data;
5191 *cap_hcksum = HCKSUM_INET_PARTIAL;
5192 break;
5193 case MAC_CAPAB_RINGS:
5194 cap_rings = cap_data;
5195 switch (cap_rings->mr_type) {
5196 case MAC_RING_TYPE_RX:
5197 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5198 cap_rings->mr_rnum = mgp->num_slices;
5199 cap_rings->mr_gnum = 1;
5200 cap_rings->mr_rget = myri10ge_fill_ring;
5201 cap_rings->mr_gget = myri10ge_fill_group;
5202 break;
5203 case MAC_RING_TYPE_TX:
5204 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5205 cap_rings->mr_rnum = mgp->num_slices;
5206 cap_rings->mr_gnum = 0;
5207 cap_rings->mr_rget = myri10ge_fill_ring;
5208 cap_rings->mr_gget = NULL;
5209 break;
5210 default:
5211 return (B_FALSE);
5213 break;
5214 case MAC_CAPAB_LSO:
5215 cap_lso = cap_data;
5216 if (!myri10ge_use_lso)
5217 return (B_FALSE);
5218 if (!(mgp->features & MYRI10GE_TSO))
5219 return (B_FALSE);
5220 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
5221 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1;
5222 break;
5224 default:
5225 return (B_FALSE);
5227 return (B_TRUE);
5231 static int
5232 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val)
5234 struct myri10ge_priv *mgp = arg;
5235 struct myri10ge_rx_ring_stats *rstat;
5236 struct myri10ge_tx_ring_stats *tstat;
5237 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats;
5238 struct myri10ge_slice_state *ss;
5239 uint64_t tmp = 0;
5240 int i;
5242 switch (stat) {
5243 case MAC_STAT_IFSPEED:
5244 *val = 10ull * 1000ull * 1000000ull;
5245 break;
5247 case MAC_STAT_MULTIRCV:
5248 for (i = 0; i < mgp->num_slices; i++) {
5249 rstat = &mgp->ss[i].rx_stats;
5250 tmp += rstat->multircv;
5252 *val = tmp;
5253 break;
5255 case MAC_STAT_BRDCSTRCV:
5256 for (i = 0; i < mgp->num_slices; i++) {
5257 rstat = &mgp->ss[i].rx_stats;
5258 tmp += rstat->brdcstrcv;
5260 *val = tmp;
5261 break;
5263 case MAC_STAT_MULTIXMT:
5264 for (i = 0; i < mgp->num_slices; i++) {
5265 tstat = &mgp->ss[i].tx.stats;
5266 tmp += tstat->multixmt;
5268 *val = tmp;
5269 break;
5271 case MAC_STAT_BRDCSTXMT:
5272 for (i = 0; i < mgp->num_slices; i++) {
5273 tstat = &mgp->ss[i].tx.stats;
5274 tmp += tstat->brdcstxmt;
5276 *val = tmp;
5277 break;
5279 case MAC_STAT_NORCVBUF:
5280 tmp = ntohl(fw_stats->dropped_no_big_buffer);
5281 tmp += ntohl(fw_stats->dropped_no_small_buffer);
5282 tmp += ntohl(fw_stats->dropped_link_overflow);
5283 for (i = 0; i < mgp->num_slices; i++) {
5284 ss = &mgp->ss[i];
5285 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf);
5286 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf);
5288 *val = tmp;
5289 break;
5291 case MAC_STAT_IERRORS:
5292 tmp += ntohl(fw_stats->dropped_bad_crc32);
5293 tmp += ntohl(fw_stats->dropped_bad_phy);
5294 tmp += ntohl(fw_stats->dropped_runt);
5295 tmp += ntohl(fw_stats->dropped_overrun);
5296 *val = tmp;
5297 break;
5299 case MAC_STAT_OERRORS:
5300 for (i = 0; i < mgp->num_slices; i++) {
5301 ss = &mgp->ss[i];
5302 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags);
5303 tmp += MYRI10GE_SLICE_STAT(xmit_err);
5305 *val = tmp;
5306 break;
5308 case MAC_STAT_RBYTES:
5309 for (i = 0; i < mgp->num_slices; i++) {
5310 rstat = &mgp->ss[i].rx_stats;
5311 tmp += rstat->ibytes;
5313 *val = tmp;
5314 break;
5316 case MAC_STAT_IPACKETS:
5317 for (i = 0; i < mgp->num_slices; i++) {
5318 rstat = &mgp->ss[i].rx_stats;
5319 tmp += rstat->ipackets;
5321 *val = tmp;
5322 break;
5324 case MAC_STAT_OBYTES:
5325 for (i = 0; i < mgp->num_slices; i++) {
5326 tstat = &mgp->ss[i].tx.stats;
5327 tmp += tstat->obytes;
5329 *val = tmp;
5330 break;
5332 case MAC_STAT_OPACKETS:
5333 for (i = 0; i < mgp->num_slices; i++) {
5334 tstat = &mgp->ss[i].tx.stats;
5335 tmp += tstat->opackets;
5337 *val = tmp;
5338 break;
5340 case ETHER_STAT_TOOLONG_ERRORS:
5341 *val = ntohl(fw_stats->dropped_overrun);
5342 break;
5344 #ifdef SOLARIS_S11
5345 case ETHER_STAT_TOOSHORT_ERRORS:
5346 *val = ntohl(fw_stats->dropped_runt);
5347 break;
5348 #endif
5350 case ETHER_STAT_LINK_PAUSE:
5351 *val = mgp->pause;
5352 break;
5354 case ETHER_STAT_LINK_AUTONEG:
5355 *val = 1;
5356 break;
5358 case ETHER_STAT_LINK_DUPLEX:
5359 *val = LINK_DUPLEX_FULL;
5360 break;
5362 default:
5363 return (ENOTSUP);
5366 return (0);
5369 /* ARGSUSED */
5370 static void
5371 myri10ge_m_propinfo(void *arg, const char *pr_name,
5372 mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
5374 switch (pr_num) {
5375 case MAC_PROP_MTU:
5376 mac_prop_info_set_default_uint32(prh, MYRI10GE_DEFAULT_GLD_MTU);
5377 mac_prop_info_set_range_uint32(prh, MYRI10GE_MIN_GLD_MTU,
5378 MYRI10GE_MAX_GLD_MTU);
5379 break;
5380 default:
5381 break;
5385 /*ARGSUSED*/
5386 static int
5387 myri10ge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
5388 uint_t pr_valsize, const void *pr_val)
5390 int err = 0;
5391 struct myri10ge_priv *mgp = arg;
5393 switch (pr_num) {
5394 case MAC_PROP_MTU: {
5395 uint32_t mtu;
5396 if (pr_valsize < sizeof (mtu)) {
5397 err = EINVAL;
5398 break;
5400 bcopy(pr_val, &mtu, sizeof (mtu));
5401 if (mtu > MYRI10GE_MAX_GLD_MTU ||
5402 mtu < MYRI10GE_MIN_GLD_MTU) {
5403 err = EINVAL;
5404 break;
5407 mutex_enter(&mgp->intrlock);
5408 if (mgp->running != MYRI10GE_ETH_STOPPED) {
5409 err = EBUSY;
5410 mutex_exit(&mgp->intrlock);
5411 break;
5414 myri10ge_mtu = mtu + sizeof (struct ether_header) +
5415 MXGEFW_PAD + VLAN_TAGSZ;
5416 mutex_exit(&mgp->intrlock);
5417 break;
5419 default:
5420 err = ENOTSUP;
5421 break;
5424 return (err);
5427 static mac_callbacks_t myri10ge_m_callbacks = {
5428 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO),
5429 myri10ge_m_stat,
5430 myri10ge_m_start,
5431 myri10ge_m_stop,
5432 myri10ge_m_promisc,
5433 myri10ge_m_multicst,
5434 NULL,
5435 NULL,
5436 NULL,
5437 myri10ge_m_ioctl,
5438 myri10ge_m_getcapab,
5439 NULL,
5440 NULL,
5441 myri10ge_m_setprop,
5442 NULL,
5443 myri10ge_m_propinfo
5447 static int
5448 myri10ge_probe_slices(struct myri10ge_priv *mgp)
5450 myri10ge_cmd_t cmd;
5451 int status;
5453 mgp->num_slices = 1;
5455 /* hit the board with a reset to ensure it is alive */
5456 (void) memset(&cmd, 0, sizeof (cmd));
5457 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
5458 if (status != 0) {
5459 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
5460 return (ENXIO);
5463 if (myri10ge_use_msix == 0)
5464 return (0);
5466 /* tell it the size of the interrupt queues */
5467 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot);
5468 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
5469 if (status != 0) {
5470 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5471 mgp->name);
5472 return (ENXIO);
5475 /* ask the maximum number of slices it supports */
5476 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
5477 &cmd);
5478 if (status != 0)
5479 return (0);
5481 mgp->num_slices = cmd.data0;
5484 * if the admin did not specify a limit to how many
5485 * slices we should use, cap it automatically to the
5486 * number of CPUs currently online
5488 if (myri10ge_max_slices == -1)
5489 myri10ge_max_slices = ncpus;
5491 if (mgp->num_slices > myri10ge_max_slices)
5492 mgp->num_slices = myri10ge_max_slices;
5496 * Now try to allocate as many MSI-X vectors as we have
5497 * slices. We give up on MSI-X if we can only get a single
5498 * vector.
5500 while (mgp->num_slices > 1) {
5501 /* make sure it is a power of two */
5502 while (!ISP2(mgp->num_slices))
5503 mgp->num_slices--;
5504 if (mgp->num_slices == 1)
5505 return (0);
5507 status = myri10ge_add_intrs(mgp, 0);
5508 if (status == 0) {
5509 myri10ge_rem_intrs(mgp, 0);
5510 if (mgp->intr_cnt == mgp->num_slices) {
5511 if (myri10ge_verbose)
5512 printf("Got %d slices!\n",
5513 mgp->num_slices);
5514 return (0);
5516 mgp->num_slices = mgp->intr_cnt;
5517 } else {
5518 mgp->num_slices = mgp->num_slices / 2;
5522 if (myri10ge_verbose)
5523 printf("Got %d slices\n", mgp->num_slices);
5524 return (0);
5527 static void
5528 myri10ge_lro_free(struct myri10ge_slice_state *ss)
5530 struct lro_entry *lro;
5532 while (ss->lro_free != NULL) {
5533 lro = ss->lro_free;
5534 ss->lro_free = lro->next;
5535 kmem_free(lro, sizeof (*lro));
5539 static void
5540 myri10ge_lro_alloc(struct myri10ge_slice_state *ss)
5542 struct lro_entry *lro;
5543 int idx;
5545 ss->lro_free = NULL;
5546 ss->lro_active = NULL;
5548 for (idx = 0; idx < myri10ge_lro_cnt; idx++) {
5549 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP);
5550 if (lro == NULL)
5551 continue;
5552 lro->next = ss->lro_free;
5553 ss->lro_free = lro;
5557 static void
5558 myri10ge_free_slices(struct myri10ge_priv *mgp)
5560 struct myri10ge_slice_state *ss;
5561 size_t bytes;
5562 int i;
5564 if (mgp->ss == NULL)
5565 return;
5567 for (i = 0; i < mgp->num_slices; i++) {
5568 ss = &mgp->ss[i];
5569 if (ss->rx_done.entry == NULL)
5570 continue;
5571 myri10ge_dma_free(&ss->rx_done.dma);
5572 ss->rx_done.entry = NULL;
5573 if (ss->fw_stats == NULL)
5574 continue;
5575 myri10ge_dma_free(&ss->fw_stats_dma);
5576 ss->fw_stats = NULL;
5577 mutex_destroy(&ss->rx_lock);
5578 mutex_destroy(&ss->tx.lock);
5579 mutex_destroy(&ss->tx.handle_lock);
5580 mutex_destroy(&ss->poll_lock);
5581 myri10ge_jpool_fini(ss);
5582 myri10ge_slice_stat_destroy(ss);
5583 myri10ge_lro_free(ss);
5585 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5586 kmem_free(mgp->ss, bytes);
5587 mgp->ss = NULL;
5591 static int
5592 myri10ge_alloc_slices(struct myri10ge_priv *mgp)
5594 struct myri10ge_slice_state *ss;
5595 size_t bytes;
5596 int i;
5598 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5599 mgp->ss = kmem_zalloc(bytes, KM_SLEEP);
5600 if (mgp->ss == NULL)
5601 return (ENOMEM);
5602 for (i = 0; i < mgp->num_slices; i++) {
5603 ss = &mgp->ss[i];
5605 ss->mgp = mgp;
5607 /* allocate the per-slice firmware stats */
5608 bytes = sizeof (*ss->fw_stats);
5609 ss->fw_stats = (mcp_irq_data_t *)(void *)
5610 myri10ge_dma_alloc(mgp->dip, bytes,
5611 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5612 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5613 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT);
5614 if (ss->fw_stats == NULL)
5615 goto abort;
5616 (void) memset(ss->fw_stats, 0, bytes);
5618 /* allocate rx done ring */
5619 bytes = mgp->max_intr_slots *
5620 sizeof (*ss->rx_done.entry);
5621 ss->rx_done.entry = (mcp_slot_t *)(void *)
5622 myri10ge_dma_alloc(mgp->dip, bytes,
5623 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5624 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5625 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT);
5626 if (ss->rx_done.entry == NULL) {
5627 goto abort;
5629 (void) memset(ss->rx_done.entry, 0, bytes);
5630 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie);
5631 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL);
5632 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL);
5633 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL);
5634 myri10ge_jpool_init(ss);
5635 (void) myri10ge_slice_stat_init(ss);
5636 myri10ge_lro_alloc(ss);
5639 return (0);
5641 abort:
5642 myri10ge_free_slices(mgp);
5643 return (ENOMEM);
5646 static int
5647 myri10ge_save_msi_state(struct myri10ge_priv *mgp,
5648 ddi_acc_handle_t handle)
5650 uint8_t ptr;
5651 int err;
5653 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5654 if (err != 0) {
5655 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5656 mgp->name);
5657 return (DDI_FAILURE);
5659 mgp->pci_saved_state.msi_ctrl =
5660 pci_config_get16(handle, ptr + PCI_MSI_CTRL);
5661 mgp->pci_saved_state.msi_addr_low =
5662 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET);
5663 mgp->pci_saved_state.msi_addr_high =
5664 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4);
5665 mgp->pci_saved_state.msi_data_32 =
5666 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA);
5667 mgp->pci_saved_state.msi_data_64 =
5668 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA);
5669 return (DDI_SUCCESS);
5672 static int
5673 myri10ge_restore_msi_state(struct myri10ge_priv *mgp,
5674 ddi_acc_handle_t handle)
5676 uint8_t ptr;
5677 int err;
5679 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5680 if (err != 0) {
5681 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5682 mgp->name);
5683 return (DDI_FAILURE);
5686 pci_config_put16(handle, ptr + PCI_MSI_CTRL,
5687 mgp->pci_saved_state.msi_ctrl);
5688 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET,
5689 mgp->pci_saved_state.msi_addr_low);
5690 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4,
5691 mgp->pci_saved_state.msi_addr_high);
5692 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA,
5693 mgp->pci_saved_state.msi_data_32);
5694 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA,
5695 mgp->pci_saved_state.msi_data_64);
5697 return (DDI_SUCCESS);
5700 static int
5701 myri10ge_save_pci_state(struct myri10ge_priv *mgp)
5703 ddi_acc_handle_t handle = mgp->cfg_hdl;
5704 int i;
5705 int err = DDI_SUCCESS;
5708 /* Save the non-extended PCI config space 32-bits at a time */
5709 for (i = 0; i < 16; i++)
5710 mgp->pci_saved_state.base[i] =
5711 pci_config_get32(handle, i*4);
5713 /* now save MSI interrupt state *, if needed */
5714 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5715 err = myri10ge_save_msi_state(mgp, handle);
5717 return (err);
5720 static int
5721 myri10ge_restore_pci_state(struct myri10ge_priv *mgp)
5723 ddi_acc_handle_t handle = mgp->cfg_hdl;
5724 int i;
5725 int err = DDI_SUCCESS;
5728 /* Restore the non-extended PCI config space 32-bits at a time */
5729 for (i = 15; i >= 0; i--)
5730 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]);
5732 /* now restore MSI interrupt state *, if needed */
5733 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5734 err = myri10ge_restore_msi_state(mgp, handle);
5736 if (mgp->max_read_request_4k)
5737 (void) myri10ge_set_max_readreq(handle);
5738 return (err);
5742 static int
5743 myri10ge_suspend(dev_info_t *dip)
5745 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5746 int status;
5748 if (mgp == NULL) {
5749 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n");
5750 return (DDI_FAILURE);
5752 if (mgp->dip != dip) {
5753 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n");
5754 return (DDI_FAILURE);
5756 mutex_enter(&mgp->intrlock);
5757 if (mgp->running == MYRI10GE_ETH_RUNNING) {
5758 mgp->running = MYRI10GE_ETH_STOPPING;
5759 mutex_exit(&mgp->intrlock);
5760 (void) untimeout(mgp->timer_id);
5761 mutex_enter(&mgp->intrlock);
5762 myri10ge_stop_locked(mgp);
5763 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING;
5765 status = myri10ge_save_pci_state(mgp);
5766 mutex_exit(&mgp->intrlock);
5767 return (status);
5770 static int
5771 myri10ge_resume(dev_info_t *dip)
5773 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5774 int status = DDI_SUCCESS;
5776 if (mgp == NULL) {
5777 cmn_err(CE_WARN, "null dip in myri10ge_resume\n");
5778 return (DDI_FAILURE);
5780 if (mgp->dip != dip) {
5781 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n");
5782 return (DDI_FAILURE);
5785 mutex_enter(&mgp->intrlock);
5786 status = myri10ge_restore_pci_state(mgp);
5787 if (status == DDI_SUCCESS &&
5788 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) {
5789 status = myri10ge_start_locked(mgp);
5791 mutex_exit(&mgp->intrlock);
5792 if (status != DDI_SUCCESS)
5793 return (status);
5795 /* start the watchdog timer */
5796 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5797 mgp->timer_ticks);
5798 return (DDI_SUCCESS);
5801 static int
5802 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5805 struct myri10ge_priv *mgp;
5806 mac_register_t *macp, *omacp;
5807 ddi_acc_handle_t handle;
5808 uint32_t csr, hdr_offset;
5809 int status, span, link_width, max_read_request_4k;
5810 unsigned long bus_number, dev_number, func_number;
5811 size_t bytes;
5812 offset_t ss_offset;
5813 uint8_t vso;
5815 if (cmd == DDI_RESUME) {
5816 return (myri10ge_resume(dip));
5819 if (cmd != DDI_ATTACH)
5820 return (DDI_FAILURE);
5821 if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
5822 return (DDI_FAILURE);
5824 /* enable busmater and io space access */
5825 csr = pci_config_get32(handle, PCI_CONF_COMM);
5826 pci_config_put32(handle, PCI_CONF_COMM,
5827 (csr |PCI_COMM_ME|PCI_COMM_MAE));
5828 status = myri10ge_read_pcie_link_width(handle, &link_width);
5829 if (status != 0) {
5830 cmn_err(CE_WARN, "could not read link width!\n");
5831 link_width = 0;
5833 max_read_request_4k = !myri10ge_set_max_readreq(handle);
5834 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS);
5835 if (status != 0)
5836 goto abort_with_cfg_hdl;
5837 if ((omacp = mac_alloc(MAC_VERSION)) == NULL)
5838 goto abort_with_cfg_hdl;
5840 * XXXX Hack: mac_register_t grows in newer kernels. To be
5841 * able to write newer fields, such as m_margin, without
5842 * writing outside allocated memory, we allocate our own macp
5843 * and pass that to mac_register()
5845 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP);
5846 macp->m_version = omacp->m_version;
5848 if ((mgp = (struct myri10ge_priv *)
5849 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) {
5850 goto abort_with_macinfo;
5852 ddi_set_driver_private(dip, mgp);
5854 /* setup device name for log messages */
5855 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip));
5857 mutex_enter(&myri10ge_param_lock);
5858 myri10ge_get_props(dip);
5859 mgp->intr_coal_delay = myri10ge_intr_coal_delay;
5860 mgp->pause = myri10ge_flow_control;
5861 mutex_exit(&myri10ge_param_lock);
5863 mgp->max_read_request_4k = max_read_request_4k;
5864 mgp->pcie_link_width = link_width;
5865 mgp->running = MYRI10GE_ETH_STOPPED;
5866 mgp->vso = vso;
5867 mgp->dip = dip;
5868 mgp->cfg_hdl = handle;
5870 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */
5871 myri10ge_test_physical(dip);
5873 /* allocate command page */
5874 bytes = sizeof (*mgp->cmd);
5875 mgp->cmd = (mcp_cmd_response_t *)
5876 (void *)myri10ge_dma_alloc(dip, bytes,
5877 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5878 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT,
5879 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT);
5880 if (mgp->cmd == NULL)
5881 goto abort_with_mgp;
5883 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number,
5884 &dev_number, &func_number);
5885 if (myri10ge_verbose)
5886 printf("%s at %ld:%ld:%ld attaching\n", mgp->name,
5887 bus_number, dev_number, func_number);
5888 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram,
5889 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr,
5890 &mgp->io_handle);
5891 if (status != DDI_SUCCESS) {
5892 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name);
5893 printf("%s: reg_set = %d, span = %d, status = %d",
5894 mgp->name, mgp->reg_set, span, status);
5895 goto abort_with_mgp;
5898 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET);
5899 hdr_offset = ntohl(hdr_offset) & 0xffffc;
5900 ss_offset = hdr_offset +
5901 offsetof(struct mcp_gen_header, string_specs);
5902 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset));
5903 myri10ge_pio_copy32(mgp->eeprom_strings,
5904 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size),
5905 MYRI10GE_EEPROM_STRINGS_SIZE);
5906 (void) memset(mgp->eeprom_strings +
5907 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
5909 status = myri10ge_read_mac_addr(mgp);
5910 if (status) {
5911 goto abort_with_mapped;
5914 status = myri10ge_select_firmware(mgp);
5915 if (status != 0) {
5916 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name);
5917 goto abort_with_mapped;
5920 status = myri10ge_probe_slices(mgp);
5921 if (status != 0) {
5922 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name);
5923 goto abort_with_dummy_rdma;
5926 status = myri10ge_alloc_slices(mgp);
5927 if (status != 0) {
5928 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name);
5929 goto abort_with_dummy_rdma;
5932 /* add the interrupt handler */
5933 status = myri10ge_add_intrs(mgp, 1);
5934 if (status != 0) {
5935 cmn_err(CE_WARN, "%s: Failed to add interrupt\n",
5936 mgp->name);
5937 goto abort_with_slices;
5940 /* now that we have an iblock_cookie, init the mutexes */
5941 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie);
5942 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie);
5945 status = myri10ge_nic_stat_init(mgp);
5946 if (status != DDI_SUCCESS)
5947 goto abort_with_interrupts;
5948 status = myri10ge_info_init(mgp);
5949 if (status != DDI_SUCCESS)
5950 goto abort_with_stats;
5953 * Initialize GLD state
5956 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
5957 macp->m_driver = mgp;
5958 macp->m_dip = dip;
5959 macp->m_src_addr = mgp->mac_addr;
5960 macp->m_callbacks = &myri10ge_m_callbacks;
5961 macp->m_min_sdu = 0;
5962 macp->m_max_sdu = myri10ge_mtu -
5963 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ);
5964 #ifdef SOLARIS_S11
5965 macp->m_margin = VLAN_TAGSZ;
5966 #endif
5967 macp->m_v12n = MAC_VIRT_LEVEL1;
5968 status = mac_register(macp, &mgp->mh);
5969 if (status != 0) {
5970 cmn_err(CE_WARN, "%s: mac_register failed with %d\n",
5971 mgp->name, status);
5972 goto abort_with_info;
5974 myri10ge_ndd_init(mgp);
5975 if (myri10ge_verbose)
5976 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name,
5977 mgp->intr_type, mgp->tx_boundary, mgp->fw_name);
5978 mutex_enter(&myri10ge_param_lock);
5979 mgp->next = mgp_list;
5980 mgp_list = mgp;
5981 mutex_exit(&myri10ge_param_lock);
5982 kmem_free(macp, sizeof (*macp) * 8);
5983 mac_free(omacp);
5984 return (DDI_SUCCESS);
5986 abort_with_info:
5987 myri10ge_info_destroy(mgp);
5989 abort_with_stats:
5990 myri10ge_nic_stat_destroy(mgp);
5992 abort_with_interrupts:
5993 mutex_destroy(&mgp->cmd_lock);
5994 mutex_destroy(&mgp->intrlock);
5995 myri10ge_rem_intrs(mgp, 1);
5997 abort_with_slices:
5998 myri10ge_free_slices(mgp);
6000 abort_with_dummy_rdma:
6001 myri10ge_dummy_rdma(mgp, 0);
6003 abort_with_mapped:
6004 ddi_regs_map_free(&mgp->io_handle);
6006 myri10ge_dma_free(&mgp->cmd_dma);
6008 abort_with_mgp:
6009 kmem_free(mgp, sizeof (*mgp));
6011 abort_with_macinfo:
6012 kmem_free(macp, sizeof (*macp) * 8);
6013 mac_free(omacp);
6015 abort_with_cfg_hdl:
6016 pci_config_teardown(&handle);
6017 return (DDI_FAILURE);
6022 static int
6023 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6025 struct myri10ge_priv *mgp, *tmp;
6026 int status, i, jbufs_alloced;
6028 if (cmd == DDI_SUSPEND) {
6029 status = myri10ge_suspend(dip);
6030 return (status);
6033 if (cmd != DDI_DETACH) {
6034 return (DDI_FAILURE);
6036 /* Get the driver private (gld_mac_info_t) structure */
6037 mgp = ddi_get_driver_private(dip);
6039 mutex_enter(&mgp->intrlock);
6040 jbufs_alloced = 0;
6041 for (i = 0; i < mgp->num_slices; i++) {
6042 myri10ge_remove_jbufs(&mgp->ss[i]);
6043 jbufs_alloced += mgp->ss[i].jpool.num_alloc;
6045 mutex_exit(&mgp->intrlock);
6046 if (jbufs_alloced != 0) {
6047 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n",
6048 mgp->name, jbufs_alloced);
6049 return (DDI_FAILURE);
6052 mutex_enter(&myri10ge_param_lock);
6053 if (mgp->refcnt != 0) {
6054 mutex_exit(&myri10ge_param_lock);
6055 cmn_err(CE_NOTE, "%s: %d external refs remain\n",
6056 mgp->name, mgp->refcnt);
6057 return (DDI_FAILURE);
6059 mutex_exit(&myri10ge_param_lock);
6061 status = mac_unregister(mgp->mh);
6062 if (status != DDI_SUCCESS)
6063 return (status);
6065 myri10ge_ndd_fini(mgp);
6066 myri10ge_dummy_rdma(mgp, 0);
6067 myri10ge_nic_stat_destroy(mgp);
6068 myri10ge_info_destroy(mgp);
6070 mutex_destroy(&mgp->cmd_lock);
6071 mutex_destroy(&mgp->intrlock);
6073 myri10ge_rem_intrs(mgp, 1);
6075 myri10ge_free_slices(mgp);
6076 ddi_regs_map_free(&mgp->io_handle);
6077 myri10ge_dma_free(&mgp->cmd_dma);
6078 pci_config_teardown(&mgp->cfg_hdl);
6080 mutex_enter(&myri10ge_param_lock);
6081 if (mgp_list == mgp) {
6082 mgp_list = mgp->next;
6083 } else {
6084 tmp = mgp_list;
6085 while (tmp->next != mgp && tmp->next != NULL)
6086 tmp = tmp->next;
6087 if (tmp->next != NULL)
6088 tmp->next = tmp->next->next;
6090 kmem_free(mgp, sizeof (*mgp));
6091 mutex_exit(&myri10ge_param_lock);
6092 return (DDI_SUCCESS);
6096 * Helper for quiesce entry point: Interrupt threads are not being
6097 * scheduled, so we must poll for the confirmation DMA to arrive in
6098 * the firmware stats block for slice 0. We're essentially running
6099 * the guts of the interrupt handler, and just cherry picking the
6100 * confirmation that the NIC is queuesced (stats->link_down)
6103 static int
6104 myri10ge_poll_down(struct myri10ge_priv *mgp)
6106 struct myri10ge_slice_state *ss = mgp->ss;
6107 mcp_irq_data_t *stats = ss->fw_stats;
6108 int valid;
6109 int found_down = 0;
6112 /* check for a pending IRQ */
6114 if (! *((volatile uint8_t *)& stats->valid))
6115 return (0);
6116 valid = stats->valid;
6119 * Make sure to tell the NIC to lower a legacy IRQ, else
6120 * it may have corrupt state after restarting
6123 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
6124 /* lower legacy IRQ */
6125 *mgp->irq_deassert = 0;
6126 mb();
6127 /* wait for irq conf DMA */
6128 while (*((volatile uint8_t *)& stats->valid))
6131 if (stats->stats_updated && stats->link_down)
6132 found_down = 1;
6134 if (valid & 0x1)
6135 *ss->irq_claim = BE_32(3);
6136 *(ss->irq_claim + 1) = BE_32(3);
6138 return (found_down);
6141 static int
6142 myri10ge_quiesce(dev_info_t *dip)
6144 struct myri10ge_priv *mgp;
6145 myri10ge_cmd_t cmd;
6146 int status, down, i;
6148 mgp = ddi_get_driver_private(dip);
6149 if (mgp == NULL)
6150 return (DDI_FAILURE);
6152 /* if devices was unplumbed, it is guaranteed to be quiescent */
6153 if (mgp->running == MYRI10GE_ETH_STOPPED)
6154 return (DDI_SUCCESS);
6156 /* send a down CMD to queuesce NIC */
6157 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
6158 if (status) {
6159 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
6160 return (DDI_FAILURE);
6163 for (i = 0; i < 20; i++) {
6164 down = myri10ge_poll_down(mgp);
6165 if (down)
6166 break;
6167 delay(drv_usectohz(100000));
6168 mb();
6170 if (down)
6171 return (DDI_SUCCESS);
6172 return (DDI_FAILURE);
6176 * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6177 * storage.
6179 static void
6180 myri10ge_find_lastfree(void)
6182 mblk_t *mp = allocb(1024, 0);
6183 dblk_t *dbp;
6185 if (mp == NULL) {
6186 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n");
6187 return;
6189 dbp = mp->b_datap;
6190 myri10ge_db_lastfree = (void *)dbp->db_lastfree;
6194 _init(void)
6196 int i;
6198 if (myri10ge_verbose)
6199 cmn_err(CE_NOTE,
6200 "Myricom 10G driver (10GbE) version %s loading\n",
6201 MYRI10GE_VERSION_STR);
6202 myri10ge_find_lastfree();
6203 mac_init_ops(&myri10ge_ops, "myri10ge");
6204 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL);
6205 if ((i = mod_install(&modlinkage)) != 0) {
6206 cmn_err(CE_WARN, "mod_install returned %d\n", i);
6207 mac_fini_ops(&myri10ge_ops);
6208 mutex_destroy(&myri10ge_param_lock);
6210 return (i);
6214 _fini(void)
6216 int i;
6217 i = mod_remove(&modlinkage);
6218 if (i != 0) {
6219 return (i);
6221 mac_fini_ops(&myri10ge_ops);
6222 mutex_destroy(&myri10ge_param_lock);
6223 return (0);
6227 _info(struct modinfo *modinfop)
6229 return (mod_info(&modlinkage, modinfop));
6234 * This file uses MyriGE driver indentation.
6236 * Local Variables:
6237 * c-file-style:"sun"
6238 * tab-width:8
6239 * End: