4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
29 * Use is subject to license terms.
33 * Copyright (c) 2014, Joyent, Inc.
34 * Copyright (c) 2016 by Delphix. All rights reserved.
38 #include "myri10ge_var.h"
39 #include "rss_eth_z8e.h"
40 #include "rss_ethp_z8e.h"
41 #include "mcp_gen_header.h"
43 #define MYRI10GE_MAX_ETHER_MTU 9014
44 #define MYRI10GE_MAX_GLD_MTU 9000
45 #define MYRI10GE_MIN_GLD_MTU 1500
47 #define MYRI10GE_ETH_STOPPED 0
48 #define MYRI10GE_ETH_STOPPING 1
49 #define MYRI10GE_ETH_STARTING 2
50 #define MYRI10GE_ETH_RUNNING 3
51 #define MYRI10GE_ETH_OPEN_FAILED 4
52 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
54 static int myri10ge_small_bytes
= 510;
55 static int myri10ge_intr_coal_delay
= 125;
56 static int myri10ge_flow_control
= 1;
57 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
58 static int myri10ge_nvidia_ecrc_enable
= 1;
60 static int myri10ge_mtu_override
= 0;
61 static int myri10ge_tx_copylen
= 512;
62 static int myri10ge_deassert_wait
= 1;
63 static int myri10ge_verbose
= 0;
64 static int myri10ge_watchdog_reset
= 0;
65 static int myri10ge_use_msix
= 1;
66 static int myri10ge_max_slices
= -1;
67 static int myri10ge_use_msi
= 1;
68 int myri10ge_force_firmware
= 0;
69 static boolean_t myri10ge_use_lso
= B_TRUE
;
70 static int myri10ge_rss_hash
= MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
;
71 static int myri10ge_tx_hash
= 1;
72 static int myri10ge_lro
= 0;
73 static int myri10ge_lro_cnt
= 8;
74 int myri10ge_lro_max_aggr
= 2;
75 static int myri10ge_lso_copy
= 0;
76 static mblk_t
*myri10ge_send_wrapper(void *arg
, mblk_t
*mp
);
77 int myri10ge_tx_handles_initial
= 128;
79 static kmutex_t myri10ge_param_lock
;
80 static void* myri10ge_db_lastfree
;
82 static int myri10ge_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
);
83 static int myri10ge_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
);
84 static int myri10ge_quiesce(dev_info_t
*dip
);
86 DDI_DEFINE_STREAM_OPS(myri10ge_ops
, nulldev
, nulldev
, myri10ge_attach
,
87 myri10ge_detach
, nodev
, NULL
, D_MP
, NULL
, myri10ge_quiesce
);
90 static struct modldrv modldrv
= {
92 "Myricom 10G driver (10GbE)",
97 static struct modlinkage modlinkage
= {
102 unsigned char myri10ge_broadcastaddr
[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
104 static ddi_dma_attr_t myri10ge_misc_dma_attr
= {
105 DMA_ATTR_V0
, /* version number. */
106 (uint64_t)0, /* low address */
107 (uint64_t)0xffffffffffffffffULL
, /* high address */
108 (uint64_t)0x7ffffff, /* address counter max */
109 (uint64_t)4096, /* alignment */
110 (uint_t
)0x7f, /* burstsizes for 32b and 64b xfers */
111 (uint32_t)0x1, /* minimum transfer size */
112 (uint64_t)0x7fffffff, /* maximum transfer size */
113 (uint64_t)0x7fffffff, /* maximum segment size */
114 1, /* scatter/gather list length */
116 0 /* attribute flags */
120 * The Myri10GE NIC has the following constraints on receive buffers:
121 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
122 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
125 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr
= {
126 DMA_ATTR_V0
, /* version number. */
127 (uint64_t)0, /* low address */
128 (uint64_t)0xffffffffffffffffULL
, /* high address */
129 (uint64_t)0x7ffffff, /* address counter max */
130 (uint64_t)4096, /* alignment */
131 (uint_t
)0x7f, /* burstsizes for 32b and 64b xfers */
132 (uint32_t)0x1, /* minimum transfer size */
133 (uint64_t)0x7fffffff, /* maximum transfer size */
134 UINT64_MAX
, /* maximum segment size */
135 1, /* scatter/gather list length */
137 0 /* attribute flags */
140 static ddi_dma_attr_t myri10ge_rx_std_dma_attr
= {
141 DMA_ATTR_V0
, /* version number. */
142 (uint64_t)0, /* low address */
143 (uint64_t)0xffffffffffffffffULL
, /* high address */
144 (uint64_t)0x7ffffff, /* address counter max */
145 #if defined sparc64 || defined __sparcv9
146 (uint64_t)4096, /* alignment */
148 (uint64_t)0x80, /* alignment */
150 (uint_t
)0x7f, /* burstsizes for 32b and 64b xfers */
151 (uint32_t)0x1, /* minimum transfer size */
152 (uint64_t)0x7fffffff, /* maximum transfer size */
153 #if defined sparc64 || defined __sparcv9
154 UINT64_MAX
, /* maximum segment size */
156 (uint64_t)0xfff, /* maximum segment size */
158 1, /* scatter/gather list length */
160 0 /* attribute flags */
163 static ddi_dma_attr_t myri10ge_tx_dma_attr
= {
164 DMA_ATTR_V0
, /* version number. */
165 (uint64_t)0, /* low address */
166 (uint64_t)0xffffffffffffffffULL
, /* high address */
167 (uint64_t)0x7ffffff, /* address counter max */
168 (uint64_t)1, /* alignment */
169 (uint_t
)0x7f, /* burstsizes for 32b and 64b xfers */
170 (uint32_t)0x1, /* minimum transfer size */
171 (uint64_t)0x7fffffff, /* maximum transfer size */
172 UINT64_MAX
, /* maximum segment size */
173 INT32_MAX
, /* scatter/gather list length */
175 0 /* attribute flags */
178 #if defined sparc64 || defined __sparcv9
184 struct ddi_device_acc_attr myri10ge_dev_access_attr
= {
185 DDI_DEVICE_ATTR_V0
, /* version */
186 DDI_NEVERSWAP_ACC
, /* endian flash */
188 DDI_MERGING_OK_ACC
/* data order */
194 static void myri10ge_watchdog(void *arg
);
197 int myri10ge_mtu
= MYRI10GE_MAX_ETHER_MTU
+ MXGEFW_PAD
+ VLAN_TAGSZ
;
198 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MAX_GLD_MTU
200 int myri10ge_mtu
= ETHERMAX
+ MXGEFW_PAD
+ VLAN_TAGSZ
;
201 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MIN_GLD_MTU
203 int myri10ge_bigbufs_initial
= 1024;
204 int myri10ge_bigbufs_max
= 4096;
208 myri10ge_dma_alloc(dev_info_t
*dip
, size_t len
,
209 ddi_dma_attr_t
*attr
, ddi_device_acc_attr_t
*accattr
,
210 uint_t alloc_flags
, int bind_flags
, struct myri10ge_dma_stuff
*dma
,
211 int warn
, int (*wait
)(caddr_t
))
215 ddi_dma_cookie_t cookie
;
219 err
= ddi_dma_alloc_handle(dip
, attr
, wait
,
221 if (err
!= DDI_SUCCESS
) {
224 "myri10ge: ddi_dma_alloc_handle failed\n");
225 goto abort_with_nothing
;
228 err
= ddi_dma_mem_alloc(dma
->handle
, len
, accattr
, alloc_flags
,
229 wait
, NULL
, &kaddr
, &real_length
,
231 if (err
!= DDI_SUCCESS
) {
234 "myri10ge: ddi_dma_mem_alloc failed\n");
235 goto abort_with_handle
;
238 err
= ddi_dma_addr_bind_handle(dma
->handle
, NULL
, kaddr
, len
,
239 bind_flags
, wait
, NULL
, &cookie
, &count
);
241 if (err
!= DDI_SUCCESS
) {
244 "myri10ge: ddi_dma_addr_bind_handle failed\n");
251 "myri10ge: got too many dma segments ");
252 goto abort_with_bind
;
254 dma
->low
= htonl(MYRI10GE_LOWPART_TO_U32(cookie
.dmac_laddress
));
255 dma
->high
= htonl(MYRI10GE_HIGHPART_TO_U32(cookie
.dmac_laddress
));
259 (void) ddi_dma_unbind_handle(dma
->handle
);
262 ddi_dma_mem_free(&dma
->acc_handle
);
265 ddi_dma_free_handle(&dma
->handle
);
268 cmn_err(CE_WARN
, "myri10ge: myri10ge_dma_alloc failed.\n ");
269 cmn_err(CE_WARN
, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
270 (void*) dip
, len
, (void*) attr
);
272 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n",
273 (void*) accattr
, alloc_flags
);
274 cmn_err(CE_WARN
, "args: bind_flags=0x%x dmastuff=%p",
275 bind_flags
, (void*) dma
);
282 myri10ge_dma_free(struct myri10ge_dma_stuff
*dma
)
284 (void) ddi_dma_unbind_handle(dma
->handle
);
285 ddi_dma_mem_free(&dma
->acc_handle
);
286 ddi_dma_free_handle(&dma
->handle
);
290 myri10ge_pio_copy32(void *to
, uint32_t *from32
, size_t size
)
292 register volatile uint32_t *to32
;
295 to32
= (volatile uint32_t *) to
;
296 for (i
= (size
/ 4); i
; i
--) {
305 myri10ge_pio_copy64(void *to
, uint64_t *from64
, size_t size
)
307 register volatile uint64_t *to64
;
310 to64
= (volatile uint64_t *) to
;
311 for (i
= (size
/ 8); i
; i
--) {
320 * This routine copies memory from the host to the NIC.
321 * The "size" argument must always be a multiple of
322 * the size of long (4 or 8 bytes), and to/from must also
323 * be naturally aligned.
326 myri10ge_pio_copy(void *to
, void *from
, size_t size
)
329 ASSERT((size
% 4) == 0);
330 myri10ge_pio_copy32(to
, (uint32_t *)from
, size
);
332 ASSERT((size
% 8) == 0);
333 myri10ge_pio_copy64(to
, (uint64_t *)from
, size
);
339 * Due to various bugs in Solaris (especially bug 6186772 where the
340 * TCP/UDP checksum is calculated incorrectly on mblk chains with more
341 * than two elements), and the design bug where hardware checksums are
342 * ignored on mblk chains with more than 2 elements, we need to
343 * allocate private pool of physically contiguous receive buffers.
347 myri10ge_jpool_init(struct myri10ge_slice_state
*ss
)
349 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
351 bzero(jpool
, sizeof (*jpool
));
352 mutex_init(&jpool
->mtx
, NULL
, MUTEX_DRIVER
,
358 myri10ge_jpool_fini(struct myri10ge_slice_state
*ss
)
360 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
362 if (jpool
->head
!= NULL
) {
364 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
367 mutex_destroy(&jpool
->mtx
);
372 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
373 * at most 32 bytes at a time, so as to avoid involving the software
374 * pio handler in the nic. We re-write the first segment's low
375 * DMA address to mark it valid only after we write the entire chunk
379 myri10ge_submit_8rx(mcp_kreq_ether_recv_t
*dst
, mcp_kreq_ether_recv_t
*src
)
381 src
->addr_low
|= BE_32(1);
382 myri10ge_pio_copy(dst
, src
, 4 * sizeof (*src
));
384 myri10ge_pio_copy(dst
+ 4, src
+ 4, 4 * sizeof (*src
));
386 src
->addr_low
&= ~(BE_32(1));
387 dst
->addr_low
= src
->addr_low
;
392 myri10ge_pull_jpool(struct myri10ge_slice_state
*ss
)
394 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
395 struct myri10ge_jpool_entry
*jtail
, *j
, *jfree
;
396 volatile uintptr_t *putp
;
402 if (jpool
->head
!= NULL
) {
404 while (j
->next
!= NULL
)
410 * iterate over all per-CPU caches, and add contents into
413 for (i
= 0; i
< MYRI10GE_MAX_CPUS
; i
++) {
414 /* take per-CPU free list */
415 putp
= (void *)&jpool
->cpu
[i
& MYRI10GE_MAX_CPU_MASK
].head
;
418 put
= atomic_swap_ulong(putp
, 0);
419 jfree
= (struct myri10ge_jpool_entry
*)put
;
428 while (j
->next
!= NULL
)
435 * Transfers buffers from the free pool to the nic
436 * Must be called holding the jpool mutex.
440 myri10ge_restock_jumbos(struct myri10ge_slice_state
*ss
)
442 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
443 struct myri10ge_jpool_entry
*j
;
444 myri10ge_rx_ring_t
*rx
;
448 limit
= ss
->j_rx_cnt
+ (rx
->mask
+ 1);
450 for (i
= rx
->cnt
; i
!= limit
; i
++) {
451 idx
= i
& (rx
->mask
);
454 myri10ge_pull_jpool(ss
);
460 jpool
->head
= j
->next
;
462 rx
->shadow
[idx
].addr_low
= j
->dma
.low
;
463 rx
->shadow
[idx
].addr_high
= j
->dma
.high
;
464 /* copy 4 descriptors (32-bytes) to the mcp at a time */
465 if ((idx
& 7) == 7) {
466 myri10ge_submit_8rx(&rx
->lanai
[idx
- 7],
467 &rx
->shadow
[idx
- 7]);
474 * Transfer buffers from the nic to the free pool.
475 * Should be called holding the jpool mutex
479 myri10ge_unstock_jumbos(struct myri10ge_slice_state
*ss
)
481 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
482 struct myri10ge_jpool_entry
*j
;
483 myri10ge_rx_ring_t
*rx
;
486 mutex_enter(&jpool
->mtx
);
489 for (i
= 0; i
< rx
->mask
+ 1; i
++) {
491 rx
->info
[i
].j
= NULL
;
494 j
->next
= jpool
->head
;
497 mutex_exit(&jpool
->mtx
);
503 * Free routine which is called when the mblk allocated via
504 * esballoc() is freed. Here we return the jumbo buffer
505 * to the free pool, and possibly pass some jumbo buffers
510 myri10ge_jfree_rtn(void *arg
)
512 struct myri10ge_jpool_entry
*j
= (struct myri10ge_jpool_entry
*)arg
;
513 struct myri10ge_jpool_stuff
*jpool
;
514 volatile uintptr_t *putp
;
517 jpool
= &j
->ss
->jpool
;
519 /* prepend buffer locklessly to per-CPU freelist */
520 putp
= (void *)&jpool
->cpu
[CPU
->cpu_seqid
& MYRI10GE_MAX_CPU_MASK
].head
;
524 j
->next
= (void *)old
;
525 } while (atomic_cas_ulong(putp
, old
, new) != old
);
529 myri10ge_remove_jbuf(struct myri10ge_jpool_entry
*j
)
531 (void) ddi_dma_unbind_handle(j
->dma_handle
);
532 ddi_dma_mem_free(&j
->acc_handle
);
533 ddi_dma_free_handle(&j
->dma_handle
);
534 kmem_free(j
, sizeof (*j
));
539 * Allocates one physically contiguous descriptor
540 * and add it to the jumbo buffer pool.
544 myri10ge_add_jbuf(struct myri10ge_slice_state
*ss
)
546 struct myri10ge_jpool_entry
*j
;
547 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
548 ddi_dma_attr_t
*rx_dma_attr
;
550 ddi_dma_cookie_t cookie
;
554 if (myri10ge_mtu
< 2048)
555 rx_dma_attr
= &myri10ge_rx_std_dma_attr
;
557 rx_dma_attr
= &myri10ge_rx_jumbo_dma_attr
;
560 j
= (struct myri10ge_jpool_entry
*)
561 kmem_alloc(sizeof (*j
), KM_SLEEP
);
562 err
= ddi_dma_alloc_handle(ss
->mgp
->dip
, rx_dma_attr
,
563 DDI_DMA_DONTWAIT
, NULL
, &j
->dma_handle
);
564 if (err
!= DDI_SUCCESS
)
567 err
= ddi_dma_mem_alloc(j
->dma_handle
, myri10ge_mtu
,
568 &myri10ge_dev_access_attr
, DDI_DMA_STREAMING
, DDI_DMA_DONTWAIT
,
569 NULL
, &j
->buf
, &real_length
, &j
->acc_handle
);
570 if (err
!= DDI_SUCCESS
)
571 goto abort_with_handle
;
573 err
= ddi_dma_addr_bind_handle(j
->dma_handle
, NULL
, j
->buf
,
574 real_length
, DDI_DMA_READ
|DDI_DMA_STREAMING
, DDI_DMA_DONTWAIT
,
575 NULL
, &cookie
, &count
);
576 if (err
!= DDI_SUCCESS
)
580 * Make certain std MTU buffers do not cross a 4KB boundary:
582 * Setting dma_attr_align=4096 will do this, but the system
583 * will only allocate 1 RX buffer per 4KB page, rather than 2.
584 * Setting dma_attr_granular=4096 *seems* to work around this,
585 * but I'm paranoid about future systems no longer honoring
586 * this, so fall back to the safe, but memory wasting way if a
587 * buffer crosses a 4KB boundary.
590 if (rx_dma_attr
== &myri10ge_rx_std_dma_attr
&&
591 rx_dma_attr
->dma_attr_align
!= 4096) {
594 start
= MYRI10GE_LOWPART_TO_U32(cookie
.dmac_laddress
);
595 end
= start
+ myri10ge_mtu
;
596 if (((end
>> 12) != (start
>> 12)) && (start
& 4095U)) {
597 printf("std buffer crossed a 4KB boundary!\n");
598 myri10ge_remove_jbuf(j
);
599 rx_dma_attr
->dma_attr_align
= 4096;
600 rx_dma_attr
->dma_attr_seg
= UINT64_MAX
;
606 htonl(MYRI10GE_LOWPART_TO_U32(cookie
.dmac_laddress
));
608 htonl(MYRI10GE_HIGHPART_TO_U32(cookie
.dmac_laddress
));
612 j
->free_func
.free_func
= myri10ge_jfree_rtn
;
613 j
->free_func
.free_arg
= (char *)j
;
614 mutex_enter(&jpool
->mtx
);
615 j
->next
= jpool
->head
;
618 mutex_exit(&jpool
->mtx
);
622 ddi_dma_mem_free(&j
->acc_handle
);
625 ddi_dma_free_handle(&j
->dma_handle
);
628 kmem_free(j
, sizeof (*j
));
631 * If an allocation failed, perhaps it failed because it could
632 * not satisfy granularity requirement. Disable that, and
635 if (rx_dma_attr
== &myri10ge_rx_std_dma_attr
&&
636 rx_dma_attr
->dma_attr_align
!= 4096) {
638 "!alloc failed, reverting to gran=1\n");
639 rx_dma_attr
->dma_attr_align
= 4096;
640 rx_dma_attr
->dma_attr_seg
= UINT64_MAX
;
647 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff
*jpool
)
650 struct myri10ge_jpool_entry
*j
;
652 mutex_enter(&jpool
->mtx
);
659 mutex_exit(&jpool
->mtx
);
664 myri10ge_add_jbufs(struct myri10ge_slice_state
*ss
, int num
, int total
)
666 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
672 * if total is set, user wants "num" jbufs in the pool,
673 * otherwise the user wants to "num" additional jbufs
676 if (total
&& jpool
->num_alloc
) {
677 allocated
= myri10ge_jfree_cnt(jpool
);
678 needed
= num
- allocated
;
685 err
= myri10ge_add_jbuf(ss
);
694 myri10ge_remove_jbufs(struct myri10ge_slice_state
*ss
)
696 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
697 struct myri10ge_jpool_entry
*j
;
699 mutex_enter(&jpool
->mtx
);
700 myri10ge_pull_jpool(ss
);
701 while (jpool
->head
!= NULL
) {
704 jpool
->head
= j
->next
;
705 myri10ge_remove_jbuf(j
);
707 mutex_exit(&jpool
->mtx
);
711 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state
*ss
)
713 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
714 struct myri10ge_jpool_entry
*j
= NULL
;
716 uint32_t dma_low
, dma_high
;
718 unsigned int alloc_size
;
720 dma_low
= dma_high
= len
= 0;
721 alloc_size
= myri10ge_small_bytes
+ MXGEFW_PAD
;
723 for (idx
= 0; idx
< ss
->rx_small
.mask
+ 1; idx
++) {
724 /* Allocate a jumbo frame and carve it into small frames */
725 if (len
< alloc_size
) {
726 mutex_enter(&jpool
->mtx
);
727 /* remove jumbo from freelist */
729 jpool
->head
= j
->next
;
730 /* place it onto small list */
731 j
->next
= ss
->small_jpool
;
733 mutex_exit(&jpool
->mtx
);
735 dma_low
= ntohl(j
->dma
.low
);
736 dma_high
= ntohl(j
->dma
.high
);
739 ss
->rx_small
.info
[idx
].ptr
= ptr
;
740 ss
->rx_small
.shadow
[idx
].addr_low
= htonl(dma_low
);
741 ss
->rx_small
.shadow
[idx
].addr_high
= htonl(dma_high
);
744 dma_low
+= alloc_size
;
749 * Return the jumbo bufs we carved up for small to the jumbo pool
753 myri10ge_release_small_jbufs(struct myri10ge_slice_state
*ss
)
755 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
756 struct myri10ge_jpool_entry
*j
= NULL
;
758 mutex_enter(&jpool
->mtx
);
759 while (ss
->small_jpool
!= NULL
) {
761 ss
->small_jpool
= j
->next
;
762 j
->next
= jpool
->head
;
765 mutex_exit(&jpool
->mtx
);
766 ss
->jbufs_for_smalls
= 0;
770 myri10ge_add_tx_handle(struct myri10ge_slice_state
*ss
)
772 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
773 struct myri10ge_priv
*mgp
= ss
->mgp
;
774 struct myri10ge_tx_dma_handle
*handle
;
777 handle
= kmem_zalloc(sizeof (*handle
), KM_SLEEP
);
778 err
= ddi_dma_alloc_handle(mgp
->dip
,
779 &myri10ge_tx_dma_attr
,
783 static int limit
= 0;
785 cmn_err(CE_WARN
, "%s: Falled to alloc tx dma handle\n",
788 kmem_free(handle
, sizeof (*handle
));
791 mutex_enter(&tx
->handle_lock
);
792 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced
);
793 handle
->next
= tx
->free_tx_handles
;
794 tx
->free_tx_handles
= handle
;
795 mutex_exit(&tx
->handle_lock
);
796 return (DDI_SUCCESS
);
800 myri10ge_remove_tx_handles(struct myri10ge_slice_state
*ss
)
802 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
803 struct myri10ge_tx_dma_handle
*handle
;
804 mutex_enter(&tx
->handle_lock
);
806 handle
= tx
->free_tx_handles
;
807 while (handle
!= NULL
) {
808 tx
->free_tx_handles
= handle
->next
;
809 ddi_dma_free_handle(&handle
->h
);
810 kmem_free(handle
, sizeof (*handle
));
811 handle
= tx
->free_tx_handles
;
812 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced
);
814 mutex_exit(&tx
->handle_lock
);
815 if (MYRI10GE_SLICE_STAT(tx_handles_alloced
) != 0) {
816 cmn_err(CE_WARN
, "%s: %d tx dma handles allocated at close\n",
818 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced
));
823 myri10ge_free_tx_handles(myri10ge_tx_ring_t
*tx
,
824 struct myri10ge_tx_dma_handle_head
*list
)
826 mutex_enter(&tx
->handle_lock
);
827 list
->tail
->next
= tx
->free_tx_handles
;
828 tx
->free_tx_handles
= list
->head
;
829 mutex_exit(&tx
->handle_lock
);
833 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t
*tx
,
834 struct myri10ge_tx_dma_handle
*handle
)
836 struct myri10ge_tx_dma_handle_head list
;
842 while (handle
!= NULL
) {
844 handle
= handle
->next
;
846 myri10ge_free_tx_handles(tx
, &list
);
850 myri10ge_alloc_tx_handles(struct myri10ge_slice_state
*ss
, int count
,
851 struct myri10ge_tx_dma_handle
**ret
)
853 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
854 struct myri10ge_tx_dma_handle
*handle
;
857 mutex_enter(&tx
->handle_lock
);
858 for (i
= 0; i
< count
; i
++) {
859 handle
= tx
->free_tx_handles
;
860 while (handle
== NULL
) {
861 mutex_exit(&tx
->handle_lock
);
862 err
= myri10ge_add_tx_handle(ss
);
863 if (err
!= DDI_SUCCESS
) {
864 goto abort_with_handles
;
866 mutex_enter(&tx
->handle_lock
);
867 handle
= tx
->free_tx_handles
;
869 tx
->free_tx_handles
= handle
->next
;
873 mutex_exit(&tx
->handle_lock
);
874 return (DDI_SUCCESS
);
877 myri10ge_free_tx_handle_slist(tx
, *ret
);
883 * Frees DMA resources associated with the send ring
886 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state
*ss
)
888 myri10ge_tx_ring_t
*tx
;
889 struct myri10ge_tx_dma_handle_head handles
;
896 for (idx
= 0; idx
< ss
->tx
.mask
+ 1; idx
++) {
897 if (tx
->info
[idx
].m
) {
898 (void) ddi_dma_unbind_handle(tx
->info
[idx
].handle
->h
);
899 handles
.head
= tx
->info
[idx
].handle
;
900 if (handles
.tail
== NULL
)
901 handles
.tail
= tx
->info
[idx
].handle
;
902 freeb(tx
->info
[idx
].m
);
904 tx
->info
[idx
].handle
= 0;
906 tx
->cp
[idx
].va
= NULL
;
907 myri10ge_dma_free(&tx
->cp
[idx
].dma
);
909 bytes
= sizeof (*tx
->cp
) * (tx
->mask
+ 1);
910 kmem_free(tx
->cp
, bytes
);
912 if (handles
.head
!= NULL
)
913 myri10ge_free_tx_handles(tx
, &handles
);
914 myri10ge_remove_tx_handles(ss
);
918 * Allocates DMA handles associated with the send ring
921 myri10ge_prepare_tx_ring(struct myri10ge_slice_state
*ss
)
923 struct myri10ge_tx_dma_handle
*handles
;
927 bytes
= sizeof (*ss
->tx
.cp
) * (ss
->tx
.mask
+ 1);
928 ss
->tx
.cp
= kmem_zalloc(bytes
, KM_SLEEP
);
929 if (ss
->tx
.cp
== NULL
) {
931 "%s: Failed to allocate tx copyblock storage\n",
933 return (DDI_FAILURE
);
937 /* allocate the TX copyblocks */
938 for (h
= 0; h
< ss
->tx
.mask
+ 1; h
++) {
939 ss
->tx
.cp
[h
].va
= myri10ge_dma_alloc(ss
->mgp
->dip
,
940 4096, &myri10ge_rx_jumbo_dma_attr
,
941 &myri10ge_dev_access_attr
, DDI_DMA_STREAMING
,
942 DDI_DMA_WRITE
|DDI_DMA_STREAMING
, &ss
->tx
.cp
[h
].dma
, 1,
944 if (ss
->tx
.cp
[h
].va
== NULL
) {
945 cmn_err(CE_WARN
, "%s: Failed to allocate tx "
946 "copyblock %d\n", ss
->mgp
->name
, h
);
947 goto abort_with_copyblocks
;
950 /* pre-allocate transmit handles */
952 (void) myri10ge_alloc_tx_handles(ss
, myri10ge_tx_handles_initial
,
955 myri10ge_free_tx_handle_slist(&ss
->tx
, handles
);
957 return (DDI_SUCCESS
);
959 abort_with_copyblocks
:
962 myri10ge_dma_free(&ss
->tx
.cp
[h
].dma
);
965 bytes
= sizeof (*ss
->tx
.cp
) * (ss
->tx
.mask
+ 1);
966 kmem_free(ss
->tx
.cp
, bytes
);
968 return (DDI_FAILURE
);
972 * The eeprom strings on the lanaiX have the format
975 * PT:ddd mmm xx xx:xx:xx xx\0
976 * PV:ddd mmm xx xx:xx:xx xx\0
979 myri10ge_read_mac_addr(struct myri10ge_priv
*mgp
)
981 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
982 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \
983 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \
984 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
989 ptr
= mgp
->eeprom_strings
;
990 limit
= mgp
->eeprom_strings
+ MYRI10GE_EEPROM_STRINGS_SIZE
;
992 while (*ptr
!= '\0' && ptr
< limit
) {
993 if (memcmp(ptr
, "MAC=", 4) == 0) {
995 if (myri10ge_verbose
)
996 printf("%s: mac address = %s\n", mgp
->name
,
998 mgp
->mac_addr_string
= ptr
;
999 for (i
= 0; i
< 6; i
++) {
1000 if ((ptr
+ 2) > limit
)
1003 if (*(ptr
+1) == ':') {
1005 lv
= myri10ge_digit(*ptr
); ptr
++;
1007 hv
= myri10ge_digit(*ptr
); ptr
++;
1008 lv
= myri10ge_digit(*ptr
); ptr
++;
1010 mgp
->mac_addr
[i
] = (hv
<< 4) | lv
;
1014 if (memcmp((const void *)ptr
, "SN=", 3) == 0) {
1016 mgp
->sn_str
= (char *)ptr
;
1018 if (memcmp((const void *)ptr
, "PC=", 3) == 0) {
1020 mgp
->pc_str
= (char *)ptr
;
1022 MYRI10GE_NEXT_STRING(ptr
);
1028 cmn_err(CE_WARN
, "%s: failed to parse eeprom_strings", mgp
->name
);
1034 * Determine the register set containing the PCI resource we
1035 * want to map: the memory-mappable part of the interface. We do
1036 * this by scanning the DDI "reg" property of the interface,
1037 * which is an array of mx_ddi_reg_set structures.
1040 myri10ge_reg_set(dev_info_t
*dip
, int *reg_set
, int *span
,
1041 unsigned long *busno
, unsigned long *devno
,
1042 unsigned long *funcno
)
1045 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff)
1046 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07)
1047 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f)
1048 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff)
1049 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03)
1050 #define PCI_ADDR_HIGH(ip) (ip[1])
1051 #define PCI_ADDR_LOW(ip) (ip[2])
1052 #define PCI_SPAN_HIGH(ip) (ip[3])
1053 #define PCI_SPAN_LOW(ip) (ip[4])
1055 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1056 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1059 uint32_t nelementsp
;
1061 #ifdef MYRI10GE_REGSET_VERBOSE
1062 char *address_space_name
[] = { "Configuration Space",
1064 "32-bit Memory Space",
1065 "64-bit Memory Space"
1069 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dip
, DDI_PROP_DONTPASS
,
1070 "reg", &data
, &nelementsp
) != DDI_SUCCESS
) {
1071 printf("Could not determine register set.\n");
1075 #ifdef MYRI10GE_REGSET_VERBOSE
1076 printf("There are %d register sets.\n", nelementsp
/ 5);
1079 printf("Didn't find any \"reg\" properties.\n");
1080 ddi_prop_free(data
);
1084 /* Scan for the register number. */
1086 *busno
= BUS_NUMBER(rs
);
1087 *devno
= DEVICE_NUMBER(rs
);
1088 *funcno
= FUNCTION_NUMBER(rs
);
1090 #ifdef MYRI10GE_REGSET_VERBOSE
1091 printf("*** Scanning for register number.\n");
1093 for (i
= 0; i
< nelementsp
/ 5; i
++) {
1095 #ifdef MYRI10GE_REGSET_VERBOSE
1096 printf("Examining register set %d:\n", i
);
1097 printf(" Register number = %d.\n", REGISTER_NUMBER(rs
));
1098 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs
));
1099 printf(" Device number = %d.\n", DEVICE_NUMBER(rs
));
1100 printf(" Bus number = %d.\n", BUS_NUMBER(rs
));
1101 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs
),
1102 address_space_name
[ADDRESS_SPACE(rs
)]);
1103 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs
),
1105 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs
),
1108 /* We are looking for a memory property. */
1110 if (ADDRESS_SPACE(rs
) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE
||
1111 ADDRESS_SPACE(rs
) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE
) {
1114 #ifdef MYRI10GE_REGSET_VERBOSE
1115 printf("%s uses register set %d.\n",
1116 address_space_name
[ADDRESS_SPACE(rs
)], *reg_set
);
1119 *span
= (PCI_SPAN_LOW(rs
));
1120 #ifdef MYRI10GE_REGSET_VERBOSE
1121 printf("Board span is 0x%x\n", *span
);
1127 ddi_prop_free(data
);
1129 /* If no match, fail. */
1130 if (i
>= nelementsp
/ 5) {
1139 myri10ge_load_firmware_from_zlib(struct myri10ge_priv
*mgp
, uint32_t *limit
)
1141 void *inflate_buffer
;
1143 size_t sram_size
= mgp
->sram_size
- MYRI10GE_EEPROM_STRINGS_SIZE
;
1145 mcp_gen_header_t
*hdr
;
1146 unsigned hdr_offset
, i
;
1149 *limit
= 0; /* -Wuninitialized */
1152 inflate_buffer
= kmem_zalloc(sram_size
, KM_NOSLEEP
);
1153 if (!inflate_buffer
) {
1155 "%s: Could not allocate buffer to inflate mcp\n",
1160 destlen
= sram_size
;
1161 rv
= z_uncompress(inflate_buffer
, &destlen
, mgp
->eth_z8e
,
1162 mgp
->eth_z8e_length
);
1165 cmn_err(CE_WARN
, "%s: Could not inflate mcp: %s\n",
1166 mgp
->name
, z_strerror(rv
));
1171 *limit
= (uint32_t)destlen
;
1173 hdr_offset
= htonl(*(uint32_t *)(void *)((char *)inflate_buffer
+
1174 MCP_HEADER_PTR_OFFSET
));
1175 hdr
= (void *)((char *)inflate_buffer
+ hdr_offset
);
1176 if (ntohl(hdr
->mcp_type
) != MCP_TYPE_ETH
) {
1177 cmn_err(CE_WARN
, "%s: Bad firmware type: 0x%x\n", mgp
->name
,
1178 ntohl(hdr
->mcp_type
));
1183 /* save firmware version for kstat */
1184 (void) strncpy(mgp
->fw_version
, hdr
->version
, sizeof (mgp
->fw_version
));
1185 if (myri10ge_verbose
)
1186 printf("%s: firmware id: %s\n", mgp
->name
, hdr
->version
);
1188 /* Copy the inflated firmware to NIC SRAM. */
1189 for (i
= 0; i
< *limit
; i
+= 256) {
1190 myri10ge_pio_copy((char *)mgp
->sram
+ MYRI10GE_FW_OFFSET
+ i
,
1191 (char *)inflate_buffer
+ i
,
1192 min(256U, (unsigned)(*limit
- i
)));
1194 (void) *(int *)(void *)mgp
->sram
;
1199 kmem_free(inflate_buffer
, sram_size
);
1207 myri10ge_send_cmd(struct myri10ge_priv
*mgp
, uint32_t cmd
,
1208 myri10ge_cmd_t
*data
)
1211 char buf_bytes
[sizeof (*buf
) + 8];
1212 volatile mcp_cmd_response_t
*response
= mgp
->cmd
;
1213 volatile char *cmd_addr
=
1214 (volatile char *)mgp
->sram
+ MXGEFW_ETH_CMD
;
1215 int sleep_total
= 0;
1217 /* ensure buf is aligned to 8 bytes */
1218 buf
= (mcp_cmd_t
*)((unsigned long)(buf_bytes
+ 7) & ~7UL);
1220 buf
->data0
= htonl(data
->data0
);
1221 buf
->data1
= htonl(data
->data1
);
1222 buf
->data2
= htonl(data
->data2
);
1223 buf
->cmd
= htonl(cmd
);
1224 buf
->response_addr
.low
= mgp
->cmd_dma
.low
;
1225 buf
->response_addr
.high
= mgp
->cmd_dma
.high
;
1226 mutex_enter(&mgp
->cmd_lock
);
1227 response
->result
= 0xffffffff;
1230 myri10ge_pio_copy((void *)cmd_addr
, buf
, sizeof (*buf
));
1232 /* wait up to 20ms */
1233 for (sleep_total
= 0; sleep_total
< 20; sleep_total
++) {
1235 if (response
->result
!= 0xffffffff) {
1236 if (response
->result
== 0) {
1237 data
->data0
= ntohl(response
->data
);
1238 mutex_exit(&mgp
->cmd_lock
);
1240 } else if (ntohl(response
->result
)
1241 == MXGEFW_CMD_UNKNOWN
) {
1242 mutex_exit(&mgp
->cmd_lock
);
1244 } else if (ntohl(response
->result
)
1245 == MXGEFW_CMD_ERROR_UNALIGNED
) {
1246 mutex_exit(&mgp
->cmd_lock
);
1250 "%s: command %d failed, result = %d\n",
1251 mgp
->name
, cmd
, ntohl(response
->result
));
1252 mutex_exit(&mgp
->cmd_lock
);
1258 mutex_exit(&mgp
->cmd_lock
);
1259 cmn_err(CE_WARN
, "%s: command %d timed out, result = %d\n",
1260 mgp
->name
, cmd
, ntohl(response
->result
));
1265 * Enable or disable periodic RDMAs from the host to make certain
1266 * chipsets resend dropped PCIe messages
1270 myri10ge_dummy_rdma(struct myri10ge_priv
*mgp
, int enable
)
1273 volatile uint32_t *confirm
;
1274 volatile char *submit
;
1278 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
1280 /* clear confirmation addr */
1281 confirm
= (volatile uint32_t *)mgp
->cmd
;
1286 * send an rdma command to the PCIe engine, and wait for the
1287 * response in the confirmation address. The firmware should
1288 * write a -1 there to indicate it is alive and well
1291 buf
[0] = mgp
->cmd_dma
.high
; /* confirm addr MSW */
1292 buf
[1] = mgp
->cmd_dma
.low
; /* confirm addr LSW */
1293 buf
[2] = htonl(0xffffffff); /* confirm data */
1294 buf
[3] = htonl(mgp
->cmd_dma
.high
); /* dummy addr MSW */
1295 buf
[4] = htonl(mgp
->cmd_dma
.low
); /* dummy addr LSW */
1296 buf
[5] = htonl(enable
); /* enable? */
1299 submit
= (volatile char *)(mgp
->sram
+ MXGEFW_BOOT_DUMMY_RDMA
);
1301 myri10ge_pio_copy((char *)submit
, buf
, 64);
1306 while (*confirm
!= 0xffffffff && i
< 20) {
1310 if (*confirm
!= 0xffffffff) {
1311 cmn_err(CE_WARN
, "%s: dummy rdma %s failed (%p = 0x%x)",
1313 (enable
? "enable" : "disable"), (void*) confirm
, *confirm
);
1318 myri10ge_load_firmware(struct myri10ge_priv
*mgp
)
1321 volatile uint32_t *confirm
;
1322 volatile char *submit
;
1324 uint32_t *buf
, size
;
1327 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
1329 status
= myri10ge_load_firmware_from_zlib(mgp
, &size
);
1331 cmn_err(CE_WARN
, "%s: firmware loading failed\n", mgp
->name
);
1335 /* clear confirmation addr */
1336 confirm
= (volatile uint32_t *)mgp
->cmd
;
1341 * send a reload command to the bootstrap MCP, and wait for the
1342 * response in the confirmation address. The firmware should
1343 * write a -1 there to indicate it is alive and well
1346 buf
[0] = mgp
->cmd_dma
.high
; /* confirm addr MSW */
1347 buf
[1] = mgp
->cmd_dma
.low
; /* confirm addr LSW */
1348 buf
[2] = htonl(0xffffffff); /* confirm data */
1351 * FIX: All newest firmware should un-protect the bottom of
1352 * the sram before handoff. However, the very first interfaces
1353 * do not. Therefore the handoff copy must skip the first 8 bytes
1355 buf
[3] = htonl(MYRI10GE_FW_OFFSET
+ 8); /* where the code starts */
1356 buf
[4] = htonl(size
- 8); /* length of code */
1357 buf
[5] = htonl(8); /* where to copy to */
1358 buf
[6] = htonl(0); /* where to jump to */
1360 submit
= (volatile char *)(mgp
->sram
+ MXGEFW_BOOT_HANDOFF
);
1362 myri10ge_pio_copy((char *)submit
, buf
, 64);
1367 while (*confirm
!= 0xffffffff && i
< 1000) {
1371 if (*confirm
!= 0xffffffff) {
1372 cmn_err(CE_WARN
, "%s: handoff failed (%p = 0x%x)",
1373 mgp
->name
, (void *) confirm
, *confirm
);
1377 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
1379 cmn_err(CE_WARN
, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1384 mgp
->max_intr_slots
= 2 * (cmd
.data0
/ sizeof (mcp_dma_addr_t
));
1385 myri10ge_dummy_rdma(mgp
, 1);
1390 myri10ge_m_unicst(void *arg
, const uint8_t *addr
)
1392 struct myri10ge_priv
*mgp
= arg
;
1396 cmd
.data0
= ((addr
[0] << 24) | (addr
[1] << 16)
1397 | (addr
[2] << 8) | addr
[3]);
1399 cmd
.data1
= ((addr
[4] << 8) | (addr
[5]));
1401 status
= myri10ge_send_cmd(mgp
, MXGEFW_SET_MAC_ADDRESS
, &cmd
);
1402 if (status
== 0 && (addr
!= mgp
->mac_addr
))
1403 (void) memcpy(mgp
->mac_addr
, addr
, sizeof (mgp
->mac_addr
));
1409 myri10ge_change_pause(struct myri10ge_priv
*mgp
, int pause
)
1415 status
= myri10ge_send_cmd(mgp
, MXGEFW_ENABLE_FLOW_CONTROL
,
1418 status
= myri10ge_send_cmd(mgp
, MXGEFW_DISABLE_FLOW_CONTROL
,
1422 cmn_err(CE_WARN
, "%s: Failed to set flow control mode\n",
1431 myri10ge_change_promisc(struct myri10ge_priv
*mgp
, int promisc
)
1437 status
= myri10ge_send_cmd(mgp
, MXGEFW_ENABLE_PROMISC
, &cmd
);
1439 status
= myri10ge_send_cmd(mgp
, MXGEFW_DISABLE_PROMISC
, &cmd
);
1442 cmn_err(CE_WARN
, "%s: Failed to set promisc mode\n",
1448 myri10ge_dma_test(struct myri10ge_priv
*mgp
, int test_type
)
1454 struct myri10ge_dma_stuff dmabench_dma
;
1458 * Run a small DMA test.
1459 * The magic multipliers to the length tell the firmware
1460 * tp do DMA read, write, or read+write tests. The
1461 * results are returned in cmd.data0. The upper 16
1462 * bits or the return is the number of transfers completed.
1463 * The lower 16 bits is the time in 0.5us ticks that the
1464 * transfers took to complete
1467 len
= mgp
->tx_boundary
;
1469 dmabench
= myri10ge_dma_alloc(mgp
->dip
, len
,
1470 &myri10ge_rx_jumbo_dma_attr
, &myri10ge_dev_access_attr
,
1471 DDI_DMA_STREAMING
, DDI_DMA_RDWR
|DDI_DMA_STREAMING
,
1472 &dmabench_dma
, 1, DDI_DMA_DONTWAIT
);
1473 mgp
->read_dma
= mgp
->write_dma
= mgp
->read_write_dma
= 0;
1474 if (dmabench
== NULL
) {
1475 cmn_err(CE_WARN
, "%s dma benchmark aborted\n", mgp
->name
);
1479 cmd
.data0
= ntohl(dmabench_dma
.low
);
1480 cmd
.data1
= ntohl(dmabench_dma
.high
);
1481 cmd
.data2
= len
* 0x10000;
1482 status
= myri10ge_send_cmd(mgp
, test_type
, &cmd
);
1487 mgp
->read_dma
= ((cmd
.data0
>>16) * len
* 2) / (cmd
.data0
& 0xffff);
1489 cmd
.data0
= ntohl(dmabench_dma
.low
);
1490 cmd
.data1
= ntohl(dmabench_dma
.high
);
1491 cmd
.data2
= len
* 0x1;
1492 status
= myri10ge_send_cmd(mgp
, test_type
, &cmd
);
1497 mgp
->write_dma
= ((cmd
.data0
>>16) * len
* 2) / (cmd
.data0
& 0xffff);
1499 cmd
.data0
= ntohl(dmabench_dma
.low
);
1500 cmd
.data1
= ntohl(dmabench_dma
.high
);
1501 cmd
.data2
= len
* 0x10001;
1502 status
= myri10ge_send_cmd(mgp
, test_type
, &cmd
);
1504 test
= "read/write";
1507 mgp
->read_write_dma
= ((cmd
.data0
>>16) * len
* 2 * 2) /
1508 (cmd
.data0
& 0xffff);
1512 myri10ge_dma_free(&dmabench_dma
);
1513 if (status
!= 0 && test_type
!= MXGEFW_CMD_UNALIGNED_TEST
)
1514 cmn_err(CE_WARN
, "%s %s dma benchmark failed\n", mgp
->name
,
1520 myri10ge_reset(struct myri10ge_priv
*mgp
)
1523 struct myri10ge_nic_stat
*ethstat
;
1524 struct myri10ge_slice_state
*ss
;
1528 /* send a reset command to the card to see if it is alive */
1529 (void) memset(&cmd
, 0, sizeof (cmd
));
1530 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_RESET
, &cmd
);
1532 cmn_err(CE_WARN
, "%s: failed reset\n", mgp
->name
);
1536 /* Now exchange information about interrupts */
1538 bytes
= mgp
->max_intr_slots
* sizeof (*mgp
->ss
[0].rx_done
.entry
);
1539 cmd
.data0
= (uint32_t)bytes
;
1540 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
1543 * Even though we already know how many slices are supported
1544 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1545 * has magic side effects, and must be called after a reset.
1546 * It must be called prior to calling any RSS related cmds,
1547 * including assigning an interrupt queue for anything but
1548 * slice 0. It must also be called *after*
1549 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1550 * the firmware to compute offsets.
1553 if (mgp
->num_slices
> 1) {
1555 /* ask the maximum number of slices it supports */
1556 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
,
1560 "%s: failed to get number of slices\n",
1566 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1567 * to setting up the interrupt queue DMA
1570 cmd
.data0
= mgp
->num_slices
;
1571 cmd
.data1
= MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE
|
1572 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES
;
1573 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ENABLE_RSS_QUEUES
,
1577 "%s: failed to set number of slices\n",
1582 for (i
= 0; i
< mgp
->num_slices
; i
++) {
1584 cmd
.data0
= ntohl(ss
->rx_done
.dma
.low
);
1585 cmd
.data1
= ntohl(ss
->rx_done
.dma
.high
);
1587 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_INTRQ_DMA
,
1591 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_IRQ_ACK_OFFSET
, &cmd
);
1592 for (i
= 0; i
< mgp
->num_slices
; i
++) {
1594 ss
->irq_claim
= (volatile unsigned int *)
1595 (void *)(mgp
->sram
+ cmd
.data0
+ 8 * i
);
1598 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_FIXED
) {
1599 status
|= myri10ge_send_cmd(mgp
,
1600 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET
, &cmd
);
1601 mgp
->irq_deassert
= (uint32_t *)(void *)(mgp
->sram
+ cmd
.data0
);
1604 status
|= myri10ge_send_cmd(mgp
,
1605 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET
, &cmd
);
1606 mgp
->intr_coal_delay_ptr
= (uint32_t *)(void *)(mgp
->sram
+ cmd
.data0
);
1609 cmn_err(CE_WARN
, "%s: failed set interrupt parameters\n",
1614 *mgp
->intr_coal_delay_ptr
= htonl(mgp
->intr_coal_delay
);
1615 (void) myri10ge_dma_test(mgp
, MXGEFW_DMA_TEST
);
1617 /* reset mcp/driver shared state back to 0 */
1619 for (i
= 0; i
< mgp
->num_slices
; i
++) {
1621 bytes
= mgp
->max_intr_slots
*
1622 sizeof (*mgp
->ss
[0].rx_done
.entry
);
1623 (void) memset(ss
->rx_done
.entry
, 0, bytes
);
1626 ss
->tx
.pkt_done
= 0;
1628 ss
->rx_small
.cnt
= 0;
1629 ss
->rx_done
.idx
= 0;
1630 ss
->rx_done
.cnt
= 0;
1632 ss
->tx
.watchdog_done
= 0;
1633 ss
->tx
.watchdog_req
= 0;
1635 ss
->tx
.activate
= 0;
1637 mgp
->watchdog_rx_pause
= 0;
1638 if (mgp
->ksp_stat
!= NULL
) {
1639 ethstat
= (struct myri10ge_nic_stat
*)mgp
->ksp_stat
->ks_data
;
1640 ethstat
->link_changes
.value
.ul
= 0;
1642 status
= myri10ge_m_unicst(mgp
, mgp
->mac_addr
);
1643 myri10ge_change_promisc(mgp
, 0);
1644 (void) myri10ge_change_pause(mgp
, mgp
->pause
);
1649 myri10ge_init_toeplitz(struct myri10ge_priv
*mgp
)
1658 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_RSS_KEY_OFFSET
,
1661 cmn_err(CE_WARN
, "%s: failed to get rss key\n",
1665 myri10ge_pio_copy32(mgp
->rss_key
,
1666 (uint32_t *)(void*)((char *)mgp
->sram
+ cmd
.data0
),
1667 sizeof (mgp
->rss_key
));
1669 mgp
->toeplitz_hash_table
= kmem_alloc(sizeof (uint32_t) * 12 * 256,
1671 key
= (uint8_t *)mgp
->rss_key
;
1673 for (b
= 0; b
< 12; b
++) {
1674 for (s
= 0; s
< 8; s
++) {
1675 /* Bits: b*8+s, ..., b*8+s+31 */
1677 for (j
= 0; j
< 32; j
++) {
1679 bit
= 0x1 & (key
[bit
/ 8] >> (7 -(bit
& 0x7)));
1680 k
[s
] |= bit
<< (31 - j
);
1684 for (i
= 0; i
<= 0xff; i
++) {
1686 if (i
& (1 << 7)) { tmp
^= k
[0]; }
1687 if (i
& (1 << 6)) { tmp
^= k
[1]; }
1688 if (i
& (1 << 5)) { tmp
^= k
[2]; }
1689 if (i
& (1 << 4)) { tmp
^= k
[3]; }
1690 if (i
& (1 << 3)) { tmp
^= k
[4]; }
1691 if (i
& (1 << 2)) { tmp
^= k
[5]; }
1692 if (i
& (1 << 1)) { tmp
^= k
[6]; }
1693 if (i
& (1 << 0)) { tmp
^= k
[7]; }
1694 mgp
->toeplitz_hash_table
[t
++] = tmp
;
1700 static inline struct myri10ge_slice_state
*
1701 myri10ge_toeplitz_send_hash(struct myri10ge_priv
*mgp
, struct ip
*ip
)
1704 uint32_t saddr
, daddr
;
1705 uint32_t hash
, slice
;
1706 uint32_t *table
= mgp
->toeplitz_hash_table
;
1710 * Note hashing order is reversed from how it is done
1711 * in the NIC, so as to generate the same hash value
1712 * for the connection to try to keep connections CPU local
1715 /* hash on IPv4 src/dst address */
1716 saddr
= ntohl(ip
->ip_src
.s_addr
);
1717 daddr
= ntohl(ip
->ip_dst
.s_addr
);
1718 hash
= table
[(256 * 0) + ((daddr
>> 24) & 0xff)];
1719 hash
^= table
[(256 * 1) + ((daddr
>> 16) & 0xff)];
1720 hash
^= table
[(256 * 2) + ((daddr
>> 8) & 0xff)];
1721 hash
^= table
[(256 * 3) + ((daddr
) & 0xff)];
1722 hash
^= table
[(256 * 4) + ((saddr
>> 24) & 0xff)];
1723 hash
^= table
[(256 * 5) + ((saddr
>> 16) & 0xff)];
1724 hash
^= table
[(256 * 6) + ((saddr
>> 8) & 0xff)];
1725 hash
^= table
[(256 * 7) + ((saddr
) & 0xff)];
1726 /* hash on TCP port, if required */
1727 if ((myri10ge_rss_hash
& MXGEFW_RSS_HASH_TYPE_TCP_IPV4
) &&
1728 ip
->ip_p
== IPPROTO_TCP
) {
1729 hdr
= (struct tcphdr
*)(void *)
1730 (((uint8_t *)ip
) + (ip
->ip_hl
<< 2));
1731 src
= ntohs(hdr
->th_sport
);
1732 dst
= ntohs(hdr
->th_dport
);
1734 hash
^= table
[(256 * 8) + ((dst
>> 8) & 0xff)];
1735 hash
^= table
[(256 * 9) + ((dst
) & 0xff)];
1736 hash
^= table
[(256 * 10) + ((src
>> 8) & 0xff)];
1737 hash
^= table
[(256 * 11) + ((src
) & 0xff)];
1739 slice
= (mgp
->num_slices
- 1) & hash
;
1740 return (&mgp
->ss
[slice
]);
1744 static inline struct myri10ge_slice_state
*
1745 myri10ge_simple_send_hash(struct myri10ge_priv
*mgp
, struct ip
*ip
)
1748 uint32_t slice
, hash_val
;
1751 if (ip
->ip_p
!= IPPROTO_TCP
&& ip
->ip_p
!= IPPROTO_UDP
) {
1752 return (&mgp
->ss
[0]);
1754 hdr
= (struct tcphdr
*)(void *)(((uint8_t *)ip
) + (ip
->ip_hl
<< 2));
1757 * Use the second byte of the *destination* address for
1758 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1760 hash_val
= ntohs(hdr
->th_dport
) & 0xff;
1761 if (myri10ge_rss_hash
== MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
)
1762 hash_val
+= ntohs(hdr
->th_sport
) & 0xff;
1764 slice
= (mgp
->num_slices
- 1) & hash_val
;
1765 return (&mgp
->ss
[slice
]);
1768 static inline struct myri10ge_slice_state
*
1769 myri10ge_send_hash(struct myri10ge_priv
*mgp
, mblk_t
*mp
)
1771 unsigned int slice
= 0;
1772 struct ether_header
*eh
;
1773 struct ether_vlan_header
*vh
;
1777 if (mgp
->num_slices
== 1)
1778 return (&mgp
->ss
[0]);
1780 if (myri10ge_tx_hash
== 0) {
1781 slice
= CPU
->cpu_id
& (mgp
->num_slices
- 1);
1782 return (&mgp
->ss
[slice
]);
1786 * ensure it is a TCP or UDP over IPv4 packet, and that the
1787 * headers are in the 1st mblk. Otherwise, punt
1791 if ((MBLKL(mp
)) < (ehl
+ ihl
+ 8))
1792 return (&mgp
->ss
[0]);
1793 eh
= (struct ether_header
*)(void *)mp
->b_rptr
;
1794 ip
= (struct ip
*)(void *)(eh
+ 1);
1795 if (eh
->ether_type
!= BE_16(ETHERTYPE_IP
)) {
1796 if (eh
->ether_type
!= BE_16(ETHERTYPE_VLAN
))
1797 return (&mgp
->ss
[0]);
1798 vh
= (struct ether_vlan_header
*)(void *)mp
->b_rptr
;
1799 if (vh
->ether_type
!= BE_16(ETHERTYPE_IP
))
1800 return (&mgp
->ss
[0]);
1802 ip
= (struct ip
*)(void *)(vh
+ 1);
1804 ihl
= ip
->ip_hl
<< 2;
1805 if (MBLKL(mp
) < (ehl
+ ihl
+ 8))
1806 return (&mgp
->ss
[0]);
1807 switch (myri10ge_rss_hash
) {
1808 case MXGEFW_RSS_HASH_TYPE_IPV4
:
1810 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4
:
1812 case (MXGEFW_RSS_HASH_TYPE_IPV4
|MXGEFW_RSS_HASH_TYPE_TCP_IPV4
):
1813 return (myri10ge_toeplitz_send_hash(mgp
, ip
));
1814 case MXGEFW_RSS_HASH_TYPE_SRC_PORT
:
1816 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
:
1817 return (myri10ge_simple_send_hash(mgp
, ip
));
1821 return (&mgp
->ss
[0]);
1825 myri10ge_setup_slice(struct myri10ge_slice_state
*ss
)
1827 struct myri10ge_priv
*mgp
= ss
->mgp
;
1829 int tx_ring_size
, rx_ring_size
;
1830 int tx_ring_entries
, rx_ring_entries
;
1835 slice
= ss
- mgp
->ss
;
1837 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_SEND_RING_SIZE
, &cmd
);
1838 tx_ring_size
= cmd
.data0
;
1840 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
1843 rx_ring_size
= cmd
.data0
;
1845 tx_ring_entries
= tx_ring_size
/ sizeof (struct mcp_kreq_ether_send
);
1846 rx_ring_entries
= rx_ring_size
/ sizeof (struct mcp_dma_addr
);
1847 ss
->tx
.mask
= tx_ring_entries
- 1;
1848 ss
->rx_small
.mask
= ss
->rx_big
.mask
= rx_ring_entries
- 1;
1850 /* get the lanai pointers to the send and receive rings */
1853 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_SEND_OFFSET
, &cmd
);
1854 ss
->tx
.lanai
= (mcp_kreq_ether_send_t
*)(void *)(mgp
->sram
+ cmd
.data0
);
1855 if (mgp
->num_slices
> 1) {
1856 ss
->tx
.go
= (char *)mgp
->sram
+ MXGEFW_ETH_SEND_GO
+ 64 * slice
;
1857 ss
->tx
.stop
= (char *)mgp
->sram
+ MXGEFW_ETH_SEND_STOP
+
1865 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_SMALL_RX_OFFSET
, &cmd
);
1866 ss
->rx_small
.lanai
= (mcp_kreq_ether_recv_t
*)
1867 (void *)(mgp
->sram
+ cmd
.data0
);
1870 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_BIG_RX_OFFSET
, &cmd
);
1871 ss
->rx_big
.lanai
= (mcp_kreq_ether_recv_t
*)(void *)
1872 (mgp
->sram
+ cmd
.data0
);
1876 "%s: failed to get ring sizes or locations\n", mgp
->name
);
1881 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.shadow
);
1882 ss
->rx_small
.shadow
= kmem_zalloc(bytes
, KM_SLEEP
);
1883 if (ss
->rx_small
.shadow
== NULL
)
1885 (void) memset(ss
->rx_small
.shadow
, 0, bytes
);
1887 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.shadow
);
1888 ss
->rx_big
.shadow
= kmem_zalloc(bytes
, KM_SLEEP
);
1889 if (ss
->rx_big
.shadow
== NULL
)
1890 goto abort_with_rx_small_shadow
;
1891 (void) memset(ss
->rx_big
.shadow
, 0, bytes
);
1893 /* allocate the host info rings */
1895 bytes
= tx_ring_entries
* sizeof (*ss
->tx
.info
);
1896 ss
->tx
.info
= kmem_zalloc(bytes
, KM_SLEEP
);
1897 if (ss
->tx
.info
== NULL
)
1898 goto abort_with_rx_big_shadow
;
1899 (void) memset(ss
->tx
.info
, 0, bytes
);
1901 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.info
);
1902 ss
->rx_small
.info
= kmem_zalloc(bytes
, KM_SLEEP
);
1903 if (ss
->rx_small
.info
== NULL
)
1904 goto abort_with_tx_info
;
1905 (void) memset(ss
->rx_small
.info
, 0, bytes
);
1907 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.info
);
1908 ss
->rx_big
.info
= kmem_zalloc(bytes
, KM_SLEEP
);
1909 if (ss
->rx_big
.info
== NULL
)
1910 goto abort_with_rx_small_info
;
1911 (void) memset(ss
->rx_big
.info
, 0, bytes
);
1913 ss
->tx
.stall
= ss
->tx
.sched
= 0;
1914 ss
->tx
.stall_early
= ss
->tx
.stall_late
= 0;
1916 ss
->jbufs_for_smalls
= 1 + (1 + ss
->rx_small
.mask
) /
1917 (myri10ge_mtu
/ (myri10ge_small_bytes
+ MXGEFW_PAD
));
1919 allocated
= myri10ge_add_jbufs(ss
,
1920 myri10ge_bigbufs_initial
+ ss
->jbufs_for_smalls
, 1);
1921 if (allocated
< ss
->jbufs_for_smalls
+ myri10ge_bigbufs_initial
) {
1923 "%s: Could not allocate enough receive buffers (%d/%d)\n",
1924 mgp
->name
, allocated
,
1925 myri10ge_bigbufs_initial
+ ss
->jbufs_for_smalls
);
1926 goto abort_with_jumbos
;
1929 myri10ge_carve_up_jbufs_into_small_ring(ss
);
1932 mutex_enter(&ss
->jpool
.mtx
);
1933 if (allocated
< rx_ring_entries
)
1934 ss
->jpool
.low_water
= allocated
/ 4;
1936 ss
->jpool
.low_water
= rx_ring_entries
/ 2;
1939 * invalidate the big receive ring in case we do not
1940 * allocate sufficient jumbos to fill it
1942 (void) memset(ss
->rx_big
.shadow
, 1,
1943 (ss
->rx_big
.mask
+ 1) * sizeof (ss
->rx_big
.shadow
[0]));
1944 for (idx
= 7; idx
<= ss
->rx_big
.mask
; idx
+= 8) {
1945 myri10ge_submit_8rx(&ss
->rx_big
.lanai
[idx
- 7],
1946 &ss
->rx_big
.shadow
[idx
- 7]);
1951 myri10ge_restock_jumbos(ss
);
1953 for (idx
= 7; idx
<= ss
->rx_small
.mask
; idx
+= 8) {
1954 myri10ge_submit_8rx(&ss
->rx_small
.lanai
[idx
- 7],
1955 &ss
->rx_small
.shadow
[idx
- 7]);
1958 ss
->rx_small
.cnt
= ss
->rx_small
.mask
+ 1;
1960 mutex_exit(&ss
->jpool
.mtx
);
1962 status
= myri10ge_prepare_tx_ring(ss
);
1965 goto abort_with_small_jbufs
;
1967 cmd
.data0
= ntohl(ss
->fw_stats_dma
.low
);
1968 cmd
.data1
= ntohl(ss
->fw_stats_dma
.high
);
1969 cmd
.data2
= sizeof (mcp_irq_data_t
);
1970 cmd
.data2
|= (slice
<< 16);
1971 bzero(ss
->fw_stats
, sizeof (*ss
->fw_stats
));
1972 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_STATS_DMA_V2
, &cmd
);
1973 if (status
== ENOSYS
) {
1974 cmd
.data0
= ntohl(ss
->fw_stats_dma
.low
) +
1975 offsetof(mcp_irq_data_t
, send_done_count
);
1976 cmd
.data1
= ntohl(ss
->fw_stats_dma
.high
);
1977 status
= myri10ge_send_cmd(mgp
,
1978 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE
, &cmd
);
1981 cmn_err(CE_WARN
, "%s: Couldn't set stats DMA\n", mgp
->name
);
1988 myri10ge_unprepare_tx_ring(ss
);
1990 abort_with_small_jbufs
:
1991 myri10ge_release_small_jbufs(ss
);
1994 if (allocated
!= 0) {
1995 mutex_enter(&ss
->jpool
.mtx
);
1996 ss
->jpool
.low_water
= 0;
1997 mutex_exit(&ss
->jpool
.mtx
);
1998 myri10ge_unstock_jumbos(ss
);
1999 myri10ge_remove_jbufs(ss
);
2002 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.info
);
2003 kmem_free(ss
->rx_big
.info
, bytes
);
2005 abort_with_rx_small_info
:
2006 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.info
);
2007 kmem_free(ss
->rx_small
.info
, bytes
);
2010 bytes
= tx_ring_entries
* sizeof (*ss
->tx
.info
);
2011 kmem_free(ss
->tx
.info
, bytes
);
2013 abort_with_rx_big_shadow
:
2014 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.shadow
);
2015 kmem_free(ss
->rx_big
.shadow
, bytes
);
2017 abort_with_rx_small_shadow
:
2018 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.shadow
);
2019 kmem_free(ss
->rx_small
.shadow
, bytes
);
2026 myri10ge_teardown_slice(struct myri10ge_slice_state
*ss
)
2028 int tx_ring_entries
, rx_ring_entries
;
2031 /* ignore slices that have not been fully setup */
2032 if (ss
->tx
.cp
== NULL
)
2034 /* Free the TX copy buffers */
2035 myri10ge_unprepare_tx_ring(ss
);
2037 /* stop passing returned buffers to firmware */
2039 mutex_enter(&ss
->jpool
.mtx
);
2040 ss
->jpool
.low_water
= 0;
2041 mutex_exit(&ss
->jpool
.mtx
);
2042 myri10ge_release_small_jbufs(ss
);
2044 /* Release the free jumbo frame pool */
2045 myri10ge_unstock_jumbos(ss
);
2046 myri10ge_remove_jbufs(ss
);
2048 rx_ring_entries
= ss
->rx_big
.mask
+ 1;
2049 tx_ring_entries
= ss
->tx
.mask
+ 1;
2051 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.info
);
2052 kmem_free(ss
->rx_big
.info
, bytes
);
2054 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.info
);
2055 kmem_free(ss
->rx_small
.info
, bytes
);
2057 bytes
= tx_ring_entries
* sizeof (*ss
->tx
.info
);
2058 kmem_free(ss
->tx
.info
, bytes
);
2060 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.shadow
);
2061 kmem_free(ss
->rx_big
.shadow
, bytes
);
2063 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.shadow
);
2064 kmem_free(ss
->rx_small
.shadow
, bytes
);
2068 myri10ge_start_locked(struct myri10ge_priv
*mgp
)
2071 int status
, big_pow2
, i
;
2072 volatile uint8_t *itable
;
2074 status
= DDI_SUCCESS
;
2075 /* Allocate DMA resources and receive buffers */
2077 status
= myri10ge_reset(mgp
);
2079 cmn_err(CE_WARN
, "%s: failed reset\n", mgp
->name
);
2080 return (DDI_FAILURE
);
2083 if (mgp
->num_slices
> 1) {
2084 cmd
.data0
= mgp
->num_slices
;
2085 cmd
.data1
= 1; /* use MSI-X */
2086 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ENABLE_RSS_QUEUES
,
2090 "%s: failed to set number of slices\n",
2092 goto abort_with_nothing
;
2094 /* setup the indirection table */
2095 cmd
.data0
= mgp
->num_slices
;
2096 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_RSS_TABLE_SIZE
,
2099 status
|= myri10ge_send_cmd(mgp
,
2100 MXGEFW_CMD_GET_RSS_TABLE_OFFSET
, &cmd
);
2103 "%s: failed to setup rss tables\n", mgp
->name
);
2106 /* just enable an identity mapping */
2107 itable
= mgp
->sram
+ cmd
.data0
;
2108 for (i
= 0; i
< mgp
->num_slices
; i
++)
2109 itable
[i
] = (uint8_t)i
;
2111 if (myri10ge_rss_hash
& MYRI10GE_TOEPLITZ_HASH
) {
2112 status
= myri10ge_init_toeplitz(mgp
);
2114 cmn_err(CE_WARN
, "%s: failed to setup "
2115 "toeplitz tx hash table", mgp
->name
);
2116 goto abort_with_nothing
;
2120 cmd
.data1
= myri10ge_rss_hash
;
2121 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_RSS_ENABLE
,
2125 "%s: failed to enable slices\n", mgp
->name
);
2126 goto abort_with_toeplitz
;
2130 for (i
= 0; i
< mgp
->num_slices
; i
++) {
2131 status
= myri10ge_setup_slice(&mgp
->ss
[i
]);
2133 goto abort_with_slices
;
2137 * Tell the MCP how many buffers it has, and to
2138 * bring the ethernet interface up
2140 * Firmware needs the big buff size as a power of 2. Lie and
2141 * tell it the buffer is larger, because we only use 1
2142 * buffer/pkt, and the mtu will prevent overruns
2144 big_pow2
= myri10ge_mtu
+ MXGEFW_PAD
;
2145 while (!ISP2(big_pow2
))
2148 /* now give firmware buffers sizes, and MTU */
2149 cmd
.data0
= myri10ge_mtu
;
2150 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_MTU
, &cmd
);
2151 cmd
.data0
= myri10ge_small_bytes
;
2153 myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE
, &cmd
);
2154 cmd
.data0
= big_pow2
;
2155 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_BIG_BUFFER_SIZE
, &cmd
);
2157 cmn_err(CE_WARN
, "%s: Couldn't set buffer sizes\n", mgp
->name
);
2158 goto abort_with_slices
;
2163 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_TSO_MODE
, &cmd
);
2165 cmn_err(CE_WARN
, "%s: unable to setup TSO (%d)\n",
2168 mgp
->features
|= MYRI10GE_TSO
;
2171 mgp
->link_state
= -1;
2172 mgp
->rdma_tags_available
= 15;
2173 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ETHERNET_UP
, &cmd
);
2175 cmn_err(CE_WARN
, "%s: unable to start ethernet\n", mgp
->name
);
2176 goto abort_with_slices
;
2178 mgp
->running
= MYRI10GE_ETH_RUNNING
;
2179 return (DDI_SUCCESS
);
2182 for (i
= 0; i
< mgp
->num_slices
; i
++)
2183 myri10ge_teardown_slice(&mgp
->ss
[i
]);
2185 mgp
->running
= MYRI10GE_ETH_STOPPED
;
2187 abort_with_toeplitz
:
2188 if (mgp
->toeplitz_hash_table
!= NULL
) {
2189 kmem_free(mgp
->toeplitz_hash_table
,
2190 sizeof (uint32_t) * 12 * 256);
2191 mgp
->toeplitz_hash_table
= NULL
;
2195 return (DDI_FAILURE
);
2199 myri10ge_stop_locked(struct myri10ge_priv
*mgp
)
2201 int status
, old_down_cnt
;
2206 old_down_cnt
= mgp
->down_cnt
;
2208 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ETHERNET_DOWN
, &cmd
);
2210 cmn_err(CE_WARN
, "%s: Couldn't bring down link\n", mgp
->name
);
2213 while (old_down_cnt
== *((volatile int *)&mgp
->down_cnt
)) {
2214 delay(1 * drv_usectohz(1000000));
2220 if (old_down_cnt
== *((volatile int *)&mgp
->down_cnt
)) {
2221 cmn_err(CE_WARN
, "%s: didn't get down irq\n", mgp
->name
);
2222 for (i
= 0; i
< mgp
->num_slices
; i
++) {
2224 * take and release the rx lock to ensure
2225 * that no interrupt thread is blocked
2226 * elsewhere in the stack, preventing
2230 mutex_enter(&mgp
->ss
[i
].rx_lock
);
2231 printf("%s: slice %d rx irq idle\n",
2233 mutex_exit(&mgp
->ss
[i
].rx_lock
);
2235 /* verify that the poll handler is inactive */
2236 mutex_enter(&mgp
->ss
->poll_lock
);
2237 polling
= mgp
->ss
->rx_polling
;
2238 mutex_exit(&mgp
->ss
->poll_lock
);
2240 printf("%s: slice %d is polling\n",
2242 delay(1 * drv_usectohz(1000000));
2246 delay(1 * drv_usectohz(1000000));
2247 if (old_down_cnt
== *((volatile int *)&mgp
->down_cnt
)) {
2248 cmn_err(CE_WARN
, "%s: Never got down irq\n", mgp
->name
);
2252 for (i
= 0; i
< mgp
->num_slices
; i
++)
2253 myri10ge_teardown_slice(&mgp
->ss
[i
]);
2255 if (mgp
->toeplitz_hash_table
!= NULL
) {
2256 kmem_free(mgp
->toeplitz_hash_table
,
2257 sizeof (uint32_t) * 12 * 256);
2258 mgp
->toeplitz_hash_table
= NULL
;
2260 mgp
->running
= MYRI10GE_ETH_STOPPED
;
2264 myri10ge_m_start(void *arg
)
2266 struct myri10ge_priv
*mgp
= arg
;
2269 mutex_enter(&mgp
->intrlock
);
2271 if (mgp
->running
!= MYRI10GE_ETH_STOPPED
) {
2272 mutex_exit(&mgp
->intrlock
);
2273 return (DDI_FAILURE
);
2275 status
= myri10ge_start_locked(mgp
);
2276 mutex_exit(&mgp
->intrlock
);
2278 if (status
!= DDI_SUCCESS
)
2281 /* start the watchdog timer */
2282 mgp
->timer_id
= timeout(myri10ge_watchdog
, mgp
,
2284 return (DDI_SUCCESS
);
2289 myri10ge_m_stop(void *arg
)
2291 struct myri10ge_priv
*mgp
= arg
;
2293 mutex_enter(&mgp
->intrlock
);
2294 /* if the device not running give up */
2295 if (mgp
->running
!= MYRI10GE_ETH_RUNNING
) {
2296 mutex_exit(&mgp
->intrlock
);
2300 mgp
->running
= MYRI10GE_ETH_STOPPING
;
2301 mutex_exit(&mgp
->intrlock
);
2302 (void) untimeout(mgp
->timer_id
);
2303 mutex_enter(&mgp
->intrlock
);
2304 myri10ge_stop_locked(mgp
);
2305 mutex_exit(&mgp
->intrlock
);
2310 myri10ge_rx_csum(mblk_t
*mp
, struct myri10ge_rx_ring_stats
*s
, uint32_t csum
)
2312 struct ether_header
*eh
;
2314 struct ip6_hdr
*ip6
;
2315 uint32_t start
, stuff
, end
, partial
, hdrlen
;
2318 csum
= ntohs((uint16_t)csum
);
2319 eh
= (struct ether_header
*)(void *)mp
->b_rptr
;
2320 hdrlen
= sizeof (*eh
);
2321 if (eh
->ether_dhost
.ether_addr_octet
[0] & 1) {
2322 if (0 == (bcmp(eh
->ether_dhost
.ether_addr_octet
,
2323 myri10ge_broadcastaddr
, sizeof (eh
->ether_dhost
))))
2329 if (eh
->ether_type
== BE_16(ETHERTYPE_VLAN
)) {
2331 * fix checksum by subtracting 4 bytes after what the
2332 * firmware thought was the end of the ether hdr
2334 partial
= *(uint32_t *)
2335 (void *)(mp
->b_rptr
+ ETHERNET_HEADER_SIZE
);
2337 csum
+= (csum
< ~partial
);
2338 csum
= (csum
>> 16) + (csum
& 0xFFFF);
2339 csum
= (csum
>> 16) + (csum
& 0xFFFF);
2340 hdrlen
+= VLAN_TAGSZ
;
2343 if (eh
->ether_type
== BE_16(ETHERTYPE_IP
)) {
2344 ip
= (struct ip
*)(void *)(mp
->b_rptr
+ hdrlen
);
2345 start
= ip
->ip_hl
<< 2;
2347 if (ip
->ip_p
== IPPROTO_TCP
)
2348 stuff
= start
+ offsetof(struct tcphdr
, th_sum
);
2349 else if (ip
->ip_p
== IPPROTO_UDP
)
2350 stuff
= start
+ offsetof(struct udphdr
, uh_sum
);
2353 end
= ntohs(ip
->ip_len
);
2354 } else if (eh
->ether_type
== BE_16(ETHERTYPE_IPV6
)) {
2355 ip6
= (struct ip6_hdr
*)(void *)(mp
->b_rptr
+ hdrlen
);
2356 start
= sizeof (*ip6
);
2357 if (ip6
->ip6_nxt
== IPPROTO_TCP
) {
2358 stuff
= start
+ offsetof(struct tcphdr
, th_sum
);
2359 } else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
2360 stuff
= start
+ offsetof(struct udphdr
, uh_sum
);
2363 end
= start
+ ntohs(ip6
->ip6_plen
);
2365 * IPv6 headers do not contain a checksum, and hence
2366 * do not checksum to zero, so they don't "fall out"
2367 * of the partial checksum calculation like IPv4
2368 * headers do. We need to fix the partial checksum by
2369 * subtracting the checksum of the IPv6 header.
2372 partial
= myri10ge_csum_generic((uint16_t *)ip6
, sizeof (*ip6
));
2374 csum
+= (csum
< ~partial
);
2375 csum
= (csum
>> 16) + (csum
& 0xFFFF);
2376 csum
= (csum
>> 16) + (csum
& 0xFFFF);
2381 if (MBLKL(mp
) > hdrlen
+ end
) {
2382 /* padded frame, so hw csum may be invalid */
2386 mac_hcksum_set(mp
, start
, stuff
, end
, csum
, HCK_PARTIALCKSUM
);
2390 myri10ge_rx_done_small(struct myri10ge_slice_state
*ss
, uint32_t len
,
2394 myri10ge_rx_ring_t
*rx
;
2398 idx
= rx
->cnt
& rx
->mask
;
2401 /* allocate a new buffer to pass up the stack */
2402 mp
= allocb(len
+ MXGEFW_PAD
, 0);
2404 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf
);
2407 bcopy(ss
->rx_small
.info
[idx
].ptr
,
2408 (caddr_t
)mp
->b_wptr
, len
+ MXGEFW_PAD
);
2409 mp
->b_wptr
+= len
+ MXGEFW_PAD
;
2410 mp
->b_rptr
+= MXGEFW_PAD
;
2412 ss
->rx_stats
.ibytes
+= len
;
2413 ss
->rx_stats
.ipackets
+= 1;
2414 myri10ge_rx_csum(mp
, &ss
->rx_stats
, csum
);
2417 if ((idx
& 7) == 7) {
2418 myri10ge_submit_8rx(&rx
->lanai
[idx
- 7],
2419 &rx
->shadow
[idx
- 7]);
2427 myri10ge_rx_done_big(struct myri10ge_slice_state
*ss
, uint32_t len
,
2430 struct myri10ge_jpool_stuff
*jpool
;
2431 struct myri10ge_jpool_entry
*j
;
2433 int idx
, num_owned_by_mcp
;
2436 idx
= ss
->j_rx_cnt
& ss
->rx_big
.mask
;
2437 j
= ss
->rx_big
.info
[idx
].j
;
2440 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2441 ss
->mgp
->name
, idx
, ss
->rx_big
.cnt
, ss
->j_rx_cnt
);
2446 ss
->rx_big
.info
[idx
].j
= NULL
;
2451 * Check to see if we are low on rx buffers.
2452 * Note that we must leave at least 8 free so there are
2453 * enough to free in a single 64-byte write.
2455 num_owned_by_mcp
= ss
->rx_big
.cnt
- ss
->j_rx_cnt
;
2456 if (num_owned_by_mcp
< jpool
->low_water
) {
2457 mutex_enter(&jpool
->mtx
);
2458 myri10ge_restock_jumbos(ss
);
2459 mutex_exit(&jpool
->mtx
);
2460 num_owned_by_mcp
= ss
->rx_big
.cnt
- ss
->j_rx_cnt
;
2461 /* if we are still low, then we have to copy */
2462 if (num_owned_by_mcp
< 16) {
2463 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy
);
2464 /* allocate a new buffer to pass up the stack */
2465 mp
= allocb(len
+ MXGEFW_PAD
, 0);
2470 (caddr_t
)mp
->b_wptr
, len
+ MXGEFW_PAD
);
2471 myri10ge_jfree_rtn(j
);
2472 /* push buffer back to NIC */
2473 mutex_enter(&jpool
->mtx
);
2474 myri10ge_restock_jumbos(ss
);
2475 mutex_exit(&jpool
->mtx
);
2480 /* loan our buffer to the stack */
2481 mp
= desballoc((unsigned char *)j
->buf
, myri10ge_mtu
, 0, &j
->free_func
);
2487 mp
->b_rptr
+= MXGEFW_PAD
;
2488 mp
->b_wptr
= ((unsigned char *) mp
->b_rptr
+ len
);
2490 ss
->rx_stats
.ibytes
+= len
;
2491 ss
->rx_stats
.ipackets
+= 1;
2492 myri10ge_rx_csum(mp
, &ss
->rx_stats
, csum
);
2497 myri10ge_jfree_rtn(j
);
2498 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf
);
2503 * Free all transmit buffers up until the specified index
2506 myri10ge_tx_done(struct myri10ge_slice_state
*ss
, uint32_t mcp_index
)
2508 myri10ge_tx_ring_t
*tx
;
2509 struct myri10ge_tx_dma_handle_head handles
;
2514 handles
.head
= NULL
;
2515 handles
.tail
= NULL
;
2516 while (tx
->pkt_done
!= (int)mcp_index
) {
2517 idx
= tx
->done
& tx
->mask
;
2520 * mblk & DMA handle attached only to first slot
2521 * per buffer in the packet
2524 if (tx
->info
[idx
].m
) {
2525 (void) ddi_dma_unbind_handle(tx
->info
[idx
].handle
->h
);
2526 tx
->info
[idx
].handle
->next
= handles
.head
;
2527 handles
.head
= tx
->info
[idx
].handle
;
2528 if (handles
.tail
== NULL
)
2529 handles
.tail
= tx
->info
[idx
].handle
;
2530 freeb(tx
->info
[idx
].m
);
2531 tx
->info
[idx
].m
= 0;
2532 tx
->info
[idx
].handle
= 0;
2534 if (tx
->info
[idx
].ostat
.opackets
!= 0) {
2535 tx
->stats
.multixmt
+= tx
->info
[idx
].ostat
.multixmt
;
2536 tx
->stats
.brdcstxmt
+= tx
->info
[idx
].ostat
.brdcstxmt
;
2537 tx
->stats
.obytes
+= tx
->info
[idx
].ostat
.obytes
;
2538 tx
->stats
.opackets
+= tx
->info
[idx
].ostat
.opackets
;
2539 tx
->info
[idx
].stat
.un
.all
= 0;
2545 * if we stalled the queue, wake it. But Wait until
2546 * we have at least 1/2 our slots free.
2548 if ((tx
->req
- tx
->done
) < (tx
->mask
>> 1) &&
2549 tx
->stall
!= tx
->sched
) {
2550 mutex_enter(&ss
->tx
.lock
);
2551 tx
->sched
= tx
->stall
;
2552 mutex_exit(&ss
->tx
.lock
);
2553 mac_tx_ring_update(ss
->mgp
->mh
, tx
->rh
);
2556 /* limit potential for livelock */
2557 if (unlikely(++limit
> 2 * tx
->mask
))
2560 if (tx
->req
== tx
->done
&& tx
->stop
!= NULL
) {
2562 * Nic has sent all pending requests, allow it
2563 * to stop polling this queue
2565 mutex_enter(&tx
->lock
);
2566 if (tx
->req
== tx
->done
&& tx
->active
) {
2567 *(int *)(void *)tx
->stop
= 1;
2571 mutex_exit(&tx
->lock
);
2573 if (handles
.head
!= NULL
)
2574 myri10ge_free_tx_handles(tx
, &handles
);
2578 myri10ge_mbl_init(struct myri10ge_mblk_list
*mbl
)
2581 mbl
->tail
= &mbl
->head
;
2587 myri10ge_mbl_append(struct myri10ge_slice_state
*ss
,
2588 struct myri10ge_mblk_list
*mbl
, mblk_t
*mp
)
2591 mbl
->tail
= &mp
->b_next
;
2598 myri10ge_clean_rx_done(struct myri10ge_slice_state
*ss
,
2599 struct myri10ge_mblk_list
*mbl
, int limit
, boolean_t
*stop
)
2601 myri10ge_rx_done_t
*rx_done
= &ss
->rx_done
;
2602 struct myri10ge_priv
*mgp
= ss
->mgp
;
2604 struct lro_entry
*lro
;
2609 while (rx_done
->entry
[rx_done
->idx
].length
!= 0) {
2610 if (unlikely (*stop
)) {
2613 length
= ntohs(rx_done
->entry
[rx_done
->idx
].length
);
2614 length
&= (~MXGEFW_RSS_HASH_MASK
);
2616 /* limit potential for livelock */
2618 if (unlikely(limit
< 0))
2621 rx_done
->entry
[rx_done
->idx
].length
= 0;
2622 checksum
= ntohs(rx_done
->entry
[rx_done
->idx
].checksum
);
2623 if (length
<= myri10ge_small_bytes
)
2624 mp
= myri10ge_rx_done_small(ss
, length
, checksum
);
2626 mp
= myri10ge_rx_done_big(ss
, length
, checksum
);
2628 if (!myri10ge_lro
||
2629 0 != myri10ge_lro_rx(ss
, mp
, checksum
, mbl
))
2630 myri10ge_mbl_append(ss
, mbl
, mp
);
2633 rx_done
->idx
= rx_done
->cnt
& (mgp
->max_intr_slots
- 1);
2635 while (ss
->lro_active
!= NULL
) {
2636 lro
= ss
->lro_active
;
2637 ss
->lro_active
= lro
->next
;
2638 myri10ge_lro_flush(ss
, lro
, mbl
);
2643 myri10ge_intr_rx(struct myri10ge_slice_state
*ss
)
2646 struct myri10ge_mblk_list mbl
;
2648 myri10ge_mbl_init(&mbl
);
2649 if (mutex_tryenter(&ss
->rx_lock
) == 0)
2651 gen
= ss
->rx_gen_num
;
2652 myri10ge_clean_rx_done(ss
, &mbl
, MYRI10GE_POLL_NULL
,
2654 if (mbl
.head
!= NULL
)
2655 mac_rx_ring(ss
->mgp
->mh
, ss
->rx_rh
, mbl
.head
, gen
);
2656 mutex_exit(&ss
->rx_lock
);
2661 myri10ge_poll_rx(void *arg
, int bytes
)
2663 struct myri10ge_slice_state
*ss
= arg
;
2664 struct myri10ge_mblk_list mbl
;
2665 boolean_t dummy
= B_FALSE
;
2670 myri10ge_mbl_init(&mbl
);
2671 mutex_enter(&ss
->rx_lock
);
2673 myri10ge_clean_rx_done(ss
, &mbl
, bytes
, &dummy
);
2675 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss
-
2676 ss
->mgp
->ss
), ss
->rx_token
, ss
->rx_polling
);
2677 mutex_exit(&ss
->rx_lock
);
2683 myri10ge_intr(caddr_t arg0
, caddr_t arg1
)
2685 struct myri10ge_slice_state
*ss
=
2686 (struct myri10ge_slice_state
*)(void *)arg0
;
2687 struct myri10ge_priv
*mgp
= ss
->mgp
;
2688 mcp_irq_data_t
*stats
= ss
->fw_stats
;
2689 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
2690 uint32_t send_done_count
;
2694 /* make sure the DMA has finished */
2695 if (!stats
->valid
) {
2696 return (DDI_INTR_UNCLAIMED
);
2698 valid
= stats
->valid
;
2700 /* low bit indicates receives are present */
2702 myri10ge_intr_rx(ss
);
2704 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_FIXED
) {
2705 /* lower legacy IRQ */
2706 *mgp
->irq_deassert
= 0;
2707 if (!myri10ge_deassert_wait
)
2708 /* don't wait for conf. that irq is low */
2712 /* no need to wait for conf. that irq is low */
2717 /* check for transmit completes and receives */
2718 send_done_count
= ntohl(stats
->send_done_count
);
2719 if (send_done_count
!= tx
->pkt_done
)
2720 myri10ge_tx_done(ss
, (int)send_done_count
);
2721 } while (*((volatile uint8_t *) &stats
->valid
));
2723 if (stats
->stats_updated
) {
2724 if (mgp
->link_state
!= stats
->link_up
|| stats
->link_down
) {
2725 mgp
->link_state
= stats
->link_up
;
2726 if (stats
->link_down
) {
2727 mgp
->down_cnt
+= stats
->link_down
;
2728 mgp
->link_state
= 0;
2730 if (mgp
->link_state
) {
2731 if (myri10ge_verbose
)
2732 printf("%s: link up\n", mgp
->name
);
2733 mac_link_update(mgp
->mh
, LINK_STATE_UP
);
2735 if (myri10ge_verbose
)
2736 printf("%s: link down\n", mgp
->name
);
2737 mac_link_update(mgp
->mh
, LINK_STATE_DOWN
);
2739 MYRI10GE_NIC_STAT_INC(link_changes
);
2741 if (mgp
->rdma_tags_available
!=
2742 ntohl(ss
->fw_stats
->rdma_tags_available
)) {
2743 mgp
->rdma_tags_available
=
2744 ntohl(ss
->fw_stats
->rdma_tags_available
);
2745 cmn_err(CE_NOTE
, "%s: RDMA timed out! "
2746 "%d tags left\n", mgp
->name
,
2747 mgp
->rdma_tags_available
);
2752 /* check to see if we have rx token to pass back */
2754 mutex_enter(&ss
->poll_lock
);
2755 if (ss
->rx_polling
) {
2758 *ss
->irq_claim
= BE_32(3);
2761 mutex_exit(&ss
->poll_lock
);
2763 *(ss
->irq_claim
+ 1) = BE_32(3);
2764 return (DDI_INTR_CLAIMED
);
2768 * Add or remove a multicast address. This is called with our
2769 * macinfo's lock held by GLD, so we do not need to worry about
2770 * our own locking here.
2773 myri10ge_m_multicst(void *arg
, boolean_t add
, const uint8_t *multicastaddr
)
2776 struct myri10ge_priv
*mgp
= arg
;
2777 int status
, join_leave
;
2780 join_leave
= MXGEFW_JOIN_MULTICAST_GROUP
;
2782 join_leave
= MXGEFW_LEAVE_MULTICAST_GROUP
;
2783 (void) memcpy(&cmd
.data0
, multicastaddr
, 4);
2784 (void) memcpy(&cmd
.data1
, multicastaddr
+ 4, 2);
2785 cmd
.data0
= htonl(cmd
.data0
);
2786 cmd
.data1
= htonl(cmd
.data1
);
2787 status
= myri10ge_send_cmd(mgp
, join_leave
, &cmd
);
2791 cmn_err(CE_WARN
, "%s: failed to set multicast address\n",
2798 myri10ge_m_promisc(void *arg
, boolean_t on
)
2800 struct myri10ge_priv
*mgp
= arg
;
2802 myri10ge_change_promisc(mgp
, on
);
2807 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2808 * backwards one at a time and handle ring wraps
2812 myri10ge_submit_req_backwards(myri10ge_tx_ring_t
*tx
,
2813 mcp_kreq_ether_send_t
*src
, int cnt
)
2815 int idx
, starting_slot
;
2816 starting_slot
= tx
->req
;
2819 idx
= (starting_slot
+ cnt
) & tx
->mask
;
2820 myri10ge_pio_copy(&tx
->lanai
[idx
],
2821 &src
[cnt
], sizeof (*src
));
2827 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2828 * at most 32 bytes at a time, so as to avoid involving the software
2829 * pio handler in the nic. We re-write the first segment's flags
2830 * to mark them valid only after writing the entire chain
2834 myri10ge_submit_req(myri10ge_tx_ring_t
*tx
, mcp_kreq_ether_send_t
*src
,
2838 uint32_t *src_ints
, *dst_ints
;
2839 mcp_kreq_ether_send_t
*srcp
, *dstp
, *dst
;
2842 idx
= tx
->req
& tx
->mask
;
2844 last_flags
= src
->flags
;
2847 dst
= dstp
= &tx
->lanai
[idx
];
2850 if ((idx
+ cnt
) < tx
->mask
) {
2851 for (i
= 0; i
< (cnt
- 1); i
+= 2) {
2852 myri10ge_pio_copy(dstp
, srcp
, 2 * sizeof (*src
));
2853 mb(); /* force write every 32 bytes */
2859 * submit all but the first request, and ensure
2860 * that it is submitted below
2862 myri10ge_submit_req_backwards(tx
, src
, cnt
);
2866 /* submit the first request */
2867 myri10ge_pio_copy(dstp
, srcp
, sizeof (*src
));
2868 mb(); /* barrier before setting valid flag */
2871 /* re-write the last 32-bits with the valid flags */
2872 src
->flags
|= last_flags
;
2873 src_ints
= (uint32_t *)src
;
2875 dst_ints
= (uint32_t *)dst
;
2877 *dst_ints
= *src_ints
;
2880 /* notify NIC to poll this tx ring */
2881 if (!tx
->active
&& tx
->go
!= NULL
) {
2882 *(int *)(void *)tx
->go
= 1;
2891 myri10ge_lso_info_get(mblk_t
*mp
, uint32_t *mss
, uint32_t *flags
)
2894 mac_lso_get(mp
, mss
, &lso_flag
);
2895 (*flags
) |= lso_flag
;
2899 /* like pullupmsg, except preserve hcksum/LSO attributes */
2901 myri10ge_pullup(struct myri10ge_slice_state
*ss
, mblk_t
*mp
)
2903 uint32_t start
, stuff
, tx_offload_flags
, mss
;
2907 mac_hcksum_get(mp
, &start
, &stuff
, NULL
, NULL
, &tx_offload_flags
);
2908 myri10ge_lso_info_get(mp
, &mss
, &tx_offload_flags
);
2910 ok
= pullupmsg(mp
, -1);
2912 printf("pullupmsg failed");
2913 return (DDI_FAILURE
);
2915 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup
);
2916 mac_hcksum_set(mp
, start
, stuff
, NULL
, NULL
, tx_offload_flags
);
2917 if (tx_offload_flags
& HW_LSO
)
2918 DB_LSOMSS(mp
) = (uint16_t)mss
;
2919 lso_info_set(mp
, mss
, tx_offload_flags
);
2920 return (DDI_SUCCESS
);
2924 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats
*s
, struct ether_header
*eh
,
2925 int opackets
, int obytes
)
2928 if (eh
->ether_dhost
.ether_addr_octet
[0] & 1) {
2929 if (0 == (bcmp(eh
->ether_dhost
.ether_addr_octet
,
2930 myri10ge_broadcastaddr
, sizeof (eh
->ether_dhost
))))
2931 s
->un
.s
.brdcstxmt
= 1;
2933 s
->un
.s
.multixmt
= 1;
2935 s
->un
.s
.opackets
= (uint16_t)opackets
;
2936 s
->un
.s
.obytes
= obytes
;
2940 myri10ge_tx_copy(struct myri10ge_slice_state
*ss
, mblk_t
*mp
,
2941 mcp_kreq_ether_send_t
*req
)
2943 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
2945 struct myri10ge_tx_copybuf
*cp
;
2947 int idx
, mblen
, avail
;
2950 mutex_enter(&tx
->lock
);
2951 avail
= tx
->mask
- (tx
->req
- tx
->done
);
2953 mutex_exit(&tx
->lock
);
2956 idx
= tx
->req
& tx
->mask
;
2959 for (len
= 0, bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
2961 bcopy(bp
->b_rptr
, ptr
, mblen
);
2965 /* ensure runts are padded to 60 bytes */
2967 bzero(ptr
, 64 - len
);
2970 req
->addr_low
= cp
->dma
.low
;
2971 req
->addr_high
= cp
->dma
.high
;
2972 req
->length
= htons(len
);
2974 req
->rdma_count
= 1;
2975 myri10ge_tx_stat(&tx
->info
[idx
].stat
,
2976 (struct ether_header
*)(void *)cp
->va
, 1, len
);
2977 (void) ddi_dma_sync(cp
->dma
.handle
, 0, len
, DDI_DMA_SYNC_FORDEV
);
2978 myri10ge_submit_req(&ss
->tx
, req
, 1);
2979 mutex_exit(&tx
->lock
);
2981 return (DDI_SUCCESS
);
2986 myri10ge_send_locked(myri10ge_tx_ring_t
*tx
, mcp_kreq_ether_send_t
*req_list
,
2987 struct myri10ge_tx_buffer_state
*tx_info
,
2992 idx
= 0; /* gcc -Wuninitialized */
2993 /* store unmapping and bp info for tx irq handler */
2994 for (i
= 0; i
< count
; i
++) {
2995 idx
= (tx
->req
+ i
) & tx
->mask
;
2996 tx
->info
[idx
].m
= tx_info
[i
].m
;
2997 tx
->info
[idx
].handle
= tx_info
[i
].handle
;
2999 tx
->info
[idx
].stat
.un
.all
= tx_info
[0].stat
.un
.all
;
3001 /* submit the frame to the nic */
3002 myri10ge_submit_req(tx
, req_list
, count
);
3010 myri10ge_copydata(mblk_t
*mp
, int off
, int len
, caddr_t buf
)
3027 count
= min(seglen
- off
, len
);
3028 bcopy(bp
->b_rptr
+ off
, buf
, count
);
3037 myri10ge_ether_parse_header(mblk_t
*mp
)
3039 struct ether_header eh_copy
;
3040 struct ether_header
*eh
;
3041 int eth_hdr_len
, seglen
;
3044 eth_hdr_len
= sizeof (*eh
);
3045 if (seglen
< eth_hdr_len
) {
3046 myri10ge_copydata(mp
, 0, eth_hdr_len
, (caddr_t
)&eh_copy
);
3049 eh
= (struct ether_header
*)(void *)mp
->b_rptr
;
3051 if (eh
->ether_type
== BE_16(ETHERTYPE_VLAN
)) {
3055 return (eth_hdr_len
);
3059 myri10ge_lso_parse_header(mblk_t
*mp
, int off
)
3062 int seglen
, sum_off
;
3067 if (seglen
< off
+ sizeof (*ip
)) {
3068 myri10ge_copydata(mp
, off
, sizeof (*ip
), buf
);
3069 ip
= (struct ip
*)(void *)buf
;
3071 ip
= (struct ip
*)(void *)(mp
->b_rptr
+ off
);
3073 if (seglen
< off
+ (ip
->ip_hl
<< 2) + sizeof (*tcp
)) {
3074 myri10ge_copydata(mp
, off
,
3075 (ip
->ip_hl
<< 2) + sizeof (*tcp
), buf
);
3076 ip
= (struct ip
*)(void *)buf
;
3078 tcp
= (struct tcphdr
*)(void *)((char *)ip
+ (ip
->ip_hl
<< 2));
3081 * NIC expects ip_sum to be zero. Recent changes to
3082 * OpenSolaris leave the correct ip checksum there, rather
3083 * than the required zero, so we need to zero it. Otherwise,
3084 * the NIC will produce bad checksums when sending LSO packets.
3086 if (ip
->ip_sum
!= 0) {
3087 if (((char *)ip
) != buf
) {
3088 /* ip points into mblk, so just zero it */
3092 * ip points into a copy, so walk the chain
3093 * to find the ip_csum, then zero it
3095 sum_off
= off
+ _PTRDIFF(&ip
->ip_sum
, buf
);
3096 while (sum_off
> (int)(MBLKL(mp
) - 1)) {
3097 sum_off
-= MBLKL(mp
);
3100 mp
->b_rptr
[sum_off
] = 0;
3102 while (sum_off
> MBLKL(mp
) - 1) {
3103 sum_off
-= MBLKL(mp
);
3106 mp
->b_rptr
[sum_off
] = 0;
3109 return (off
+ ((ip
->ip_hl
+ tcp
->th_off
) << 2));
3113 myri10ge_tx_tso_copy(struct myri10ge_slice_state
*ss
, mblk_t
*mp
,
3114 mcp_kreq_ether_send_t
*req_list
, int hdr_size
, int pkt_size
,
3115 uint16_t mss
, uint8_t cksum_offset
)
3117 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
3118 struct myri10ge_priv
*mgp
= ss
->mgp
;
3120 mcp_kreq_ether_send_t
*req
;
3121 struct myri10ge_tx_copybuf
*cp
;
3123 int mblen
, count
, cum_len
, mss_resid
, tx_req
, pkt_size_tmp
;
3124 int resid
, avail
, idx
, hdr_size_tmp
, tx_boundary
;
3126 uint32_t seglen
, len
, boundary
, low
, high_swapped
;
3127 uint16_t pseudo_hdr_offset
= htons(mss
);
3130 tx_boundary
= mgp
->tx_boundary
;
3131 hdr_size_tmp
= hdr_size
;
3132 resid
= tx_boundary
;
3134 mutex_enter(&tx
->lock
);
3136 /* check to see if the slots are really there */
3137 avail
= tx
->mask
- (tx
->req
- tx
->done
);
3138 if (unlikely(avail
<= MYRI10GE_MAX_SEND_DESC_TSO
)) {
3139 atomic_inc_32(&tx
->stall
);
3140 mutex_exit(&tx
->lock
);
3145 cum_len
= -hdr_size
;
3148 idx
= tx
->mask
& tx
->req
;
3150 low
= ntohl(cp
->dma
.low
);
3154 int payload
= pkt_size
- hdr_size
;
3155 uint16_t opackets
= (payload
/ mss
) + ((payload
% mss
) != 0);
3156 tx
->info
[idx
].ostat
.opackets
= opackets
;
3157 tx
->info
[idx
].ostat
.obytes
= (opackets
- 1) * hdr_size
3160 hdr_size_tmp
= hdr_size
;
3162 flags
= (MXGEFW_FLAGS_TSO_HDR
| MXGEFW_FLAGS_FIRST
);
3164 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
3166 rptr
= (caddr_t
)bp
->b_rptr
;
3167 len
= min(hdr_size_tmp
, mblen
);
3169 bcopy(rptr
, ptr
, len
);
3174 hdr_size_tmp
-= len
;
3180 idx
= tx
->mask
& tx_req
;
3182 low
= ntohl(cp
->dma
.low
);
3184 resid
= tx_boundary
;
3188 len
= min(mss_resid
, mblen
);
3189 bcopy(rptr
, ptr
, len
);
3196 if (mss_resid
== 0) {
3200 idx
= tx
->mask
& tx_req
;
3203 low
= ntohl(cp
->dma
.low
);
3205 resid
= tx_boundary
;
3212 pkt_size_tmp
= pkt_size
;
3216 while (pkt_size_tmp
) {
3217 idx
= tx
->mask
& tx_req
;
3219 high_swapped
= cp
->dma
.high
;
3220 low
= ntohl(cp
->dma
.low
);
3223 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3224 pkt_size_tmp
, pkt_size
);
3225 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
3227 printf("mblen:%d\n", mblen
);
3229 pkt_size_tmp
= pkt_size
;
3231 while (pkt_size_tmp
> 0) {
3232 idx
= tx
->mask
& tx_req
;
3234 printf("cp->len = %d\n", cp
->len
);
3235 pkt_size_tmp
-= cp
->len
;
3238 printf("dropped\n");
3239 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err
);
3242 pkt_size_tmp
-= len
;
3248 boundary
= (low
+ mgp
->tx_boundary
) &
3249 ~(mgp
->tx_boundary
- 1);
3250 seglen
= boundary
- low
;
3254 flags_next
= flags
& ~MXGEFW_FLAGS_FIRST
;
3255 cum_len_next
= cum_len
+ seglen
;
3256 (req
-rdma_count
)->rdma_count
= rdma_count
+ 1;
3257 if (likely(cum_len
>= 0)) {
3259 int next_is_first
, chop
;
3261 chop
= (cum_len_next
> mss
);
3262 cum_len_next
= cum_len_next
% mss
;
3263 next_is_first
= (cum_len_next
== 0);
3265 MXGEFW_FLAGS_TSO_CHOP
;
3266 flags_next
|= next_is_first
*
3268 rdma_count
|= -(chop
| next_is_first
);
3269 rdma_count
+= chop
& !next_is_first
;
3270 } else if (likely(cum_len_next
>= 0)) {
3277 small
= (mss
<= MXGEFW_SEND_SMALL_SIZE
);
3278 flags_next
= MXGEFW_FLAGS_TSO_PLD
|
3279 MXGEFW_FLAGS_FIRST
|
3280 (small
* MXGEFW_FLAGS_SMALL
);
3282 req
->addr_high
= high_swapped
;
3283 req
->addr_low
= htonl(low
);
3284 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
3285 req
->pad
= 0; /* complete solid 16-byte block */
3286 req
->rdma_count
= 1;
3287 req
->cksum_offset
= cksum_offset
;
3288 req
->length
= htons(seglen
);
3289 req
->flags
= flags
| ((cum_len
& 1) *
3290 MXGEFW_FLAGS_ALIGN_ODD
);
3291 if (cksum_offset
> seglen
)
3292 cksum_offset
-= seglen
;
3297 cum_len
= cum_len_next
;
3307 (req
-rdma_count
)->rdma_count
= (uint8_t)rdma_count
;
3310 req
->flags
|= MXGEFW_FLAGS_TSO_LAST
;
3311 } while (!(req
->flags
& (MXGEFW_FLAGS_TSO_CHOP
|
3312 MXGEFW_FLAGS_FIRST
)));
3314 myri10ge_submit_req(tx
, req_list
, count
);
3316 mutex_exit(&tx
->lock
);
3318 return (DDI_SUCCESS
);
3322 * Try to send the chain of buffers described by the mp. We must not
3323 * encapsulate more than eth->tx.req - eth->tx.done, or
3324 * MXGEFW_MAX_SEND_DESC, whichever is more.
3328 myri10ge_send(struct myri10ge_slice_state
*ss
, mblk_t
*mp
,
3329 mcp_kreq_ether_send_t
*req_list
, struct myri10ge_tx_buffer_state
*tx_info
)
3331 struct myri10ge_priv
*mgp
= ss
->mgp
;
3332 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
3333 mcp_kreq_ether_send_t
*req
;
3334 struct myri10ge_tx_dma_handle
*handles
, *dma_handle
= NULL
;
3336 ddi_dma_cookie_t cookie
;
3337 int err
, rv
, count
, avail
, mblen
, try_pullup
, i
, max_segs
, maclen
,
3338 rdma_count
, cum_len
, lso_hdr_size
;
3339 uint32_t start
, stuff
, tx_offload_flags
;
3340 uint32_t seglen
, len
, mss
, boundary
, low
, high_swapped
;
3342 uint16_t pseudo_hdr_offset
;
3343 uint8_t flags
, cksum_offset
, odd_flag
;
3345 int lso_copy
= myri10ge_lso_copy
;
3349 /* Setup checksum offloading, if needed */
3350 mac_hcksum_get(mp
, &start
, &stuff
, NULL
, NULL
, &tx_offload_flags
);
3351 myri10ge_lso_info_get(mp
, &mss
, &tx_offload_flags
);
3352 if (tx_offload_flags
& HW_LSO
) {
3353 max_segs
= MYRI10GE_MAX_SEND_DESC_TSO
;
3354 if ((tx_offload_flags
& HCK_PARTIALCKSUM
) == 0) {
3355 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags
);
3357 return (DDI_SUCCESS
);
3360 max_segs
= MXGEFW_MAX_SEND_DESC
;
3365 pseudo_hdr_offset
= 0;
3367 /* leave an extra slot keep the ring from wrapping */
3368 avail
= tx
->mask
- (tx
->req
- tx
->done
);
3371 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3372 * message will need to be pulled up in order to fit.
3373 * Otherwise, we are low on transmit descriptors, it is
3374 * probably better to stall and try again rather than pullup a
3378 if (avail
< max_segs
) {
3380 atomic_inc_32(&tx
->stall_early
);
3384 /* find out how long the frame is and how many segments it is */
3388 flags
= (MXGEFW_FLAGS_NO_TSO
| MXGEFW_FLAGS_FIRST
);
3389 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
3394 * we can't simply skip over 0-length mblks
3395 * because the hardware can't deal with them,
3396 * and we could leak them.
3398 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len
);
3403 * There's no advantage to copying most gesballoc
3404 * attached blocks, so disable lso copy in that case
3406 if (mss
&& lso_copy
== 1 && ((dbp
= bp
->b_datap
) != NULL
)) {
3407 if ((void *)dbp
->db_lastfree
!= myri10ge_db_lastfree
) {
3415 /* Try to pull up excessivly long chains */
3416 if (count
>= max_segs
) {
3417 err
= myri10ge_pullup(ss
, mp
);
3418 if (likely(err
== DDI_SUCCESS
)) {
3421 if (count
< MYRI10GE_MAX_SEND_DESC_TSO
) {
3423 * just let the h/w send it, it will be
3424 * inefficient, but us better than dropping
3426 max_segs
= MYRI10GE_MAX_SEND_DESC_TSO
;
3429 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err
);
3437 maclen
= myri10ge_ether_parse_header(mp
);
3439 if (tx_offload_flags
& HCK_PARTIALCKSUM
) {
3441 cksum_offset
= start
+ maclen
;
3442 pseudo_hdr_offset
= htons(stuff
+ maclen
);
3443 odd_flag
= MXGEFW_FLAGS_ALIGN_ODD
;
3444 flags
|= MXGEFW_FLAGS_CKSUM
;
3447 lso_hdr_size
= 0; /* -Wunitinialized */
3448 if (mss
) { /* LSO */
3449 /* this removes any CKSUM flag from before */
3450 flags
= (MXGEFW_FLAGS_TSO_HDR
| MXGEFW_FLAGS_FIRST
);
3452 * parse the headers and set cum_len to a negative
3453 * value to reflect the offset of the TCP payload
3455 lso_hdr_size
= myri10ge_lso_parse_header(mp
, maclen
);
3456 cum_len
= -lso_hdr_size
;
3457 if ((mss
< mgp
->tx_boundary
) && lso_copy
) {
3458 err
= myri10ge_tx_tso_copy(ss
, mp
, req_list
,
3459 lso_hdr_size
, pkt_size
, mss
, cksum_offset
);
3464 * for TSO, pseudo_hdr_offset holds mss. The firmware
3465 * figures out where to put the checksum by parsing
3469 pseudo_hdr_offset
= htons(mss
);
3470 } else if (pkt_size
<= MXGEFW_SEND_SMALL_SIZE
) {
3471 flags
|= MXGEFW_FLAGS_SMALL
;
3472 if (pkt_size
< myri10ge_tx_copylen
) {
3473 req
->cksum_offset
= cksum_offset
;
3474 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
3476 err
= myri10ge_tx_copy(ss
, mp
, req
);
3482 /* pull one DMA handle for each bp from our freelist */
3484 err
= myri10ge_alloc_tx_handles(ss
, count
, &handles
);
3485 if (err
!= DDI_SUCCESS
) {
3491 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
3493 dma_handle
= handles
;
3494 handles
= handles
->next
;
3496 rv
= ddi_dma_addr_bind_handle(dma_handle
->h
, NULL
,
3497 (caddr_t
)bp
->b_rptr
, mblen
,
3498 DDI_DMA_WRITE
| DDI_DMA_STREAMING
, DDI_DMA_SLEEP
, NULL
,
3499 &cookie
, &ncookies
);
3500 if (unlikely(rv
!= DDI_DMA_MAPPED
)) {
3503 dma_handle
->next
= handles
;
3504 handles
= dma_handle
;
3505 goto abort_with_handles
;
3508 /* reserve the slot */
3509 tx_info
[count
].m
= bp
;
3510 tx_info
[count
].handle
= dma_handle
;
3513 low
= MYRI10GE_LOWPART_TO_U32(cookie
.dmac_laddress
);
3515 htonl(MYRI10GE_HIGHPART_TO_U32(
3516 cookie
.dmac_laddress
));
3517 len
= (uint32_t)cookie
.dmac_size
;
3522 boundary
= (low
+ mgp
->tx_boundary
) &
3523 ~(mgp
->tx_boundary
- 1);
3524 seglen
= boundary
- low
;
3528 flags_next
= flags
& ~MXGEFW_FLAGS_FIRST
;
3529 cum_len_next
= cum_len
+ seglen
;
3531 (req
-rdma_count
)->rdma_count
=
3533 if (likely(cum_len
>= 0)) {
3535 int next_is_first
, chop
;
3537 chop
= (cum_len_next
> mss
);
3541 (cum_len_next
== 0);
3543 MXGEFW_FLAGS_TSO_CHOP
;
3544 flags_next
|= next_is_first
*
3547 -(chop
| next_is_first
);
3549 chop
& !next_is_first
;
3550 } else if (likely(cum_len_next
>= 0)) {
3558 MXGEFW_SEND_SMALL_SIZE
);
3560 MXGEFW_FLAGS_TSO_PLD
3561 | MXGEFW_FLAGS_FIRST
3563 MXGEFW_FLAGS_SMALL
);
3566 req
->addr_high
= high_swapped
;
3567 req
->addr_low
= htonl(low
);
3568 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
3569 req
->pad
= 0; /* complete solid 16-byte block */
3570 req
->rdma_count
= 1;
3571 req
->cksum_offset
= cksum_offset
;
3572 req
->length
= htons(seglen
);
3573 req
->flags
= flags
| ((cum_len
& 1) * odd_flag
);
3574 if (cksum_offset
> seglen
)
3575 cksum_offset
-= seglen
;
3580 cum_len
= cum_len_next
;
3583 /* make sure all the segments will fit */
3584 if (unlikely(count
>= max_segs
)) {
3585 MYRI10GE_ATOMIC_SLICE_STAT_INC(
3587 /* may try a pullup */
3591 goto abort_with_handles
;
3596 tx_info
[count
].m
= 0;
3601 ddi_dma_nextcookie(dma_handle
->h
, &cookie
);
3604 (req
-rdma_count
)->rdma_count
= (uint8_t)rdma_count
;
3609 req
->flags
|= MXGEFW_FLAGS_TSO_LAST
;
3610 } while (!(req
->flags
& (MXGEFW_FLAGS_TSO_CHOP
|
3611 MXGEFW_FLAGS_FIRST
)));
3614 /* calculate tx stats */
3619 payload
= pkt_size
- lso_hdr_size
;
3620 opackets
= (payload
/ mss
) + ((payload
% mss
) != 0);
3621 tx_info
[0].stat
.un
.all
= 0;
3622 tx_info
[0].ostat
.opackets
= opackets
;
3623 tx_info
[0].ostat
.obytes
= (opackets
- 1) * lso_hdr_size
3626 myri10ge_tx_stat(&tx_info
[0].stat
,
3627 (struct ether_header
*)(void *)mp
->b_rptr
, 1, pkt_size
);
3629 mutex_enter(&tx
->lock
);
3631 /* check to see if the slots are really there */
3632 avail
= tx
->mask
- (tx
->req
- tx
->done
);
3633 if (unlikely(avail
<= count
)) {
3634 mutex_exit(&tx
->lock
);
3639 myri10ge_send_locked(tx
, req_list
, tx_info
, count
);
3640 mutex_exit(&tx
->lock
);
3641 return (DDI_SUCCESS
);
3645 atomic_inc_32(&tx
->stall_late
);
3648 /* unbind and free handles from previous mblks */
3649 for (i
= 0; i
< count
; i
++) {
3653 dma_handle
= tx_info
[i
].handle
;
3654 (void) ddi_dma_unbind_handle(dma_handle
->h
);
3655 dma_handle
->next
= handles
;
3656 handles
= dma_handle
;
3657 tx_info
[i
].handle
= NULL
;
3658 tx_info
[i
].m
= NULL
;
3661 myri10ge_free_tx_handle_slist(tx
, handles
);
3664 err
= myri10ge_pullup(ss
, mp
);
3665 if (err
!= DDI_SUCCESS
&& try_pullup
== 2) {
3667 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err
);
3678 atomic_inc_32(&tx
->stall
);
3680 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err
);
3687 myri10ge_send_wrapper(void *arg
, mblk_t
*mp
)
3689 struct myri10ge_slice_state
*ss
= arg
;
3691 mcp_kreq_ether_send_t
*req_list
;
3694 * We need about 2.5KB of scratch space to handle transmits.
3695 * i86pc has only 8KB of kernel stack space, so we malloc the
3696 * scratch space there rather than keeping it on the stack.
3698 size_t req_size
, tx_info_size
;
3699 struct myri10ge_tx_buffer_state
*tx_info
;
3702 req_size
= sizeof (*req_list
) * (MYRI10GE_MAX_SEND_DESC_TSO
+ 4)
3704 req_bytes
= kmem_alloc(req_size
, KM_SLEEP
);
3705 tx_info_size
= sizeof (*tx_info
) * (MYRI10GE_MAX_SEND_DESC_TSO
+ 1);
3706 tx_info
= kmem_alloc(tx_info_size
, KM_SLEEP
);
3708 char req_bytes
[sizeof (*req_list
) * (MYRI10GE_MAX_SEND_DESC_TSO
+ 4)
3710 struct myri10ge_tx_buffer_state tx_info
[MYRI10GE_MAX_SEND_DESC_TSO
+ 1];
3713 /* ensure req_list entries are aligned to 8 bytes */
3714 req_list
= (struct mcp_kreq_ether_send
*)
3715 (((unsigned long)req_bytes
+ 7UL) & ~7UL);
3717 err
= myri10ge_send(ss
, mp
, req_list
, tx_info
);
3720 kmem_free(tx_info
, tx_info_size
);
3721 kmem_free(req_bytes
, req_size
);
3730 myri10ge_addmac(void *arg
, const uint8_t *mac_addr
)
3732 struct myri10ge_priv
*mgp
= arg
;
3735 if (mac_addr
== NULL
)
3738 mutex_enter(&mgp
->intrlock
);
3739 if (mgp
->macaddr_cnt
) {
3740 mutex_exit(&mgp
->intrlock
);
3743 err
= myri10ge_m_unicst(mgp
, mac_addr
);
3747 mutex_exit(&mgp
->intrlock
);
3751 bcopy(mac_addr
, mgp
->mac_addr
, sizeof (mgp
->mac_addr
));
3757 myri10ge_remmac(void *arg
, const uint8_t *mac_addr
)
3759 struct myri10ge_priv
*mgp
= arg
;
3761 mutex_enter(&mgp
->intrlock
);
3763 mutex_exit(&mgp
->intrlock
);
3770 myri10ge_fill_group(void *arg
, mac_ring_type_t rtype
, const int index
,
3771 mac_group_info_t
*infop
, mac_group_handle_t gh
)
3773 struct myri10ge_priv
*mgp
= arg
;
3775 if (rtype
!= MAC_RING_TYPE_RX
)
3778 infop
->mgi_driver
= (mac_group_driver_t
)mgp
;
3779 infop
->mgi_start
= NULL
;
3780 infop
->mgi_stop
= NULL
;
3781 infop
->mgi_addmac
= myri10ge_addmac
;
3782 infop
->mgi_remmac
= myri10ge_remmac
;
3783 infop
->mgi_count
= mgp
->num_slices
;
3787 myri10ge_ring_start(mac_ring_driver_t rh
, uint64_t mr_gen_num
)
3789 struct myri10ge_slice_state
*ss
;
3791 ss
= (struct myri10ge_slice_state
*)rh
;
3792 mutex_enter(&ss
->rx_lock
);
3793 ss
->rx_gen_num
= mr_gen_num
;
3794 mutex_exit(&ss
->rx_lock
);
3799 * Retrieve a value for one of the statistics for a particular rx ring
3802 myri10ge_rx_ring_stat(mac_ring_driver_t rh
, uint_t stat
, uint64_t *val
)
3804 struct myri10ge_slice_state
*ss
;
3806 ss
= (struct myri10ge_slice_state
*)rh
;
3808 case MAC_STAT_RBYTES
:
3809 *val
= ss
->rx_stats
.ibytes
;
3812 case MAC_STAT_IPACKETS
:
3813 *val
= ss
->rx_stats
.ipackets
;
3825 * Retrieve a value for one of the statistics for a particular tx ring
3828 myri10ge_tx_ring_stat(mac_ring_driver_t rh
, uint_t stat
, uint64_t *val
)
3830 struct myri10ge_slice_state
*ss
;
3832 ss
= (struct myri10ge_slice_state
*)rh
;
3834 case MAC_STAT_OBYTES
:
3835 *val
= ss
->tx
.stats
.obytes
;
3838 case MAC_STAT_OPACKETS
:
3839 *val
= ss
->tx
.stats
.opackets
;
3851 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh
)
3853 struct myri10ge_slice_state
*ss
;
3855 ss
= (struct myri10ge_slice_state
*)intrh
;
3856 mutex_enter(&ss
->poll_lock
);
3857 ss
->rx_polling
= B_TRUE
;
3858 mutex_exit(&ss
->poll_lock
);
3863 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh
)
3865 struct myri10ge_slice_state
*ss
;
3867 ss
= (struct myri10ge_slice_state
*)intrh
;
3868 mutex_enter(&ss
->poll_lock
);
3869 ss
->rx_polling
= B_FALSE
;
3871 *ss
->irq_claim
= BE_32(3);
3874 mutex_exit(&ss
->poll_lock
);
3880 myri10ge_fill_ring(void *arg
, mac_ring_type_t rtype
, const int rg_index
,
3881 const int ring_index
, mac_ring_info_t
*infop
, mac_ring_handle_t rh
)
3883 struct myri10ge_priv
*mgp
= arg
;
3884 struct myri10ge_slice_state
*ss
;
3885 mac_intr_t
*mintr
= &infop
->mri_intr
;
3887 ASSERT((unsigned int)ring_index
< mgp
->num_slices
);
3889 ss
= &mgp
->ss
[ring_index
];
3891 case MAC_RING_TYPE_RX
:
3893 infop
->mri_driver
= (mac_ring_driver_t
)ss
;
3894 infop
->mri_start
= myri10ge_ring_start
;
3895 infop
->mri_stop
= NULL
;
3896 infop
->mri_poll
= myri10ge_poll_rx
;
3897 infop
->mri_stat
= myri10ge_rx_ring_stat
;
3898 mintr
->mi_handle
= (mac_intr_handle_t
)ss
;
3899 mintr
->mi_enable
= myri10ge_rx_ring_intr_enable
;
3900 mintr
->mi_disable
= myri10ge_rx_ring_intr_disable
;
3902 case MAC_RING_TYPE_TX
:
3904 infop
->mri_driver
= (mac_ring_driver_t
)ss
;
3905 infop
->mri_start
= NULL
;
3906 infop
->mri_stop
= NULL
;
3907 infop
->mri_tx
= myri10ge_send_wrapper
;
3908 infop
->mri_stat
= myri10ge_tx_ring_stat
;
3916 myri10ge_nic_stat_destroy(struct myri10ge_priv
*mgp
)
3918 if (mgp
->ksp_stat
== NULL
)
3921 kstat_delete(mgp
->ksp_stat
);
3922 mgp
->ksp_stat
= NULL
;
3926 myri10ge_slice_stat_destroy(struct myri10ge_slice_state
*ss
)
3928 if (ss
->ksp_stat
== NULL
)
3931 kstat_delete(ss
->ksp_stat
);
3932 ss
->ksp_stat
= NULL
;
3936 myri10ge_info_destroy(struct myri10ge_priv
*mgp
)
3938 if (mgp
->ksp_info
== NULL
)
3941 kstat_delete(mgp
->ksp_info
);
3942 mgp
->ksp_info
= NULL
;
3946 myri10ge_nic_stat_kstat_update(kstat_t
*ksp
, int rw
)
3948 struct myri10ge_nic_stat
*ethstat
;
3949 struct myri10ge_priv
*mgp
;
3950 mcp_irq_data_t
*fw_stats
;
3953 if (rw
== KSTAT_WRITE
)
3956 ethstat
= (struct myri10ge_nic_stat
*)ksp
->ks_data
;
3957 mgp
= (struct myri10ge_priv
*)ksp
->ks_private
;
3958 fw_stats
= mgp
->ss
[0].fw_stats
;
3960 ethstat
->dma_read_bw_MBs
.value
.ul
= mgp
->read_dma
;
3961 ethstat
->dma_write_bw_MBs
.value
.ul
= mgp
->write_dma
;
3962 ethstat
->dma_read_write_bw_MBs
.value
.ul
= mgp
->read_write_dma
;
3963 if (myri10ge_tx_dma_attr
.dma_attr_flags
& DDI_DMA_FORCE_PHYSICAL
)
3964 ethstat
->dma_force_physical
.value
.ul
= 1;
3966 ethstat
->dma_force_physical
.value
.ul
= 0;
3967 ethstat
->lanes
.value
.ul
= mgp
->pcie_link_width
;
3968 ethstat
->dropped_bad_crc32
.value
.ul
=
3969 ntohl(fw_stats
->dropped_bad_crc32
);
3970 ethstat
->dropped_bad_phy
.value
.ul
=
3971 ntohl(fw_stats
->dropped_bad_phy
);
3972 ethstat
->dropped_link_error_or_filtered
.value
.ul
=
3973 ntohl(fw_stats
->dropped_link_error_or_filtered
);
3974 ethstat
->dropped_link_overflow
.value
.ul
=
3975 ntohl(fw_stats
->dropped_link_overflow
);
3976 ethstat
->dropped_multicast_filtered
.value
.ul
=
3977 ntohl(fw_stats
->dropped_multicast_filtered
);
3978 ethstat
->dropped_no_big_buffer
.value
.ul
=
3979 ntohl(fw_stats
->dropped_no_big_buffer
);
3980 ethstat
->dropped_no_small_buffer
.value
.ul
=
3981 ntohl(fw_stats
->dropped_no_small_buffer
);
3982 ethstat
->dropped_overrun
.value
.ul
=
3983 ntohl(fw_stats
->dropped_overrun
);
3984 ethstat
->dropped_pause
.value
.ul
=
3985 ntohl(fw_stats
->dropped_pause
);
3986 ethstat
->dropped_runt
.value
.ul
=
3987 ntohl(fw_stats
->dropped_runt
);
3988 ethstat
->link_up
.value
.ul
=
3989 ntohl(fw_stats
->link_up
);
3990 ethstat
->dropped_unicast_filtered
.value
.ul
=
3991 ntohl(fw_stats
->dropped_unicast_filtered
);
3996 myri10ge_slice_stat_kstat_update(kstat_t
*ksp
, int rw
)
3998 struct myri10ge_slice_stat
*ethstat
;
3999 struct myri10ge_slice_state
*ss
;
4001 if (rw
== KSTAT_WRITE
)
4004 ethstat
= (struct myri10ge_slice_stat
*)ksp
->ks_data
;
4005 ss
= (struct myri10ge_slice_state
*)ksp
->ks_private
;
4007 ethstat
->rx_big
.value
.ul
= ss
->j_rx_cnt
;
4008 ethstat
->rx_bigbuf_firmware
.value
.ul
= ss
->rx_big
.cnt
- ss
->j_rx_cnt
;
4009 ethstat
->rx_bigbuf_pool
.value
.ul
=
4010 ss
->jpool
.num_alloc
- ss
->jbufs_for_smalls
;
4011 ethstat
->rx_bigbuf_smalls
.value
.ul
= ss
->jbufs_for_smalls
;
4012 ethstat
->rx_small
.value
.ul
= ss
->rx_small
.cnt
-
4013 (ss
->rx_small
.mask
+ 1);
4014 ethstat
->tx_done
.value
.ul
= ss
->tx
.done
;
4015 ethstat
->tx_req
.value
.ul
= ss
->tx
.req
;
4016 ethstat
->tx_activate
.value
.ul
= ss
->tx
.activate
;
4017 ethstat
->xmit_sched
.value
.ul
= ss
->tx
.sched
;
4018 ethstat
->xmit_stall
.value
.ul
= ss
->tx
.stall
;
4019 ethstat
->xmit_stall_early
.value
.ul
= ss
->tx
.stall_early
;
4020 ethstat
->xmit_stall_late
.value
.ul
= ss
->tx
.stall_late
;
4021 ethstat
->xmit_err
.value
.ul
= MYRI10GE_SLICE_STAT(xmit_err
);
4026 myri10ge_info_kstat_update(kstat_t
*ksp
, int rw
)
4028 struct myri10ge_info
*info
;
4029 struct myri10ge_priv
*mgp
;
4032 if (rw
== KSTAT_WRITE
)
4035 info
= (struct myri10ge_info
*)ksp
->ks_data
;
4036 mgp
= (struct myri10ge_priv
*)ksp
->ks_private
;
4037 kstat_named_setstr(&info
->driver_version
, MYRI10GE_VERSION_STR
);
4038 kstat_named_setstr(&info
->firmware_version
, mgp
->fw_version
);
4039 kstat_named_setstr(&info
->firmware_name
, mgp
->fw_name
);
4040 kstat_named_setstr(&info
->interrupt_type
, mgp
->intr_type
);
4041 kstat_named_setstr(&info
->product_code
, mgp
->pc_str
);
4042 kstat_named_setstr(&info
->serial_number
, mgp
->sn_str
);
4046 static struct myri10ge_info myri10ge_info_template
= {
4047 { "driver_version", KSTAT_DATA_STRING
},
4048 { "firmware_version", KSTAT_DATA_STRING
},
4049 { "firmware_name", KSTAT_DATA_STRING
},
4050 { "interrupt_type", KSTAT_DATA_STRING
},
4051 { "product_code", KSTAT_DATA_STRING
},
4052 { "serial_number", KSTAT_DATA_STRING
},
4054 static kmutex_t myri10ge_info_template_lock
;
4058 myri10ge_info_init(struct myri10ge_priv
*mgp
)
4062 ksp
= kstat_create("myri10ge", ddi_get_instance(mgp
->dip
),
4063 "myri10ge_info", "net", KSTAT_TYPE_NAMED
,
4064 sizeof (myri10ge_info_template
) /
4065 sizeof (kstat_named_t
), KSTAT_FLAG_VIRTUAL
);
4068 "%s: myri10ge_info_init: kstat_create failed", mgp
->name
);
4069 return (DDI_FAILURE
);
4071 mgp
->ksp_info
= ksp
;
4072 ksp
->ks_update
= myri10ge_info_kstat_update
;
4073 ksp
->ks_private
= (void *) mgp
;
4074 ksp
->ks_data
= &myri10ge_info_template
;
4075 ksp
->ks_lock
= &myri10ge_info_template_lock
;
4076 if (MYRI10GE_VERSION_STR
!= NULL
)
4077 ksp
->ks_data_size
+= strlen(MYRI10GE_VERSION_STR
) + 1;
4078 if (mgp
->fw_version
!= NULL
)
4079 ksp
->ks_data_size
+= strlen(mgp
->fw_version
) + 1;
4080 ksp
->ks_data_size
+= strlen(mgp
->fw_name
) + 1;
4081 ksp
->ks_data_size
+= strlen(mgp
->intr_type
) + 1;
4082 if (mgp
->pc_str
!= NULL
)
4083 ksp
->ks_data_size
+= strlen(mgp
->pc_str
) + 1;
4084 if (mgp
->sn_str
!= NULL
)
4085 ksp
->ks_data_size
+= strlen(mgp
->sn_str
) + 1;
4088 return (DDI_SUCCESS
);
4093 myri10ge_nic_stat_init(struct myri10ge_priv
*mgp
)
4096 struct myri10ge_nic_stat
*ethstat
;
4098 ksp
= kstat_create("myri10ge", ddi_get_instance(mgp
->dip
),
4099 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED
,
4100 sizeof (*ethstat
) / sizeof (kstat_named_t
), 0);
4103 "%s: myri10ge_stat_init: kstat_create failed", mgp
->name
);
4104 return (DDI_FAILURE
);
4106 mgp
->ksp_stat
= ksp
;
4107 ethstat
= (struct myri10ge_nic_stat
*)(ksp
->ks_data
);
4109 kstat_named_init(ðstat
->dma_read_bw_MBs
,
4110 "dma_read_bw_MBs", KSTAT_DATA_ULONG
);
4111 kstat_named_init(ðstat
->dma_write_bw_MBs
,
4112 "dma_write_bw_MBs", KSTAT_DATA_ULONG
);
4113 kstat_named_init(ðstat
->dma_read_write_bw_MBs
,
4114 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG
);
4115 kstat_named_init(ðstat
->dma_force_physical
,
4116 "dma_force_physical", KSTAT_DATA_ULONG
);
4117 kstat_named_init(ðstat
->lanes
,
4118 "lanes", KSTAT_DATA_ULONG
);
4119 kstat_named_init(ðstat
->dropped_bad_crc32
,
4120 "dropped_bad_crc32", KSTAT_DATA_ULONG
);
4121 kstat_named_init(ðstat
->dropped_bad_phy
,
4122 "dropped_bad_phy", KSTAT_DATA_ULONG
);
4123 kstat_named_init(ðstat
->dropped_link_error_or_filtered
,
4124 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG
);
4125 kstat_named_init(ðstat
->dropped_link_overflow
,
4126 "dropped_link_overflow", KSTAT_DATA_ULONG
);
4127 kstat_named_init(ðstat
->dropped_multicast_filtered
,
4128 "dropped_multicast_filtered", KSTAT_DATA_ULONG
);
4129 kstat_named_init(ðstat
->dropped_no_big_buffer
,
4130 "dropped_no_big_buffer", KSTAT_DATA_ULONG
);
4131 kstat_named_init(ðstat
->dropped_no_small_buffer
,
4132 "dropped_no_small_buffer", KSTAT_DATA_ULONG
);
4133 kstat_named_init(ðstat
->dropped_overrun
,
4134 "dropped_overrun", KSTAT_DATA_ULONG
);
4135 kstat_named_init(ðstat
->dropped_pause
,
4136 "dropped_pause", KSTAT_DATA_ULONG
);
4137 kstat_named_init(ðstat
->dropped_runt
,
4138 "dropped_runt", KSTAT_DATA_ULONG
);
4139 kstat_named_init(ðstat
->dropped_unicast_filtered
,
4140 "dropped_unicast_filtered", KSTAT_DATA_ULONG
);
4141 kstat_named_init(ðstat
->dropped_runt
, "dropped_runt",
4143 kstat_named_init(ðstat
->link_up
, "link_up", KSTAT_DATA_ULONG
);
4144 kstat_named_init(ðstat
->link_changes
, "link_changes",
4146 ksp
->ks_update
= myri10ge_nic_stat_kstat_update
;
4147 ksp
->ks_private
= (void *) mgp
;
4149 return (DDI_SUCCESS
);
4153 myri10ge_slice_stat_init(struct myri10ge_slice_state
*ss
)
4155 struct myri10ge_priv
*mgp
= ss
->mgp
;
4157 struct myri10ge_slice_stat
*ethstat
;
4161 * fake an instance so that the same slice numbers from
4162 * different instances do not collide
4164 instance
= (ddi_get_instance(mgp
->dip
) * 1000) + (int)(ss
- mgp
->ss
);
4165 ksp
= kstat_create("myri10ge", instance
,
4166 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED
,
4167 sizeof (*ethstat
) / sizeof (kstat_named_t
), 0);
4170 "%s: myri10ge_stat_init: kstat_create failed", mgp
->name
);
4171 return (DDI_FAILURE
);
4174 ethstat
= (struct myri10ge_slice_stat
*)(ksp
->ks_data
);
4175 kstat_named_init(ðstat
->lro_bad_csum
, "lro_bad_csum",
4177 kstat_named_init(ðstat
->lro_flushed
, "lro_flushed",
4179 kstat_named_init(ðstat
->lro_queued
, "lro_queued",
4181 kstat_named_init(ðstat
->rx_bigbuf_firmware
, "rx_bigbuf_firmware",
4183 kstat_named_init(ðstat
->rx_bigbuf_pool
, "rx_bigbuf_pool",
4185 kstat_named_init(ðstat
->rx_bigbuf_smalls
, "rx_bigbuf_smalls",
4187 kstat_named_init(ðstat
->rx_copy
, "rx_copy",
4189 kstat_named_init(ðstat
->rx_big_nobuf
, "rx_big_nobuf",
4191 kstat_named_init(ðstat
->rx_small_nobuf
, "rx_small_nobuf",
4193 kstat_named_init(ðstat
->xmit_zero_len
, "xmit_zero_len",
4195 kstat_named_init(ðstat
->xmit_pullup
, "xmit_pullup",
4197 kstat_named_init(ðstat
->xmit_pullup_first
, "xmit_pullup_first",
4199 kstat_named_init(ðstat
->xmit_lowbuf
, "xmit_lowbuf",
4201 kstat_named_init(ðstat
->xmit_lsobadflags
, "xmit_lsobadflags",
4203 kstat_named_init(ðstat
->xmit_sched
, "xmit_sched",
4205 kstat_named_init(ðstat
->xmit_stall
, "xmit_stall",
4207 kstat_named_init(ðstat
->xmit_stall_early
, "xmit_stall_early",
4209 kstat_named_init(ðstat
->xmit_stall_late
, "xmit_stall_late",
4211 kstat_named_init(ðstat
->xmit_err
, "xmit_err",
4213 kstat_named_init(ðstat
->tx_req
, "tx_req",
4215 kstat_named_init(ðstat
->tx_activate
, "tx_activate",
4217 kstat_named_init(ðstat
->tx_done
, "tx_done",
4219 kstat_named_init(ðstat
->tx_handles_alloced
, "tx_handles_alloced",
4221 kstat_named_init(ðstat
->rx_big
, "rx_big",
4223 kstat_named_init(ðstat
->rx_small
, "rx_small",
4225 ksp
->ks_update
= myri10ge_slice_stat_kstat_update
;
4226 ksp
->ks_private
= (void *) ss
;
4228 return (DDI_SUCCESS
);
4233 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4236 #include <sys/ddi_isa.h>
4237 void *device_arena_alloc(size_t size
, int vm_flag
);
4238 void device_arena_free(void *vaddr
, size_t size
);
4241 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv
*mgp
)
4243 dev_info_t
*parent_dip
;
4244 ddi_acc_handle_t handle
;
4245 unsigned long bus_number
, dev_number
, func_number
;
4246 unsigned long cfg_pa
, paddr
, base
, pgoffset
;
4249 int retval
= DDI_FAILURE
;
4251 uint16_t read_vid
, read_did
, vendor_id
, device_id
;
4253 if (!myri10ge_nvidia_ecrc_enable
)
4256 parent_dip
= ddi_get_parent(mgp
->dip
);
4257 if (parent_dip
== NULL
) {
4258 cmn_err(CE_WARN
, "%s: I'm an orphan?", mgp
->name
);
4262 if (pci_config_setup(parent_dip
, &handle
) != DDI_SUCCESS
) {
4264 "%s: Could not access my parent's registers", mgp
->name
);
4268 vendor_id
= pci_config_get16(handle
, PCI_CONF_VENID
);
4269 device_id
= pci_config_get16(handle
, PCI_CONF_DEVID
);
4270 pci_config_teardown(&handle
);
4272 if (myri10ge_verbose
) {
4273 unsigned long bus_number
, dev_number
, func_number
;
4275 (void) myri10ge_reg_set(parent_dip
, ®_set
, &span
,
4276 &bus_number
, &dev_number
, &func_number
);
4277 if (myri10ge_verbose
)
4278 printf("%s: parent at %ld:%ld:%ld\n", mgp
->name
,
4279 bus_number
, dev_number
, func_number
);
4282 if (vendor_id
!= 0x10de)
4285 if (device_id
!= 0x005d /* CK804 */ &&
4286 (device_id
< 0x374 || device_id
> 0x378) /* MCP55 */) {
4289 (void) myri10ge_reg_set(parent_dip
, &dontcare
, &dontcare
,
4290 &bus_number
, &dev_number
, &func_number
);
4292 for (cfg_pa
= 0xf0000000UL
;
4293 retval
!= DDI_SUCCESS
&& cfg_pa
>= 0xe0000000UL
;
4294 cfg_pa
-= 0x10000000UL
) {
4295 /* find the config space address for the nvidia bridge */
4296 paddr
= (cfg_pa
+ bus_number
* 0x00100000UL
+
4297 (dev_number
* 8 + func_number
) * 0x00001000UL
);
4299 base
= paddr
& (~MMU_PAGEOFFSET
);
4300 pgoffset
= paddr
& MMU_PAGEOFFSET
;
4302 /* map it into the kernel */
4303 cvaddr
= device_arena_alloc(ptob(1), VM_NOSLEEP
);
4305 cmn_err(CE_WARN
, "%s: failed to map nf4: cvaddr\n",
4308 hat_devload(kas
.a_hat
, cvaddr
, mmu_ptob(1),
4309 i_ddi_paddr_to_pfn(base
),
4310 PROT_WRITE
|HAT_STRICTORDER
, HAT_LOAD_LOCK
);
4312 ptr
= cvaddr
+ pgoffset
;
4313 read_vid
= *(uint16_t *)(void *)(ptr
+ PCI_CONF_VENID
);
4314 read_did
= *(uint16_t *)(void *)(ptr
+ PCI_CONF_DEVID
);
4315 if (vendor_id
== read_did
|| device_id
== read_did
) {
4316 ptr32
= (uint32_t *)(void *)(ptr
+ 0x178);
4317 if (myri10ge_verbose
)
4318 printf("%s: Enabling ECRC on upstream "
4319 "Nvidia bridge (0x%x:0x%x) "
4320 "at %ld:%ld:%ld\n", mgp
->name
,
4321 read_vid
, read_did
, bus_number
,
4322 dev_number
, func_number
);
4324 retval
= DDI_SUCCESS
;
4326 hat_unload(kas
.a_hat
, cvaddr
, ptob(1), HAT_UNLOAD_UNLOCK
);
4327 device_arena_free(cvaddr
, ptob(1));
4334 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv
*mgp
)
4341 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4342 * when the PCI-E Completion packets are aligned on an 8-byte
4343 * boundary. Some PCI-E chip sets always align Completion packets; on
4344 * the ones that do not, the alignment can be enforced by enabling
4345 * ECRC generation (if supported).
4347 * When PCI-E Completion packets are not aligned, it is actually more
4348 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4350 * If the driver can neither enable ECRC nor verify that it has
4351 * already been enabled, then it must use a firmware image which works
4352 * around unaligned completion packets (ethp_z8e.dat), and it should
4353 * also ensure that it never gives the device a Read-DMA which is
4354 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
4355 * enabled, then the driver should use the aligned (eth_z8e.dat)
4356 * firmware image, and set tx.boundary to 4KB.
4361 myri10ge_firmware_probe(struct myri10ge_priv
*mgp
)
4365 mgp
->tx_boundary
= 4096;
4367 * Verify the max read request size was set to 4KB
4368 * before trying the test with 4KB.
4370 if (mgp
->max_read_request_4k
== 0)
4371 mgp
->tx_boundary
= 2048;
4373 * load the optimized firmware which assumes aligned PCIe
4374 * completions in order to see if it works on this host.
4377 mgp
->fw_name
= "rss_eth_z8e";
4378 mgp
->eth_z8e
= (unsigned char *)rss_eth_z8e
;
4379 mgp
->eth_z8e_length
= rss_eth_z8e_length
;
4381 status
= myri10ge_load_firmware(mgp
);
4386 * Enable ECRC if possible
4388 myri10ge_enable_nvidia_ecrc(mgp
);
4391 * Run a DMA test which watches for unaligned completions and
4392 * aborts on the first one seen.
4394 status
= myri10ge_dma_test(mgp
, MXGEFW_CMD_UNALIGNED_TEST
);
4396 return (0); /* keep the aligned firmware */
4398 if (status
!= E2BIG
)
4399 cmn_err(CE_WARN
, "%s: DMA test failed: %d\n",
4401 if (status
== ENOSYS
)
4402 cmn_err(CE_WARN
, "%s: Falling back to ethp! "
4403 "Please install up to date fw\n", mgp
->name
);
4408 myri10ge_select_firmware(struct myri10ge_priv
*mgp
)
4414 if (myri10ge_force_firmware
== 1) {
4415 if (myri10ge_verbose
)
4416 printf("%s: Assuming aligned completions (forced)\n",
4422 if (myri10ge_force_firmware
== 2) {
4423 if (myri10ge_verbose
)
4424 printf("%s: Assuming unaligned completions (forced)\n",
4430 /* If the width is less than 8, we may used the aligned firmware */
4431 if (mgp
->pcie_link_width
!= 0 && mgp
->pcie_link_width
< 8) {
4432 cmn_err(CE_WARN
, "!%s: PCIe link running at x%d\n",
4433 mgp
->name
, mgp
->pcie_link_width
);
4438 if (0 == myri10ge_firmware_probe(mgp
))
4439 return (0); /* keep optimized firmware */
4443 mgp
->fw_name
= "rss_eth_z8e";
4444 mgp
->eth_z8e
= (unsigned char *)rss_eth_z8e
;
4445 mgp
->eth_z8e_length
= rss_eth_z8e_length
;
4446 mgp
->tx_boundary
= 4096;
4448 mgp
->fw_name
= "rss_ethp_z8e";
4449 mgp
->eth_z8e
= (unsigned char *)rss_ethp_z8e
;
4450 mgp
->eth_z8e_length
= rss_ethp_z8e_length
;
4451 mgp
->tx_boundary
= 2048;
4454 return (myri10ge_load_firmware(mgp
));
4458 myri10ge_add_intrs(struct myri10ge_priv
*mgp
, int add_handler
)
4460 dev_info_t
*devinfo
= mgp
->dip
;
4461 int count
, avail
, actual
, intr_types
;
4462 int x
, y
, rc
, inum
= 0;
4465 rc
= ddi_intr_get_supported_types(devinfo
, &intr_types
);
4466 if (rc
!= DDI_SUCCESS
) {
4468 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp
->name
,
4470 return (DDI_FAILURE
);
4473 if (!myri10ge_use_msi
)
4474 intr_types
&= ~DDI_INTR_TYPE_MSI
;
4475 if (!myri10ge_use_msix
)
4476 intr_types
&= ~DDI_INTR_TYPE_MSIX
;
4478 if (intr_types
& DDI_INTR_TYPE_MSIX
) {
4479 mgp
->ddi_intr_type
= DDI_INTR_TYPE_MSIX
;
4480 mgp
->intr_type
= "MSI-X";
4481 } else if (intr_types
& DDI_INTR_TYPE_MSI
) {
4482 mgp
->ddi_intr_type
= DDI_INTR_TYPE_MSI
;
4483 mgp
->intr_type
= "MSI";
4485 mgp
->ddi_intr_type
= DDI_INTR_TYPE_FIXED
;
4486 mgp
->intr_type
= "Legacy";
4488 /* Get number of interrupts */
4489 rc
= ddi_intr_get_nintrs(devinfo
, mgp
->ddi_intr_type
, &count
);
4490 if ((rc
!= DDI_SUCCESS
) || (count
== 0)) {
4491 cmn_err(CE_WARN
, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4492 "count: %d", mgp
->name
, rc
, count
);
4494 return (DDI_FAILURE
);
4497 /* Get number of available interrupts */
4498 rc
= ddi_intr_get_navail(devinfo
, mgp
->ddi_intr_type
, &avail
);
4499 if ((rc
!= DDI_SUCCESS
) || (avail
== 0)) {
4500 cmn_err(CE_WARN
, "%s: ddi_intr_get_navail() failure, "
4501 "rc: %d, avail: %d\n", mgp
->name
, rc
, avail
);
4502 return (DDI_FAILURE
);
4504 if (avail
< count
) {
4506 "!%s: nintrs() returned %d, navail returned %d",
4507 mgp
->name
, count
, avail
);
4511 if (count
< mgp
->num_slices
)
4512 return (DDI_FAILURE
);
4514 if (count
> mgp
->num_slices
)
4515 count
= mgp
->num_slices
;
4517 /* Allocate memory for MSI interrupts */
4518 mgp
->intr_size
= count
* sizeof (ddi_intr_handle_t
);
4519 mgp
->htable
= kmem_alloc(mgp
->intr_size
, KM_SLEEP
);
4521 rc
= ddi_intr_alloc(devinfo
, mgp
->htable
, mgp
->ddi_intr_type
, inum
,
4522 count
, &actual
, DDI_INTR_ALLOC_NORMAL
);
4524 if ((rc
!= DDI_SUCCESS
) || (actual
== 0)) {
4525 cmn_err(CE_WARN
, "%s: ddi_intr_alloc() failed: %d",
4528 kmem_free(mgp
->htable
, mgp
->intr_size
);
4530 return (DDI_FAILURE
);
4533 if ((actual
< count
) && myri10ge_verbose
) {
4534 cmn_err(CE_NOTE
, "%s: got %d/%d slices",
4535 mgp
->name
, actual
, count
);
4538 mgp
->intr_cnt
= actual
;
4541 * Get priority for first irq, assume remaining are all the same
4543 if (ddi_intr_get_pri(mgp
->htable
[0], &mgp
->intr_pri
)
4545 cmn_err(CE_WARN
, "%s: ddi_intr_get_pri() failed", mgp
->name
);
4547 /* Free already allocated intr */
4548 for (y
= 0; y
< actual
; y
++) {
4549 (void) ddi_intr_free(mgp
->htable
[y
]);
4552 kmem_free(mgp
->htable
, mgp
->intr_size
);
4554 return (DDI_FAILURE
);
4557 mgp
->icookie
= (void *)(uintptr_t)mgp
->intr_pri
;
4560 return (DDI_SUCCESS
);
4562 /* Call ddi_intr_add_handler() */
4563 for (x
= 0; x
< actual
; x
++) {
4564 if (ddi_intr_add_handler(mgp
->htable
[x
], myri10ge_intr
,
4565 (caddr_t
)&mgp
->ss
[x
], NULL
) != DDI_SUCCESS
) {
4566 cmn_err(CE_WARN
, "%s: ddi_intr_add_handler() failed",
4569 /* Free already allocated intr */
4570 for (y
= 0; y
< actual
; y
++) {
4571 (void) ddi_intr_free(mgp
->htable
[y
]);
4574 kmem_free(mgp
->htable
, mgp
->intr_size
);
4576 return (DDI_FAILURE
);
4580 (void) ddi_intr_get_cap(mgp
->htable
[0], &mgp
->intr_cap
);
4581 if (mgp
->intr_cap
& DDI_INTR_FLAG_BLOCK
) {
4582 /* Call ddi_intr_block_enable() for MSI */
4583 (void) ddi_intr_block_enable(mgp
->htable
, mgp
->intr_cnt
);
4585 /* Call ddi_intr_enable() for MSI non block enable */
4586 for (x
= 0; x
< mgp
->intr_cnt
; x
++) {
4587 (void) ddi_intr_enable(mgp
->htable
[x
]);
4591 return (DDI_SUCCESS
);
4595 myri10ge_rem_intrs(struct myri10ge_priv
*mgp
, int handler_installed
)
4599 /* Disable all interrupts */
4600 if (handler_installed
) {
4601 if (mgp
->intr_cap
& DDI_INTR_FLAG_BLOCK
) {
4602 /* Call ddi_intr_block_disable() */
4603 (void) ddi_intr_block_disable(mgp
->htable
,
4606 for (x
= 0; x
< mgp
->intr_cnt
; x
++) {
4607 (void) ddi_intr_disable(mgp
->htable
[x
]);
4612 for (x
= 0; x
< mgp
->intr_cnt
; x
++) {
4613 if (handler_installed
) {
4614 /* Call ddi_intr_remove_handler() */
4615 err
= ddi_intr_remove_handler(mgp
->htable
[x
]);
4616 if (err
!= DDI_SUCCESS
) {
4618 "%s: ddi_intr_remove_handler for"
4619 "vec %d returned %d\n", mgp
->name
,
4623 err
= ddi_intr_free(mgp
->htable
[x
]);
4624 if (err
!= DDI_SUCCESS
) {
4626 "%s: ddi_intr_free for vec %d returned %d\n",
4630 kmem_free(mgp
->htable
, mgp
->intr_size
);
4635 myri10ge_test_physical(dev_info_t
*dip
)
4637 ddi_dma_handle_t handle
;
4638 struct myri10ge_dma_stuff dma
;
4642 /* test #1, sufficient for older sparc systems */
4643 myri10ge_tx_dma_attr
.dma_attr_flags
= DDI_DMA_FORCE_PHYSICAL
;
4644 err
= ddi_dma_alloc_handle(dip
, &myri10ge_tx_dma_attr
,
4645 DDI_DMA_DONTWAIT
, NULL
, &handle
);
4646 if (err
== DDI_DMA_BADATTR
)
4648 ddi_dma_free_handle(&handle
);
4650 /* test #2, required on Olympis where the bind is what fails */
4651 addr
= myri10ge_dma_alloc(dip
, 128, &myri10ge_tx_dma_attr
,
4652 &myri10ge_dev_access_attr
, DDI_DMA_STREAMING
,
4653 DDI_DMA_WRITE
|DDI_DMA_STREAMING
, &dma
, 0, DDI_DMA_DONTWAIT
);
4656 myri10ge_dma_free(&dma
);
4660 if (myri10ge_verbose
)
4661 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4662 "using IOMMU\n", ddi_get_instance(dip
));
4664 myri10ge_tx_dma_attr
.dma_attr_flags
&= ~DDI_DMA_FORCE_PHYSICAL
;
4668 myri10ge_get_props(dev_info_t
*dip
)
4671 myri10ge_flow_control
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4672 "myri10ge_flow_control", myri10ge_flow_control
);
4674 myri10ge_intr_coal_delay
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4675 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay
);
4677 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4678 myri10ge_nvidia_ecrc_enable
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4679 "myri10ge_nvidia_ecrc_enable", 1);
4683 myri10ge_use_msi
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4684 "myri10ge_use_msi", myri10ge_use_msi
);
4686 myri10ge_deassert_wait
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4687 "myri10ge_deassert_wait", myri10ge_deassert_wait
);
4689 myri10ge_verbose
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4690 "myri10ge_verbose", myri10ge_verbose
);
4692 myri10ge_tx_copylen
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4693 "myri10ge_tx_copylen", myri10ge_tx_copylen
);
4695 if (myri10ge_tx_copylen
< 60) {
4697 "myri10ge_tx_copylen must be >= 60 bytes\n");
4698 myri10ge_tx_copylen
= 60;
4701 myri10ge_mtu_override
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4702 "myri10ge_mtu_override", myri10ge_mtu_override
);
4704 if (myri10ge_mtu_override
>= MYRI10GE_MIN_GLD_MTU
&&
4705 myri10ge_mtu_override
<= MYRI10GE_MAX_GLD_MTU
)
4706 myri10ge_mtu
= myri10ge_mtu_override
+
4707 sizeof (struct ether_header
) + MXGEFW_PAD
+ VLAN_TAGSZ
;
4708 else if (myri10ge_mtu_override
!= 0) {
4710 "myri10ge_mtu_override must be between 1500 and "
4714 myri10ge_bigbufs_initial
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4715 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial
);
4716 myri10ge_bigbufs_max
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4717 "myri10ge_bigbufs_max", myri10ge_bigbufs_max
);
4719 myri10ge_watchdog_reset
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4720 "myri10ge_watchdog_reset", myri10ge_watchdog_reset
);
4722 if (myri10ge_bigbufs_initial
< 128) {
4724 "myri10ge_bigbufs_initial be at least 128\n");
4725 myri10ge_bigbufs_initial
= 128;
4727 if (myri10ge_bigbufs_max
< 128) {
4729 "myri10ge_bigbufs_max be at least 128\n");
4730 myri10ge_bigbufs_max
= 128;
4733 if (myri10ge_bigbufs_max
< myri10ge_bigbufs_initial
) {
4735 "myri10ge_bigbufs_max must be >= "
4736 "myri10ge_bigbufs_initial\n");
4737 myri10ge_bigbufs_max
= myri10ge_bigbufs_initial
;
4740 myri10ge_force_firmware
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4741 "myri10ge_force_firmware", myri10ge_force_firmware
);
4743 myri10ge_max_slices
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4744 "myri10ge_max_slices", myri10ge_max_slices
);
4746 myri10ge_use_msix
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4747 "myri10ge_use_msix", myri10ge_use_msix
);
4749 myri10ge_rss_hash
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4750 "myri10ge_rss_hash", myri10ge_rss_hash
);
4752 if (myri10ge_rss_hash
> MXGEFW_RSS_HASH_TYPE_MAX
||
4753 myri10ge_rss_hash
< MXGEFW_RSS_HASH_TYPE_IPV4
) {
4754 cmn_err(CE_WARN
, "myri10ge: Illegal rssh hash type %d\n",
4756 myri10ge_rss_hash
= MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
;
4758 myri10ge_lro
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4759 "myri10ge_lro", myri10ge_lro
);
4760 myri10ge_lro_cnt
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4761 "myri10ge_lro_cnt", myri10ge_lro_cnt
);
4762 myri10ge_lro_max_aggr
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4763 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr
);
4764 myri10ge_tx_hash
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4765 "myri10ge_tx_hash", myri10ge_tx_hash
);
4766 myri10ge_use_lso
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4767 "myri10ge_use_lso", myri10ge_use_lso
);
4768 myri10ge_lso_copy
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4769 "myri10ge_lso_copy", myri10ge_lso_copy
);
4770 myri10ge_tx_handles_initial
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4771 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial
);
4772 myri10ge_small_bytes
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4773 "myri10ge_small_bytes", myri10ge_small_bytes
);
4774 if ((myri10ge_small_bytes
+ MXGEFW_PAD
) & (128 -1)) {
4775 cmn_err(CE_WARN
, "myri10ge: myri10ge_small_bytes (%d)\n",
4776 myri10ge_small_bytes
);
4777 cmn_err(CE_WARN
, "must be aligned on 128b bndry -2\n");
4778 myri10ge_small_bytes
+= 128;
4779 myri10ge_small_bytes
&= ~(128 -1);
4780 myri10ge_small_bytes
-= MXGEFW_PAD
;
4781 cmn_err(CE_WARN
, "rounded up to %d\n",
4782 myri10ge_small_bytes
);
4784 myri10ge_rss_hash
= MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
;
4788 #ifndef PCI_EXP_LNKSTA
4789 #define PCI_EXP_LNKSTA 18
4793 myri10ge_find_cap(ddi_acc_handle_t handle
, uint8_t *capptr
, uint8_t capid
)
4798 /* check to see if we have capabilities */
4799 status
= pci_config_get16(handle
, PCI_CONF_STAT
);
4800 if (!(status
& PCI_STAT_CAP
)) {
4801 cmn_err(CE_WARN
, "PCI_STAT_CAP not found\n");
4805 ptr
= pci_config_get8(handle
, PCI_CONF_CAP_PTR
);
4807 /* Walk the capabilities list, looking for a PCI Express cap */
4808 while (ptr
!= PCI_CAP_NEXT_PTR_NULL
) {
4809 if (pci_config_get8(handle
, ptr
+ PCI_CAP_ID
) == capid
)
4811 ptr
= pci_config_get8(handle
, ptr
+ PCI_CAP_NEXT_PTR
);
4814 cmn_err(CE_WARN
, "Bad capability offset %d\n", ptr
);
4822 myri10ge_set_max_readreq(ddi_acc_handle_t handle
)
4828 err
= myri10ge_find_cap(handle
, &ptr
, PCI_CAP_ID_PCI_E
);
4830 cmn_err(CE_WARN
, "could not find PCIe cap\n");
4834 /* set max read req to 4096 */
4835 val
= pci_config_get16(handle
, ptr
+ PCIE_DEVCTL
);
4836 val
= (val
& ~PCIE_DEVCTL_MAX_READ_REQ_MASK
) |
4837 PCIE_DEVCTL_MAX_READ_REQ_4096
;
4838 pci_config_put16(handle
, ptr
+ PCIE_DEVCTL
, val
);
4839 val
= pci_config_get16(handle
, ptr
+ PCIE_DEVCTL
);
4840 if ((val
& (PCIE_DEVCTL_MAX_READ_REQ_4096
)) !=
4841 PCIE_DEVCTL_MAX_READ_REQ_4096
) {
4842 cmn_err(CE_WARN
, "could not set max read req (%x)\n", val
);
4849 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle
, int *link
)
4855 err
= myri10ge_find_cap(handle
, &ptr
, PCI_CAP_ID_PCI_E
);
4857 cmn_err(CE_WARN
, "could not set max read req\n");
4861 /* read link width */
4862 val
= pci_config_get16(handle
, ptr
+ PCIE_LINKSTS
);
4863 val
&= PCIE_LINKSTS_NEG_WIDTH_MASK
;
4869 myri10ge_reset_nic(struct myri10ge_priv
*mgp
)
4871 ddi_acc_handle_t handle
= mgp
->cfg_hdl
;
4876 cmd
= pci_config_get16(handle
, PCI_CONF_COMM
);
4877 if ((cmd
& PCI_COMM_ME
) == 0) {
4879 * Bus master DMA disabled? Check to see if the card
4880 * rebooted due to a parity error For now, just report
4884 /* enter read32 mode */
4885 pci_config_put8(handle
, mgp
->vso
+ 0x10, 0x3);
4886 /* read REBOOT_STATUS (0xfffffff0) */
4887 pci_config_put32(handle
, mgp
->vso
+ 0x18, 0xfffffff0);
4888 reboot
= pci_config_get16(handle
, mgp
->vso
+ 0x14);
4889 cmn_err(CE_WARN
, "%s NIC rebooted 0x%x\n", mgp
->name
, reboot
);
4892 if (!myri10ge_watchdog_reset
) {
4893 cmn_err(CE_WARN
, "%s: not resetting\n", mgp
->name
);
4897 myri10ge_stop_locked(mgp
);
4898 err
= myri10ge_start_locked(mgp
);
4899 if (err
== DDI_FAILURE
) {
4902 mac_tx_update(mgp
->mh
);
4907 myri10ge_ring_stalled(myri10ge_tx_ring_t
*tx
)
4909 if (tx
->sched
!= tx
->stall
&&
4910 tx
->done
== tx
->watchdog_done
&&
4911 tx
->watchdog_req
!= tx
->watchdog_done
)
4917 myri10ge_watchdog(void *arg
)
4919 struct myri10ge_priv
*mgp
;
4920 struct myri10ge_slice_state
*ss
;
4921 myri10ge_tx_ring_t
*tx
;
4923 int slices_stalled
, rx_pause
, i
;
4927 mutex_enter(&mgp
->intrlock
);
4928 if (mgp
->running
!= MYRI10GE_ETH_RUNNING
) {
4930 "%s not running, not rearming watchdog (%d)\n",
4931 mgp
->name
, mgp
->running
);
4932 mutex_exit(&mgp
->intrlock
);
4936 rx_pause
= ntohl(mgp
->ss
[0].fw_stats
->dropped_pause
);
4939 * make sure nic is stalled before we reset the nic, so as to
4940 * ensure we don't rip the transmit data structures out from
4941 * under a pending transmit
4944 for (slices_stalled
= 0, i
= 0; i
< mgp
->num_slices
; i
++) {
4945 tx
= &mgp
->ss
[i
].tx
;
4946 slices_stalled
= myri10ge_ring_stalled(tx
);
4951 if (slices_stalled
) {
4952 if (mgp
->watchdog_rx_pause
== rx_pause
) {
4954 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4955 mgp
->name
, i
, tx
->sched
, tx
->stall
,
4956 tx
->done
, tx
->watchdog_done
, tx
->req
, tx
->pkt_done
,
4957 (int)ntohl(mgp
->ss
[i
].fw_stats
->send_done_count
));
4958 nic_ok
= myri10ge_reset_nic(mgp
);
4961 "%s Flow controlled, check link partner\n",
4968 "%s Nic dead, not rearming watchdog\n", mgp
->name
);
4969 mutex_exit(&mgp
->intrlock
);
4972 for (i
= 0; i
< mgp
->num_slices
; i
++) {
4975 tx
->watchdog_done
= tx
->done
;
4976 tx
->watchdog_req
= tx
->req
;
4977 if (ss
->watchdog_rx_copy
!= MYRI10GE_SLICE_STAT(rx_copy
)) {
4978 ss
->watchdog_rx_copy
= MYRI10GE_SLICE_STAT(rx_copy
);
4980 min(ss
->jpool
.num_alloc
,
4981 myri10ge_bigbufs_max
-
4982 (ss
->jpool
.num_alloc
-
4983 ss
->jbufs_for_smalls
));
4985 (void) myri10ge_add_jbufs(ss
, add_rx
, 0);
4986 /* now feed them to the firmware */
4987 mutex_enter(&ss
->jpool
.mtx
);
4988 myri10ge_restock_jumbos(ss
);
4989 mutex_exit(&ss
->jpool
.mtx
);
4993 mgp
->watchdog_rx_pause
= rx_pause
;
4995 mgp
->timer_id
= timeout(myri10ge_watchdog
, mgp
,
4997 mutex_exit(&mgp
->intrlock
);
5002 myri10ge_get_coalesce(queue_t
*q
, mblk_t
*mp
, caddr_t cp
, cred_t
*credp
)
5004 struct myri10ge_priv
*mgp
= (struct myri10ge_priv
*)(void *)cp
;
5005 (void) mi_mpprintf(mp
, "%d", mgp
->intr_coal_delay
);
5011 myri10ge_set_coalesce(queue_t
*q
, mblk_t
*mp
, char *value
,
5012 caddr_t cp
, cred_t
*credp
)
5014 struct myri10ge_priv
*mgp
= (struct myri10ge_priv
*)(void *)cp
;
5018 new_value
= mi_strtol(value
, &end
, 10);
5022 mutex_enter(&myri10ge_param_lock
);
5023 mgp
->intr_coal_delay
= (int)new_value
;
5024 *mgp
->intr_coal_delay_ptr
= htonl(mgp
->intr_coal_delay
);
5025 mutex_exit(&myri10ge_param_lock
);
5031 myri10ge_get_pauseparam(queue_t
*q
, mblk_t
*mp
, caddr_t cp
, cred_t
*credp
)
5033 struct myri10ge_priv
*mgp
= (struct myri10ge_priv
*)(void *)cp
;
5034 (void) mi_mpprintf(mp
, "%d", mgp
->pause
);
5040 myri10ge_set_pauseparam(queue_t
*q
, mblk_t
*mp
, char *value
,
5041 caddr_t cp
, cred_t
*credp
)
5043 struct myri10ge_priv
*mgp
= (struct myri10ge_priv
*)(void *)cp
;
5048 new_value
= mi_strtol(value
, &end
, 10);
5054 mutex_enter(&myri10ge_param_lock
);
5055 if (new_value
!= mgp
->pause
)
5056 err
= myri10ge_change_pause(mgp
, new_value
);
5057 mutex_exit(&myri10ge_param_lock
);
5063 myri10ge_get_int(queue_t
*q
, mblk_t
*mp
, caddr_t cp
, cred_t
*credp
)
5065 (void) mi_mpprintf(mp
, "%d", *(int *)(void *)cp
);
5071 myri10ge_set_int(queue_t
*q
, mblk_t
*mp
, char *value
,
5072 caddr_t cp
, cred_t
*credp
)
5077 new_value
= mi_strtol(value
, &end
, 10);
5080 *(int *)(void *)cp
= new_value
;
5086 myri10ge_ndd_init(struct myri10ge_priv
*mgp
)
5088 mgp
->nd_head
= NULL
;
5090 (void) nd_load(&mgp
->nd_head
, "myri10ge_intr_coal_delay",
5091 myri10ge_get_coalesce
, myri10ge_set_coalesce
, (caddr_t
)mgp
);
5092 (void) nd_load(&mgp
->nd_head
, "myri10ge_flow_control",
5093 myri10ge_get_pauseparam
, myri10ge_set_pauseparam
, (caddr_t
)mgp
);
5094 (void) nd_load(&mgp
->nd_head
, "myri10ge_verbose",
5095 myri10ge_get_int
, myri10ge_set_int
, (caddr_t
)&myri10ge_verbose
);
5096 (void) nd_load(&mgp
->nd_head
, "myri10ge_deassert_wait",
5097 myri10ge_get_int
, myri10ge_set_int
,
5098 (caddr_t
)&myri10ge_deassert_wait
);
5099 (void) nd_load(&mgp
->nd_head
, "myri10ge_bigbufs_max",
5100 myri10ge_get_int
, myri10ge_set_int
,
5101 (caddr_t
)&myri10ge_bigbufs_max
);
5102 (void) nd_load(&mgp
->nd_head
, "myri10ge_lro",
5103 myri10ge_get_int
, myri10ge_set_int
,
5104 (caddr_t
)&myri10ge_lro
);
5105 (void) nd_load(&mgp
->nd_head
, "myri10ge_lro_max_aggr",
5106 myri10ge_get_int
, myri10ge_set_int
,
5107 (caddr_t
)&myri10ge_lro_max_aggr
);
5108 (void) nd_load(&mgp
->nd_head
, "myri10ge_tx_hash",
5109 myri10ge_get_int
, myri10ge_set_int
,
5110 (caddr_t
)&myri10ge_tx_hash
);
5111 (void) nd_load(&mgp
->nd_head
, "myri10ge_lso_copy",
5112 myri10ge_get_int
, myri10ge_set_int
,
5113 (caddr_t
)&myri10ge_lso_copy
);
5117 myri10ge_ndd_fini(struct myri10ge_priv
*mgp
)
5119 nd_free(&mgp
->nd_head
);
5123 myri10ge_m_ioctl(void *arg
, queue_t
*wq
, mblk_t
*mp
)
5125 struct iocblk
*iocp
;
5126 struct myri10ge_priv
*mgp
= arg
;
5129 iocp
= (struct iocblk
*)(void *)mp
->b_rptr
;
5130 cmd
= iocp
->ioc_cmd
;
5138 ok
= nd_getset(wq
, mgp
->nd_head
, mp
);
5146 err
= iocp
->ioc_error
;
5149 miocack(wq
, mp
, iocp
->ioc_count
, err
);
5151 miocnak(wq
, mp
, 0, err
);
5154 static struct myri10ge_priv
*mgp_list
;
5156 struct myri10ge_priv
*
5157 myri10ge_get_instance(uint_t unit
)
5159 struct myri10ge_priv
*mgp
;
5161 mutex_enter(&myri10ge_param_lock
);
5162 for (mgp
= mgp_list
; mgp
!= NULL
; mgp
= mgp
->next
) {
5163 if (unit
== ddi_get_instance(mgp
->dip
)) {
5168 mutex_exit(&myri10ge_param_lock
);
5173 myri10ge_put_instance(struct myri10ge_priv
*mgp
)
5175 mutex_enter(&myri10ge_param_lock
);
5177 mutex_exit(&myri10ge_param_lock
);
5181 myri10ge_m_getcapab(void *arg
, mac_capab_t cap
, void *cap_data
)
5183 struct myri10ge_priv
*mgp
= arg
;
5184 uint32_t *cap_hcksum
;
5185 mac_capab_lso_t
*cap_lso
;
5186 mac_capab_rings_t
*cap_rings
;
5189 case MAC_CAPAB_HCKSUM
:
5190 cap_hcksum
= cap_data
;
5191 *cap_hcksum
= HCKSUM_INET_PARTIAL
;
5193 case MAC_CAPAB_RINGS
:
5194 cap_rings
= cap_data
;
5195 switch (cap_rings
->mr_type
) {
5196 case MAC_RING_TYPE_RX
:
5197 cap_rings
->mr_group_type
= MAC_GROUP_TYPE_STATIC
;
5198 cap_rings
->mr_rnum
= mgp
->num_slices
;
5199 cap_rings
->mr_gnum
= 1;
5200 cap_rings
->mr_rget
= myri10ge_fill_ring
;
5201 cap_rings
->mr_gget
= myri10ge_fill_group
;
5203 case MAC_RING_TYPE_TX
:
5204 cap_rings
->mr_group_type
= MAC_GROUP_TYPE_STATIC
;
5205 cap_rings
->mr_rnum
= mgp
->num_slices
;
5206 cap_rings
->mr_gnum
= 0;
5207 cap_rings
->mr_rget
= myri10ge_fill_ring
;
5208 cap_rings
->mr_gget
= NULL
;
5216 if (!myri10ge_use_lso
)
5218 if (!(mgp
->features
& MYRI10GE_TSO
))
5220 cap_lso
->lso_flags
= LSO_TX_BASIC_TCP_IPV4
;
5221 cap_lso
->lso_basic_tcp_ipv4
.lso_max
= (uint16_t)-1;
5232 myri10ge_m_stat(void *arg
, uint_t stat
, uint64_t *val
)
5234 struct myri10ge_priv
*mgp
= arg
;
5235 struct myri10ge_rx_ring_stats
*rstat
;
5236 struct myri10ge_tx_ring_stats
*tstat
;
5237 mcp_irq_data_t
*fw_stats
= mgp
->ss
[0].fw_stats
;
5238 struct myri10ge_slice_state
*ss
;
5243 case MAC_STAT_IFSPEED
:
5244 *val
= 10ull * 1000ull * 1000000ull;
5247 case MAC_STAT_MULTIRCV
:
5248 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5249 rstat
= &mgp
->ss
[i
].rx_stats
;
5250 tmp
+= rstat
->multircv
;
5255 case MAC_STAT_BRDCSTRCV
:
5256 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5257 rstat
= &mgp
->ss
[i
].rx_stats
;
5258 tmp
+= rstat
->brdcstrcv
;
5263 case MAC_STAT_MULTIXMT
:
5264 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5265 tstat
= &mgp
->ss
[i
].tx
.stats
;
5266 tmp
+= tstat
->multixmt
;
5271 case MAC_STAT_BRDCSTXMT
:
5272 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5273 tstat
= &mgp
->ss
[i
].tx
.stats
;
5274 tmp
+= tstat
->brdcstxmt
;
5279 case MAC_STAT_NORCVBUF
:
5280 tmp
= ntohl(fw_stats
->dropped_no_big_buffer
);
5281 tmp
+= ntohl(fw_stats
->dropped_no_small_buffer
);
5282 tmp
+= ntohl(fw_stats
->dropped_link_overflow
);
5283 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5285 tmp
+= MYRI10GE_SLICE_STAT(rx_big_nobuf
);
5286 tmp
+= MYRI10GE_SLICE_STAT(rx_small_nobuf
);
5291 case MAC_STAT_IERRORS
:
5292 tmp
+= ntohl(fw_stats
->dropped_bad_crc32
);
5293 tmp
+= ntohl(fw_stats
->dropped_bad_phy
);
5294 tmp
+= ntohl(fw_stats
->dropped_runt
);
5295 tmp
+= ntohl(fw_stats
->dropped_overrun
);
5299 case MAC_STAT_OERRORS
:
5300 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5302 tmp
+= MYRI10GE_SLICE_STAT(xmit_lsobadflags
);
5303 tmp
+= MYRI10GE_SLICE_STAT(xmit_err
);
5308 case MAC_STAT_RBYTES
:
5309 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5310 rstat
= &mgp
->ss
[i
].rx_stats
;
5311 tmp
+= rstat
->ibytes
;
5316 case MAC_STAT_IPACKETS
:
5317 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5318 rstat
= &mgp
->ss
[i
].rx_stats
;
5319 tmp
+= rstat
->ipackets
;
5324 case MAC_STAT_OBYTES
:
5325 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5326 tstat
= &mgp
->ss
[i
].tx
.stats
;
5327 tmp
+= tstat
->obytes
;
5332 case MAC_STAT_OPACKETS
:
5333 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5334 tstat
= &mgp
->ss
[i
].tx
.stats
;
5335 tmp
+= tstat
->opackets
;
5340 case ETHER_STAT_TOOLONG_ERRORS
:
5341 *val
= ntohl(fw_stats
->dropped_overrun
);
5345 case ETHER_STAT_TOOSHORT_ERRORS
:
5346 *val
= ntohl(fw_stats
->dropped_runt
);
5350 case ETHER_STAT_LINK_PAUSE
:
5354 case ETHER_STAT_LINK_AUTONEG
:
5358 case ETHER_STAT_LINK_DUPLEX
:
5359 *val
= LINK_DUPLEX_FULL
;
5371 myri10ge_m_propinfo(void *arg
, const char *pr_name
,
5372 mac_prop_id_t pr_num
, mac_prop_info_handle_t prh
)
5376 mac_prop_info_set_default_uint32(prh
, MYRI10GE_DEFAULT_GLD_MTU
);
5377 mac_prop_info_set_range_uint32(prh
, MYRI10GE_MIN_GLD_MTU
,
5378 MYRI10GE_MAX_GLD_MTU
);
5387 myri10ge_m_setprop(void *arg
, const char *pr_name
, mac_prop_id_t pr_num
,
5388 uint_t pr_valsize
, const void *pr_val
)
5391 struct myri10ge_priv
*mgp
= arg
;
5394 case MAC_PROP_MTU
: {
5396 if (pr_valsize
< sizeof (mtu
)) {
5400 bcopy(pr_val
, &mtu
, sizeof (mtu
));
5401 if (mtu
> MYRI10GE_MAX_GLD_MTU
||
5402 mtu
< MYRI10GE_MIN_GLD_MTU
) {
5407 mutex_enter(&mgp
->intrlock
);
5408 if (mgp
->running
!= MYRI10GE_ETH_STOPPED
) {
5410 mutex_exit(&mgp
->intrlock
);
5414 myri10ge_mtu
= mtu
+ sizeof (struct ether_header
) +
5415 MXGEFW_PAD
+ VLAN_TAGSZ
;
5416 mutex_exit(&mgp
->intrlock
);
5427 static mac_callbacks_t myri10ge_m_callbacks
= {
5428 (MC_IOCTL
| MC_GETCAPAB
| MC_SETPROP
| MC_PROPINFO
),
5433 myri10ge_m_multicst
,
5438 myri10ge_m_getcapab
,
5448 myri10ge_probe_slices(struct myri10ge_priv
*mgp
)
5453 mgp
->num_slices
= 1;
5455 /* hit the board with a reset to ensure it is alive */
5456 (void) memset(&cmd
, 0, sizeof (cmd
));
5457 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_RESET
, &cmd
);
5459 cmn_err(CE_WARN
, "%s: failed reset\n", mgp
->name
);
5463 if (myri10ge_use_msix
== 0)
5466 /* tell it the size of the interrupt queues */
5467 cmd
.data0
= mgp
->max_intr_slots
* sizeof (struct mcp_slot
);
5468 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
5470 cmn_err(CE_WARN
, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5475 /* ask the maximum number of slices it supports */
5476 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
,
5481 mgp
->num_slices
= cmd
.data0
;
5484 * if the admin did not specify a limit to how many
5485 * slices we should use, cap it automatically to the
5486 * number of CPUs currently online
5488 if (myri10ge_max_slices
== -1)
5489 myri10ge_max_slices
= ncpus
;
5491 if (mgp
->num_slices
> myri10ge_max_slices
)
5492 mgp
->num_slices
= myri10ge_max_slices
;
5496 * Now try to allocate as many MSI-X vectors as we have
5497 * slices. We give up on MSI-X if we can only get a single
5500 while (mgp
->num_slices
> 1) {
5501 /* make sure it is a power of two */
5502 while (!ISP2(mgp
->num_slices
))
5504 if (mgp
->num_slices
== 1)
5507 status
= myri10ge_add_intrs(mgp
, 0);
5509 myri10ge_rem_intrs(mgp
, 0);
5510 if (mgp
->intr_cnt
== mgp
->num_slices
) {
5511 if (myri10ge_verbose
)
5512 printf("Got %d slices!\n",
5516 mgp
->num_slices
= mgp
->intr_cnt
;
5518 mgp
->num_slices
= mgp
->num_slices
/ 2;
5522 if (myri10ge_verbose
)
5523 printf("Got %d slices\n", mgp
->num_slices
);
5528 myri10ge_lro_free(struct myri10ge_slice_state
*ss
)
5530 struct lro_entry
*lro
;
5532 while (ss
->lro_free
!= NULL
) {
5534 ss
->lro_free
= lro
->next
;
5535 kmem_free(lro
, sizeof (*lro
));
5540 myri10ge_lro_alloc(struct myri10ge_slice_state
*ss
)
5542 struct lro_entry
*lro
;
5545 ss
->lro_free
= NULL
;
5546 ss
->lro_active
= NULL
;
5548 for (idx
= 0; idx
< myri10ge_lro_cnt
; idx
++) {
5549 lro
= kmem_zalloc(sizeof (*lro
), KM_SLEEP
);
5552 lro
->next
= ss
->lro_free
;
5558 myri10ge_free_slices(struct myri10ge_priv
*mgp
)
5560 struct myri10ge_slice_state
*ss
;
5564 if (mgp
->ss
== NULL
)
5567 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5569 if (ss
->rx_done
.entry
== NULL
)
5571 myri10ge_dma_free(&ss
->rx_done
.dma
);
5572 ss
->rx_done
.entry
= NULL
;
5573 if (ss
->fw_stats
== NULL
)
5575 myri10ge_dma_free(&ss
->fw_stats_dma
);
5576 ss
->fw_stats
= NULL
;
5577 mutex_destroy(&ss
->rx_lock
);
5578 mutex_destroy(&ss
->tx
.lock
);
5579 mutex_destroy(&ss
->tx
.handle_lock
);
5580 mutex_destroy(&ss
->poll_lock
);
5581 myri10ge_jpool_fini(ss
);
5582 myri10ge_slice_stat_destroy(ss
);
5583 myri10ge_lro_free(ss
);
5585 bytes
= sizeof (*mgp
->ss
) * mgp
->num_slices
;
5586 kmem_free(mgp
->ss
, bytes
);
5592 myri10ge_alloc_slices(struct myri10ge_priv
*mgp
)
5594 struct myri10ge_slice_state
*ss
;
5598 bytes
= sizeof (*mgp
->ss
) * mgp
->num_slices
;
5599 mgp
->ss
= kmem_zalloc(bytes
, KM_SLEEP
);
5600 if (mgp
->ss
== NULL
)
5602 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5607 /* allocate the per-slice firmware stats */
5608 bytes
= sizeof (*ss
->fw_stats
);
5609 ss
->fw_stats
= (mcp_irq_data_t
*)(void *)
5610 myri10ge_dma_alloc(mgp
->dip
, bytes
,
5611 &myri10ge_misc_dma_attr
, &myri10ge_dev_access_attr
,
5612 DDI_DMA_CONSISTENT
, DDI_DMA_READ
|DDI_DMA_CONSISTENT
,
5613 &ss
->fw_stats_dma
, 1, DDI_DMA_DONTWAIT
);
5614 if (ss
->fw_stats
== NULL
)
5616 (void) memset(ss
->fw_stats
, 0, bytes
);
5618 /* allocate rx done ring */
5619 bytes
= mgp
->max_intr_slots
*
5620 sizeof (*ss
->rx_done
.entry
);
5621 ss
->rx_done
.entry
= (mcp_slot_t
*)(void *)
5622 myri10ge_dma_alloc(mgp
->dip
, bytes
,
5623 &myri10ge_misc_dma_attr
, &myri10ge_dev_access_attr
,
5624 DDI_DMA_CONSISTENT
, DDI_DMA_READ
|DDI_DMA_CONSISTENT
,
5625 &ss
->rx_done
.dma
, 1, DDI_DMA_DONTWAIT
);
5626 if (ss
->rx_done
.entry
== NULL
) {
5629 (void) memset(ss
->rx_done
.entry
, 0, bytes
);
5630 mutex_init(&ss
->rx_lock
, NULL
, MUTEX_DEFAULT
, mgp
->icookie
);
5631 mutex_init(&ss
->tx
.lock
, NULL
, MUTEX_DEFAULT
, NULL
);
5632 mutex_init(&ss
->tx
.handle_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
5633 mutex_init(&ss
->poll_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
5634 myri10ge_jpool_init(ss
);
5635 (void) myri10ge_slice_stat_init(ss
);
5636 myri10ge_lro_alloc(ss
);
5642 myri10ge_free_slices(mgp
);
5647 myri10ge_save_msi_state(struct myri10ge_priv
*mgp
,
5648 ddi_acc_handle_t handle
)
5653 err
= myri10ge_find_cap(handle
, &ptr
, PCI_CAP_ID_MSI
);
5655 cmn_err(CE_WARN
, "%s: could not find MSI cap\n",
5657 return (DDI_FAILURE
);
5659 mgp
->pci_saved_state
.msi_ctrl
=
5660 pci_config_get16(handle
, ptr
+ PCI_MSI_CTRL
);
5661 mgp
->pci_saved_state
.msi_addr_low
=
5662 pci_config_get32(handle
, ptr
+ PCI_MSI_ADDR_OFFSET
);
5663 mgp
->pci_saved_state
.msi_addr_high
=
5664 pci_config_get32(handle
, ptr
+ PCI_MSI_ADDR_OFFSET
+ 4);
5665 mgp
->pci_saved_state
.msi_data_32
=
5666 pci_config_get16(handle
, ptr
+ PCI_MSI_32BIT_DATA
);
5667 mgp
->pci_saved_state
.msi_data_64
=
5668 pci_config_get16(handle
, ptr
+ PCI_MSI_64BIT_DATA
);
5669 return (DDI_SUCCESS
);
5673 myri10ge_restore_msi_state(struct myri10ge_priv
*mgp
,
5674 ddi_acc_handle_t handle
)
5679 err
= myri10ge_find_cap(handle
, &ptr
, PCI_CAP_ID_MSI
);
5681 cmn_err(CE_WARN
, "%s: could not find MSI cap\n",
5683 return (DDI_FAILURE
);
5686 pci_config_put16(handle
, ptr
+ PCI_MSI_CTRL
,
5687 mgp
->pci_saved_state
.msi_ctrl
);
5688 pci_config_put32(handle
, ptr
+ PCI_MSI_ADDR_OFFSET
,
5689 mgp
->pci_saved_state
.msi_addr_low
);
5690 pci_config_put32(handle
, ptr
+ PCI_MSI_ADDR_OFFSET
+ 4,
5691 mgp
->pci_saved_state
.msi_addr_high
);
5692 pci_config_put16(handle
, ptr
+ PCI_MSI_32BIT_DATA
,
5693 mgp
->pci_saved_state
.msi_data_32
);
5694 pci_config_put16(handle
, ptr
+ PCI_MSI_64BIT_DATA
,
5695 mgp
->pci_saved_state
.msi_data_64
);
5697 return (DDI_SUCCESS
);
5701 myri10ge_save_pci_state(struct myri10ge_priv
*mgp
)
5703 ddi_acc_handle_t handle
= mgp
->cfg_hdl
;
5705 int err
= DDI_SUCCESS
;
5708 /* Save the non-extended PCI config space 32-bits at a time */
5709 for (i
= 0; i
< 16; i
++)
5710 mgp
->pci_saved_state
.base
[i
] =
5711 pci_config_get32(handle
, i
*4);
5713 /* now save MSI interrupt state *, if needed */
5714 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_MSI
)
5715 err
= myri10ge_save_msi_state(mgp
, handle
);
5721 myri10ge_restore_pci_state(struct myri10ge_priv
*mgp
)
5723 ddi_acc_handle_t handle
= mgp
->cfg_hdl
;
5725 int err
= DDI_SUCCESS
;
5728 /* Restore the non-extended PCI config space 32-bits at a time */
5729 for (i
= 15; i
>= 0; i
--)
5730 pci_config_put32(handle
, i
*4, mgp
->pci_saved_state
.base
[i
]);
5732 /* now restore MSI interrupt state *, if needed */
5733 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_MSI
)
5734 err
= myri10ge_restore_msi_state(mgp
, handle
);
5736 if (mgp
->max_read_request_4k
)
5737 (void) myri10ge_set_max_readreq(handle
);
5743 myri10ge_suspend(dev_info_t
*dip
)
5745 struct myri10ge_priv
*mgp
= ddi_get_driver_private(dip
);
5749 cmn_err(CE_WARN
, "null dip in myri10ge_suspend\n");
5750 return (DDI_FAILURE
);
5752 if (mgp
->dip
!= dip
) {
5753 cmn_err(CE_WARN
, "bad dip in myri10ge_suspend\n");
5754 return (DDI_FAILURE
);
5756 mutex_enter(&mgp
->intrlock
);
5757 if (mgp
->running
== MYRI10GE_ETH_RUNNING
) {
5758 mgp
->running
= MYRI10GE_ETH_STOPPING
;
5759 mutex_exit(&mgp
->intrlock
);
5760 (void) untimeout(mgp
->timer_id
);
5761 mutex_enter(&mgp
->intrlock
);
5762 myri10ge_stop_locked(mgp
);
5763 mgp
->running
= MYRI10GE_ETH_SUSPENDED_RUNNING
;
5765 status
= myri10ge_save_pci_state(mgp
);
5766 mutex_exit(&mgp
->intrlock
);
5771 myri10ge_resume(dev_info_t
*dip
)
5773 struct myri10ge_priv
*mgp
= ddi_get_driver_private(dip
);
5774 int status
= DDI_SUCCESS
;
5777 cmn_err(CE_WARN
, "null dip in myri10ge_resume\n");
5778 return (DDI_FAILURE
);
5780 if (mgp
->dip
!= dip
) {
5781 cmn_err(CE_WARN
, "bad dip in myri10ge_resume\n");
5782 return (DDI_FAILURE
);
5785 mutex_enter(&mgp
->intrlock
);
5786 status
= myri10ge_restore_pci_state(mgp
);
5787 if (status
== DDI_SUCCESS
&&
5788 mgp
->running
== MYRI10GE_ETH_SUSPENDED_RUNNING
) {
5789 status
= myri10ge_start_locked(mgp
);
5791 mutex_exit(&mgp
->intrlock
);
5792 if (status
!= DDI_SUCCESS
)
5795 /* start the watchdog timer */
5796 mgp
->timer_id
= timeout(myri10ge_watchdog
, mgp
,
5798 return (DDI_SUCCESS
);
5802 myri10ge_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
5805 struct myri10ge_priv
*mgp
;
5806 mac_register_t
*macp
, *omacp
;
5807 ddi_acc_handle_t handle
;
5808 uint32_t csr
, hdr_offset
;
5809 int status
, span
, link_width
, max_read_request_4k
;
5810 unsigned long bus_number
, dev_number
, func_number
;
5815 if (cmd
== DDI_RESUME
) {
5816 return (myri10ge_resume(dip
));
5819 if (cmd
!= DDI_ATTACH
)
5820 return (DDI_FAILURE
);
5821 if (pci_config_setup(dip
, &handle
) != DDI_SUCCESS
)
5822 return (DDI_FAILURE
);
5824 /* enable busmater and io space access */
5825 csr
= pci_config_get32(handle
, PCI_CONF_COMM
);
5826 pci_config_put32(handle
, PCI_CONF_COMM
,
5827 (csr
|PCI_COMM_ME
|PCI_COMM_MAE
));
5828 status
= myri10ge_read_pcie_link_width(handle
, &link_width
);
5830 cmn_err(CE_WARN
, "could not read link width!\n");
5833 max_read_request_4k
= !myri10ge_set_max_readreq(handle
);
5834 status
= myri10ge_find_cap(handle
, &vso
, PCI_CAP_ID_VS
);
5836 goto abort_with_cfg_hdl
;
5837 if ((omacp
= mac_alloc(MAC_VERSION
)) == NULL
)
5838 goto abort_with_cfg_hdl
;
5840 * XXXX Hack: mac_register_t grows in newer kernels. To be
5841 * able to write newer fields, such as m_margin, without
5842 * writing outside allocated memory, we allocate our own macp
5843 * and pass that to mac_register()
5845 macp
= kmem_zalloc(sizeof (*macp
) * 8, KM_SLEEP
);
5846 macp
->m_version
= omacp
->m_version
;
5848 if ((mgp
= (struct myri10ge_priv
*)
5849 kmem_zalloc(sizeof (*mgp
), KM_SLEEP
)) == NULL
) {
5850 goto abort_with_macinfo
;
5852 ddi_set_driver_private(dip
, mgp
);
5854 /* setup device name for log messages */
5855 (void) sprintf(mgp
->name
, "myri10ge%d", ddi_get_instance(dip
));
5857 mutex_enter(&myri10ge_param_lock
);
5858 myri10ge_get_props(dip
);
5859 mgp
->intr_coal_delay
= myri10ge_intr_coal_delay
;
5860 mgp
->pause
= myri10ge_flow_control
;
5861 mutex_exit(&myri10ge_param_lock
);
5863 mgp
->max_read_request_4k
= max_read_request_4k
;
5864 mgp
->pcie_link_width
= link_width
;
5865 mgp
->running
= MYRI10GE_ETH_STOPPED
;
5868 mgp
->cfg_hdl
= handle
;
5870 mgp
->timer_ticks
= 5 * drv_usectohz(1000000); /* 5 seconds */
5871 myri10ge_test_physical(dip
);
5873 /* allocate command page */
5874 bytes
= sizeof (*mgp
->cmd
);
5875 mgp
->cmd
= (mcp_cmd_response_t
*)
5876 (void *)myri10ge_dma_alloc(dip
, bytes
,
5877 &myri10ge_misc_dma_attr
, &myri10ge_dev_access_attr
,
5878 DDI_DMA_CONSISTENT
, DDI_DMA_RDWR
|DDI_DMA_CONSISTENT
,
5879 &mgp
->cmd_dma
, 1, DDI_DMA_DONTWAIT
);
5880 if (mgp
->cmd
== NULL
)
5881 goto abort_with_mgp
;
5883 (void) myri10ge_reg_set(dip
, &mgp
->reg_set
, &span
, &bus_number
,
5884 &dev_number
, &func_number
);
5885 if (myri10ge_verbose
)
5886 printf("%s at %ld:%ld:%ld attaching\n", mgp
->name
,
5887 bus_number
, dev_number
, func_number
);
5888 status
= ddi_regs_map_setup(dip
, mgp
->reg_set
, (caddr_t
*)&mgp
->sram
,
5889 (offset_t
)0, (offset_t
)span
, &myri10ge_dev_access_attr
,
5891 if (status
!= DDI_SUCCESS
) {
5892 cmn_err(CE_WARN
, "%s: couldn't map memory space", mgp
->name
);
5893 printf("%s: reg_set = %d, span = %d, status = %d",
5894 mgp
->name
, mgp
->reg_set
, span
, status
);
5895 goto abort_with_mgp
;
5898 hdr_offset
= *(uint32_t *)(void*)(mgp
->sram
+ MCP_HEADER_PTR_OFFSET
);
5899 hdr_offset
= ntohl(hdr_offset
) & 0xffffc;
5900 ss_offset
= hdr_offset
+
5901 offsetof(struct mcp_gen_header
, string_specs
);
5902 mgp
->sram_size
= ntohl(*(uint32_t *)(void*)(mgp
->sram
+ ss_offset
));
5903 myri10ge_pio_copy32(mgp
->eeprom_strings
,
5904 (uint32_t *)(void*)((char *)mgp
->sram
+ mgp
->sram_size
),
5905 MYRI10GE_EEPROM_STRINGS_SIZE
);
5906 (void) memset(mgp
->eeprom_strings
+
5907 MYRI10GE_EEPROM_STRINGS_SIZE
- 2, 0, 2);
5909 status
= myri10ge_read_mac_addr(mgp
);
5911 goto abort_with_mapped
;
5914 status
= myri10ge_select_firmware(mgp
);
5916 cmn_err(CE_WARN
, "%s: failed to load firmware\n", mgp
->name
);
5917 goto abort_with_mapped
;
5920 status
= myri10ge_probe_slices(mgp
);
5922 cmn_err(CE_WARN
, "%s: failed to probe slices\n", mgp
->name
);
5923 goto abort_with_dummy_rdma
;
5926 status
= myri10ge_alloc_slices(mgp
);
5928 cmn_err(CE_WARN
, "%s: failed to alloc slices\n", mgp
->name
);
5929 goto abort_with_dummy_rdma
;
5932 /* add the interrupt handler */
5933 status
= myri10ge_add_intrs(mgp
, 1);
5935 cmn_err(CE_WARN
, "%s: Failed to add interrupt\n",
5937 goto abort_with_slices
;
5940 /* now that we have an iblock_cookie, init the mutexes */
5941 mutex_init(&mgp
->cmd_lock
, NULL
, MUTEX_DRIVER
, mgp
->icookie
);
5942 mutex_init(&mgp
->intrlock
, NULL
, MUTEX_DRIVER
, mgp
->icookie
);
5945 status
= myri10ge_nic_stat_init(mgp
);
5946 if (status
!= DDI_SUCCESS
)
5947 goto abort_with_interrupts
;
5948 status
= myri10ge_info_init(mgp
);
5949 if (status
!= DDI_SUCCESS
)
5950 goto abort_with_stats
;
5953 * Initialize GLD state
5956 macp
->m_type_ident
= MAC_PLUGIN_IDENT_ETHER
;
5957 macp
->m_driver
= mgp
;
5959 macp
->m_src_addr
= mgp
->mac_addr
;
5960 macp
->m_callbacks
= &myri10ge_m_callbacks
;
5961 macp
->m_min_sdu
= 0;
5962 macp
->m_max_sdu
= myri10ge_mtu
-
5963 (sizeof (struct ether_header
) + MXGEFW_PAD
+ VLAN_TAGSZ
);
5965 macp
->m_margin
= VLAN_TAGSZ
;
5967 macp
->m_v12n
= MAC_VIRT_LEVEL1
;
5968 status
= mac_register(macp
, &mgp
->mh
);
5970 cmn_err(CE_WARN
, "%s: mac_register failed with %d\n",
5972 goto abort_with_info
;
5974 myri10ge_ndd_init(mgp
);
5975 if (myri10ge_verbose
)
5976 printf("%s: %s, tx bndry %d, fw %s\n", mgp
->name
,
5977 mgp
->intr_type
, mgp
->tx_boundary
, mgp
->fw_name
);
5978 mutex_enter(&myri10ge_param_lock
);
5979 mgp
->next
= mgp_list
;
5981 mutex_exit(&myri10ge_param_lock
);
5982 kmem_free(macp
, sizeof (*macp
) * 8);
5984 return (DDI_SUCCESS
);
5987 myri10ge_info_destroy(mgp
);
5990 myri10ge_nic_stat_destroy(mgp
);
5992 abort_with_interrupts
:
5993 mutex_destroy(&mgp
->cmd_lock
);
5994 mutex_destroy(&mgp
->intrlock
);
5995 myri10ge_rem_intrs(mgp
, 1);
5998 myri10ge_free_slices(mgp
);
6000 abort_with_dummy_rdma
:
6001 myri10ge_dummy_rdma(mgp
, 0);
6004 ddi_regs_map_free(&mgp
->io_handle
);
6006 myri10ge_dma_free(&mgp
->cmd_dma
);
6009 kmem_free(mgp
, sizeof (*mgp
));
6012 kmem_free(macp
, sizeof (*macp
) * 8);
6016 pci_config_teardown(&handle
);
6017 return (DDI_FAILURE
);
6023 myri10ge_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
6025 struct myri10ge_priv
*mgp
, *tmp
;
6026 int status
, i
, jbufs_alloced
;
6028 if (cmd
== DDI_SUSPEND
) {
6029 status
= myri10ge_suspend(dip
);
6033 if (cmd
!= DDI_DETACH
) {
6034 return (DDI_FAILURE
);
6036 /* Get the driver private (gld_mac_info_t) structure */
6037 mgp
= ddi_get_driver_private(dip
);
6039 mutex_enter(&mgp
->intrlock
);
6041 for (i
= 0; i
< mgp
->num_slices
; i
++) {
6042 myri10ge_remove_jbufs(&mgp
->ss
[i
]);
6043 jbufs_alloced
+= mgp
->ss
[i
].jpool
.num_alloc
;
6045 mutex_exit(&mgp
->intrlock
);
6046 if (jbufs_alloced
!= 0) {
6047 cmn_err(CE_NOTE
, "%s: %d loaned rx buffers remain\n",
6048 mgp
->name
, jbufs_alloced
);
6049 return (DDI_FAILURE
);
6052 mutex_enter(&myri10ge_param_lock
);
6053 if (mgp
->refcnt
!= 0) {
6054 mutex_exit(&myri10ge_param_lock
);
6055 cmn_err(CE_NOTE
, "%s: %d external refs remain\n",
6056 mgp
->name
, mgp
->refcnt
);
6057 return (DDI_FAILURE
);
6059 mutex_exit(&myri10ge_param_lock
);
6061 status
= mac_unregister(mgp
->mh
);
6062 if (status
!= DDI_SUCCESS
)
6065 myri10ge_ndd_fini(mgp
);
6066 myri10ge_dummy_rdma(mgp
, 0);
6067 myri10ge_nic_stat_destroy(mgp
);
6068 myri10ge_info_destroy(mgp
);
6070 mutex_destroy(&mgp
->cmd_lock
);
6071 mutex_destroy(&mgp
->intrlock
);
6073 myri10ge_rem_intrs(mgp
, 1);
6075 myri10ge_free_slices(mgp
);
6076 ddi_regs_map_free(&mgp
->io_handle
);
6077 myri10ge_dma_free(&mgp
->cmd_dma
);
6078 pci_config_teardown(&mgp
->cfg_hdl
);
6080 mutex_enter(&myri10ge_param_lock
);
6081 if (mgp_list
== mgp
) {
6082 mgp_list
= mgp
->next
;
6085 while (tmp
->next
!= mgp
&& tmp
->next
!= NULL
)
6087 if (tmp
->next
!= NULL
)
6088 tmp
->next
= tmp
->next
->next
;
6090 kmem_free(mgp
, sizeof (*mgp
));
6091 mutex_exit(&myri10ge_param_lock
);
6092 return (DDI_SUCCESS
);
6096 * Helper for quiesce entry point: Interrupt threads are not being
6097 * scheduled, so we must poll for the confirmation DMA to arrive in
6098 * the firmware stats block for slice 0. We're essentially running
6099 * the guts of the interrupt handler, and just cherry picking the
6100 * confirmation that the NIC is queuesced (stats->link_down)
6104 myri10ge_poll_down(struct myri10ge_priv
*mgp
)
6106 struct myri10ge_slice_state
*ss
= mgp
->ss
;
6107 mcp_irq_data_t
*stats
= ss
->fw_stats
;
6112 /* check for a pending IRQ */
6114 if (! *((volatile uint8_t *)& stats
->valid
))
6116 valid
= stats
->valid
;
6119 * Make sure to tell the NIC to lower a legacy IRQ, else
6120 * it may have corrupt state after restarting
6123 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_FIXED
) {
6124 /* lower legacy IRQ */
6125 *mgp
->irq_deassert
= 0;
6127 /* wait for irq conf DMA */
6128 while (*((volatile uint8_t *)& stats
->valid
))
6131 if (stats
->stats_updated
&& stats
->link_down
)
6135 *ss
->irq_claim
= BE_32(3);
6136 *(ss
->irq_claim
+ 1) = BE_32(3);
6138 return (found_down
);
6142 myri10ge_quiesce(dev_info_t
*dip
)
6144 struct myri10ge_priv
*mgp
;
6146 int status
, down
, i
;
6148 mgp
= ddi_get_driver_private(dip
);
6150 return (DDI_FAILURE
);
6152 /* if devices was unplumbed, it is guaranteed to be quiescent */
6153 if (mgp
->running
== MYRI10GE_ETH_STOPPED
)
6154 return (DDI_SUCCESS
);
6156 /* send a down CMD to queuesce NIC */
6157 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ETHERNET_DOWN
, &cmd
);
6159 cmn_err(CE_WARN
, "%s: Couldn't bring down link\n", mgp
->name
);
6160 return (DDI_FAILURE
);
6163 for (i
= 0; i
< 20; i
++) {
6164 down
= myri10ge_poll_down(mgp
);
6167 delay(drv_usectohz(100000));
6171 return (DDI_SUCCESS
);
6172 return (DDI_FAILURE
);
6176 * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6180 myri10ge_find_lastfree(void)
6182 mblk_t
*mp
= allocb(1024, 0);
6186 cmn_err(CE_WARN
, "myri10ge_find_lastfree failed\n");
6190 myri10ge_db_lastfree
= (void *)dbp
->db_lastfree
;
6198 if (myri10ge_verbose
)
6200 "Myricom 10G driver (10GbE) version %s loading\n",
6201 MYRI10GE_VERSION_STR
);
6202 myri10ge_find_lastfree();
6203 mac_init_ops(&myri10ge_ops
, "myri10ge");
6204 mutex_init(&myri10ge_param_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
6205 if ((i
= mod_install(&modlinkage
)) != 0) {
6206 cmn_err(CE_WARN
, "mod_install returned %d\n", i
);
6207 mac_fini_ops(&myri10ge_ops
);
6208 mutex_destroy(&myri10ge_param_lock
);
6217 i
= mod_remove(&modlinkage
);
6221 mac_fini_ops(&myri10ge_ops
);
6222 mutex_destroy(&myri10ge_param_lock
);
6227 _info(struct modinfo
*modinfop
)
6229 return (mod_info(&modlinkage
, modinfop
));
6234 * This file uses MyriGE driver indentation.
6237 * c-file-style:"sun"