4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
29 * Use is subject to license terms.
33 static const char __idstring
[] =
34 "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $";
38 #include "myri10ge_var.h"
39 #include "rss_eth_z8e.h"
40 #include "rss_ethp_z8e.h"
41 #include "mcp_gen_header.h"
43 #define MYRI10GE_MAX_ETHER_MTU 9014
45 #define MYRI10GE_ETH_STOPPED 0
46 #define MYRI10GE_ETH_STOPPING 1
47 #define MYRI10GE_ETH_STARTING 2
48 #define MYRI10GE_ETH_RUNNING 3
49 #define MYRI10GE_ETH_OPEN_FAILED 4
50 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
52 static int myri10ge_small_bytes
= 510;
53 static int myri10ge_intr_coal_delay
= 125;
54 static int myri10ge_flow_control
= 1;
55 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
56 static int myri10ge_nvidia_ecrc_enable
= 1;
58 static int myri10ge_mtu_override
= 0;
59 static int myri10ge_tx_copylen
= 512;
60 static int myri10ge_deassert_wait
= 1;
61 static int myri10ge_verbose
= 0;
62 static int myri10ge_watchdog_reset
= 0;
63 static int myri10ge_use_msix
= 1;
64 static int myri10ge_max_slices
= -1;
65 static int myri10ge_use_msi
= 1;
66 int myri10ge_force_firmware
= 0;
67 static boolean_t myri10ge_use_lso
= B_TRUE
;
68 static int myri10ge_rss_hash
= MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
;
69 static int myri10ge_tx_hash
= 1;
70 static int myri10ge_lro
= 0;
71 static int myri10ge_lro_cnt
= 8;
72 int myri10ge_lro_max_aggr
= 2;
73 static int myri10ge_lso_copy
= 0;
74 static mblk_t
*myri10ge_send_wrapper(void *arg
, mblk_t
*mp
);
75 int myri10ge_tx_handles_initial
= 128;
77 static kmutex_t myri10ge_param_lock
;
78 static void* myri10ge_db_lastfree
;
80 static int myri10ge_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
);
81 static int myri10ge_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
);
82 static int myri10ge_quiesce(dev_info_t
*dip
);
84 DDI_DEFINE_STREAM_OPS(myri10ge_ops
, nulldev
, nulldev
, myri10ge_attach
,
85 myri10ge_detach
, nodev
, NULL
, D_MP
, NULL
, myri10ge_quiesce
);
88 static struct modldrv modldrv
= {
90 "Myricom 10G driver (10GbE)",
95 static struct modlinkage modlinkage
= {
100 unsigned char myri10ge_broadcastaddr
[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
102 static ddi_dma_attr_t myri10ge_misc_dma_attr
= {
103 DMA_ATTR_V0
, /* version number. */
104 (uint64_t)0, /* low address */
105 (uint64_t)0xffffffffffffffffULL
, /* high address */
106 (uint64_t)0x7ffffff, /* address counter max */
107 (uint64_t)4096, /* alignment */
108 (uint_t
)0x7f, /* burstsizes for 32b and 64b xfers */
109 (uint32_t)0x1, /* minimum transfer size */
110 (uint64_t)0x7fffffff, /* maximum transfer size */
111 (uint64_t)0x7fffffff, /* maximum segment size */
112 1, /* scatter/gather list length */
114 0 /* attribute flags */
118 * The Myri10GE NIC has the following constraints on receive buffers:
119 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
120 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
123 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr
= {
124 DMA_ATTR_V0
, /* version number. */
125 (uint64_t)0, /* low address */
126 (uint64_t)0xffffffffffffffffULL
, /* high address */
127 (uint64_t)0x7ffffff, /* address counter max */
128 (uint64_t)4096, /* alignment */
129 (uint_t
)0x7f, /* burstsizes for 32b and 64b xfers */
130 (uint32_t)0x1, /* minimum transfer size */
131 (uint64_t)0x7fffffff, /* maximum transfer size */
132 UINT64_MAX
, /* maximum segment size */
133 1, /* scatter/gather list length */
135 0 /* attribute flags */
138 static ddi_dma_attr_t myri10ge_rx_std_dma_attr
= {
139 DMA_ATTR_V0
, /* version number. */
140 (uint64_t)0, /* low address */
141 (uint64_t)0xffffffffffffffffULL
, /* high address */
142 (uint64_t)0x7ffffff, /* address counter max */
143 #if defined sparc64 || defined __sparcv9
144 (uint64_t)4096, /* alignment */
146 (uint64_t)0x80, /* alignment */
148 (uint_t
)0x7f, /* burstsizes for 32b and 64b xfers */
149 (uint32_t)0x1, /* minimum transfer size */
150 (uint64_t)0x7fffffff, /* maximum transfer size */
151 #if defined sparc64 || defined __sparcv9
152 UINT64_MAX
, /* maximum segment size */
154 (uint64_t)0xfff, /* maximum segment size */
156 1, /* scatter/gather list length */
158 0 /* attribute flags */
161 static ddi_dma_attr_t myri10ge_tx_dma_attr
= {
162 DMA_ATTR_V0
, /* version number. */
163 (uint64_t)0, /* low address */
164 (uint64_t)0xffffffffffffffffULL
, /* high address */
165 (uint64_t)0x7ffffff, /* address counter max */
166 (uint64_t)1, /* alignment */
167 (uint_t
)0x7f, /* burstsizes for 32b and 64b xfers */
168 (uint32_t)0x1, /* minimum transfer size */
169 (uint64_t)0x7fffffff, /* maximum transfer size */
170 UINT64_MAX
, /* maximum segment size */
171 INT32_MAX
, /* scatter/gather list length */
173 0 /* attribute flags */
176 #if defined sparc64 || defined __sparcv9
182 struct ddi_device_acc_attr myri10ge_dev_access_attr
= {
183 DDI_DEVICE_ATTR_V0
, /* version */
184 DDI_NEVERSWAP_ACC
, /* endian flash */
186 DDI_MERGING_OK_ACC
/* data order */
192 static void myri10ge_watchdog(void *arg
);
195 int myri10ge_mtu
= MYRI10GE_MAX_ETHER_MTU
+ MXGEFW_PAD
+ VLAN_TAGSZ
;
197 int myri10ge_mtu
= ETHERMAX
+ MXGEFW_PAD
+ VLAN_TAGSZ
;
199 int myri10ge_bigbufs_initial
= 1024;
200 int myri10ge_bigbufs_max
= 4096;
204 myri10ge_dma_alloc(dev_info_t
*dip
, size_t len
,
205 ddi_dma_attr_t
*attr
, ddi_device_acc_attr_t
*accattr
,
206 uint_t alloc_flags
, int bind_flags
, struct myri10ge_dma_stuff
*dma
,
207 int warn
, int (*wait
)(caddr_t
))
211 ddi_dma_cookie_t cookie
;
215 err
= ddi_dma_alloc_handle(dip
, attr
, wait
,
217 if (err
!= DDI_SUCCESS
) {
220 "myri10ge: ddi_dma_alloc_handle failed\n");
221 goto abort_with_nothing
;
224 err
= ddi_dma_mem_alloc(dma
->handle
, len
, accattr
, alloc_flags
,
225 wait
, NULL
, &kaddr
, &real_length
,
227 if (err
!= DDI_SUCCESS
) {
230 "myri10ge: ddi_dma_mem_alloc failed\n");
231 goto abort_with_handle
;
234 err
= ddi_dma_addr_bind_handle(dma
->handle
, NULL
, kaddr
, len
,
235 bind_flags
, wait
, NULL
, &cookie
, &count
);
237 if (err
!= DDI_SUCCESS
) {
240 "myri10ge: ddi_dma_addr_bind_handle failed\n");
247 "myri10ge: got too many dma segments ");
248 goto abort_with_bind
;
250 dma
->low
= htonl(MYRI10GE_LOWPART_TO_U32(cookie
.dmac_laddress
));
251 dma
->high
= htonl(MYRI10GE_HIGHPART_TO_U32(cookie
.dmac_laddress
));
255 (void) ddi_dma_unbind_handle(dma
->handle
);
258 ddi_dma_mem_free(&dma
->acc_handle
);
261 ddi_dma_free_handle(&dma
->handle
);
264 cmn_err(CE_WARN
, "myri10ge: myri10ge_dma_alloc failed.\n ");
265 cmn_err(CE_WARN
, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
266 (void*) dip
, len
, (void*) attr
);
268 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n",
269 (void*) accattr
, alloc_flags
);
270 cmn_err(CE_WARN
, "args: bind_flags=0x%x dmastuff=%p",
271 bind_flags
, (void*) dma
);
278 myri10ge_dma_free(struct myri10ge_dma_stuff
*dma
)
280 (void) ddi_dma_unbind_handle(dma
->handle
);
281 ddi_dma_mem_free(&dma
->acc_handle
);
282 ddi_dma_free_handle(&dma
->handle
);
286 myri10ge_pio_copy32(void *to
, uint32_t *from32
, size_t size
)
288 register volatile uint32_t *to32
;
291 to32
= (volatile uint32_t *) to
;
292 for (i
= (size
/ 4); i
; i
--) {
301 myri10ge_pio_copy64(void *to
, uint64_t *from64
, size_t size
)
303 register volatile uint64_t *to64
;
306 to64
= (volatile uint64_t *) to
;
307 for (i
= (size
/ 8); i
; i
--) {
316 * This routine copies memory from the host to the NIC.
317 * The "size" argument must always be a multiple of
318 * the size of long (4 or 8 bytes), and to/from must also
319 * be naturally aligned.
322 myri10ge_pio_copy(void *to
, void *from
, size_t size
)
325 ASSERT((size
% 4) == 0);
326 myri10ge_pio_copy32(to
, (uint32_t *)from
, size
);
328 ASSERT((size
% 8) == 0);
329 myri10ge_pio_copy64(to
, (uint64_t *)from
, size
);
335 * Due to various bugs in Solaris (especially bug 6186772 where the
336 * TCP/UDP checksum is calculated incorrectly on mblk chains with more
337 * than two elements), and the design bug where hardware checksums are
338 * ignored on mblk chains with more than 2 elements, we need to
339 * allocate private pool of physically contiguous receive buffers.
343 myri10ge_jpool_init(struct myri10ge_slice_state
*ss
)
345 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
347 bzero(jpool
, sizeof (*jpool
));
348 mutex_init(&jpool
->mtx
, NULL
, MUTEX_DRIVER
,
354 myri10ge_jpool_fini(struct myri10ge_slice_state
*ss
)
356 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
358 if (jpool
->head
!= NULL
) {
360 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
363 mutex_destroy(&jpool
->mtx
);
368 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
369 * at most 32 bytes at a time, so as to avoid involving the software
370 * pio handler in the nic. We re-write the first segment's low
371 * DMA address to mark it valid only after we write the entire chunk
375 myri10ge_submit_8rx(mcp_kreq_ether_recv_t
*dst
, mcp_kreq_ether_recv_t
*src
)
377 src
->addr_low
|= BE_32(1);
378 myri10ge_pio_copy(dst
, src
, 4 * sizeof (*src
));
380 myri10ge_pio_copy(dst
+ 4, src
+ 4, 4 * sizeof (*src
));
382 src
->addr_low
&= ~(BE_32(1));
383 dst
->addr_low
= src
->addr_low
;
388 myri10ge_pull_jpool(struct myri10ge_slice_state
*ss
)
390 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
391 struct myri10ge_jpool_entry
*jtail
, *j
, *jfree
;
392 volatile uintptr_t *putp
;
398 if (jpool
->head
!= NULL
) {
400 while (j
->next
!= NULL
)
406 * iterate over all per-CPU caches, and add contents into
409 for (i
= 0; i
< MYRI10GE_MAX_CPUS
; i
++) {
410 /* take per-CPU free list */
411 putp
= (void *)&jpool
->cpu
[i
& MYRI10GE_MAX_CPU_MASK
].head
;
414 put
= atomic_swap_ulong(putp
, 0);
415 jfree
= (struct myri10ge_jpool_entry
*)put
;
424 while (j
->next
!= NULL
)
431 * Transfers buffers from the free pool to the nic
432 * Must be called holding the jpool mutex.
436 myri10ge_restock_jumbos(struct myri10ge_slice_state
*ss
)
438 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
439 struct myri10ge_jpool_entry
*j
;
440 myri10ge_rx_ring_t
*rx
;
444 limit
= ss
->j_rx_cnt
+ (rx
->mask
+ 1);
446 for (i
= rx
->cnt
; i
!= limit
; i
++) {
447 idx
= i
& (rx
->mask
);
450 myri10ge_pull_jpool(ss
);
456 jpool
->head
= j
->next
;
458 rx
->shadow
[idx
].addr_low
= j
->dma
.low
;
459 rx
->shadow
[idx
].addr_high
= j
->dma
.high
;
460 /* copy 4 descriptors (32-bytes) to the mcp at a time */
461 if ((idx
& 7) == 7) {
462 myri10ge_submit_8rx(&rx
->lanai
[idx
- 7],
463 &rx
->shadow
[idx
- 7]);
470 * Transfer buffers from the nic to the free pool.
471 * Should be called holding the jpool mutex
475 myri10ge_unstock_jumbos(struct myri10ge_slice_state
*ss
)
477 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
478 struct myri10ge_jpool_entry
*j
;
479 myri10ge_rx_ring_t
*rx
;
482 mutex_enter(&jpool
->mtx
);
485 for (i
= 0; i
< rx
->mask
+ 1; i
++) {
487 rx
->info
[i
].j
= NULL
;
490 j
->next
= jpool
->head
;
493 mutex_exit(&jpool
->mtx
);
499 * Free routine which is called when the mblk allocated via
500 * esballoc() is freed. Here we return the jumbo buffer
501 * to the free pool, and possibly pass some jumbo buffers
506 myri10ge_jfree_rtn(void *arg
)
508 struct myri10ge_jpool_entry
*j
= (struct myri10ge_jpool_entry
*)arg
;
509 struct myri10ge_jpool_stuff
*jpool
;
510 volatile uintptr_t *putp
;
513 jpool
= &j
->ss
->jpool
;
515 /* prepend buffer locklessly to per-CPU freelist */
516 putp
= (void *)&jpool
->cpu
[CPU
->cpu_seqid
& MYRI10GE_MAX_CPU_MASK
].head
;
520 j
->next
= (void *)old
;
521 } while (atomic_cas_ulong(putp
, old
, new) != old
);
525 myri10ge_remove_jbuf(struct myri10ge_jpool_entry
*j
)
527 (void) ddi_dma_unbind_handle(j
->dma_handle
);
528 ddi_dma_mem_free(&j
->acc_handle
);
529 ddi_dma_free_handle(&j
->dma_handle
);
530 kmem_free(j
, sizeof (*j
));
535 * Allocates one physically contiguous descriptor
536 * and add it to the jumbo buffer pool.
540 myri10ge_add_jbuf(struct myri10ge_slice_state
*ss
)
542 struct myri10ge_jpool_entry
*j
;
543 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
544 ddi_dma_attr_t
*rx_dma_attr
;
546 ddi_dma_cookie_t cookie
;
550 if (myri10ge_mtu
< 2048)
551 rx_dma_attr
= &myri10ge_rx_std_dma_attr
;
553 rx_dma_attr
= &myri10ge_rx_jumbo_dma_attr
;
556 j
= (struct myri10ge_jpool_entry
*)
557 kmem_alloc(sizeof (*j
), KM_SLEEP
);
558 err
= ddi_dma_alloc_handle(ss
->mgp
->dip
, rx_dma_attr
,
559 DDI_DMA_DONTWAIT
, NULL
, &j
->dma_handle
);
560 if (err
!= DDI_SUCCESS
)
563 err
= ddi_dma_mem_alloc(j
->dma_handle
, myri10ge_mtu
,
564 &myri10ge_dev_access_attr
, DDI_DMA_STREAMING
, DDI_DMA_DONTWAIT
,
565 NULL
, &j
->buf
, &real_length
, &j
->acc_handle
);
566 if (err
!= DDI_SUCCESS
)
567 goto abort_with_handle
;
569 err
= ddi_dma_addr_bind_handle(j
->dma_handle
, NULL
, j
->buf
,
570 real_length
, DDI_DMA_READ
|DDI_DMA_STREAMING
, DDI_DMA_DONTWAIT
,
571 NULL
, &cookie
, &count
);
572 if (err
!= DDI_SUCCESS
)
576 * Make certain std MTU buffers do not cross a 4KB boundary:
578 * Setting dma_attr_align=4096 will do this, but the system
579 * will only allocate 1 RX buffer per 4KB page, rather than 2.
580 * Setting dma_attr_granular=4096 *seems* to work around this,
581 * but I'm paranoid about future systems no longer honoring
582 * this, so fall back to the safe, but memory wasting way if a
583 * buffer crosses a 4KB boundary.
586 if (rx_dma_attr
== &myri10ge_rx_std_dma_attr
&&
587 rx_dma_attr
->dma_attr_align
!= 4096) {
590 start
= MYRI10GE_LOWPART_TO_U32(cookie
.dmac_laddress
);
591 end
= start
+ myri10ge_mtu
;
592 if (((end
>> 12) != (start
>> 12)) && (start
& 4095U)) {
593 printf("std buffer crossed a 4KB boundary!\n");
594 myri10ge_remove_jbuf(j
);
595 rx_dma_attr
->dma_attr_align
= 4096;
596 rx_dma_attr
->dma_attr_seg
= UINT64_MAX
;
602 htonl(MYRI10GE_LOWPART_TO_U32(cookie
.dmac_laddress
));
604 htonl(MYRI10GE_HIGHPART_TO_U32(cookie
.dmac_laddress
));
608 j
->free_func
.free_func
= myri10ge_jfree_rtn
;
609 j
->free_func
.free_arg
= (char *)j
;
610 mutex_enter(&jpool
->mtx
);
611 j
->next
= jpool
->head
;
614 mutex_exit(&jpool
->mtx
);
618 ddi_dma_mem_free(&j
->acc_handle
);
621 ddi_dma_free_handle(&j
->dma_handle
);
624 kmem_free(j
, sizeof (*j
));
627 * If an allocation failed, perhaps it failed because it could
628 * not satisfy granularity requirement. Disable that, and
631 if (rx_dma_attr
== &myri10ge_rx_std_dma_attr
&&
632 rx_dma_attr
->dma_attr_align
!= 4096) {
634 "!alloc failed, reverting to gran=1\n");
635 rx_dma_attr
->dma_attr_align
= 4096;
636 rx_dma_attr
->dma_attr_seg
= UINT64_MAX
;
643 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff
*jpool
)
646 struct myri10ge_jpool_entry
*j
;
648 mutex_enter(&jpool
->mtx
);
655 mutex_exit(&jpool
->mtx
);
660 myri10ge_add_jbufs(struct myri10ge_slice_state
*ss
, int num
, int total
)
662 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
668 * if total is set, user wants "num" jbufs in the pool,
669 * otherwise the user wants to "num" additional jbufs
672 if (total
&& jpool
->num_alloc
) {
673 allocated
= myri10ge_jfree_cnt(jpool
);
674 needed
= num
- allocated
;
681 err
= myri10ge_add_jbuf(ss
);
690 myri10ge_remove_jbufs(struct myri10ge_slice_state
*ss
)
692 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
693 struct myri10ge_jpool_entry
*j
;
695 mutex_enter(&jpool
->mtx
);
696 myri10ge_pull_jpool(ss
);
697 while (jpool
->head
!= NULL
) {
700 jpool
->head
= j
->next
;
701 myri10ge_remove_jbuf(j
);
703 mutex_exit(&jpool
->mtx
);
707 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state
*ss
)
709 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
710 struct myri10ge_jpool_entry
*j
= NULL
;
712 uint32_t dma_low
, dma_high
;
714 unsigned int alloc_size
;
716 dma_low
= dma_high
= len
= 0;
717 alloc_size
= myri10ge_small_bytes
+ MXGEFW_PAD
;
719 for (idx
= 0; idx
< ss
->rx_small
.mask
+ 1; idx
++) {
720 /* Allocate a jumbo frame and carve it into small frames */
721 if (len
< alloc_size
) {
722 mutex_enter(&jpool
->mtx
);
723 /* remove jumbo from freelist */
725 jpool
->head
= j
->next
;
726 /* place it onto small list */
727 j
->next
= ss
->small_jpool
;
729 mutex_exit(&jpool
->mtx
);
731 dma_low
= ntohl(j
->dma
.low
);
732 dma_high
= ntohl(j
->dma
.high
);
735 ss
->rx_small
.info
[idx
].ptr
= ptr
;
736 ss
->rx_small
.shadow
[idx
].addr_low
= htonl(dma_low
);
737 ss
->rx_small
.shadow
[idx
].addr_high
= htonl(dma_high
);
740 dma_low
+= alloc_size
;
745 * Return the jumbo bufs we carved up for small to the jumbo pool
749 myri10ge_release_small_jbufs(struct myri10ge_slice_state
*ss
)
751 struct myri10ge_jpool_stuff
*jpool
= &ss
->jpool
;
752 struct myri10ge_jpool_entry
*j
= NULL
;
754 mutex_enter(&jpool
->mtx
);
755 while (ss
->small_jpool
!= NULL
) {
757 ss
->small_jpool
= j
->next
;
758 j
->next
= jpool
->head
;
761 mutex_exit(&jpool
->mtx
);
762 ss
->jbufs_for_smalls
= 0;
766 myri10ge_add_tx_handle(struct myri10ge_slice_state
*ss
)
768 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
769 struct myri10ge_priv
*mgp
= ss
->mgp
;
770 struct myri10ge_tx_dma_handle
*handle
;
773 handle
= kmem_zalloc(sizeof (*handle
), KM_SLEEP
);
774 err
= ddi_dma_alloc_handle(mgp
->dip
,
775 &myri10ge_tx_dma_attr
,
779 static int limit
= 0;
781 cmn_err(CE_WARN
, "%s: Falled to alloc tx dma handle\n",
784 kmem_free(handle
, sizeof (*handle
));
787 mutex_enter(&tx
->handle_lock
);
788 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced
);
789 handle
->next
= tx
->free_tx_handles
;
790 tx
->free_tx_handles
= handle
;
791 mutex_exit(&tx
->handle_lock
);
792 return (DDI_SUCCESS
);
796 myri10ge_remove_tx_handles(struct myri10ge_slice_state
*ss
)
798 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
799 struct myri10ge_tx_dma_handle
*handle
;
800 mutex_enter(&tx
->handle_lock
);
802 handle
= tx
->free_tx_handles
;
803 while (handle
!= NULL
) {
804 tx
->free_tx_handles
= handle
->next
;
805 ddi_dma_free_handle(&handle
->h
);
806 kmem_free(handle
, sizeof (*handle
));
807 handle
= tx
->free_tx_handles
;
808 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced
);
810 mutex_exit(&tx
->handle_lock
);
811 if (MYRI10GE_SLICE_STAT(tx_handles_alloced
) != 0) {
812 cmn_err(CE_WARN
, "%s: %d tx dma handles allocated at close\n",
814 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced
));
819 myri10ge_free_tx_handles(myri10ge_tx_ring_t
*tx
,
820 struct myri10ge_tx_dma_handle_head
*list
)
822 mutex_enter(&tx
->handle_lock
);
823 list
->tail
->next
= tx
->free_tx_handles
;
824 tx
->free_tx_handles
= list
->head
;
825 mutex_exit(&tx
->handle_lock
);
829 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t
*tx
,
830 struct myri10ge_tx_dma_handle
*handle
)
832 struct myri10ge_tx_dma_handle_head list
;
838 while (handle
!= NULL
) {
840 handle
= handle
->next
;
842 myri10ge_free_tx_handles(tx
, &list
);
846 myri10ge_alloc_tx_handles(struct myri10ge_slice_state
*ss
, int count
,
847 struct myri10ge_tx_dma_handle
**ret
)
849 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
850 struct myri10ge_tx_dma_handle
*handle
;
853 mutex_enter(&tx
->handle_lock
);
854 for (i
= 0; i
< count
; i
++) {
855 handle
= tx
->free_tx_handles
;
856 while (handle
== NULL
) {
857 mutex_exit(&tx
->handle_lock
);
858 err
= myri10ge_add_tx_handle(ss
);
859 if (err
!= DDI_SUCCESS
) {
860 goto abort_with_handles
;
862 mutex_enter(&tx
->handle_lock
);
863 handle
= tx
->free_tx_handles
;
865 tx
->free_tx_handles
= handle
->next
;
869 mutex_exit(&tx
->handle_lock
);
870 return (DDI_SUCCESS
);
873 myri10ge_free_tx_handle_slist(tx
, *ret
);
879 * Frees DMA resources associated with the send ring
882 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state
*ss
)
884 myri10ge_tx_ring_t
*tx
;
885 struct myri10ge_tx_dma_handle_head handles
;
892 for (idx
= 0; idx
< ss
->tx
.mask
+ 1; idx
++) {
893 if (tx
->info
[idx
].m
) {
894 (void) ddi_dma_unbind_handle(tx
->info
[idx
].handle
->h
);
895 handles
.head
= tx
->info
[idx
].handle
;
896 if (handles
.tail
== NULL
)
897 handles
.tail
= tx
->info
[idx
].handle
;
898 freeb(tx
->info
[idx
].m
);
900 tx
->info
[idx
].handle
= 0;
902 tx
->cp
[idx
].va
= NULL
;
903 myri10ge_dma_free(&tx
->cp
[idx
].dma
);
905 bytes
= sizeof (*tx
->cp
) * (tx
->mask
+ 1);
906 kmem_free(tx
->cp
, bytes
);
908 if (handles
.head
!= NULL
)
909 myri10ge_free_tx_handles(tx
, &handles
);
910 myri10ge_remove_tx_handles(ss
);
914 * Allocates DMA handles associated with the send ring
917 myri10ge_prepare_tx_ring(struct myri10ge_slice_state
*ss
)
919 struct myri10ge_tx_dma_handle
*handles
;
923 bytes
= sizeof (*ss
->tx
.cp
) * (ss
->tx
.mask
+ 1);
924 ss
->tx
.cp
= kmem_zalloc(bytes
, KM_SLEEP
);
925 if (ss
->tx
.cp
== NULL
) {
927 "%s: Failed to allocate tx copyblock storage\n",
929 return (DDI_FAILURE
);
933 /* allocate the TX copyblocks */
934 for (h
= 0; h
< ss
->tx
.mask
+ 1; h
++) {
935 ss
->tx
.cp
[h
].va
= myri10ge_dma_alloc(ss
->mgp
->dip
,
936 4096, &myri10ge_rx_jumbo_dma_attr
,
937 &myri10ge_dev_access_attr
, DDI_DMA_STREAMING
,
938 DDI_DMA_WRITE
|DDI_DMA_STREAMING
, &ss
->tx
.cp
[h
].dma
, 1,
940 if (ss
->tx
.cp
[h
].va
== NULL
) {
941 cmn_err(CE_WARN
, "%s: Failed to allocate tx "
942 "copyblock %d\n", ss
->mgp
->name
, h
);
943 goto abort_with_copyblocks
;
946 /* pre-allocate transmit handles */
948 (void) myri10ge_alloc_tx_handles(ss
, myri10ge_tx_handles_initial
,
951 myri10ge_free_tx_handle_slist(&ss
->tx
, handles
);
953 return (DDI_SUCCESS
);
955 abort_with_copyblocks
:
958 myri10ge_dma_free(&ss
->tx
.cp
[h
].dma
);
961 bytes
= sizeof (*ss
->tx
.cp
) * (ss
->tx
.mask
+ 1);
962 kmem_free(ss
->tx
.cp
, bytes
);
964 return (DDI_FAILURE
);
968 * The eeprom strings on the lanaiX have the format
971 * PT:ddd mmm xx xx:xx:xx xx\0
972 * PV:ddd mmm xx xx:xx:xx xx\0
975 myri10ge_read_mac_addr(struct myri10ge_priv
*mgp
)
977 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
978 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \
979 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \
980 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
985 ptr
= mgp
->eeprom_strings
;
986 limit
= mgp
->eeprom_strings
+ MYRI10GE_EEPROM_STRINGS_SIZE
;
988 while (*ptr
!= '\0' && ptr
< limit
) {
989 if (memcmp(ptr
, "MAC=", 4) == 0) {
991 if (myri10ge_verbose
)
992 printf("%s: mac address = %s\n", mgp
->name
,
994 mgp
->mac_addr_string
= ptr
;
995 for (i
= 0; i
< 6; i
++) {
996 if ((ptr
+ 2) > limit
)
999 if (*(ptr
+1) == ':') {
1001 lv
= myri10ge_digit(*ptr
); ptr
++;
1003 hv
= myri10ge_digit(*ptr
); ptr
++;
1004 lv
= myri10ge_digit(*ptr
); ptr
++;
1006 mgp
->mac_addr
[i
] = (hv
<< 4) | lv
;
1010 if (memcmp((const void *)ptr
, "SN=", 3) == 0) {
1012 mgp
->sn_str
= (char *)ptr
;
1014 if (memcmp((const void *)ptr
, "PC=", 3) == 0) {
1016 mgp
->pc_str
= (char *)ptr
;
1018 MYRI10GE_NEXT_STRING(ptr
);
1024 cmn_err(CE_WARN
, "%s: failed to parse eeprom_strings", mgp
->name
);
1030 * Determine the register set containing the PCI resource we
1031 * want to map: the memory-mappable part of the interface. We do
1032 * this by scanning the DDI "reg" property of the interface,
1033 * which is an array of mx_ddi_reg_set structures.
1036 myri10ge_reg_set(dev_info_t
*dip
, int *reg_set
, int *span
,
1037 unsigned long *busno
, unsigned long *devno
,
1038 unsigned long *funcno
)
1041 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff)
1042 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07)
1043 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f)
1044 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff)
1045 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03)
1046 #define PCI_ADDR_HIGH(ip) (ip[1])
1047 #define PCI_ADDR_LOW(ip) (ip[2])
1048 #define PCI_SPAN_HIGH(ip) (ip[3])
1049 #define PCI_SPAN_LOW(ip) (ip[4])
1051 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1052 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1055 uint32_t nelementsp
;
1057 #ifdef MYRI10GE_REGSET_VERBOSE
1058 char *address_space_name
[] = { "Configuration Space",
1060 "32-bit Memory Space",
1061 "64-bit Memory Space"
1065 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dip
, DDI_PROP_DONTPASS
,
1066 "reg", &data
, &nelementsp
) != DDI_SUCCESS
) {
1067 printf("Could not determine register set.\n");
1071 #ifdef MYRI10GE_REGSET_VERBOSE
1072 printf("There are %d register sets.\n", nelementsp
/ 5);
1075 printf("Didn't find any \"reg\" properties.\n");
1076 ddi_prop_free(data
);
1080 /* Scan for the register number. */
1082 *busno
= BUS_NUMBER(rs
);
1083 *devno
= DEVICE_NUMBER(rs
);
1084 *funcno
= FUNCTION_NUMBER(rs
);
1086 #ifdef MYRI10GE_REGSET_VERBOSE
1087 printf("*** Scanning for register number.\n");
1089 for (i
= 0; i
< nelementsp
/ 5; i
++) {
1091 #ifdef MYRI10GE_REGSET_VERBOSE
1092 printf("Examining register set %d:\n", i
);
1093 printf(" Register number = %d.\n", REGISTER_NUMBER(rs
));
1094 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs
));
1095 printf(" Device number = %d.\n", DEVICE_NUMBER(rs
));
1096 printf(" Bus number = %d.\n", BUS_NUMBER(rs
));
1097 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs
),
1098 address_space_name
[ADDRESS_SPACE(rs
)]);
1099 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs
),
1101 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs
),
1104 /* We are looking for a memory property. */
1106 if (ADDRESS_SPACE(rs
) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE
||
1107 ADDRESS_SPACE(rs
) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE
) {
1110 #ifdef MYRI10GE_REGSET_VERBOSE
1111 printf("%s uses register set %d.\n",
1112 address_space_name
[ADDRESS_SPACE(rs
)], *reg_set
);
1115 *span
= (PCI_SPAN_LOW(rs
));
1116 #ifdef MYRI10GE_REGSET_VERBOSE
1117 printf("Board span is 0x%x\n", *span
);
1123 ddi_prop_free(data
);
1125 /* If no match, fail. */
1126 if (i
>= nelementsp
/ 5) {
1135 myri10ge_load_firmware_from_zlib(struct myri10ge_priv
*mgp
, uint32_t *limit
)
1137 void *inflate_buffer
;
1139 size_t sram_size
= mgp
->sram_size
- MYRI10GE_EEPROM_STRINGS_SIZE
;
1141 mcp_gen_header_t
*hdr
;
1142 unsigned hdr_offset
, i
;
1145 *limit
= 0; /* -Wuninitialized */
1148 inflate_buffer
= kmem_zalloc(sram_size
, KM_NOSLEEP
);
1149 if (!inflate_buffer
) {
1151 "%s: Could not allocate buffer to inflate mcp\n",
1156 destlen
= sram_size
;
1157 rv
= z_uncompress(inflate_buffer
, &destlen
, mgp
->eth_z8e
,
1158 mgp
->eth_z8e_length
);
1161 cmn_err(CE_WARN
, "%s: Could not inflate mcp: %s\n",
1162 mgp
->name
, z_strerror(rv
));
1167 *limit
= (uint32_t)destlen
;
1169 hdr_offset
= htonl(*(uint32_t *)(void *)((char *)inflate_buffer
+
1170 MCP_HEADER_PTR_OFFSET
));
1171 hdr
= (void *)((char *)inflate_buffer
+ hdr_offset
);
1172 if (ntohl(hdr
->mcp_type
) != MCP_TYPE_ETH
) {
1173 cmn_err(CE_WARN
, "%s: Bad firmware type: 0x%x\n", mgp
->name
,
1174 ntohl(hdr
->mcp_type
));
1179 /* save firmware version for kstat */
1180 (void) strncpy(mgp
->fw_version
, hdr
->version
, sizeof (mgp
->fw_version
));
1181 if (myri10ge_verbose
)
1182 printf("%s: firmware id: %s\n", mgp
->name
, hdr
->version
);
1184 /* Copy the inflated firmware to NIC SRAM. */
1185 for (i
= 0; i
< *limit
; i
+= 256) {
1186 myri10ge_pio_copy((char *)mgp
->sram
+ MYRI10GE_FW_OFFSET
+ i
,
1187 (char *)inflate_buffer
+ i
,
1188 min(256U, (unsigned)(*limit
- i
)));
1190 (void) *(int *)(void *)mgp
->sram
;
1195 kmem_free(inflate_buffer
, sram_size
);
1203 myri10ge_send_cmd(struct myri10ge_priv
*mgp
, uint32_t cmd
,
1204 myri10ge_cmd_t
*data
)
1207 char buf_bytes
[sizeof (*buf
) + 8];
1208 volatile mcp_cmd_response_t
*response
= mgp
->cmd
;
1209 volatile char *cmd_addr
=
1210 (volatile char *)mgp
->sram
+ MXGEFW_ETH_CMD
;
1211 int sleep_total
= 0;
1213 /* ensure buf is aligned to 8 bytes */
1214 buf
= (mcp_cmd_t
*)((unsigned long)(buf_bytes
+ 7) & ~7UL);
1216 buf
->data0
= htonl(data
->data0
);
1217 buf
->data1
= htonl(data
->data1
);
1218 buf
->data2
= htonl(data
->data2
);
1219 buf
->cmd
= htonl(cmd
);
1220 buf
->response_addr
.low
= mgp
->cmd_dma
.low
;
1221 buf
->response_addr
.high
= mgp
->cmd_dma
.high
;
1222 mutex_enter(&mgp
->cmd_lock
);
1223 response
->result
= 0xffffffff;
1226 myri10ge_pio_copy((void *)cmd_addr
, buf
, sizeof (*buf
));
1228 /* wait up to 20ms */
1229 for (sleep_total
= 0; sleep_total
< 20; sleep_total
++) {
1231 if (response
->result
!= 0xffffffff) {
1232 if (response
->result
== 0) {
1233 data
->data0
= ntohl(response
->data
);
1234 mutex_exit(&mgp
->cmd_lock
);
1236 } else if (ntohl(response
->result
)
1237 == MXGEFW_CMD_UNKNOWN
) {
1238 mutex_exit(&mgp
->cmd_lock
);
1240 } else if (ntohl(response
->result
)
1241 == MXGEFW_CMD_ERROR_UNALIGNED
) {
1242 mutex_exit(&mgp
->cmd_lock
);
1246 "%s: command %d failed, result = %d\n",
1247 mgp
->name
, cmd
, ntohl(response
->result
));
1248 mutex_exit(&mgp
->cmd_lock
);
1254 mutex_exit(&mgp
->cmd_lock
);
1255 cmn_err(CE_WARN
, "%s: command %d timed out, result = %d\n",
1256 mgp
->name
, cmd
, ntohl(response
->result
));
1261 * Enable or disable periodic RDMAs from the host to make certain
1262 * chipsets resend dropped PCIe messages
1266 myri10ge_dummy_rdma(struct myri10ge_priv
*mgp
, int enable
)
1269 volatile uint32_t *confirm
;
1270 volatile char *submit
;
1274 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
1276 /* clear confirmation addr */
1277 confirm
= (volatile uint32_t *)mgp
->cmd
;
1282 * send an rdma command to the PCIe engine, and wait for the
1283 * response in the confirmation address. The firmware should
1284 * write a -1 there to indicate it is alive and well
1287 buf
[0] = mgp
->cmd_dma
.high
; /* confirm addr MSW */
1288 buf
[1] = mgp
->cmd_dma
.low
; /* confirm addr LSW */
1289 buf
[2] = htonl(0xffffffff); /* confirm data */
1290 buf
[3] = htonl(mgp
->cmd_dma
.high
); /* dummy addr MSW */
1291 buf
[4] = htonl(mgp
->cmd_dma
.low
); /* dummy addr LSW */
1292 buf
[5] = htonl(enable
); /* enable? */
1295 submit
= (volatile char *)(mgp
->sram
+ MXGEFW_BOOT_DUMMY_RDMA
);
1297 myri10ge_pio_copy((char *)submit
, buf
, 64);
1302 while (*confirm
!= 0xffffffff && i
< 20) {
1306 if (*confirm
!= 0xffffffff) {
1307 cmn_err(CE_WARN
, "%s: dummy rdma %s failed (%p = 0x%x)",
1309 (enable
? "enable" : "disable"), (void*) confirm
, *confirm
);
1314 myri10ge_load_firmware(struct myri10ge_priv
*mgp
)
1317 volatile uint32_t *confirm
;
1318 volatile char *submit
;
1320 uint32_t *buf
, size
;
1323 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
1325 status
= myri10ge_load_firmware_from_zlib(mgp
, &size
);
1327 cmn_err(CE_WARN
, "%s: firmware loading failed\n", mgp
->name
);
1331 /* clear confirmation addr */
1332 confirm
= (volatile uint32_t *)mgp
->cmd
;
1337 * send a reload command to the bootstrap MCP, and wait for the
1338 * response in the confirmation address. The firmware should
1339 * write a -1 there to indicate it is alive and well
1342 buf
[0] = mgp
->cmd_dma
.high
; /* confirm addr MSW */
1343 buf
[1] = mgp
->cmd_dma
.low
; /* confirm addr LSW */
1344 buf
[2] = htonl(0xffffffff); /* confirm data */
1347 * FIX: All newest firmware should un-protect the bottom of
1348 * the sram before handoff. However, the very first interfaces
1349 * do not. Therefore the handoff copy must skip the first 8 bytes
1351 buf
[3] = htonl(MYRI10GE_FW_OFFSET
+ 8); /* where the code starts */
1352 buf
[4] = htonl(size
- 8); /* length of code */
1353 buf
[5] = htonl(8); /* where to copy to */
1354 buf
[6] = htonl(0); /* where to jump to */
1356 submit
= (volatile char *)(mgp
->sram
+ MXGEFW_BOOT_HANDOFF
);
1358 myri10ge_pio_copy((char *)submit
, buf
, 64);
1363 while (*confirm
!= 0xffffffff && i
< 1000) {
1367 if (*confirm
!= 0xffffffff) {
1368 cmn_err(CE_WARN
, "%s: handoff failed (%p = 0x%x)",
1369 mgp
->name
, (void *) confirm
, *confirm
);
1373 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
1375 cmn_err(CE_WARN
, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1380 mgp
->max_intr_slots
= 2 * (cmd
.data0
/ sizeof (mcp_dma_addr_t
));
1381 myri10ge_dummy_rdma(mgp
, 1);
1386 myri10ge_m_unicst(void *arg
, const uint8_t *addr
)
1388 struct myri10ge_priv
*mgp
= arg
;
1392 cmd
.data0
= ((addr
[0] << 24) | (addr
[1] << 16)
1393 | (addr
[2] << 8) | addr
[3]);
1395 cmd
.data1
= ((addr
[4] << 8) | (addr
[5]));
1397 status
= myri10ge_send_cmd(mgp
, MXGEFW_SET_MAC_ADDRESS
, &cmd
);
1398 if (status
== 0 && (addr
!= mgp
->mac_addr
))
1399 (void) memcpy(mgp
->mac_addr
, addr
, sizeof (mgp
->mac_addr
));
1405 myri10ge_change_pause(struct myri10ge_priv
*mgp
, int pause
)
1411 status
= myri10ge_send_cmd(mgp
, MXGEFW_ENABLE_FLOW_CONTROL
,
1414 status
= myri10ge_send_cmd(mgp
, MXGEFW_DISABLE_FLOW_CONTROL
,
1418 cmn_err(CE_WARN
, "%s: Failed to set flow control mode\n",
1427 myri10ge_change_promisc(struct myri10ge_priv
*mgp
, int promisc
)
1433 status
= myri10ge_send_cmd(mgp
, MXGEFW_ENABLE_PROMISC
, &cmd
);
1435 status
= myri10ge_send_cmd(mgp
, MXGEFW_DISABLE_PROMISC
, &cmd
);
1438 cmn_err(CE_WARN
, "%s: Failed to set promisc mode\n",
1444 myri10ge_dma_test(struct myri10ge_priv
*mgp
, int test_type
)
1450 struct myri10ge_dma_stuff dmabench_dma
;
1454 * Run a small DMA test.
1455 * The magic multipliers to the length tell the firmware
1456 * tp do DMA read, write, or read+write tests. The
1457 * results are returned in cmd.data0. The upper 16
1458 * bits or the return is the number of transfers completed.
1459 * The lower 16 bits is the time in 0.5us ticks that the
1460 * transfers took to complete
1463 len
= mgp
->tx_boundary
;
1465 dmabench
= myri10ge_dma_alloc(mgp
->dip
, len
,
1466 &myri10ge_rx_jumbo_dma_attr
, &myri10ge_dev_access_attr
,
1467 DDI_DMA_STREAMING
, DDI_DMA_RDWR
|DDI_DMA_STREAMING
,
1468 &dmabench_dma
, 1, DDI_DMA_DONTWAIT
);
1469 mgp
->read_dma
= mgp
->write_dma
= mgp
->read_write_dma
= 0;
1470 if (dmabench
== NULL
) {
1471 cmn_err(CE_WARN
, "%s dma benchmark aborted\n", mgp
->name
);
1475 cmd
.data0
= ntohl(dmabench_dma
.low
);
1476 cmd
.data1
= ntohl(dmabench_dma
.high
);
1477 cmd
.data2
= len
* 0x10000;
1478 status
= myri10ge_send_cmd(mgp
, test_type
, &cmd
);
1483 mgp
->read_dma
= ((cmd
.data0
>>16) * len
* 2) / (cmd
.data0
& 0xffff);
1485 cmd
.data0
= ntohl(dmabench_dma
.low
);
1486 cmd
.data1
= ntohl(dmabench_dma
.high
);
1487 cmd
.data2
= len
* 0x1;
1488 status
= myri10ge_send_cmd(mgp
, test_type
, &cmd
);
1493 mgp
->write_dma
= ((cmd
.data0
>>16) * len
* 2) / (cmd
.data0
& 0xffff);
1495 cmd
.data0
= ntohl(dmabench_dma
.low
);
1496 cmd
.data1
= ntohl(dmabench_dma
.high
);
1497 cmd
.data2
= len
* 0x10001;
1498 status
= myri10ge_send_cmd(mgp
, test_type
, &cmd
);
1500 test
= "read/write";
1503 mgp
->read_write_dma
= ((cmd
.data0
>>16) * len
* 2 * 2) /
1504 (cmd
.data0
& 0xffff);
1508 myri10ge_dma_free(&dmabench_dma
);
1509 if (status
!= 0 && test_type
!= MXGEFW_CMD_UNALIGNED_TEST
)
1510 cmn_err(CE_WARN
, "%s %s dma benchmark failed\n", mgp
->name
,
1516 myri10ge_reset(struct myri10ge_priv
*mgp
)
1519 struct myri10ge_nic_stat
*ethstat
;
1520 struct myri10ge_slice_state
*ss
;
1524 /* send a reset command to the card to see if it is alive */
1525 (void) memset(&cmd
, 0, sizeof (cmd
));
1526 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_RESET
, &cmd
);
1528 cmn_err(CE_WARN
, "%s: failed reset\n", mgp
->name
);
1532 /* Now exchange information about interrupts */
1534 bytes
= mgp
->max_intr_slots
* sizeof (*mgp
->ss
[0].rx_done
.entry
);
1535 cmd
.data0
= (uint32_t)bytes
;
1536 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
1539 * Even though we already know how many slices are supported
1540 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1541 * has magic side effects, and must be called after a reset.
1542 * It must be called prior to calling any RSS related cmds,
1543 * including assigning an interrupt queue for anything but
1544 * slice 0. It must also be called *after*
1545 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1546 * the firmware to compute offsets.
1549 if (mgp
->num_slices
> 1) {
1551 /* ask the maximum number of slices it supports */
1552 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
,
1556 "%s: failed to get number of slices\n",
1562 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1563 * to setting up the interrupt queue DMA
1566 cmd
.data0
= mgp
->num_slices
;
1567 cmd
.data1
= MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE
|
1568 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES
;
1569 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ENABLE_RSS_QUEUES
,
1573 "%s: failed to set number of slices\n",
1578 for (i
= 0; i
< mgp
->num_slices
; i
++) {
1580 cmd
.data0
= ntohl(ss
->rx_done
.dma
.low
);
1581 cmd
.data1
= ntohl(ss
->rx_done
.dma
.high
);
1583 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_INTRQ_DMA
,
1587 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_IRQ_ACK_OFFSET
, &cmd
);
1588 for (i
= 0; i
< mgp
->num_slices
; i
++) {
1590 ss
->irq_claim
= (volatile unsigned int *)
1591 (void *)(mgp
->sram
+ cmd
.data0
+ 8 * i
);
1594 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_FIXED
) {
1595 status
|= myri10ge_send_cmd(mgp
,
1596 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET
, &cmd
);
1597 mgp
->irq_deassert
= (uint32_t *)(void *)(mgp
->sram
+ cmd
.data0
);
1600 status
|= myri10ge_send_cmd(mgp
,
1601 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET
, &cmd
);
1602 mgp
->intr_coal_delay_ptr
= (uint32_t *)(void *)(mgp
->sram
+ cmd
.data0
);
1605 cmn_err(CE_WARN
, "%s: failed set interrupt parameters\n",
1610 *mgp
->intr_coal_delay_ptr
= htonl(mgp
->intr_coal_delay
);
1611 (void) myri10ge_dma_test(mgp
, MXGEFW_DMA_TEST
);
1613 /* reset mcp/driver shared state back to 0 */
1615 for (i
= 0; i
< mgp
->num_slices
; i
++) {
1617 bytes
= mgp
->max_intr_slots
*
1618 sizeof (*mgp
->ss
[0].rx_done
.entry
);
1619 (void) memset(ss
->rx_done
.entry
, 0, bytes
);
1622 ss
->tx
.pkt_done
= 0;
1624 ss
->rx_small
.cnt
= 0;
1625 ss
->rx_done
.idx
= 0;
1626 ss
->rx_done
.cnt
= 0;
1628 ss
->tx
.watchdog_done
= 0;
1629 ss
->tx
.watchdog_req
= 0;
1631 ss
->tx
.activate
= 0;
1633 mgp
->watchdog_rx_pause
= 0;
1634 if (mgp
->ksp_stat
!= NULL
) {
1635 ethstat
= (struct myri10ge_nic_stat
*)mgp
->ksp_stat
->ks_data
;
1636 ethstat
->link_changes
.value
.ul
= 0;
1638 status
= myri10ge_m_unicst(mgp
, mgp
->mac_addr
);
1639 myri10ge_change_promisc(mgp
, 0);
1640 (void) myri10ge_change_pause(mgp
, mgp
->pause
);
1645 myri10ge_init_toeplitz(struct myri10ge_priv
*mgp
)
1654 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_RSS_KEY_OFFSET
,
1657 cmn_err(CE_WARN
, "%s: failed to get rss key\n",
1661 myri10ge_pio_copy32(mgp
->rss_key
,
1662 (uint32_t *)(void*)((char *)mgp
->sram
+ cmd
.data0
),
1663 sizeof (mgp
->rss_key
));
1665 mgp
->toeplitz_hash_table
= kmem_alloc(sizeof (uint32_t) * 12 * 256,
1667 key
= (uint8_t *)mgp
->rss_key
;
1669 for (b
= 0; b
< 12; b
++) {
1670 for (s
= 0; s
< 8; s
++) {
1671 /* Bits: b*8+s, ..., b*8+s+31 */
1673 for (j
= 0; j
< 32; j
++) {
1675 bit
= 0x1 & (key
[bit
/ 8] >> (7 -(bit
& 0x7)));
1676 k
[s
] |= bit
<< (31 - j
);
1680 for (i
= 0; i
<= 0xff; i
++) {
1682 if (i
& (1 << 7)) { tmp
^= k
[0]; }
1683 if (i
& (1 << 6)) { tmp
^= k
[1]; }
1684 if (i
& (1 << 5)) { tmp
^= k
[2]; }
1685 if (i
& (1 << 4)) { tmp
^= k
[3]; }
1686 if (i
& (1 << 3)) { tmp
^= k
[4]; }
1687 if (i
& (1 << 2)) { tmp
^= k
[5]; }
1688 if (i
& (1 << 1)) { tmp
^= k
[6]; }
1689 if (i
& (1 << 0)) { tmp
^= k
[7]; }
1690 mgp
->toeplitz_hash_table
[t
++] = tmp
;
1696 static inline struct myri10ge_slice_state
*
1697 myri10ge_toeplitz_send_hash(struct myri10ge_priv
*mgp
, struct ip
*ip
)
1700 uint32_t saddr
, daddr
;
1701 uint32_t hash
, slice
;
1702 uint32_t *table
= mgp
->toeplitz_hash_table
;
1706 * Note hashing order is reversed from how it is done
1707 * in the NIC, so as to generate the same hash value
1708 * for the connection to try to keep connections CPU local
1711 /* hash on IPv4 src/dst address */
1712 saddr
= ntohl(ip
->ip_src
.s_addr
);
1713 daddr
= ntohl(ip
->ip_dst
.s_addr
);
1714 hash
= table
[(256 * 0) + ((daddr
>> 24) & 0xff)];
1715 hash
^= table
[(256 * 1) + ((daddr
>> 16) & 0xff)];
1716 hash
^= table
[(256 * 2) + ((daddr
>> 8) & 0xff)];
1717 hash
^= table
[(256 * 3) + ((daddr
) & 0xff)];
1718 hash
^= table
[(256 * 4) + ((saddr
>> 24) & 0xff)];
1719 hash
^= table
[(256 * 5) + ((saddr
>> 16) & 0xff)];
1720 hash
^= table
[(256 * 6) + ((saddr
>> 8) & 0xff)];
1721 hash
^= table
[(256 * 7) + ((saddr
) & 0xff)];
1722 /* hash on TCP port, if required */
1723 if ((myri10ge_rss_hash
& MXGEFW_RSS_HASH_TYPE_TCP_IPV4
) &&
1724 ip
->ip_p
== IPPROTO_TCP
) {
1725 hdr
= (struct tcphdr
*)(void *)
1726 (((uint8_t *)ip
) + (ip
->ip_hl
<< 2));
1727 src
= ntohs(hdr
->th_sport
);
1728 dst
= ntohs(hdr
->th_dport
);
1730 hash
^= table
[(256 * 8) + ((dst
>> 8) & 0xff)];
1731 hash
^= table
[(256 * 9) + ((dst
) & 0xff)];
1732 hash
^= table
[(256 * 10) + ((src
>> 8) & 0xff)];
1733 hash
^= table
[(256 * 11) + ((src
) & 0xff)];
1735 slice
= (mgp
->num_slices
- 1) & hash
;
1736 return (&mgp
->ss
[slice
]);
1740 static inline struct myri10ge_slice_state
*
1741 myri10ge_simple_send_hash(struct myri10ge_priv
*mgp
, struct ip
*ip
)
1744 uint32_t slice
, hash_val
;
1747 if (ip
->ip_p
!= IPPROTO_TCP
&& ip
->ip_p
!= IPPROTO_UDP
) {
1748 return (&mgp
->ss
[0]);
1750 hdr
= (struct tcphdr
*)(void *)(((uint8_t *)ip
) + (ip
->ip_hl
<< 2));
1753 * Use the second byte of the *destination* address for
1754 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1756 hash_val
= ntohs(hdr
->th_dport
) & 0xff;
1757 if (myri10ge_rss_hash
== MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
)
1758 hash_val
+= ntohs(hdr
->th_sport
) & 0xff;
1760 slice
= (mgp
->num_slices
- 1) & hash_val
;
1761 return (&mgp
->ss
[slice
]);
1764 static inline struct myri10ge_slice_state
*
1765 myri10ge_send_hash(struct myri10ge_priv
*mgp
, mblk_t
*mp
)
1767 unsigned int slice
= 0;
1768 struct ether_header
*eh
;
1769 struct ether_vlan_header
*vh
;
1773 if (mgp
->num_slices
== 1)
1774 return (&mgp
->ss
[0]);
1776 if (myri10ge_tx_hash
== 0) {
1777 slice
= CPU
->cpu_id
& (mgp
->num_slices
- 1);
1778 return (&mgp
->ss
[slice
]);
1782 * ensure it is a TCP or UDP over IPv4 packet, and that the
1783 * headers are in the 1st mblk. Otherwise, punt
1787 if ((MBLKL(mp
)) < (ehl
+ ihl
+ 8))
1788 return (&mgp
->ss
[0]);
1789 eh
= (struct ether_header
*)(void *)mp
->b_rptr
;
1790 ip
= (struct ip
*)(void *)(eh
+ 1);
1791 if (eh
->ether_type
!= BE_16(ETHERTYPE_IP
)) {
1792 if (eh
->ether_type
!= BE_16(ETHERTYPE_VLAN
))
1793 return (&mgp
->ss
[0]);
1794 vh
= (struct ether_vlan_header
*)(void *)mp
->b_rptr
;
1795 if (vh
->ether_type
!= BE_16(ETHERTYPE_IP
))
1796 return (&mgp
->ss
[0]);
1798 ip
= (struct ip
*)(void *)(vh
+ 1);
1800 ihl
= ip
->ip_hl
<< 2;
1801 if (MBLKL(mp
) < (ehl
+ ihl
+ 8))
1802 return (&mgp
->ss
[0]);
1803 switch (myri10ge_rss_hash
) {
1804 case MXGEFW_RSS_HASH_TYPE_IPV4
:
1806 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4
:
1808 case (MXGEFW_RSS_HASH_TYPE_IPV4
|MXGEFW_RSS_HASH_TYPE_TCP_IPV4
):
1809 return (myri10ge_toeplitz_send_hash(mgp
, ip
));
1810 case MXGEFW_RSS_HASH_TYPE_SRC_PORT
:
1812 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
:
1813 return (myri10ge_simple_send_hash(mgp
, ip
));
1817 return (&mgp
->ss
[0]);
1821 myri10ge_setup_slice(struct myri10ge_slice_state
*ss
)
1823 struct myri10ge_priv
*mgp
= ss
->mgp
;
1825 int tx_ring_size
, rx_ring_size
;
1826 int tx_ring_entries
, rx_ring_entries
;
1831 slice
= ss
- mgp
->ss
;
1833 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_SEND_RING_SIZE
, &cmd
);
1834 tx_ring_size
= cmd
.data0
;
1836 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
1839 rx_ring_size
= cmd
.data0
;
1841 tx_ring_entries
= tx_ring_size
/ sizeof (struct mcp_kreq_ether_send
);
1842 rx_ring_entries
= rx_ring_size
/ sizeof (struct mcp_dma_addr
);
1843 ss
->tx
.mask
= tx_ring_entries
- 1;
1844 ss
->rx_small
.mask
= ss
->rx_big
.mask
= rx_ring_entries
- 1;
1846 /* get the lanai pointers to the send and receive rings */
1849 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_SEND_OFFSET
, &cmd
);
1850 ss
->tx
.lanai
= (mcp_kreq_ether_send_t
*)(void *)(mgp
->sram
+ cmd
.data0
);
1851 if (mgp
->num_slices
> 1) {
1852 ss
->tx
.go
= (char *)mgp
->sram
+ MXGEFW_ETH_SEND_GO
+ 64 * slice
;
1853 ss
->tx
.stop
= (char *)mgp
->sram
+ MXGEFW_ETH_SEND_STOP
+
1861 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_SMALL_RX_OFFSET
, &cmd
);
1862 ss
->rx_small
.lanai
= (mcp_kreq_ether_recv_t
*)
1863 (void *)(mgp
->sram
+ cmd
.data0
);
1866 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_BIG_RX_OFFSET
, &cmd
);
1867 ss
->rx_big
.lanai
= (mcp_kreq_ether_recv_t
*)(void *)
1868 (mgp
->sram
+ cmd
.data0
);
1872 "%s: failed to get ring sizes or locations\n", mgp
->name
);
1877 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.shadow
);
1878 ss
->rx_small
.shadow
= kmem_zalloc(bytes
, KM_SLEEP
);
1879 if (ss
->rx_small
.shadow
== NULL
)
1881 (void) memset(ss
->rx_small
.shadow
, 0, bytes
);
1883 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.shadow
);
1884 ss
->rx_big
.shadow
= kmem_zalloc(bytes
, KM_SLEEP
);
1885 if (ss
->rx_big
.shadow
== NULL
)
1886 goto abort_with_rx_small_shadow
;
1887 (void) memset(ss
->rx_big
.shadow
, 0, bytes
);
1889 /* allocate the host info rings */
1891 bytes
= tx_ring_entries
* sizeof (*ss
->tx
.info
);
1892 ss
->tx
.info
= kmem_zalloc(bytes
, KM_SLEEP
);
1893 if (ss
->tx
.info
== NULL
)
1894 goto abort_with_rx_big_shadow
;
1895 (void) memset(ss
->tx
.info
, 0, bytes
);
1897 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.info
);
1898 ss
->rx_small
.info
= kmem_zalloc(bytes
, KM_SLEEP
);
1899 if (ss
->rx_small
.info
== NULL
)
1900 goto abort_with_tx_info
;
1901 (void) memset(ss
->rx_small
.info
, 0, bytes
);
1903 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.info
);
1904 ss
->rx_big
.info
= kmem_zalloc(bytes
, KM_SLEEP
);
1905 if (ss
->rx_big
.info
== NULL
)
1906 goto abort_with_rx_small_info
;
1907 (void) memset(ss
->rx_big
.info
, 0, bytes
);
1909 ss
->tx
.stall
= ss
->tx
.sched
= 0;
1910 ss
->tx
.stall_early
= ss
->tx
.stall_late
= 0;
1912 ss
->jbufs_for_smalls
= 1 + (1 + ss
->rx_small
.mask
) /
1913 (myri10ge_mtu
/ (myri10ge_small_bytes
+ MXGEFW_PAD
));
1915 allocated
= myri10ge_add_jbufs(ss
,
1916 myri10ge_bigbufs_initial
+ ss
->jbufs_for_smalls
, 1);
1917 if (allocated
< ss
->jbufs_for_smalls
+ myri10ge_bigbufs_initial
) {
1919 "%s: Could not allocate enough receive buffers (%d/%d)\n",
1920 mgp
->name
, allocated
,
1921 myri10ge_bigbufs_initial
+ ss
->jbufs_for_smalls
);
1922 goto abort_with_jumbos
;
1925 myri10ge_carve_up_jbufs_into_small_ring(ss
);
1928 mutex_enter(&ss
->jpool
.mtx
);
1929 if (allocated
< rx_ring_entries
)
1930 ss
->jpool
.low_water
= allocated
/ 4;
1932 ss
->jpool
.low_water
= rx_ring_entries
/ 2;
1935 * invalidate the big receive ring in case we do not
1936 * allocate sufficient jumbos to fill it
1938 (void) memset(ss
->rx_big
.shadow
, 1,
1939 (ss
->rx_big
.mask
+ 1) * sizeof (ss
->rx_big
.shadow
[0]));
1940 for (idx
= 7; idx
<= ss
->rx_big
.mask
; idx
+= 8) {
1941 myri10ge_submit_8rx(&ss
->rx_big
.lanai
[idx
- 7],
1942 &ss
->rx_big
.shadow
[idx
- 7]);
1947 myri10ge_restock_jumbos(ss
);
1949 for (idx
= 7; idx
<= ss
->rx_small
.mask
; idx
+= 8) {
1950 myri10ge_submit_8rx(&ss
->rx_small
.lanai
[idx
- 7],
1951 &ss
->rx_small
.shadow
[idx
- 7]);
1954 ss
->rx_small
.cnt
= ss
->rx_small
.mask
+ 1;
1956 mutex_exit(&ss
->jpool
.mtx
);
1958 status
= myri10ge_prepare_tx_ring(ss
);
1961 goto abort_with_small_jbufs
;
1963 cmd
.data0
= ntohl(ss
->fw_stats_dma
.low
);
1964 cmd
.data1
= ntohl(ss
->fw_stats_dma
.high
);
1965 cmd
.data2
= sizeof (mcp_irq_data_t
);
1966 cmd
.data2
|= (slice
<< 16);
1967 bzero(ss
->fw_stats
, sizeof (*ss
->fw_stats
));
1968 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_STATS_DMA_V2
, &cmd
);
1969 if (status
== ENOSYS
) {
1970 cmd
.data0
= ntohl(ss
->fw_stats_dma
.low
) +
1971 offsetof(mcp_irq_data_t
, send_done_count
);
1972 cmd
.data1
= ntohl(ss
->fw_stats_dma
.high
);
1973 status
= myri10ge_send_cmd(mgp
,
1974 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE
, &cmd
);
1977 cmn_err(CE_WARN
, "%s: Couldn't set stats DMA\n", mgp
->name
);
1984 myri10ge_unprepare_tx_ring(ss
);
1986 abort_with_small_jbufs
:
1987 myri10ge_release_small_jbufs(ss
);
1990 if (allocated
!= 0) {
1991 mutex_enter(&ss
->jpool
.mtx
);
1992 ss
->jpool
.low_water
= 0;
1993 mutex_exit(&ss
->jpool
.mtx
);
1994 myri10ge_unstock_jumbos(ss
);
1995 myri10ge_remove_jbufs(ss
);
1998 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.info
);
1999 kmem_free(ss
->rx_big
.info
, bytes
);
2001 abort_with_rx_small_info
:
2002 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.info
);
2003 kmem_free(ss
->rx_small
.info
, bytes
);
2006 bytes
= tx_ring_entries
* sizeof (*ss
->tx
.info
);
2007 kmem_free(ss
->tx
.info
, bytes
);
2009 abort_with_rx_big_shadow
:
2010 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.shadow
);
2011 kmem_free(ss
->rx_big
.shadow
, bytes
);
2013 abort_with_rx_small_shadow
:
2014 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.shadow
);
2015 kmem_free(ss
->rx_small
.shadow
, bytes
);
2022 myri10ge_teardown_slice(struct myri10ge_slice_state
*ss
)
2024 int tx_ring_entries
, rx_ring_entries
;
2027 /* ignore slices that have not been fully setup */
2028 if (ss
->tx
.cp
== NULL
)
2030 /* Free the TX copy buffers */
2031 myri10ge_unprepare_tx_ring(ss
);
2033 /* stop passing returned buffers to firmware */
2035 mutex_enter(&ss
->jpool
.mtx
);
2036 ss
->jpool
.low_water
= 0;
2037 mutex_exit(&ss
->jpool
.mtx
);
2038 myri10ge_release_small_jbufs(ss
);
2040 /* Release the free jumbo frame pool */
2041 myri10ge_unstock_jumbos(ss
);
2042 myri10ge_remove_jbufs(ss
);
2044 rx_ring_entries
= ss
->rx_big
.mask
+ 1;
2045 tx_ring_entries
= ss
->tx
.mask
+ 1;
2047 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.info
);
2048 kmem_free(ss
->rx_big
.info
, bytes
);
2050 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.info
);
2051 kmem_free(ss
->rx_small
.info
, bytes
);
2053 bytes
= tx_ring_entries
* sizeof (*ss
->tx
.info
);
2054 kmem_free(ss
->tx
.info
, bytes
);
2056 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.shadow
);
2057 kmem_free(ss
->rx_big
.shadow
, bytes
);
2059 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.shadow
);
2060 kmem_free(ss
->rx_small
.shadow
, bytes
);
2064 myri10ge_start_locked(struct myri10ge_priv
*mgp
)
2067 int status
, big_pow2
, i
;
2068 volatile uint8_t *itable
;
2070 status
= DDI_SUCCESS
;
2071 /* Allocate DMA resources and receive buffers */
2073 status
= myri10ge_reset(mgp
);
2075 cmn_err(CE_WARN
, "%s: failed reset\n", mgp
->name
);
2076 return (DDI_FAILURE
);
2079 if (mgp
->num_slices
> 1) {
2080 cmd
.data0
= mgp
->num_slices
;
2081 cmd
.data1
= 1; /* use MSI-X */
2082 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ENABLE_RSS_QUEUES
,
2086 "%s: failed to set number of slices\n",
2088 goto abort_with_nothing
;
2090 /* setup the indirection table */
2091 cmd
.data0
= mgp
->num_slices
;
2092 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_RSS_TABLE_SIZE
,
2095 status
|= myri10ge_send_cmd(mgp
,
2096 MXGEFW_CMD_GET_RSS_TABLE_OFFSET
, &cmd
);
2099 "%s: failed to setup rss tables\n", mgp
->name
);
2102 /* just enable an identity mapping */
2103 itable
= mgp
->sram
+ cmd
.data0
;
2104 for (i
= 0; i
< mgp
->num_slices
; i
++)
2105 itable
[i
] = (uint8_t)i
;
2107 if (myri10ge_rss_hash
& MYRI10GE_TOEPLITZ_HASH
) {
2108 status
= myri10ge_init_toeplitz(mgp
);
2110 cmn_err(CE_WARN
, "%s: failed to setup "
2111 "toeplitz tx hash table", mgp
->name
);
2112 goto abort_with_nothing
;
2116 cmd
.data1
= myri10ge_rss_hash
;
2117 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_RSS_ENABLE
,
2121 "%s: failed to enable slices\n", mgp
->name
);
2122 goto abort_with_toeplitz
;
2126 for (i
= 0; i
< mgp
->num_slices
; i
++) {
2127 status
= myri10ge_setup_slice(&mgp
->ss
[i
]);
2129 goto abort_with_slices
;
2133 * Tell the MCP how many buffers he has, and to
2134 * bring the ethernet interface up
2136 * Firmware needs the big buff size as a power of 2. Lie and
2137 * tell him the buffer is larger, because we only use 1
2138 * buffer/pkt, and the mtu will prevent overruns
2140 big_pow2
= myri10ge_mtu
+ MXGEFW_PAD
;
2141 while (!ISP2(big_pow2
))
2144 /* now give firmware buffers sizes, and MTU */
2145 cmd
.data0
= myri10ge_mtu
;
2146 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_MTU
, &cmd
);
2147 cmd
.data0
= myri10ge_small_bytes
;
2149 myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE
, &cmd
);
2150 cmd
.data0
= big_pow2
;
2151 status
|= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_BIG_BUFFER_SIZE
, &cmd
);
2153 cmn_err(CE_WARN
, "%s: Couldn't set buffer sizes\n", mgp
->name
);
2154 goto abort_with_slices
;
2159 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_TSO_MODE
, &cmd
);
2161 cmn_err(CE_WARN
, "%s: unable to setup TSO (%d)\n",
2164 mgp
->features
|= MYRI10GE_TSO
;
2167 mgp
->link_state
= -1;
2168 mgp
->rdma_tags_available
= 15;
2169 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ETHERNET_UP
, &cmd
);
2171 cmn_err(CE_WARN
, "%s: unable to start ethernet\n", mgp
->name
);
2172 goto abort_with_slices
;
2174 mgp
->running
= MYRI10GE_ETH_RUNNING
;
2175 return (DDI_SUCCESS
);
2178 for (i
= 0; i
< mgp
->num_slices
; i
++)
2179 myri10ge_teardown_slice(&mgp
->ss
[i
]);
2181 mgp
->running
= MYRI10GE_ETH_STOPPED
;
2183 abort_with_toeplitz
:
2184 if (mgp
->toeplitz_hash_table
!= NULL
) {
2185 kmem_free(mgp
->toeplitz_hash_table
,
2186 sizeof (uint32_t) * 12 * 256);
2187 mgp
->toeplitz_hash_table
= NULL
;
2191 return (DDI_FAILURE
);
2195 myri10ge_stop_locked(struct myri10ge_priv
*mgp
)
2197 int status
, old_down_cnt
;
2202 old_down_cnt
= mgp
->down_cnt
;
2204 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ETHERNET_DOWN
, &cmd
);
2206 cmn_err(CE_WARN
, "%s: Couldn't bring down link\n", mgp
->name
);
2209 while (old_down_cnt
== *((volatile int *)&mgp
->down_cnt
)) {
2210 delay(1 * drv_usectohz(1000000));
2216 if (old_down_cnt
== *((volatile int *)&mgp
->down_cnt
)) {
2217 cmn_err(CE_WARN
, "%s: didn't get down irq\n", mgp
->name
);
2218 for (i
= 0; i
< mgp
->num_slices
; i
++) {
2220 * take and release the rx lock to ensure
2221 * that no interrupt thread is blocked
2222 * elsewhere in the stack, preventing
2226 mutex_enter(&mgp
->ss
[i
].rx_lock
);
2227 printf("%s: slice %d rx irq idle\n",
2229 mutex_exit(&mgp
->ss
[i
].rx_lock
);
2231 /* verify that the poll handler is inactive */
2232 mutex_enter(&mgp
->ss
->poll_lock
);
2233 polling
= mgp
->ss
->rx_polling
;
2234 mutex_exit(&mgp
->ss
->poll_lock
);
2236 printf("%s: slice %d is polling\n",
2238 delay(1 * drv_usectohz(1000000));
2242 delay(1 * drv_usectohz(1000000));
2243 if (old_down_cnt
== *((volatile int *)&mgp
->down_cnt
)) {
2244 cmn_err(CE_WARN
, "%s: Never got down irq\n", mgp
->name
);
2248 for (i
= 0; i
< mgp
->num_slices
; i
++)
2249 myri10ge_teardown_slice(&mgp
->ss
[i
]);
2251 if (mgp
->toeplitz_hash_table
!= NULL
) {
2252 kmem_free(mgp
->toeplitz_hash_table
,
2253 sizeof (uint32_t) * 12 * 256);
2254 mgp
->toeplitz_hash_table
= NULL
;
2256 mgp
->running
= MYRI10GE_ETH_STOPPED
;
2260 myri10ge_m_start(void *arg
)
2262 struct myri10ge_priv
*mgp
= arg
;
2265 mutex_enter(&mgp
->intrlock
);
2267 if (mgp
->running
!= MYRI10GE_ETH_STOPPED
) {
2268 mutex_exit(&mgp
->intrlock
);
2269 return (DDI_FAILURE
);
2271 status
= myri10ge_start_locked(mgp
);
2272 mutex_exit(&mgp
->intrlock
);
2274 if (status
!= DDI_SUCCESS
)
2277 /* start the watchdog timer */
2278 mgp
->timer_id
= timeout(myri10ge_watchdog
, mgp
,
2280 return (DDI_SUCCESS
);
2285 myri10ge_m_stop(void *arg
)
2287 struct myri10ge_priv
*mgp
= arg
;
2289 mutex_enter(&mgp
->intrlock
);
2290 /* if the device not running give up */
2291 if (mgp
->running
!= MYRI10GE_ETH_RUNNING
) {
2292 mutex_exit(&mgp
->intrlock
);
2296 mgp
->running
= MYRI10GE_ETH_STOPPING
;
2297 mutex_exit(&mgp
->intrlock
);
2298 (void) untimeout(mgp
->timer_id
);
2299 mutex_enter(&mgp
->intrlock
);
2300 myri10ge_stop_locked(mgp
);
2301 mutex_exit(&mgp
->intrlock
);
2306 myri10ge_rx_csum(mblk_t
*mp
, struct myri10ge_rx_ring_stats
*s
, uint32_t csum
)
2308 struct ether_header
*eh
;
2310 struct ip6_hdr
*ip6
;
2311 uint32_t start
, stuff
, end
, partial
, hdrlen
;
2314 csum
= ntohs((uint16_t)csum
);
2315 eh
= (struct ether_header
*)(void *)mp
->b_rptr
;
2316 hdrlen
= sizeof (*eh
);
2317 if (eh
->ether_dhost
.ether_addr_octet
[0] & 1) {
2318 if (0 == (bcmp(eh
->ether_dhost
.ether_addr_octet
,
2319 myri10ge_broadcastaddr
, sizeof (eh
->ether_dhost
))))
2325 if (eh
->ether_type
== BE_16(ETHERTYPE_VLAN
)) {
2327 * fix checksum by subtracting 4 bytes after what the
2328 * firmware thought was the end of the ether hdr
2330 partial
= *(uint32_t *)
2331 (void *)(mp
->b_rptr
+ ETHERNET_HEADER_SIZE
);
2333 csum
+= (csum
< ~partial
);
2334 csum
= (csum
>> 16) + (csum
& 0xFFFF);
2335 csum
= (csum
>> 16) + (csum
& 0xFFFF);
2336 hdrlen
+= VLAN_TAGSZ
;
2339 if (eh
->ether_type
== BE_16(ETHERTYPE_IP
)) {
2340 ip
= (struct ip
*)(void *)(mp
->b_rptr
+ hdrlen
);
2341 start
= ip
->ip_hl
<< 2;
2343 if (ip
->ip_p
== IPPROTO_TCP
)
2344 stuff
= start
+ offsetof(struct tcphdr
, th_sum
);
2345 else if (ip
->ip_p
== IPPROTO_UDP
)
2346 stuff
= start
+ offsetof(struct udphdr
, uh_sum
);
2349 end
= ntohs(ip
->ip_len
);
2350 } else if (eh
->ether_type
== BE_16(ETHERTYPE_IPV6
)) {
2351 ip6
= (struct ip6_hdr
*)(void *)(mp
->b_rptr
+ hdrlen
);
2352 start
= sizeof (*ip6
);
2353 if (ip6
->ip6_nxt
== IPPROTO_TCP
) {
2354 stuff
= start
+ offsetof(struct tcphdr
, th_sum
);
2355 } else if (ip6
->ip6_nxt
== IPPROTO_UDP
)
2356 stuff
= start
+ offsetof(struct udphdr
, uh_sum
);
2359 end
= start
+ ntohs(ip6
->ip6_plen
);
2361 * IPv6 headers do not contain a checksum, and hence
2362 * do not checksum to zero, so they don't "fall out"
2363 * of the partial checksum calculation like IPv4
2364 * headers do. We need to fix the partial checksum by
2365 * subtracting the checksum of the IPv6 header.
2368 partial
= myri10ge_csum_generic((uint16_t *)ip6
, sizeof (*ip6
));
2370 csum
+= (csum
< ~partial
);
2371 csum
= (csum
>> 16) + (csum
& 0xFFFF);
2372 csum
= (csum
>> 16) + (csum
& 0xFFFF);
2377 if (MBLKL(mp
) > hdrlen
+ end
) {
2378 /* padded frame, so hw csum may be invalid */
2382 mac_hcksum_set(mp
, start
, stuff
, end
, csum
, HCK_PARTIALCKSUM
);
2386 myri10ge_rx_done_small(struct myri10ge_slice_state
*ss
, uint32_t len
,
2390 myri10ge_rx_ring_t
*rx
;
2394 idx
= rx
->cnt
& rx
->mask
;
2397 /* allocate a new buffer to pass up the stack */
2398 mp
= allocb(len
+ MXGEFW_PAD
, 0);
2400 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf
);
2403 bcopy(ss
->rx_small
.info
[idx
].ptr
,
2404 (caddr_t
)mp
->b_wptr
, len
+ MXGEFW_PAD
);
2405 mp
->b_wptr
+= len
+ MXGEFW_PAD
;
2406 mp
->b_rptr
+= MXGEFW_PAD
;
2408 ss
->rx_stats
.ibytes
+= len
;
2409 ss
->rx_stats
.ipackets
+= 1;
2410 myri10ge_rx_csum(mp
, &ss
->rx_stats
, csum
);
2413 if ((idx
& 7) == 7) {
2414 myri10ge_submit_8rx(&rx
->lanai
[idx
- 7],
2415 &rx
->shadow
[idx
- 7]);
2423 myri10ge_rx_done_big(struct myri10ge_slice_state
*ss
, uint32_t len
,
2426 struct myri10ge_jpool_stuff
*jpool
;
2427 struct myri10ge_jpool_entry
*j
;
2429 int idx
, num_owned_by_mcp
;
2432 idx
= ss
->j_rx_cnt
& ss
->rx_big
.mask
;
2433 j
= ss
->rx_big
.info
[idx
].j
;
2436 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2437 ss
->mgp
->name
, idx
, ss
->rx_big
.cnt
, ss
->j_rx_cnt
);
2442 ss
->rx_big
.info
[idx
].j
= NULL
;
2447 * Check to see if we are low on rx buffers.
2448 * Note that we must leave at least 8 free so there are
2449 * enough to free in a single 64-byte write.
2451 num_owned_by_mcp
= ss
->rx_big
.cnt
- ss
->j_rx_cnt
;
2452 if (num_owned_by_mcp
< jpool
->low_water
) {
2453 mutex_enter(&jpool
->mtx
);
2454 myri10ge_restock_jumbos(ss
);
2455 mutex_exit(&jpool
->mtx
);
2456 num_owned_by_mcp
= ss
->rx_big
.cnt
- ss
->j_rx_cnt
;
2457 /* if we are still low, then we have to copy */
2458 if (num_owned_by_mcp
< 16) {
2459 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy
);
2460 /* allocate a new buffer to pass up the stack */
2461 mp
= allocb(len
+ MXGEFW_PAD
, 0);
2466 (caddr_t
)mp
->b_wptr
, len
+ MXGEFW_PAD
);
2467 myri10ge_jfree_rtn(j
);
2468 /* push buffer back to NIC */
2469 mutex_enter(&jpool
->mtx
);
2470 myri10ge_restock_jumbos(ss
);
2471 mutex_exit(&jpool
->mtx
);
2476 /* loan our buffer to the stack */
2477 mp
= desballoc((unsigned char *)j
->buf
, myri10ge_mtu
, 0, &j
->free_func
);
2483 mp
->b_rptr
+= MXGEFW_PAD
;
2484 mp
->b_wptr
= ((unsigned char *) mp
->b_rptr
+ len
);
2486 ss
->rx_stats
.ibytes
+= len
;
2487 ss
->rx_stats
.ipackets
+= 1;
2488 myri10ge_rx_csum(mp
, &ss
->rx_stats
, csum
);
2493 myri10ge_jfree_rtn(j
);
2494 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf
);
2499 * Free all transmit buffers up until the specified index
2502 myri10ge_tx_done(struct myri10ge_slice_state
*ss
, uint32_t mcp_index
)
2504 myri10ge_tx_ring_t
*tx
;
2505 struct myri10ge_tx_dma_handle_head handles
;
2510 handles
.head
= NULL
;
2511 handles
.tail
= NULL
;
2512 while (tx
->pkt_done
!= (int)mcp_index
) {
2513 idx
= tx
->done
& tx
->mask
;
2516 * mblk & DMA handle attached only to first slot
2517 * per buffer in the packet
2520 if (tx
->info
[idx
].m
) {
2521 (void) ddi_dma_unbind_handle(tx
->info
[idx
].handle
->h
);
2522 tx
->info
[idx
].handle
->next
= handles
.head
;
2523 handles
.head
= tx
->info
[idx
].handle
;
2524 if (handles
.tail
== NULL
)
2525 handles
.tail
= tx
->info
[idx
].handle
;
2526 freeb(tx
->info
[idx
].m
);
2527 tx
->info
[idx
].m
= 0;
2528 tx
->info
[idx
].handle
= 0;
2530 if (tx
->info
[idx
].ostat
.opackets
!= 0) {
2531 tx
->stats
.multixmt
+= tx
->info
[idx
].ostat
.multixmt
;
2532 tx
->stats
.brdcstxmt
+= tx
->info
[idx
].ostat
.brdcstxmt
;
2533 tx
->stats
.obytes
+= tx
->info
[idx
].ostat
.obytes
;
2534 tx
->stats
.opackets
+= tx
->info
[idx
].ostat
.opackets
;
2535 tx
->info
[idx
].stat
.un
.all
= 0;
2541 * if we stalled the queue, wake it. But Wait until
2542 * we have at least 1/2 our slots free.
2544 if ((tx
->req
- tx
->done
) < (tx
->mask
>> 1) &&
2545 tx
->stall
!= tx
->sched
) {
2546 mutex_enter(&ss
->tx
.lock
);
2547 tx
->sched
= tx
->stall
;
2548 mutex_exit(&ss
->tx
.lock
);
2549 mac_tx_ring_update(ss
->mgp
->mh
, tx
->rh
);
2552 /* limit potential for livelock */
2553 if (unlikely(++limit
> 2 * tx
->mask
))
2556 if (tx
->req
== tx
->done
&& tx
->stop
!= NULL
) {
2558 * Nic has sent all pending requests, allow him
2559 * to stop polling this queue
2561 mutex_enter(&tx
->lock
);
2562 if (tx
->req
== tx
->done
&& tx
->active
) {
2563 *(int *)(void *)tx
->stop
= 1;
2567 mutex_exit(&tx
->lock
);
2569 if (handles
.head
!= NULL
)
2570 myri10ge_free_tx_handles(tx
, &handles
);
2574 myri10ge_mbl_init(struct myri10ge_mblk_list
*mbl
)
2577 mbl
->tail
= &mbl
->head
;
2583 myri10ge_mbl_append(struct myri10ge_slice_state
*ss
,
2584 struct myri10ge_mblk_list
*mbl
, mblk_t
*mp
)
2587 mbl
->tail
= &mp
->b_next
;
2594 myri10ge_clean_rx_done(struct myri10ge_slice_state
*ss
,
2595 struct myri10ge_mblk_list
*mbl
, int limit
, boolean_t
*stop
)
2597 myri10ge_rx_done_t
*rx_done
= &ss
->rx_done
;
2598 struct myri10ge_priv
*mgp
= ss
->mgp
;
2600 struct lro_entry
*lro
;
2605 while (rx_done
->entry
[rx_done
->idx
].length
!= 0) {
2606 if (unlikely (*stop
)) {
2609 length
= ntohs(rx_done
->entry
[rx_done
->idx
].length
);
2610 length
&= (~MXGEFW_RSS_HASH_MASK
);
2612 /* limit potential for livelock */
2614 if (unlikely(limit
< 0))
2617 rx_done
->entry
[rx_done
->idx
].length
= 0;
2618 checksum
= ntohs(rx_done
->entry
[rx_done
->idx
].checksum
);
2619 if (length
<= myri10ge_small_bytes
)
2620 mp
= myri10ge_rx_done_small(ss
, length
, checksum
);
2622 mp
= myri10ge_rx_done_big(ss
, length
, checksum
);
2624 if (!myri10ge_lro
||
2625 0 != myri10ge_lro_rx(ss
, mp
, checksum
, mbl
))
2626 myri10ge_mbl_append(ss
, mbl
, mp
);
2629 rx_done
->idx
= rx_done
->cnt
& (mgp
->max_intr_slots
- 1);
2631 while (ss
->lro_active
!= NULL
) {
2632 lro
= ss
->lro_active
;
2633 ss
->lro_active
= lro
->next
;
2634 myri10ge_lro_flush(ss
, lro
, mbl
);
2639 myri10ge_intr_rx(struct myri10ge_slice_state
*ss
)
2642 struct myri10ge_mblk_list mbl
;
2644 myri10ge_mbl_init(&mbl
);
2645 if (mutex_tryenter(&ss
->rx_lock
) == 0)
2647 gen
= ss
->rx_gen_num
;
2648 myri10ge_clean_rx_done(ss
, &mbl
, MYRI10GE_POLL_NULL
,
2650 if (mbl
.head
!= NULL
)
2651 mac_rx_ring(ss
->mgp
->mh
, ss
->rx_rh
, mbl
.head
, gen
);
2652 mutex_exit(&ss
->rx_lock
);
2657 myri10ge_poll_rx(void *arg
, int bytes
)
2659 struct myri10ge_slice_state
*ss
= arg
;
2660 struct myri10ge_mblk_list mbl
;
2661 boolean_t dummy
= B_FALSE
;
2666 myri10ge_mbl_init(&mbl
);
2667 mutex_enter(&ss
->rx_lock
);
2669 myri10ge_clean_rx_done(ss
, &mbl
, bytes
, &dummy
);
2671 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss
-
2672 ss
->mgp
->ss
), ss
->rx_token
, ss
->rx_polling
);
2673 mutex_exit(&ss
->rx_lock
);
2679 myri10ge_intr(caddr_t arg0
, caddr_t arg1
)
2681 struct myri10ge_slice_state
*ss
=
2682 (struct myri10ge_slice_state
*)(void *)arg0
;
2683 struct myri10ge_priv
*mgp
= ss
->mgp
;
2684 mcp_irq_data_t
*stats
= ss
->fw_stats
;
2685 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
2686 uint32_t send_done_count
;
2690 /* make sure the DMA has finished */
2691 if (!stats
->valid
) {
2692 return (DDI_INTR_UNCLAIMED
);
2694 valid
= stats
->valid
;
2696 /* low bit indicates receives are present */
2698 myri10ge_intr_rx(ss
);
2700 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_FIXED
) {
2701 /* lower legacy IRQ */
2702 *mgp
->irq_deassert
= 0;
2703 if (!myri10ge_deassert_wait
)
2704 /* don't wait for conf. that irq is low */
2708 /* no need to wait for conf. that irq is low */
2713 /* check for transmit completes and receives */
2714 send_done_count
= ntohl(stats
->send_done_count
);
2715 if (send_done_count
!= tx
->pkt_done
)
2716 myri10ge_tx_done(ss
, (int)send_done_count
);
2717 } while (*((volatile uint8_t *) &stats
->valid
));
2719 if (stats
->stats_updated
) {
2720 if (mgp
->link_state
!= stats
->link_up
|| stats
->link_down
) {
2721 mgp
->link_state
= stats
->link_up
;
2722 if (stats
->link_down
) {
2723 mgp
->down_cnt
+= stats
->link_down
;
2724 mgp
->link_state
= 0;
2726 if (mgp
->link_state
) {
2727 if (myri10ge_verbose
)
2728 printf("%s: link up\n", mgp
->name
);
2729 mac_link_update(mgp
->mh
, LINK_STATE_UP
);
2731 if (myri10ge_verbose
)
2732 printf("%s: link down\n", mgp
->name
);
2733 mac_link_update(mgp
->mh
, LINK_STATE_DOWN
);
2735 MYRI10GE_NIC_STAT_INC(link_changes
);
2737 if (mgp
->rdma_tags_available
!=
2738 ntohl(ss
->fw_stats
->rdma_tags_available
)) {
2739 mgp
->rdma_tags_available
=
2740 ntohl(ss
->fw_stats
->rdma_tags_available
);
2741 cmn_err(CE_NOTE
, "%s: RDMA timed out! "
2742 "%d tags left\n", mgp
->name
,
2743 mgp
->rdma_tags_available
);
2748 /* check to see if we have rx token to pass back */
2750 mutex_enter(&ss
->poll_lock
);
2751 if (ss
->rx_polling
) {
2754 *ss
->irq_claim
= BE_32(3);
2757 mutex_exit(&ss
->poll_lock
);
2759 *(ss
->irq_claim
+ 1) = BE_32(3);
2760 return (DDI_INTR_CLAIMED
);
2764 * Add or remove a multicast address. This is called with our
2765 * macinfo's lock held by GLD, so we do not need to worry about
2766 * our own locking here.
2769 myri10ge_m_multicst(void *arg
, boolean_t add
, const uint8_t *multicastaddr
)
2772 struct myri10ge_priv
*mgp
= arg
;
2773 int status
, join_leave
;
2776 join_leave
= MXGEFW_JOIN_MULTICAST_GROUP
;
2778 join_leave
= MXGEFW_LEAVE_MULTICAST_GROUP
;
2779 (void) memcpy(&cmd
.data0
, multicastaddr
, 4);
2780 (void) memcpy(&cmd
.data1
, multicastaddr
+ 4, 2);
2781 cmd
.data0
= htonl(cmd
.data0
);
2782 cmd
.data1
= htonl(cmd
.data1
);
2783 status
= myri10ge_send_cmd(mgp
, join_leave
, &cmd
);
2787 cmn_err(CE_WARN
, "%s: failed to set multicast address\n",
2794 myri10ge_m_promisc(void *arg
, boolean_t on
)
2796 struct myri10ge_priv
*mgp
= arg
;
2798 myri10ge_change_promisc(mgp
, on
);
2803 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2804 * backwards one at a time and handle ring wraps
2808 myri10ge_submit_req_backwards(myri10ge_tx_ring_t
*tx
,
2809 mcp_kreq_ether_send_t
*src
, int cnt
)
2811 int idx
, starting_slot
;
2812 starting_slot
= tx
->req
;
2815 idx
= (starting_slot
+ cnt
) & tx
->mask
;
2816 myri10ge_pio_copy(&tx
->lanai
[idx
],
2817 &src
[cnt
], sizeof (*src
));
2823 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2824 * at most 32 bytes at a time, so as to avoid involving the software
2825 * pio handler in the nic. We re-write the first segment's flags
2826 * to mark them valid only after writing the entire chain
2830 myri10ge_submit_req(myri10ge_tx_ring_t
*tx
, mcp_kreq_ether_send_t
*src
,
2834 uint32_t *src_ints
, *dst_ints
;
2835 mcp_kreq_ether_send_t
*srcp
, *dstp
, *dst
;
2838 idx
= tx
->req
& tx
->mask
;
2840 last_flags
= src
->flags
;
2843 dst
= dstp
= &tx
->lanai
[idx
];
2846 if ((idx
+ cnt
) < tx
->mask
) {
2847 for (i
= 0; i
< (cnt
- 1); i
+= 2) {
2848 myri10ge_pio_copy(dstp
, srcp
, 2 * sizeof (*src
));
2849 mb(); /* force write every 32 bytes */
2855 * submit all but the first request, and ensure
2856 * that it is submitted below
2858 myri10ge_submit_req_backwards(tx
, src
, cnt
);
2862 /* submit the first request */
2863 myri10ge_pio_copy(dstp
, srcp
, sizeof (*src
));
2864 mb(); /* barrier before setting valid flag */
2867 /* re-write the last 32-bits with the valid flags */
2868 src
->flags
|= last_flags
;
2869 src_ints
= (uint32_t *)src
;
2871 dst_ints
= (uint32_t *)dst
;
2873 *dst_ints
= *src_ints
;
2876 /* notify NIC to poll this tx ring */
2877 if (!tx
->active
&& tx
->go
!= NULL
) {
2878 *(int *)(void *)tx
->go
= 1;
2887 myri10ge_lso_info_get(mblk_t
*mp
, uint32_t *mss
, uint32_t *flags
)
2890 mac_lso_get(mp
, mss
, &lso_flag
);
2891 (*flags
) |= lso_flag
;
2895 /* like pullupmsg, except preserve hcksum/LSO attributes */
2897 myri10ge_pullup(struct myri10ge_slice_state
*ss
, mblk_t
*mp
)
2899 uint32_t start
, stuff
, tx_offload_flags
, mss
;
2903 mac_hcksum_get(mp
, &start
, &stuff
, NULL
, NULL
, &tx_offload_flags
);
2904 myri10ge_lso_info_get(mp
, &mss
, &tx_offload_flags
);
2906 ok
= pullupmsg(mp
, -1);
2908 printf("pullupmsg failed");
2909 return (DDI_FAILURE
);
2911 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup
);
2912 mac_hcksum_set(mp
, start
, stuff
, NULL
, NULL
, tx_offload_flags
);
2913 if (tx_offload_flags
& HW_LSO
)
2914 DB_LSOMSS(mp
) = (uint16_t)mss
;
2915 lso_info_set(mp
, mss
, tx_offload_flags
);
2916 return (DDI_SUCCESS
);
2920 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats
*s
, struct ether_header
*eh
,
2921 int opackets
, int obytes
)
2924 if (eh
->ether_dhost
.ether_addr_octet
[0] & 1) {
2925 if (0 == (bcmp(eh
->ether_dhost
.ether_addr_octet
,
2926 myri10ge_broadcastaddr
, sizeof (eh
->ether_dhost
))))
2927 s
->un
.s
.brdcstxmt
= 1;
2929 s
->un
.s
.multixmt
= 1;
2931 s
->un
.s
.opackets
= (uint16_t)opackets
;
2932 s
->un
.s
.obytes
= obytes
;
2936 myri10ge_tx_copy(struct myri10ge_slice_state
*ss
, mblk_t
*mp
,
2937 mcp_kreq_ether_send_t
*req
)
2939 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
2941 struct myri10ge_tx_copybuf
*cp
;
2943 int idx
, mblen
, avail
;
2946 mutex_enter(&tx
->lock
);
2947 avail
= tx
->mask
- (tx
->req
- tx
->done
);
2949 mutex_exit(&tx
->lock
);
2952 idx
= tx
->req
& tx
->mask
;
2955 for (len
= 0, bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
2957 bcopy(bp
->b_rptr
, ptr
, mblen
);
2961 /* ensure runts are padded to 60 bytes */
2963 bzero(ptr
, 64 - len
);
2966 req
->addr_low
= cp
->dma
.low
;
2967 req
->addr_high
= cp
->dma
.high
;
2968 req
->length
= htons(len
);
2970 req
->rdma_count
= 1;
2971 myri10ge_tx_stat(&tx
->info
[idx
].stat
,
2972 (struct ether_header
*)(void *)cp
->va
, 1, len
);
2973 (void) ddi_dma_sync(cp
->dma
.handle
, 0, len
, DDI_DMA_SYNC_FORDEV
);
2974 myri10ge_submit_req(&ss
->tx
, req
, 1);
2975 mutex_exit(&tx
->lock
);
2977 return (DDI_SUCCESS
);
2982 myri10ge_send_locked(myri10ge_tx_ring_t
*tx
, mcp_kreq_ether_send_t
*req_list
,
2983 struct myri10ge_tx_buffer_state
*tx_info
,
2988 idx
= 0; /* gcc -Wuninitialized */
2989 /* store unmapping and bp info for tx irq handler */
2990 for (i
= 0; i
< count
; i
++) {
2991 idx
= (tx
->req
+ i
) & tx
->mask
;
2992 tx
->info
[idx
].m
= tx_info
[i
].m
;
2993 tx
->info
[idx
].handle
= tx_info
[i
].handle
;
2995 tx
->info
[idx
].stat
.un
.all
= tx_info
[0].stat
.un
.all
;
2997 /* submit the frame to the nic */
2998 myri10ge_submit_req(tx
, req_list
, count
);
3006 myri10ge_copydata(mblk_t
*mp
, int off
, int len
, caddr_t buf
)
3023 count
= min(seglen
- off
, len
);
3024 bcopy(bp
->b_rptr
+ off
, buf
, count
);
3033 myri10ge_ether_parse_header(mblk_t
*mp
)
3035 struct ether_header eh_copy
;
3036 struct ether_header
*eh
;
3037 int eth_hdr_len
, seglen
;
3040 eth_hdr_len
= sizeof (*eh
);
3041 if (seglen
< eth_hdr_len
) {
3042 myri10ge_copydata(mp
, 0, eth_hdr_len
, (caddr_t
)&eh_copy
);
3045 eh
= (struct ether_header
*)(void *)mp
->b_rptr
;
3047 if (eh
->ether_type
== BE_16(ETHERTYPE_VLAN
)) {
3051 return (eth_hdr_len
);
3055 myri10ge_lso_parse_header(mblk_t
*mp
, int off
)
3058 int seglen
, sum_off
;
3063 if (seglen
< off
+ sizeof (*ip
)) {
3064 myri10ge_copydata(mp
, off
, sizeof (*ip
), buf
);
3065 ip
= (struct ip
*)(void *)buf
;
3067 ip
= (struct ip
*)(void *)(mp
->b_rptr
+ off
);
3069 if (seglen
< off
+ (ip
->ip_hl
<< 2) + sizeof (*tcp
)) {
3070 myri10ge_copydata(mp
, off
,
3071 (ip
->ip_hl
<< 2) + sizeof (*tcp
), buf
);
3072 ip
= (struct ip
*)(void *)buf
;
3074 tcp
= (struct tcphdr
*)(void *)((char *)ip
+ (ip
->ip_hl
<< 2));
3077 * NIC expects ip_sum to be zero. Recent changes to
3078 * OpenSolaris leave the correct ip checksum there, rather
3079 * than the required zero, so we need to zero it. Otherwise,
3080 * the NIC will produce bad checksums when sending LSO packets.
3082 if (ip
->ip_sum
!= 0) {
3083 if (((char *)ip
) != buf
) {
3084 /* ip points into mblk, so just zero it */
3088 * ip points into a copy, so walk the chain
3089 * to find the ip_csum, then zero it
3091 sum_off
= off
+ _PTRDIFF(&ip
->ip_sum
, buf
);
3092 while (sum_off
> (int)(MBLKL(mp
) - 1)) {
3093 sum_off
-= MBLKL(mp
);
3096 mp
->b_rptr
[sum_off
] = 0;
3098 while (sum_off
> MBLKL(mp
) - 1) {
3099 sum_off
-= MBLKL(mp
);
3102 mp
->b_rptr
[sum_off
] = 0;
3105 return (off
+ ((ip
->ip_hl
+ tcp
->th_off
) << 2));
3109 myri10ge_tx_tso_copy(struct myri10ge_slice_state
*ss
, mblk_t
*mp
,
3110 mcp_kreq_ether_send_t
*req_list
, int hdr_size
, int pkt_size
,
3111 uint16_t mss
, uint8_t cksum_offset
)
3113 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
3114 struct myri10ge_priv
*mgp
= ss
->mgp
;
3116 mcp_kreq_ether_send_t
*req
;
3117 struct myri10ge_tx_copybuf
*cp
;
3119 int mblen
, count
, cum_len
, mss_resid
, tx_req
, pkt_size_tmp
;
3120 int resid
, avail
, idx
, hdr_size_tmp
, tx_boundary
;
3122 uint32_t seglen
, len
, boundary
, low
, high_swapped
;
3123 uint16_t pseudo_hdr_offset
= htons(mss
);
3126 tx_boundary
= mgp
->tx_boundary
;
3127 hdr_size_tmp
= hdr_size
;
3128 resid
= tx_boundary
;
3130 mutex_enter(&tx
->lock
);
3132 /* check to see if the slots are really there */
3133 avail
= tx
->mask
- (tx
->req
- tx
->done
);
3134 if (unlikely(avail
<= MYRI10GE_MAX_SEND_DESC_TSO
)) {
3135 atomic_inc_32(&tx
->stall
);
3136 mutex_exit(&tx
->lock
);
3141 cum_len
= -hdr_size
;
3144 idx
= tx
->mask
& tx
->req
;
3146 low
= ntohl(cp
->dma
.low
);
3150 int payload
= pkt_size
- hdr_size
;
3151 uint16_t opackets
= (payload
/ mss
) + ((payload
% mss
) != 0);
3152 tx
->info
[idx
].ostat
.opackets
= opackets
;
3153 tx
->info
[idx
].ostat
.obytes
= (opackets
- 1) * hdr_size
3156 hdr_size_tmp
= hdr_size
;
3158 flags
= (MXGEFW_FLAGS_TSO_HDR
| MXGEFW_FLAGS_FIRST
);
3160 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
3162 rptr
= (caddr_t
)bp
->b_rptr
;
3163 len
= min(hdr_size_tmp
, mblen
);
3165 bcopy(rptr
, ptr
, len
);
3170 hdr_size_tmp
-= len
;
3176 idx
= tx
->mask
& tx_req
;
3178 low
= ntohl(cp
->dma
.low
);
3180 resid
= tx_boundary
;
3184 len
= min(mss_resid
, mblen
);
3185 bcopy(rptr
, ptr
, len
);
3192 if (mss_resid
== 0) {
3196 idx
= tx
->mask
& tx_req
;
3199 low
= ntohl(cp
->dma
.low
);
3201 resid
= tx_boundary
;
3208 pkt_size_tmp
= pkt_size
;
3212 while (pkt_size_tmp
) {
3213 idx
= tx
->mask
& tx_req
;
3215 high_swapped
= cp
->dma
.high
;
3216 low
= ntohl(cp
->dma
.low
);
3219 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3220 pkt_size_tmp
, pkt_size
);
3221 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
3223 printf("mblen:%d\n", mblen
);
3225 pkt_size_tmp
= pkt_size
;
3227 while (pkt_size_tmp
> 0) {
3228 idx
= tx
->mask
& tx_req
;
3230 printf("cp->len = %d\n", cp
->len
);
3231 pkt_size_tmp
-= cp
->len
;
3234 printf("dropped\n");
3235 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err
);
3238 pkt_size_tmp
-= len
;
3244 boundary
= (low
+ mgp
->tx_boundary
) &
3245 ~(mgp
->tx_boundary
- 1);
3246 seglen
= boundary
- low
;
3250 flags_next
= flags
& ~MXGEFW_FLAGS_FIRST
;
3251 cum_len_next
= cum_len
+ seglen
;
3252 (req
-rdma_count
)->rdma_count
= rdma_count
+ 1;
3253 if (likely(cum_len
>= 0)) {
3255 int next_is_first
, chop
;
3257 chop
= (cum_len_next
> mss
);
3258 cum_len_next
= cum_len_next
% mss
;
3259 next_is_first
= (cum_len_next
== 0);
3261 MXGEFW_FLAGS_TSO_CHOP
;
3262 flags_next
|= next_is_first
*
3264 rdma_count
|= -(chop
| next_is_first
);
3265 rdma_count
+= chop
& !next_is_first
;
3266 } else if (likely(cum_len_next
>= 0)) {
3273 small
= (mss
<= MXGEFW_SEND_SMALL_SIZE
);
3274 flags_next
= MXGEFW_FLAGS_TSO_PLD
|
3275 MXGEFW_FLAGS_FIRST
|
3276 (small
* MXGEFW_FLAGS_SMALL
);
3278 req
->addr_high
= high_swapped
;
3279 req
->addr_low
= htonl(low
);
3280 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
3281 req
->pad
= 0; /* complete solid 16-byte block */
3282 req
->rdma_count
= 1;
3283 req
->cksum_offset
= cksum_offset
;
3284 req
->length
= htons(seglen
);
3285 req
->flags
= flags
| ((cum_len
& 1) *
3286 MXGEFW_FLAGS_ALIGN_ODD
);
3287 if (cksum_offset
> seglen
)
3288 cksum_offset
-= seglen
;
3293 cum_len
= cum_len_next
;
3303 (req
-rdma_count
)->rdma_count
= (uint8_t)rdma_count
;
3306 req
->flags
|= MXGEFW_FLAGS_TSO_LAST
;
3307 } while (!(req
->flags
& (MXGEFW_FLAGS_TSO_CHOP
|
3308 MXGEFW_FLAGS_FIRST
)));
3310 myri10ge_submit_req(tx
, req_list
, count
);
3312 mutex_exit(&tx
->lock
);
3314 return (DDI_SUCCESS
);
3318 * Try to send the chain of buffers described by the mp. We must not
3319 * encapsulate more than eth->tx.req - eth->tx.done, or
3320 * MXGEFW_MAX_SEND_DESC, whichever is more.
3324 myri10ge_send(struct myri10ge_slice_state
*ss
, mblk_t
*mp
,
3325 mcp_kreq_ether_send_t
*req_list
, struct myri10ge_tx_buffer_state
*tx_info
)
3327 struct myri10ge_priv
*mgp
= ss
->mgp
;
3328 myri10ge_tx_ring_t
*tx
= &ss
->tx
;
3329 mcp_kreq_ether_send_t
*req
;
3330 struct myri10ge_tx_dma_handle
*handles
, *dma_handle
= NULL
;
3332 ddi_dma_cookie_t cookie
;
3333 int err
, rv
, count
, avail
, mblen
, try_pullup
, i
, max_segs
, maclen
,
3334 rdma_count
, cum_len
, lso_hdr_size
;
3335 uint32_t start
, stuff
, tx_offload_flags
;
3336 uint32_t seglen
, len
, mss
, boundary
, low
, high_swapped
;
3338 uint16_t pseudo_hdr_offset
;
3339 uint8_t flags
, cksum_offset
, odd_flag
;
3341 int lso_copy
= myri10ge_lso_copy
;
3345 /* Setup checksum offloading, if needed */
3346 mac_hcksum_get(mp
, &start
, &stuff
, NULL
, NULL
, &tx_offload_flags
);
3347 myri10ge_lso_info_get(mp
, &mss
, &tx_offload_flags
);
3348 if (tx_offload_flags
& HW_LSO
) {
3349 max_segs
= MYRI10GE_MAX_SEND_DESC_TSO
;
3350 if ((tx_offload_flags
& HCK_PARTIALCKSUM
) == 0) {
3351 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags
);
3353 return (DDI_SUCCESS
);
3356 max_segs
= MXGEFW_MAX_SEND_DESC
;
3361 pseudo_hdr_offset
= 0;
3363 /* leave an extra slot keep the ring from wrapping */
3364 avail
= tx
->mask
- (tx
->req
- tx
->done
);
3367 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3368 * message will need to be pulled up in order to fit.
3369 * Otherwise, we are low on transmit descriptors, it is
3370 * probably better to stall and try again rather than pullup a
3374 if (avail
< max_segs
) {
3376 atomic_inc_32(&tx
->stall_early
);
3380 /* find out how long the frame is and how many segments it is */
3384 flags
= (MXGEFW_FLAGS_NO_TSO
| MXGEFW_FLAGS_FIRST
);
3385 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
3390 * we can't simply skip over 0-length mblks
3391 * because the hardware can't deal with them,
3392 * and we could leak them.
3394 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len
);
3399 * There's no advantage to copying most gesballoc
3400 * attached blocks, so disable lso copy in that case
3402 if (mss
&& lso_copy
== 1 && ((dbp
= bp
->b_datap
) != NULL
)) {
3403 if ((void *)dbp
->db_lastfree
!= myri10ge_db_lastfree
) {
3411 /* Try to pull up excessivly long chains */
3412 if (count
>= max_segs
) {
3413 err
= myri10ge_pullup(ss
, mp
);
3414 if (likely(err
== DDI_SUCCESS
)) {
3417 if (count
< MYRI10GE_MAX_SEND_DESC_TSO
) {
3419 * just let the h/w send it, it will be
3420 * inefficient, but us better than dropping
3422 max_segs
= MYRI10GE_MAX_SEND_DESC_TSO
;
3425 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err
);
3433 maclen
= myri10ge_ether_parse_header(mp
);
3435 if (tx_offload_flags
& HCK_PARTIALCKSUM
) {
3437 cksum_offset
= start
+ maclen
;
3438 pseudo_hdr_offset
= htons(stuff
+ maclen
);
3439 odd_flag
= MXGEFW_FLAGS_ALIGN_ODD
;
3440 flags
|= MXGEFW_FLAGS_CKSUM
;
3443 lso_hdr_size
= 0; /* -Wunitinialized */
3444 if (mss
) { /* LSO */
3445 /* this removes any CKSUM flag from before */
3446 flags
= (MXGEFW_FLAGS_TSO_HDR
| MXGEFW_FLAGS_FIRST
);
3448 * parse the headers and set cum_len to a negative
3449 * value to reflect the offset of the TCP payload
3451 lso_hdr_size
= myri10ge_lso_parse_header(mp
, maclen
);
3452 cum_len
= -lso_hdr_size
;
3453 if ((mss
< mgp
->tx_boundary
) && lso_copy
) {
3454 err
= myri10ge_tx_tso_copy(ss
, mp
, req_list
,
3455 lso_hdr_size
, pkt_size
, mss
, cksum_offset
);
3460 * for TSO, pseudo_hdr_offset holds mss. The firmware
3461 * figures out where to put the checksum by parsing
3465 pseudo_hdr_offset
= htons(mss
);
3466 } else if (pkt_size
<= MXGEFW_SEND_SMALL_SIZE
) {
3467 flags
|= MXGEFW_FLAGS_SMALL
;
3468 if (pkt_size
< myri10ge_tx_copylen
) {
3469 req
->cksum_offset
= cksum_offset
;
3470 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
3472 err
= myri10ge_tx_copy(ss
, mp
, req
);
3478 /* pull one DMA handle for each bp from our freelist */
3480 err
= myri10ge_alloc_tx_handles(ss
, count
, &handles
);
3481 if (err
!= DDI_SUCCESS
) {
3487 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
3489 dma_handle
= handles
;
3490 handles
= handles
->next
;
3492 rv
= ddi_dma_addr_bind_handle(dma_handle
->h
, NULL
,
3493 (caddr_t
)bp
->b_rptr
, mblen
,
3494 DDI_DMA_WRITE
| DDI_DMA_STREAMING
, DDI_DMA_SLEEP
, NULL
,
3495 &cookie
, &ncookies
);
3496 if (unlikely(rv
!= DDI_DMA_MAPPED
)) {
3499 dma_handle
->next
= handles
;
3500 handles
= dma_handle
;
3501 goto abort_with_handles
;
3504 /* reserve the slot */
3505 tx_info
[count
].m
= bp
;
3506 tx_info
[count
].handle
= dma_handle
;
3509 low
= MYRI10GE_LOWPART_TO_U32(cookie
.dmac_laddress
);
3511 htonl(MYRI10GE_HIGHPART_TO_U32(
3512 cookie
.dmac_laddress
));
3513 len
= (uint32_t)cookie
.dmac_size
;
3518 boundary
= (low
+ mgp
->tx_boundary
) &
3519 ~(mgp
->tx_boundary
- 1);
3520 seglen
= boundary
- low
;
3524 flags_next
= flags
& ~MXGEFW_FLAGS_FIRST
;
3525 cum_len_next
= cum_len
+ seglen
;
3527 (req
-rdma_count
)->rdma_count
=
3529 if (likely(cum_len
>= 0)) {
3531 int next_is_first
, chop
;
3533 chop
= (cum_len_next
> mss
);
3537 (cum_len_next
== 0);
3539 MXGEFW_FLAGS_TSO_CHOP
;
3540 flags_next
|= next_is_first
*
3543 -(chop
| next_is_first
);
3545 chop
& !next_is_first
;
3546 } else if (likely(cum_len_next
>= 0)) {
3554 MXGEFW_SEND_SMALL_SIZE
);
3556 MXGEFW_FLAGS_TSO_PLD
3557 | MXGEFW_FLAGS_FIRST
3559 MXGEFW_FLAGS_SMALL
);
3562 req
->addr_high
= high_swapped
;
3563 req
->addr_low
= htonl(low
);
3564 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
3565 req
->pad
= 0; /* complete solid 16-byte block */
3566 req
->rdma_count
= 1;
3567 req
->cksum_offset
= cksum_offset
;
3568 req
->length
= htons(seglen
);
3569 req
->flags
= flags
| ((cum_len
& 1) * odd_flag
);
3570 if (cksum_offset
> seglen
)
3571 cksum_offset
-= seglen
;
3576 cum_len
= cum_len_next
;
3579 /* make sure all the segments will fit */
3580 if (unlikely(count
>= max_segs
)) {
3581 MYRI10GE_ATOMIC_SLICE_STAT_INC(
3583 /* may try a pullup */
3587 goto abort_with_handles
;
3592 tx_info
[count
].m
= 0;
3597 ddi_dma_nextcookie(dma_handle
->h
, &cookie
);
3600 (req
-rdma_count
)->rdma_count
= (uint8_t)rdma_count
;
3605 req
->flags
|= MXGEFW_FLAGS_TSO_LAST
;
3606 } while (!(req
->flags
& (MXGEFW_FLAGS_TSO_CHOP
|
3607 MXGEFW_FLAGS_FIRST
)));
3610 /* calculate tx stats */
3615 payload
= pkt_size
- lso_hdr_size
;
3616 opackets
= (payload
/ mss
) + ((payload
% mss
) != 0);
3617 tx_info
[0].stat
.un
.all
= 0;
3618 tx_info
[0].ostat
.opackets
= opackets
;
3619 tx_info
[0].ostat
.obytes
= (opackets
- 1) * lso_hdr_size
3622 myri10ge_tx_stat(&tx_info
[0].stat
,
3623 (struct ether_header
*)(void *)mp
->b_rptr
, 1, pkt_size
);
3625 mutex_enter(&tx
->lock
);
3627 /* check to see if the slots are really there */
3628 avail
= tx
->mask
- (tx
->req
- tx
->done
);
3629 if (unlikely(avail
<= count
)) {
3630 mutex_exit(&tx
->lock
);
3635 myri10ge_send_locked(tx
, req_list
, tx_info
, count
);
3636 mutex_exit(&tx
->lock
);
3637 return (DDI_SUCCESS
);
3641 atomic_inc_32(&tx
->stall_late
);
3644 /* unbind and free handles from previous mblks */
3645 for (i
= 0; i
< count
; i
++) {
3649 dma_handle
= tx_info
[i
].handle
;
3650 (void) ddi_dma_unbind_handle(dma_handle
->h
);
3651 dma_handle
->next
= handles
;
3652 handles
= dma_handle
;
3653 tx_info
[i
].handle
= NULL
;
3654 tx_info
[i
].m
= NULL
;
3657 myri10ge_free_tx_handle_slist(tx
, handles
);
3660 err
= myri10ge_pullup(ss
, mp
);
3661 if (err
!= DDI_SUCCESS
&& try_pullup
== 2) {
3663 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err
);
3674 atomic_inc_32(&tx
->stall
);
3676 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err
);
3683 myri10ge_send_wrapper(void *arg
, mblk_t
*mp
)
3685 struct myri10ge_slice_state
*ss
= arg
;
3687 mcp_kreq_ether_send_t
*req_list
;
3690 * We need about 2.5KB of scratch space to handle transmits.
3691 * i86pc has only 8KB of kernel stack space, so we malloc the
3692 * scratch space there rather than keeping it on the stack.
3694 size_t req_size
, tx_info_size
;
3695 struct myri10ge_tx_buffer_state
*tx_info
;
3698 req_size
= sizeof (*req_list
) * (MYRI10GE_MAX_SEND_DESC_TSO
+ 4)
3700 req_bytes
= kmem_alloc(req_size
, KM_SLEEP
);
3701 tx_info_size
= sizeof (*tx_info
) * (MYRI10GE_MAX_SEND_DESC_TSO
+ 1);
3702 tx_info
= kmem_alloc(tx_info_size
, KM_SLEEP
);
3704 char req_bytes
[sizeof (*req_list
) * (MYRI10GE_MAX_SEND_DESC_TSO
+ 4)
3706 struct myri10ge_tx_buffer_state tx_info
[MYRI10GE_MAX_SEND_DESC_TSO
+ 1];
3709 /* ensure req_list entries are aligned to 8 bytes */
3710 req_list
= (struct mcp_kreq_ether_send
*)
3711 (((unsigned long)req_bytes
+ 7UL) & ~7UL);
3713 err
= myri10ge_send(ss
, mp
, req_list
, tx_info
);
3716 kmem_free(tx_info
, tx_info_size
);
3717 kmem_free(req_bytes
, req_size
);
3726 myri10ge_addmac(void *arg
, const uint8_t *mac_addr
)
3728 struct myri10ge_priv
*mgp
= arg
;
3731 if (mac_addr
== NULL
)
3734 mutex_enter(&mgp
->intrlock
);
3735 if (mgp
->macaddr_cnt
) {
3736 mutex_exit(&mgp
->intrlock
);
3739 err
= myri10ge_m_unicst(mgp
, mac_addr
);
3743 mutex_exit(&mgp
->intrlock
);
3747 bcopy(mac_addr
, mgp
->mac_addr
, sizeof (mgp
->mac_addr
));
3753 myri10ge_remmac(void *arg
, const uint8_t *mac_addr
)
3755 struct myri10ge_priv
*mgp
= arg
;
3757 mutex_enter(&mgp
->intrlock
);
3759 mutex_exit(&mgp
->intrlock
);
3766 myri10ge_fill_group(void *arg
, mac_ring_type_t rtype
, const int index
,
3767 mac_group_info_t
*infop
, mac_group_handle_t gh
)
3769 struct myri10ge_priv
*mgp
= arg
;
3771 if (rtype
!= MAC_RING_TYPE_RX
)
3774 infop
->mgi_driver
= (mac_group_driver_t
)mgp
;
3775 infop
->mgi_start
= NULL
;
3776 infop
->mgi_stop
= NULL
;
3777 infop
->mgi_addmac
= myri10ge_addmac
;
3778 infop
->mgi_remmac
= myri10ge_remmac
;
3779 infop
->mgi_count
= mgp
->num_slices
;
3783 myri10ge_ring_start(mac_ring_driver_t rh
, uint64_t mr_gen_num
)
3785 struct myri10ge_slice_state
*ss
;
3787 ss
= (struct myri10ge_slice_state
*)rh
;
3788 mutex_enter(&ss
->rx_lock
);
3789 ss
->rx_gen_num
= mr_gen_num
;
3790 mutex_exit(&ss
->rx_lock
);
3795 * Retrieve a value for one of the statistics for a particular rx ring
3798 myri10ge_rx_ring_stat(mac_ring_driver_t rh
, uint_t stat
, uint64_t *val
)
3800 struct myri10ge_slice_state
*ss
;
3802 ss
= (struct myri10ge_slice_state
*)rh
;
3804 case MAC_STAT_RBYTES
:
3805 *val
= ss
->rx_stats
.ibytes
;
3808 case MAC_STAT_IPACKETS
:
3809 *val
= ss
->rx_stats
.ipackets
;
3821 * Retrieve a value for one of the statistics for a particular tx ring
3824 myri10ge_tx_ring_stat(mac_ring_driver_t rh
, uint_t stat
, uint64_t *val
)
3826 struct myri10ge_slice_state
*ss
;
3828 ss
= (struct myri10ge_slice_state
*)rh
;
3830 case MAC_STAT_OBYTES
:
3831 *val
= ss
->tx
.stats
.obytes
;
3834 case MAC_STAT_OPACKETS
:
3835 *val
= ss
->tx
.stats
.opackets
;
3847 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh
)
3849 struct myri10ge_slice_state
*ss
;
3851 ss
= (struct myri10ge_slice_state
*)intrh
;
3852 mutex_enter(&ss
->poll_lock
);
3853 ss
->rx_polling
= B_TRUE
;
3854 mutex_exit(&ss
->poll_lock
);
3859 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh
)
3861 struct myri10ge_slice_state
*ss
;
3863 ss
= (struct myri10ge_slice_state
*)intrh
;
3864 mutex_enter(&ss
->poll_lock
);
3865 ss
->rx_polling
= B_FALSE
;
3867 *ss
->irq_claim
= BE_32(3);
3870 mutex_exit(&ss
->poll_lock
);
3876 myri10ge_fill_ring(void *arg
, mac_ring_type_t rtype
, const int rg_index
,
3877 const int ring_index
, mac_ring_info_t
*infop
, mac_ring_handle_t rh
)
3879 struct myri10ge_priv
*mgp
= arg
;
3880 struct myri10ge_slice_state
*ss
;
3881 mac_intr_t
*mintr
= &infop
->mri_intr
;
3883 ASSERT((unsigned int)ring_index
< mgp
->num_slices
);
3885 ss
= &mgp
->ss
[ring_index
];
3887 case MAC_RING_TYPE_RX
:
3889 infop
->mri_driver
= (mac_ring_driver_t
)ss
;
3890 infop
->mri_start
= myri10ge_ring_start
;
3891 infop
->mri_stop
= NULL
;
3892 infop
->mri_poll
= myri10ge_poll_rx
;
3893 infop
->mri_stat
= myri10ge_rx_ring_stat
;
3894 mintr
->mi_handle
= (mac_intr_handle_t
)ss
;
3895 mintr
->mi_enable
= myri10ge_rx_ring_intr_enable
;
3896 mintr
->mi_disable
= myri10ge_rx_ring_intr_disable
;
3898 case MAC_RING_TYPE_TX
:
3900 infop
->mri_driver
= (mac_ring_driver_t
)ss
;
3901 infop
->mri_start
= NULL
;
3902 infop
->mri_stop
= NULL
;
3903 infop
->mri_tx
= myri10ge_send_wrapper
;
3904 infop
->mri_stat
= myri10ge_tx_ring_stat
;
3912 myri10ge_nic_stat_destroy(struct myri10ge_priv
*mgp
)
3914 if (mgp
->ksp_stat
== NULL
)
3917 kstat_delete(mgp
->ksp_stat
);
3918 mgp
->ksp_stat
= NULL
;
3922 myri10ge_slice_stat_destroy(struct myri10ge_slice_state
*ss
)
3924 if (ss
->ksp_stat
== NULL
)
3927 kstat_delete(ss
->ksp_stat
);
3928 ss
->ksp_stat
= NULL
;
3932 myri10ge_info_destroy(struct myri10ge_priv
*mgp
)
3934 if (mgp
->ksp_info
== NULL
)
3937 kstat_delete(mgp
->ksp_info
);
3938 mgp
->ksp_info
= NULL
;
3942 myri10ge_nic_stat_kstat_update(kstat_t
*ksp
, int rw
)
3944 struct myri10ge_nic_stat
*ethstat
;
3945 struct myri10ge_priv
*mgp
;
3946 mcp_irq_data_t
*fw_stats
;
3949 if (rw
== KSTAT_WRITE
)
3952 ethstat
= (struct myri10ge_nic_stat
*)ksp
->ks_data
;
3953 mgp
= (struct myri10ge_priv
*)ksp
->ks_private
;
3954 fw_stats
= mgp
->ss
[0].fw_stats
;
3956 ethstat
->dma_read_bw_MBs
.value
.ul
= mgp
->read_dma
;
3957 ethstat
->dma_write_bw_MBs
.value
.ul
= mgp
->write_dma
;
3958 ethstat
->dma_read_write_bw_MBs
.value
.ul
= mgp
->read_write_dma
;
3959 if (myri10ge_tx_dma_attr
.dma_attr_flags
& DDI_DMA_FORCE_PHYSICAL
)
3960 ethstat
->dma_force_physical
.value
.ul
= 1;
3962 ethstat
->dma_force_physical
.value
.ul
= 0;
3963 ethstat
->lanes
.value
.ul
= mgp
->pcie_link_width
;
3964 ethstat
->dropped_bad_crc32
.value
.ul
=
3965 ntohl(fw_stats
->dropped_bad_crc32
);
3966 ethstat
->dropped_bad_phy
.value
.ul
=
3967 ntohl(fw_stats
->dropped_bad_phy
);
3968 ethstat
->dropped_link_error_or_filtered
.value
.ul
=
3969 ntohl(fw_stats
->dropped_link_error_or_filtered
);
3970 ethstat
->dropped_link_overflow
.value
.ul
=
3971 ntohl(fw_stats
->dropped_link_overflow
);
3972 ethstat
->dropped_multicast_filtered
.value
.ul
=
3973 ntohl(fw_stats
->dropped_multicast_filtered
);
3974 ethstat
->dropped_no_big_buffer
.value
.ul
=
3975 ntohl(fw_stats
->dropped_no_big_buffer
);
3976 ethstat
->dropped_no_small_buffer
.value
.ul
=
3977 ntohl(fw_stats
->dropped_no_small_buffer
);
3978 ethstat
->dropped_overrun
.value
.ul
=
3979 ntohl(fw_stats
->dropped_overrun
);
3980 ethstat
->dropped_pause
.value
.ul
=
3981 ntohl(fw_stats
->dropped_pause
);
3982 ethstat
->dropped_runt
.value
.ul
=
3983 ntohl(fw_stats
->dropped_runt
);
3984 ethstat
->link_up
.value
.ul
=
3985 ntohl(fw_stats
->link_up
);
3986 ethstat
->dropped_unicast_filtered
.value
.ul
=
3987 ntohl(fw_stats
->dropped_unicast_filtered
);
3992 myri10ge_slice_stat_kstat_update(kstat_t
*ksp
, int rw
)
3994 struct myri10ge_slice_stat
*ethstat
;
3995 struct myri10ge_slice_state
*ss
;
3997 if (rw
== KSTAT_WRITE
)
4000 ethstat
= (struct myri10ge_slice_stat
*)ksp
->ks_data
;
4001 ss
= (struct myri10ge_slice_state
*)ksp
->ks_private
;
4003 ethstat
->rx_big
.value
.ul
= ss
->j_rx_cnt
;
4004 ethstat
->rx_bigbuf_firmware
.value
.ul
= ss
->rx_big
.cnt
- ss
->j_rx_cnt
;
4005 ethstat
->rx_bigbuf_pool
.value
.ul
=
4006 ss
->jpool
.num_alloc
- ss
->jbufs_for_smalls
;
4007 ethstat
->rx_bigbuf_smalls
.value
.ul
= ss
->jbufs_for_smalls
;
4008 ethstat
->rx_small
.value
.ul
= ss
->rx_small
.cnt
-
4009 (ss
->rx_small
.mask
+ 1);
4010 ethstat
->tx_done
.value
.ul
= ss
->tx
.done
;
4011 ethstat
->tx_req
.value
.ul
= ss
->tx
.req
;
4012 ethstat
->tx_activate
.value
.ul
= ss
->tx
.activate
;
4013 ethstat
->xmit_sched
.value
.ul
= ss
->tx
.sched
;
4014 ethstat
->xmit_stall
.value
.ul
= ss
->tx
.stall
;
4015 ethstat
->xmit_stall_early
.value
.ul
= ss
->tx
.stall_early
;
4016 ethstat
->xmit_stall_late
.value
.ul
= ss
->tx
.stall_late
;
4017 ethstat
->xmit_err
.value
.ul
= MYRI10GE_SLICE_STAT(xmit_err
);
4022 myri10ge_info_kstat_update(kstat_t
*ksp
, int rw
)
4024 struct myri10ge_info
*info
;
4025 struct myri10ge_priv
*mgp
;
4028 if (rw
== KSTAT_WRITE
)
4031 info
= (struct myri10ge_info
*)ksp
->ks_data
;
4032 mgp
= (struct myri10ge_priv
*)ksp
->ks_private
;
4033 kstat_named_setstr(&info
->driver_version
, MYRI10GE_VERSION_STR
);
4034 kstat_named_setstr(&info
->firmware_version
, mgp
->fw_version
);
4035 kstat_named_setstr(&info
->firmware_name
, mgp
->fw_name
);
4036 kstat_named_setstr(&info
->interrupt_type
, mgp
->intr_type
);
4037 kstat_named_setstr(&info
->product_code
, mgp
->pc_str
);
4038 kstat_named_setstr(&info
->serial_number
, mgp
->sn_str
);
4042 static struct myri10ge_info myri10ge_info_template
= {
4043 { "driver_version", KSTAT_DATA_STRING
},
4044 { "firmware_version", KSTAT_DATA_STRING
},
4045 { "firmware_name", KSTAT_DATA_STRING
},
4046 { "interrupt_type", KSTAT_DATA_STRING
},
4047 { "product_code", KSTAT_DATA_STRING
},
4048 { "serial_number", KSTAT_DATA_STRING
},
4050 static kmutex_t myri10ge_info_template_lock
;
4054 myri10ge_info_init(struct myri10ge_priv
*mgp
)
4058 ksp
= kstat_create("myri10ge", ddi_get_instance(mgp
->dip
),
4059 "myri10ge_info", "net", KSTAT_TYPE_NAMED
,
4060 sizeof (myri10ge_info_template
) /
4061 sizeof (kstat_named_t
), KSTAT_FLAG_VIRTUAL
);
4064 "%s: myri10ge_info_init: kstat_create failed", mgp
->name
);
4065 return (DDI_FAILURE
);
4067 mgp
->ksp_info
= ksp
;
4068 ksp
->ks_update
= myri10ge_info_kstat_update
;
4069 ksp
->ks_private
= (void *) mgp
;
4070 ksp
->ks_data
= &myri10ge_info_template
;
4071 ksp
->ks_lock
= &myri10ge_info_template_lock
;
4072 if (MYRI10GE_VERSION_STR
!= NULL
)
4073 ksp
->ks_data_size
+= strlen(MYRI10GE_VERSION_STR
) + 1;
4074 if (mgp
->fw_version
!= NULL
)
4075 ksp
->ks_data_size
+= strlen(mgp
->fw_version
) + 1;
4076 ksp
->ks_data_size
+= strlen(mgp
->fw_name
) + 1;
4077 ksp
->ks_data_size
+= strlen(mgp
->intr_type
) + 1;
4078 if (mgp
->pc_str
!= NULL
)
4079 ksp
->ks_data_size
+= strlen(mgp
->pc_str
) + 1;
4080 if (mgp
->sn_str
!= NULL
)
4081 ksp
->ks_data_size
+= strlen(mgp
->sn_str
) + 1;
4084 return (DDI_SUCCESS
);
4089 myri10ge_nic_stat_init(struct myri10ge_priv
*mgp
)
4092 struct myri10ge_nic_stat
*ethstat
;
4094 ksp
= kstat_create("myri10ge", ddi_get_instance(mgp
->dip
),
4095 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED
,
4096 sizeof (*ethstat
) / sizeof (kstat_named_t
), 0);
4099 "%s: myri10ge_stat_init: kstat_create failed", mgp
->name
);
4100 return (DDI_FAILURE
);
4102 mgp
->ksp_stat
= ksp
;
4103 ethstat
= (struct myri10ge_nic_stat
*)(ksp
->ks_data
);
4105 kstat_named_init(ðstat
->dma_read_bw_MBs
,
4106 "dma_read_bw_MBs", KSTAT_DATA_ULONG
);
4107 kstat_named_init(ðstat
->dma_write_bw_MBs
,
4108 "dma_write_bw_MBs", KSTAT_DATA_ULONG
);
4109 kstat_named_init(ðstat
->dma_read_write_bw_MBs
,
4110 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG
);
4111 kstat_named_init(ðstat
->dma_force_physical
,
4112 "dma_force_physical", KSTAT_DATA_ULONG
);
4113 kstat_named_init(ðstat
->lanes
,
4114 "lanes", KSTAT_DATA_ULONG
);
4115 kstat_named_init(ðstat
->dropped_bad_crc32
,
4116 "dropped_bad_crc32", KSTAT_DATA_ULONG
);
4117 kstat_named_init(ðstat
->dropped_bad_phy
,
4118 "dropped_bad_phy", KSTAT_DATA_ULONG
);
4119 kstat_named_init(ðstat
->dropped_link_error_or_filtered
,
4120 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG
);
4121 kstat_named_init(ðstat
->dropped_link_overflow
,
4122 "dropped_link_overflow", KSTAT_DATA_ULONG
);
4123 kstat_named_init(ðstat
->dropped_multicast_filtered
,
4124 "dropped_multicast_filtered", KSTAT_DATA_ULONG
);
4125 kstat_named_init(ðstat
->dropped_no_big_buffer
,
4126 "dropped_no_big_buffer", KSTAT_DATA_ULONG
);
4127 kstat_named_init(ðstat
->dropped_no_small_buffer
,
4128 "dropped_no_small_buffer", KSTAT_DATA_ULONG
);
4129 kstat_named_init(ðstat
->dropped_overrun
,
4130 "dropped_overrun", KSTAT_DATA_ULONG
);
4131 kstat_named_init(ðstat
->dropped_pause
,
4132 "dropped_pause", KSTAT_DATA_ULONG
);
4133 kstat_named_init(ðstat
->dropped_runt
,
4134 "dropped_runt", KSTAT_DATA_ULONG
);
4135 kstat_named_init(ðstat
->dropped_unicast_filtered
,
4136 "dropped_unicast_filtered", KSTAT_DATA_ULONG
);
4137 kstat_named_init(ðstat
->dropped_runt
, "dropped_runt",
4139 kstat_named_init(ðstat
->link_up
, "link_up", KSTAT_DATA_ULONG
);
4140 kstat_named_init(ðstat
->link_changes
, "link_changes",
4142 ksp
->ks_update
= myri10ge_nic_stat_kstat_update
;
4143 ksp
->ks_private
= (void *) mgp
;
4145 return (DDI_SUCCESS
);
4149 myri10ge_slice_stat_init(struct myri10ge_slice_state
*ss
)
4151 struct myri10ge_priv
*mgp
= ss
->mgp
;
4153 struct myri10ge_slice_stat
*ethstat
;
4157 * fake an instance so that the same slice numbers from
4158 * different instances do not collide
4160 instance
= (ddi_get_instance(mgp
->dip
) * 1000) + (int)(ss
- mgp
->ss
);
4161 ksp
= kstat_create("myri10ge", instance
,
4162 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED
,
4163 sizeof (*ethstat
) / sizeof (kstat_named_t
), 0);
4166 "%s: myri10ge_stat_init: kstat_create failed", mgp
->name
);
4167 return (DDI_FAILURE
);
4170 ethstat
= (struct myri10ge_slice_stat
*)(ksp
->ks_data
);
4171 kstat_named_init(ðstat
->lro_bad_csum
, "lro_bad_csum",
4173 kstat_named_init(ðstat
->lro_flushed
, "lro_flushed",
4175 kstat_named_init(ðstat
->lro_queued
, "lro_queued",
4177 kstat_named_init(ðstat
->rx_bigbuf_firmware
, "rx_bigbuf_firmware",
4179 kstat_named_init(ðstat
->rx_bigbuf_pool
, "rx_bigbuf_pool",
4181 kstat_named_init(ðstat
->rx_bigbuf_smalls
, "rx_bigbuf_smalls",
4183 kstat_named_init(ðstat
->rx_copy
, "rx_copy",
4185 kstat_named_init(ðstat
->rx_big_nobuf
, "rx_big_nobuf",
4187 kstat_named_init(ðstat
->rx_small_nobuf
, "rx_small_nobuf",
4189 kstat_named_init(ðstat
->xmit_zero_len
, "xmit_zero_len",
4191 kstat_named_init(ðstat
->xmit_pullup
, "xmit_pullup",
4193 kstat_named_init(ðstat
->xmit_pullup_first
, "xmit_pullup_first",
4195 kstat_named_init(ðstat
->xmit_lowbuf
, "xmit_lowbuf",
4197 kstat_named_init(ðstat
->xmit_lsobadflags
, "xmit_lsobadflags",
4199 kstat_named_init(ðstat
->xmit_sched
, "xmit_sched",
4201 kstat_named_init(ðstat
->xmit_stall
, "xmit_stall",
4203 kstat_named_init(ðstat
->xmit_stall_early
, "xmit_stall_early",
4205 kstat_named_init(ðstat
->xmit_stall_late
, "xmit_stall_late",
4207 kstat_named_init(ðstat
->xmit_err
, "xmit_err",
4209 kstat_named_init(ðstat
->tx_req
, "tx_req",
4211 kstat_named_init(ðstat
->tx_activate
, "tx_activate",
4213 kstat_named_init(ðstat
->tx_done
, "tx_done",
4215 kstat_named_init(ðstat
->tx_handles_alloced
, "tx_handles_alloced",
4217 kstat_named_init(ðstat
->rx_big
, "rx_big",
4219 kstat_named_init(ðstat
->rx_small
, "rx_small",
4221 ksp
->ks_update
= myri10ge_slice_stat_kstat_update
;
4222 ksp
->ks_private
= (void *) ss
;
4224 return (DDI_SUCCESS
);
4229 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4232 #include <sys/ddi_isa.h>
4233 void *device_arena_alloc(size_t size
, int vm_flag
);
4234 void device_arena_free(void *vaddr
, size_t size
);
4237 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv
*mgp
)
4239 dev_info_t
*parent_dip
;
4240 ddi_acc_handle_t handle
;
4241 unsigned long bus_number
, dev_number
, func_number
;
4242 unsigned long cfg_pa
, paddr
, base
, pgoffset
;
4245 int retval
= DDI_FAILURE
;
4247 uint16_t read_vid
, read_did
, vendor_id
, device_id
;
4249 if (!myri10ge_nvidia_ecrc_enable
)
4252 parent_dip
= ddi_get_parent(mgp
->dip
);
4253 if (parent_dip
== NULL
) {
4254 cmn_err(CE_WARN
, "%s: I'm an orphan?", mgp
->name
);
4258 if (pci_config_setup(parent_dip
, &handle
) != DDI_SUCCESS
) {
4260 "%s: Could not access my parent's registers", mgp
->name
);
4264 vendor_id
= pci_config_get16(handle
, PCI_CONF_VENID
);
4265 device_id
= pci_config_get16(handle
, PCI_CONF_DEVID
);
4266 pci_config_teardown(&handle
);
4268 if (myri10ge_verbose
) {
4269 unsigned long bus_number
, dev_number
, func_number
;
4271 (void) myri10ge_reg_set(parent_dip
, ®_set
, &span
,
4272 &bus_number
, &dev_number
, &func_number
);
4273 if (myri10ge_verbose
)
4274 printf("%s: parent at %ld:%ld:%ld\n", mgp
->name
,
4275 bus_number
, dev_number
, func_number
);
4278 if (vendor_id
!= 0x10de)
4281 if (device_id
!= 0x005d /* CK804 */ &&
4282 (device_id
< 0x374 || device_id
> 0x378) /* MCP55 */) {
4285 (void) myri10ge_reg_set(parent_dip
, &dontcare
, &dontcare
,
4286 &bus_number
, &dev_number
, &func_number
);
4288 for (cfg_pa
= 0xf0000000UL
;
4289 retval
!= DDI_SUCCESS
&& cfg_pa
>= 0xe0000000UL
;
4290 cfg_pa
-= 0x10000000UL
) {
4291 /* find the config space address for the nvidia bridge */
4292 paddr
= (cfg_pa
+ bus_number
* 0x00100000UL
+
4293 (dev_number
* 8 + func_number
) * 0x00001000UL
);
4295 base
= paddr
& (~MMU_PAGEOFFSET
);
4296 pgoffset
= paddr
& MMU_PAGEOFFSET
;
4298 /* map it into the kernel */
4299 cvaddr
= device_arena_alloc(ptob(1), VM_NOSLEEP
);
4301 cmn_err(CE_WARN
, "%s: failed to map nf4: cvaddr\n",
4304 hat_devload(kas
.a_hat
, cvaddr
, mmu_ptob(1),
4305 i_ddi_paddr_to_pfn(base
),
4306 PROT_WRITE
|HAT_STRICTORDER
, HAT_LOAD_LOCK
);
4308 ptr
= cvaddr
+ pgoffset
;
4309 read_vid
= *(uint16_t *)(void *)(ptr
+ PCI_CONF_VENID
);
4310 read_did
= *(uint16_t *)(void *)(ptr
+ PCI_CONF_DEVID
);
4311 if (vendor_id
== read_did
|| device_id
== read_did
) {
4312 ptr32
= (uint32_t *)(void *)(ptr
+ 0x178);
4313 if (myri10ge_verbose
)
4314 printf("%s: Enabling ECRC on upstream "
4315 "Nvidia bridge (0x%x:0x%x) "
4316 "at %ld:%ld:%ld\n", mgp
->name
,
4317 read_vid
, read_did
, bus_number
,
4318 dev_number
, func_number
);
4320 retval
= DDI_SUCCESS
;
4322 hat_unload(kas
.a_hat
, cvaddr
, ptob(1), HAT_UNLOAD_UNLOCK
);
4323 device_arena_free(cvaddr
, ptob(1));
4330 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv
*mgp
)
4337 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4338 * when the PCI-E Completion packets are aligned on an 8-byte
4339 * boundary. Some PCI-E chip sets always align Completion packets; on
4340 * the ones that do not, the alignment can be enforced by enabling
4341 * ECRC generation (if supported).
4343 * When PCI-E Completion packets are not aligned, it is actually more
4344 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4346 * If the driver can neither enable ECRC nor verify that it has
4347 * already been enabled, then it must use a firmware image which works
4348 * around unaligned completion packets (ethp_z8e.dat), and it should
4349 * also ensure that it never gives the device a Read-DMA which is
4350 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
4351 * enabled, then the driver should use the aligned (eth_z8e.dat)
4352 * firmware image, and set tx.boundary to 4KB.
4357 myri10ge_firmware_probe(struct myri10ge_priv
*mgp
)
4361 mgp
->tx_boundary
= 4096;
4363 * Verify the max read request size was set to 4KB
4364 * before trying the test with 4KB.
4366 if (mgp
->max_read_request_4k
== 0)
4367 mgp
->tx_boundary
= 2048;
4369 * load the optimized firmware which assumes aligned PCIe
4370 * completions in order to see if it works on this host.
4373 mgp
->fw_name
= "rss_eth_z8e";
4374 mgp
->eth_z8e
= (unsigned char *)rss_eth_z8e
;
4375 mgp
->eth_z8e_length
= rss_eth_z8e_length
;
4377 status
= myri10ge_load_firmware(mgp
);
4382 * Enable ECRC if possible
4384 myri10ge_enable_nvidia_ecrc(mgp
);
4387 * Run a DMA test which watches for unaligned completions and
4388 * aborts on the first one seen.
4390 status
= myri10ge_dma_test(mgp
, MXGEFW_CMD_UNALIGNED_TEST
);
4392 return (0); /* keep the aligned firmware */
4394 if (status
!= E2BIG
)
4395 cmn_err(CE_WARN
, "%s: DMA test failed: %d\n",
4397 if (status
== ENOSYS
)
4398 cmn_err(CE_WARN
, "%s: Falling back to ethp! "
4399 "Please install up to date fw\n", mgp
->name
);
4404 myri10ge_select_firmware(struct myri10ge_priv
*mgp
)
4410 if (myri10ge_force_firmware
== 1) {
4411 if (myri10ge_verbose
)
4412 printf("%s: Assuming aligned completions (forced)\n",
4418 if (myri10ge_force_firmware
== 2) {
4419 if (myri10ge_verbose
)
4420 printf("%s: Assuming unaligned completions (forced)\n",
4426 /* If the width is less than 8, we may used the aligned firmware */
4427 if (mgp
->pcie_link_width
!= 0 && mgp
->pcie_link_width
< 8) {
4428 cmn_err(CE_WARN
, "!%s: PCIe link running at x%d\n",
4429 mgp
->name
, mgp
->pcie_link_width
);
4434 if (0 == myri10ge_firmware_probe(mgp
))
4435 return (0); /* keep optimized firmware */
4439 mgp
->fw_name
= "rss_eth_z8e";
4440 mgp
->eth_z8e
= (unsigned char *)rss_eth_z8e
;
4441 mgp
->eth_z8e_length
= rss_eth_z8e_length
;
4442 mgp
->tx_boundary
= 4096;
4444 mgp
->fw_name
= "rss_ethp_z8e";
4445 mgp
->eth_z8e
= (unsigned char *)rss_ethp_z8e
;
4446 mgp
->eth_z8e_length
= rss_ethp_z8e_length
;
4447 mgp
->tx_boundary
= 2048;
4450 return (myri10ge_load_firmware(mgp
));
4454 myri10ge_add_intrs(struct myri10ge_priv
*mgp
, int add_handler
)
4456 dev_info_t
*devinfo
= mgp
->dip
;
4457 int count
, avail
, actual
, intr_types
;
4458 int x
, y
, rc
, inum
= 0;
4461 rc
= ddi_intr_get_supported_types(devinfo
, &intr_types
);
4462 if (rc
!= DDI_SUCCESS
) {
4464 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp
->name
,
4466 return (DDI_FAILURE
);
4469 if (!myri10ge_use_msi
)
4470 intr_types
&= ~DDI_INTR_TYPE_MSI
;
4471 if (!myri10ge_use_msix
)
4472 intr_types
&= ~DDI_INTR_TYPE_MSIX
;
4474 if (intr_types
& DDI_INTR_TYPE_MSIX
) {
4475 mgp
->ddi_intr_type
= DDI_INTR_TYPE_MSIX
;
4476 mgp
->intr_type
= "MSI-X";
4477 } else if (intr_types
& DDI_INTR_TYPE_MSI
) {
4478 mgp
->ddi_intr_type
= DDI_INTR_TYPE_MSI
;
4479 mgp
->intr_type
= "MSI";
4481 mgp
->ddi_intr_type
= DDI_INTR_TYPE_FIXED
;
4482 mgp
->intr_type
= "Legacy";
4484 /* Get number of interrupts */
4485 rc
= ddi_intr_get_nintrs(devinfo
, mgp
->ddi_intr_type
, &count
);
4486 if ((rc
!= DDI_SUCCESS
) || (count
== 0)) {
4487 cmn_err(CE_WARN
, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4488 "count: %d", mgp
->name
, rc
, count
);
4490 return (DDI_FAILURE
);
4493 /* Get number of available interrupts */
4494 rc
= ddi_intr_get_navail(devinfo
, mgp
->ddi_intr_type
, &avail
);
4495 if ((rc
!= DDI_SUCCESS
) || (avail
== 0)) {
4496 cmn_err(CE_WARN
, "%s: ddi_intr_get_navail() failure, "
4497 "rc: %d, avail: %d\n", mgp
->name
, rc
, avail
);
4498 return (DDI_FAILURE
);
4500 if (avail
< count
) {
4502 "!%s: nintrs() returned %d, navail returned %d",
4503 mgp
->name
, count
, avail
);
4507 if (count
< mgp
->num_slices
)
4508 return (DDI_FAILURE
);
4510 if (count
> mgp
->num_slices
)
4511 count
= mgp
->num_slices
;
4513 /* Allocate memory for MSI interrupts */
4514 mgp
->intr_size
= count
* sizeof (ddi_intr_handle_t
);
4515 mgp
->htable
= kmem_alloc(mgp
->intr_size
, KM_SLEEP
);
4517 rc
= ddi_intr_alloc(devinfo
, mgp
->htable
, mgp
->ddi_intr_type
, inum
,
4518 count
, &actual
, DDI_INTR_ALLOC_NORMAL
);
4520 if ((rc
!= DDI_SUCCESS
) || (actual
== 0)) {
4521 cmn_err(CE_WARN
, "%s: ddi_intr_alloc() failed: %d",
4524 kmem_free(mgp
->htable
, mgp
->intr_size
);
4526 return (DDI_FAILURE
);
4529 if ((actual
< count
) && myri10ge_verbose
) {
4530 cmn_err(CE_NOTE
, "%s: got %d/%d slices",
4531 mgp
->name
, actual
, count
);
4534 mgp
->intr_cnt
= actual
;
4537 * Get priority for first irq, assume remaining are all the same
4539 if (ddi_intr_get_pri(mgp
->htable
[0], &mgp
->intr_pri
)
4541 cmn_err(CE_WARN
, "%s: ddi_intr_get_pri() failed", mgp
->name
);
4543 /* Free already allocated intr */
4544 for (y
= 0; y
< actual
; y
++) {
4545 (void) ddi_intr_free(mgp
->htable
[y
]);
4548 kmem_free(mgp
->htable
, mgp
->intr_size
);
4550 return (DDI_FAILURE
);
4553 mgp
->icookie
= (void *)(uintptr_t)mgp
->intr_pri
;
4556 return (DDI_SUCCESS
);
4558 /* Call ddi_intr_add_handler() */
4559 for (x
= 0; x
< actual
; x
++) {
4560 if (ddi_intr_add_handler(mgp
->htable
[x
], myri10ge_intr
,
4561 (caddr_t
)&mgp
->ss
[x
], NULL
) != DDI_SUCCESS
) {
4562 cmn_err(CE_WARN
, "%s: ddi_intr_add_handler() failed",
4565 /* Free already allocated intr */
4566 for (y
= 0; y
< actual
; y
++) {
4567 (void) ddi_intr_free(mgp
->htable
[y
]);
4570 kmem_free(mgp
->htable
, mgp
->intr_size
);
4572 return (DDI_FAILURE
);
4576 (void) ddi_intr_get_cap(mgp
->htable
[0], &mgp
->intr_cap
);
4577 if (mgp
->intr_cap
& DDI_INTR_FLAG_BLOCK
) {
4578 /* Call ddi_intr_block_enable() for MSI */
4579 (void) ddi_intr_block_enable(mgp
->htable
, mgp
->intr_cnt
);
4581 /* Call ddi_intr_enable() for MSI non block enable */
4582 for (x
= 0; x
< mgp
->intr_cnt
; x
++) {
4583 (void) ddi_intr_enable(mgp
->htable
[x
]);
4587 return (DDI_SUCCESS
);
4591 myri10ge_rem_intrs(struct myri10ge_priv
*mgp
, int handler_installed
)
4595 /* Disable all interrupts */
4596 if (handler_installed
) {
4597 if (mgp
->intr_cap
& DDI_INTR_FLAG_BLOCK
) {
4598 /* Call ddi_intr_block_disable() */
4599 (void) ddi_intr_block_disable(mgp
->htable
,
4602 for (x
= 0; x
< mgp
->intr_cnt
; x
++) {
4603 (void) ddi_intr_disable(mgp
->htable
[x
]);
4608 for (x
= 0; x
< mgp
->intr_cnt
; x
++) {
4609 if (handler_installed
) {
4610 /* Call ddi_intr_remove_handler() */
4611 err
= ddi_intr_remove_handler(mgp
->htable
[x
]);
4612 if (err
!= DDI_SUCCESS
) {
4614 "%s: ddi_intr_remove_handler for"
4615 "vec %d returned %d\n", mgp
->name
,
4619 err
= ddi_intr_free(mgp
->htable
[x
]);
4620 if (err
!= DDI_SUCCESS
) {
4622 "%s: ddi_intr_free for vec %d returned %d\n",
4626 kmem_free(mgp
->htable
, mgp
->intr_size
);
4631 myri10ge_test_physical(dev_info_t
*dip
)
4633 ddi_dma_handle_t handle
;
4634 struct myri10ge_dma_stuff dma
;
4638 /* test #1, sufficient for older sparc systems */
4639 myri10ge_tx_dma_attr
.dma_attr_flags
= DDI_DMA_FORCE_PHYSICAL
;
4640 err
= ddi_dma_alloc_handle(dip
, &myri10ge_tx_dma_attr
,
4641 DDI_DMA_DONTWAIT
, NULL
, &handle
);
4642 if (err
== DDI_DMA_BADATTR
)
4644 ddi_dma_free_handle(&handle
);
4646 /* test #2, required on Olympis where the bind is what fails */
4647 addr
= myri10ge_dma_alloc(dip
, 128, &myri10ge_tx_dma_attr
,
4648 &myri10ge_dev_access_attr
, DDI_DMA_STREAMING
,
4649 DDI_DMA_WRITE
|DDI_DMA_STREAMING
, &dma
, 0, DDI_DMA_DONTWAIT
);
4652 myri10ge_dma_free(&dma
);
4656 if (myri10ge_verbose
)
4657 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4658 "using IOMMU\n", ddi_get_instance(dip
));
4660 myri10ge_tx_dma_attr
.dma_attr_flags
&= ~DDI_DMA_FORCE_PHYSICAL
;
4664 myri10ge_get_props(dev_info_t
*dip
)
4667 myri10ge_flow_control
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4668 "myri10ge_flow_control", myri10ge_flow_control
);
4670 myri10ge_intr_coal_delay
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4671 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay
);
4673 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
4674 myri10ge_nvidia_ecrc_enable
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4675 "myri10ge_nvidia_ecrc_enable", 1);
4679 myri10ge_use_msi
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4680 "myri10ge_use_msi", myri10ge_use_msi
);
4682 myri10ge_deassert_wait
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4683 "myri10ge_deassert_wait", myri10ge_deassert_wait
);
4685 myri10ge_verbose
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4686 "myri10ge_verbose", myri10ge_verbose
);
4688 myri10ge_tx_copylen
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4689 "myri10ge_tx_copylen", myri10ge_tx_copylen
);
4691 if (myri10ge_tx_copylen
< 60) {
4693 "myri10ge_tx_copylen must be >= 60 bytes\n");
4694 myri10ge_tx_copylen
= 60;
4697 myri10ge_mtu_override
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4698 "myri10ge_mtu_override", myri10ge_mtu_override
);
4700 if (myri10ge_mtu_override
>= 1500 && myri10ge_mtu_override
<= 9000)
4701 myri10ge_mtu
= myri10ge_mtu_override
+
4702 sizeof (struct ether_header
) + MXGEFW_PAD
+ VLAN_TAGSZ
;
4703 else if (myri10ge_mtu_override
!= 0) {
4705 "myri10ge_mtu_override must be between 1500 and "
4709 myri10ge_bigbufs_initial
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4710 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial
);
4711 myri10ge_bigbufs_max
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4712 "myri10ge_bigbufs_max", myri10ge_bigbufs_max
);
4714 myri10ge_watchdog_reset
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4715 "myri10ge_watchdog_reset", myri10ge_watchdog_reset
);
4717 if (myri10ge_bigbufs_initial
< 128) {
4719 "myri10ge_bigbufs_initial be at least 128\n");
4720 myri10ge_bigbufs_initial
= 128;
4722 if (myri10ge_bigbufs_max
< 128) {
4724 "myri10ge_bigbufs_max be at least 128\n");
4725 myri10ge_bigbufs_max
= 128;
4728 if (myri10ge_bigbufs_max
< myri10ge_bigbufs_initial
) {
4730 "myri10ge_bigbufs_max must be >= "
4731 "myri10ge_bigbufs_initial\n");
4732 myri10ge_bigbufs_max
= myri10ge_bigbufs_initial
;
4735 myri10ge_force_firmware
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4736 "myri10ge_force_firmware", myri10ge_force_firmware
);
4738 myri10ge_max_slices
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4739 "myri10ge_max_slices", myri10ge_max_slices
);
4741 myri10ge_use_msix
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4742 "myri10ge_use_msix", myri10ge_use_msix
);
4744 myri10ge_rss_hash
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4745 "myri10ge_rss_hash", myri10ge_rss_hash
);
4747 if (myri10ge_rss_hash
> MXGEFW_RSS_HASH_TYPE_MAX
||
4748 myri10ge_rss_hash
< MXGEFW_RSS_HASH_TYPE_IPV4
) {
4749 cmn_err(CE_WARN
, "myri10ge: Illegal rssh hash type %d\n",
4751 myri10ge_rss_hash
= MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
;
4753 myri10ge_lro
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4754 "myri10ge_lro", myri10ge_lro
);
4755 myri10ge_lro_cnt
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4756 "myri10ge_lro_cnt", myri10ge_lro_cnt
);
4757 myri10ge_lro_max_aggr
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4758 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr
);
4759 myri10ge_tx_hash
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4760 "myri10ge_tx_hash", myri10ge_tx_hash
);
4761 myri10ge_use_lso
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4762 "myri10ge_use_lso", myri10ge_use_lso
);
4763 myri10ge_lso_copy
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4764 "myri10ge_lso_copy", myri10ge_lso_copy
);
4765 myri10ge_tx_handles_initial
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4766 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial
);
4767 myri10ge_small_bytes
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
4768 "myri10ge_small_bytes", myri10ge_small_bytes
);
4769 if ((myri10ge_small_bytes
+ MXGEFW_PAD
) & (128 -1)) {
4770 cmn_err(CE_WARN
, "myri10ge: myri10ge_small_bytes (%d)\n",
4771 myri10ge_small_bytes
);
4772 cmn_err(CE_WARN
, "must be aligned on 128b bndry -2\n");
4773 myri10ge_small_bytes
+= 128;
4774 myri10ge_small_bytes
&= ~(128 -1);
4775 myri10ge_small_bytes
-= MXGEFW_PAD
;
4776 cmn_err(CE_WARN
, "rounded up to %d\n",
4777 myri10ge_small_bytes
);
4779 myri10ge_rss_hash
= MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
;
4783 #ifndef PCI_EXP_LNKSTA
4784 #define PCI_EXP_LNKSTA 18
4788 myri10ge_find_cap(ddi_acc_handle_t handle
, uint8_t *capptr
, uint8_t capid
)
4793 /* check to see if we have capabilities */
4794 status
= pci_config_get16(handle
, PCI_CONF_STAT
);
4795 if (!(status
& PCI_STAT_CAP
)) {
4796 cmn_err(CE_WARN
, "PCI_STAT_CAP not found\n");
4800 ptr
= pci_config_get8(handle
, PCI_CONF_CAP_PTR
);
4802 /* Walk the capabilities list, looking for a PCI Express cap */
4803 while (ptr
!= PCI_CAP_NEXT_PTR_NULL
) {
4804 if (pci_config_get8(handle
, ptr
+ PCI_CAP_ID
) == capid
)
4806 ptr
= pci_config_get8(handle
, ptr
+ PCI_CAP_NEXT_PTR
);
4809 cmn_err(CE_WARN
, "Bad capability offset %d\n", ptr
);
4817 myri10ge_set_max_readreq(ddi_acc_handle_t handle
)
4823 err
= myri10ge_find_cap(handle
, &ptr
, PCI_CAP_ID_PCI_E
);
4825 cmn_err(CE_WARN
, "could not find PCIe cap\n");
4829 /* set max read req to 4096 */
4830 val
= pci_config_get16(handle
, ptr
+ PCIE_DEVCTL
);
4831 val
= (val
& ~PCIE_DEVCTL_MAX_READ_REQ_MASK
) |
4832 PCIE_DEVCTL_MAX_READ_REQ_4096
;
4833 pci_config_put16(handle
, ptr
+ PCIE_DEVCTL
, val
);
4834 val
= pci_config_get16(handle
, ptr
+ PCIE_DEVCTL
);
4835 if ((val
& (PCIE_DEVCTL_MAX_READ_REQ_4096
)) !=
4836 PCIE_DEVCTL_MAX_READ_REQ_4096
) {
4837 cmn_err(CE_WARN
, "could not set max read req (%x)\n", val
);
4844 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle
, int *link
)
4850 err
= myri10ge_find_cap(handle
, &ptr
, PCI_CAP_ID_PCI_E
);
4852 cmn_err(CE_WARN
, "could not set max read req\n");
4856 /* read link width */
4857 val
= pci_config_get16(handle
, ptr
+ PCIE_LINKSTS
);
4858 val
&= PCIE_LINKSTS_NEG_WIDTH_MASK
;
4864 myri10ge_reset_nic(struct myri10ge_priv
*mgp
)
4866 ddi_acc_handle_t handle
= mgp
->cfg_hdl
;
4871 cmd
= pci_config_get16(handle
, PCI_CONF_COMM
);
4872 if ((cmd
& PCI_COMM_ME
) == 0) {
4874 * Bus master DMA disabled? Check to see if the card
4875 * rebooted due to a parity error For now, just report
4879 /* enter read32 mode */
4880 pci_config_put8(handle
, mgp
->vso
+ 0x10, 0x3);
4881 /* read REBOOT_STATUS (0xfffffff0) */
4882 pci_config_put32(handle
, mgp
->vso
+ 0x18, 0xfffffff0);
4883 reboot
= pci_config_get16(handle
, mgp
->vso
+ 0x14);
4884 cmn_err(CE_WARN
, "%s NIC rebooted 0x%x\n", mgp
->name
, reboot
);
4887 if (!myri10ge_watchdog_reset
) {
4888 cmn_err(CE_WARN
, "%s: not resetting\n", mgp
->name
);
4892 myri10ge_stop_locked(mgp
);
4893 err
= myri10ge_start_locked(mgp
);
4894 if (err
== DDI_FAILURE
) {
4897 mac_tx_update(mgp
->mh
);
4902 myri10ge_ring_stalled(myri10ge_tx_ring_t
*tx
)
4904 if (tx
->sched
!= tx
->stall
&&
4905 tx
->done
== tx
->watchdog_done
&&
4906 tx
->watchdog_req
!= tx
->watchdog_done
)
4912 myri10ge_watchdog(void *arg
)
4914 struct myri10ge_priv
*mgp
;
4915 struct myri10ge_slice_state
*ss
;
4916 myri10ge_tx_ring_t
*tx
;
4918 int slices_stalled
, rx_pause
, i
;
4922 mutex_enter(&mgp
->intrlock
);
4923 if (mgp
->running
!= MYRI10GE_ETH_RUNNING
) {
4925 "%s not running, not rearming watchdog (%d)\n",
4926 mgp
->name
, mgp
->running
);
4927 mutex_exit(&mgp
->intrlock
);
4931 rx_pause
= ntohl(mgp
->ss
[0].fw_stats
->dropped_pause
);
4934 * make sure nic is stalled before we reset the nic, so as to
4935 * ensure we don't rip the transmit data structures out from
4936 * under a pending transmit
4939 for (slices_stalled
= 0, i
= 0; i
< mgp
->num_slices
; i
++) {
4940 tx
= &mgp
->ss
[i
].tx
;
4941 slices_stalled
= myri10ge_ring_stalled(tx
);
4946 if (slices_stalled
) {
4947 if (mgp
->watchdog_rx_pause
== rx_pause
) {
4949 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4950 mgp
->name
, i
, tx
->sched
, tx
->stall
,
4951 tx
->done
, tx
->watchdog_done
, tx
->req
, tx
->pkt_done
,
4952 (int)ntohl(mgp
->ss
[i
].fw_stats
->send_done_count
));
4953 nic_ok
= myri10ge_reset_nic(mgp
);
4956 "%s Flow controlled, check link partner\n",
4963 "%s Nic dead, not rearming watchdog\n", mgp
->name
);
4964 mutex_exit(&mgp
->intrlock
);
4967 for (i
= 0; i
< mgp
->num_slices
; i
++) {
4970 tx
->watchdog_done
= tx
->done
;
4971 tx
->watchdog_req
= tx
->req
;
4972 if (ss
->watchdog_rx_copy
!= MYRI10GE_SLICE_STAT(rx_copy
)) {
4973 ss
->watchdog_rx_copy
= MYRI10GE_SLICE_STAT(rx_copy
);
4975 min(ss
->jpool
.num_alloc
,
4976 myri10ge_bigbufs_max
-
4977 (ss
->jpool
.num_alloc
-
4978 ss
->jbufs_for_smalls
));
4980 (void) myri10ge_add_jbufs(ss
, add_rx
, 0);
4981 /* now feed them to the firmware */
4982 mutex_enter(&ss
->jpool
.mtx
);
4983 myri10ge_restock_jumbos(ss
);
4984 mutex_exit(&ss
->jpool
.mtx
);
4988 mgp
->watchdog_rx_pause
= rx_pause
;
4990 mgp
->timer_id
= timeout(myri10ge_watchdog
, mgp
,
4992 mutex_exit(&mgp
->intrlock
);
4997 myri10ge_get_coalesce(queue_t
*q
, mblk_t
*mp
, caddr_t cp
, cred_t
*credp
)
5000 struct myri10ge_priv
*mgp
= (struct myri10ge_priv
*)(void *)cp
;
5001 (void) mi_mpprintf(mp
, "%d", mgp
->intr_coal_delay
);
5007 myri10ge_set_coalesce(queue_t
*q
, mblk_t
*mp
, char *value
,
5008 caddr_t cp
, cred_t
*credp
)
5011 struct myri10ge_priv
*mgp
= (struct myri10ge_priv
*)(void *)cp
;
5015 new_value
= mi_strtol(value
, &end
, 10);
5019 mutex_enter(&myri10ge_param_lock
);
5020 mgp
->intr_coal_delay
= (int)new_value
;
5021 *mgp
->intr_coal_delay_ptr
= htonl(mgp
->intr_coal_delay
);
5022 mutex_exit(&myri10ge_param_lock
);
5028 myri10ge_get_pauseparam(queue_t
*q
, mblk_t
*mp
, caddr_t cp
, cred_t
*credp
)
5031 struct myri10ge_priv
*mgp
= (struct myri10ge_priv
*)(void *)cp
;
5032 (void) mi_mpprintf(mp
, "%d", mgp
->pause
);
5038 myri10ge_set_pauseparam(queue_t
*q
, mblk_t
*mp
, char *value
,
5039 caddr_t cp
, cred_t
*credp
)
5042 struct myri10ge_priv
*mgp
= (struct myri10ge_priv
*)(void *)cp
;
5047 new_value
= mi_strtol(value
, &end
, 10);
5053 mutex_enter(&myri10ge_param_lock
);
5054 if (new_value
!= mgp
->pause
)
5055 err
= myri10ge_change_pause(mgp
, new_value
);
5056 mutex_exit(&myri10ge_param_lock
);
5062 myri10ge_get_int(queue_t
*q
, mblk_t
*mp
, caddr_t cp
, cred_t
*credp
)
5065 (void) mi_mpprintf(mp
, "%d", *(int *)(void *)cp
);
5071 myri10ge_set_int(queue_t
*q
, mblk_t
*mp
, char *value
,
5072 caddr_t cp
, cred_t
*credp
)
5078 new_value
= mi_strtol(value
, &end
, 10);
5081 *(int *)(void *)cp
= new_value
;
5087 myri10ge_ndd_init(struct myri10ge_priv
*mgp
)
5089 mgp
->nd_head
= NULL
;
5091 (void) nd_load(&mgp
->nd_head
, "myri10ge_intr_coal_delay",
5092 myri10ge_get_coalesce
, myri10ge_set_coalesce
, (caddr_t
)mgp
);
5093 (void) nd_load(&mgp
->nd_head
, "myri10ge_flow_control",
5094 myri10ge_get_pauseparam
, myri10ge_set_pauseparam
, (caddr_t
)mgp
);
5095 (void) nd_load(&mgp
->nd_head
, "myri10ge_verbose",
5096 myri10ge_get_int
, myri10ge_set_int
, (caddr_t
)&myri10ge_verbose
);
5097 (void) nd_load(&mgp
->nd_head
, "myri10ge_deassert_wait",
5098 myri10ge_get_int
, myri10ge_set_int
,
5099 (caddr_t
)&myri10ge_deassert_wait
);
5100 (void) nd_load(&mgp
->nd_head
, "myri10ge_bigbufs_max",
5101 myri10ge_get_int
, myri10ge_set_int
,
5102 (caddr_t
)&myri10ge_bigbufs_max
);
5103 (void) nd_load(&mgp
->nd_head
, "myri10ge_lro",
5104 myri10ge_get_int
, myri10ge_set_int
,
5105 (caddr_t
)&myri10ge_lro
);
5106 (void) nd_load(&mgp
->nd_head
, "myri10ge_lro_max_aggr",
5107 myri10ge_get_int
, myri10ge_set_int
,
5108 (caddr_t
)&myri10ge_lro_max_aggr
);
5109 (void) nd_load(&mgp
->nd_head
, "myri10ge_tx_hash",
5110 myri10ge_get_int
, myri10ge_set_int
,
5111 (caddr_t
)&myri10ge_tx_hash
);
5112 (void) nd_load(&mgp
->nd_head
, "myri10ge_lso_copy",
5113 myri10ge_get_int
, myri10ge_set_int
,
5114 (caddr_t
)&myri10ge_lso_copy
);
5118 myri10ge_ndd_fini(struct myri10ge_priv
*mgp
)
5120 nd_free(&mgp
->nd_head
);
5124 myri10ge_m_ioctl(void *arg
, queue_t
*wq
, mblk_t
*mp
)
5126 struct iocblk
*iocp
;
5127 struct myri10ge_priv
*mgp
= arg
;
5130 iocp
= (struct iocblk
*)(void *)mp
->b_rptr
;
5131 cmd
= iocp
->ioc_cmd
;
5139 ok
= nd_getset(wq
, mgp
->nd_head
, mp
);
5147 err
= iocp
->ioc_error
;
5150 miocack(wq
, mp
, iocp
->ioc_count
, err
);
5152 miocnak(wq
, mp
, 0, err
);
5155 static struct myri10ge_priv
*mgp_list
;
5157 struct myri10ge_priv
*
5158 myri10ge_get_instance(uint_t unit
)
5160 struct myri10ge_priv
*mgp
;
5162 mutex_enter(&myri10ge_param_lock
);
5163 for (mgp
= mgp_list
; mgp
!= NULL
; mgp
= mgp
->next
) {
5164 if (unit
== ddi_get_instance(mgp
->dip
)) {
5169 mutex_exit(&myri10ge_param_lock
);
5174 myri10ge_put_instance(struct myri10ge_priv
*mgp
)
5176 mutex_enter(&myri10ge_param_lock
);
5178 mutex_exit(&myri10ge_param_lock
);
5182 myri10ge_m_getcapab(void *arg
, mac_capab_t cap
, void *cap_data
)
5184 struct myri10ge_priv
*mgp
= arg
;
5185 uint32_t *cap_hcksum
;
5186 mac_capab_lso_t
*cap_lso
;
5187 mac_capab_rings_t
*cap_rings
;
5190 case MAC_CAPAB_HCKSUM
:
5191 cap_hcksum
= cap_data
;
5192 *cap_hcksum
= HCKSUM_INET_PARTIAL
;
5194 case MAC_CAPAB_RINGS
:
5195 cap_rings
= cap_data
;
5196 switch (cap_rings
->mr_type
) {
5197 case MAC_RING_TYPE_RX
:
5198 cap_rings
->mr_group_type
= MAC_GROUP_TYPE_STATIC
;
5199 cap_rings
->mr_rnum
= mgp
->num_slices
;
5200 cap_rings
->mr_gnum
= 1;
5201 cap_rings
->mr_rget
= myri10ge_fill_ring
;
5202 cap_rings
->mr_gget
= myri10ge_fill_group
;
5204 case MAC_RING_TYPE_TX
:
5205 cap_rings
->mr_group_type
= MAC_GROUP_TYPE_STATIC
;
5206 cap_rings
->mr_rnum
= mgp
->num_slices
;
5207 cap_rings
->mr_gnum
= 0;
5208 cap_rings
->mr_rget
= myri10ge_fill_ring
;
5209 cap_rings
->mr_gget
= NULL
;
5217 if (!myri10ge_use_lso
)
5219 if (!(mgp
->features
& MYRI10GE_TSO
))
5221 cap_lso
->lso_flags
= LSO_TX_BASIC_TCP_IPV4
;
5222 cap_lso
->lso_basic_tcp_ipv4
.lso_max
= (uint16_t)-1;
5233 myri10ge_m_stat(void *arg
, uint_t stat
, uint64_t *val
)
5235 struct myri10ge_priv
*mgp
= arg
;
5236 struct myri10ge_rx_ring_stats
*rstat
;
5237 struct myri10ge_tx_ring_stats
*tstat
;
5238 mcp_irq_data_t
*fw_stats
= mgp
->ss
[0].fw_stats
;
5239 struct myri10ge_slice_state
*ss
;
5244 case MAC_STAT_IFSPEED
:
5245 *val
= 10ull * 1000ull * 1000000ull;
5248 case MAC_STAT_MULTIRCV
:
5249 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5250 rstat
= &mgp
->ss
[i
].rx_stats
;
5251 tmp
+= rstat
->multircv
;
5256 case MAC_STAT_BRDCSTRCV
:
5257 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5258 rstat
= &mgp
->ss
[i
].rx_stats
;
5259 tmp
+= rstat
->brdcstrcv
;
5264 case MAC_STAT_MULTIXMT
:
5265 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5266 tstat
= &mgp
->ss
[i
].tx
.stats
;
5267 tmp
+= tstat
->multixmt
;
5272 case MAC_STAT_BRDCSTXMT
:
5273 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5274 tstat
= &mgp
->ss
[i
].tx
.stats
;
5275 tmp
+= tstat
->brdcstxmt
;
5280 case MAC_STAT_NORCVBUF
:
5281 tmp
= ntohl(fw_stats
->dropped_no_big_buffer
);
5282 tmp
+= ntohl(fw_stats
->dropped_no_small_buffer
);
5283 tmp
+= ntohl(fw_stats
->dropped_link_overflow
);
5284 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5286 tmp
+= MYRI10GE_SLICE_STAT(rx_big_nobuf
);
5287 tmp
+= MYRI10GE_SLICE_STAT(rx_small_nobuf
);
5292 case MAC_STAT_IERRORS
:
5293 tmp
+= ntohl(fw_stats
->dropped_bad_crc32
);
5294 tmp
+= ntohl(fw_stats
->dropped_bad_phy
);
5295 tmp
+= ntohl(fw_stats
->dropped_runt
);
5296 tmp
+= ntohl(fw_stats
->dropped_overrun
);
5300 case MAC_STAT_OERRORS
:
5301 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5303 tmp
+= MYRI10GE_SLICE_STAT(xmit_lsobadflags
);
5304 tmp
+= MYRI10GE_SLICE_STAT(xmit_err
);
5309 case MAC_STAT_RBYTES
:
5310 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5311 rstat
= &mgp
->ss
[i
].rx_stats
;
5312 tmp
+= rstat
->ibytes
;
5317 case MAC_STAT_IPACKETS
:
5318 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5319 rstat
= &mgp
->ss
[i
].rx_stats
;
5320 tmp
+= rstat
->ipackets
;
5325 case MAC_STAT_OBYTES
:
5326 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5327 tstat
= &mgp
->ss
[i
].tx
.stats
;
5328 tmp
+= tstat
->obytes
;
5333 case MAC_STAT_OPACKETS
:
5334 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5335 tstat
= &mgp
->ss
[i
].tx
.stats
;
5336 tmp
+= tstat
->opackets
;
5341 case ETHER_STAT_TOOLONG_ERRORS
:
5342 *val
= ntohl(fw_stats
->dropped_overrun
);
5346 case ETHER_STAT_TOOSHORT_ERRORS
:
5347 *val
= ntohl(fw_stats
->dropped_runt
);
5351 case ETHER_STAT_LINK_PAUSE
:
5355 case ETHER_STAT_LINK_AUTONEG
:
5359 case ETHER_STAT_LINK_DUPLEX
:
5360 *val
= LINK_DUPLEX_FULL
;
5370 static mac_callbacks_t myri10ge_m_callbacks
= {
5371 (MC_IOCTL
| MC_GETCAPAB
),
5376 myri10ge_m_multicst
,
5386 myri10ge_probe_slices(struct myri10ge_priv
*mgp
)
5391 mgp
->num_slices
= 1;
5393 /* hit the board with a reset to ensure it is alive */
5394 (void) memset(&cmd
, 0, sizeof (cmd
));
5395 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_RESET
, &cmd
);
5397 cmn_err(CE_WARN
, "%s: failed reset\n", mgp
->name
);
5401 if (myri10ge_use_msix
== 0)
5404 /* tell it the size of the interrupt queues */
5405 cmd
.data0
= mgp
->max_intr_slots
* sizeof (struct mcp_slot
);
5406 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
5408 cmn_err(CE_WARN
, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5413 /* ask the maximum number of slices it supports */
5414 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
,
5419 mgp
->num_slices
= cmd
.data0
;
5422 * if the admin did not specify a limit to how many
5423 * slices we should use, cap it automatically to the
5424 * number of CPUs currently online
5426 if (myri10ge_max_slices
== -1)
5427 myri10ge_max_slices
= ncpus
;
5429 if (mgp
->num_slices
> myri10ge_max_slices
)
5430 mgp
->num_slices
= myri10ge_max_slices
;
5434 * Now try to allocate as many MSI-X vectors as we have
5435 * slices. We give up on MSI-X if we can only get a single
5438 while (mgp
->num_slices
> 1) {
5439 /* make sure it is a power of two */
5440 while (!ISP2(mgp
->num_slices
))
5442 if (mgp
->num_slices
== 1)
5445 status
= myri10ge_add_intrs(mgp
, 0);
5447 myri10ge_rem_intrs(mgp
, 0);
5448 if (mgp
->intr_cnt
== mgp
->num_slices
) {
5449 if (myri10ge_verbose
)
5450 printf("Got %d slices!\n",
5454 mgp
->num_slices
= mgp
->intr_cnt
;
5456 mgp
->num_slices
= mgp
->num_slices
/ 2;
5460 if (myri10ge_verbose
)
5461 printf("Got %d slices\n", mgp
->num_slices
);
5466 myri10ge_lro_free(struct myri10ge_slice_state
*ss
)
5468 struct lro_entry
*lro
;
5470 while (ss
->lro_free
!= NULL
) {
5472 ss
->lro_free
= lro
->next
;
5473 kmem_free(lro
, sizeof (*lro
));
5478 myri10ge_lro_alloc(struct myri10ge_slice_state
*ss
)
5480 struct lro_entry
*lro
;
5483 ss
->lro_free
= NULL
;
5484 ss
->lro_active
= NULL
;
5486 for (idx
= 0; idx
< myri10ge_lro_cnt
; idx
++) {
5487 lro
= kmem_zalloc(sizeof (*lro
), KM_SLEEP
);
5490 lro
->next
= ss
->lro_free
;
5496 myri10ge_free_slices(struct myri10ge_priv
*mgp
)
5498 struct myri10ge_slice_state
*ss
;
5502 if (mgp
->ss
== NULL
)
5505 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5507 if (ss
->rx_done
.entry
== NULL
)
5509 myri10ge_dma_free(&ss
->rx_done
.dma
);
5510 ss
->rx_done
.entry
= NULL
;
5511 if (ss
->fw_stats
== NULL
)
5513 myri10ge_dma_free(&ss
->fw_stats_dma
);
5514 ss
->fw_stats
= NULL
;
5515 mutex_destroy(&ss
->rx_lock
);
5516 mutex_destroy(&ss
->tx
.lock
);
5517 mutex_destroy(&ss
->tx
.handle_lock
);
5518 mutex_destroy(&ss
->poll_lock
);
5519 myri10ge_jpool_fini(ss
);
5520 myri10ge_slice_stat_destroy(ss
);
5521 myri10ge_lro_free(ss
);
5523 bytes
= sizeof (*mgp
->ss
) * mgp
->num_slices
;
5524 kmem_free(mgp
->ss
, bytes
);
5530 myri10ge_alloc_slices(struct myri10ge_priv
*mgp
)
5532 struct myri10ge_slice_state
*ss
;
5536 bytes
= sizeof (*mgp
->ss
) * mgp
->num_slices
;
5537 mgp
->ss
= kmem_zalloc(bytes
, KM_SLEEP
);
5538 if (mgp
->ss
== NULL
)
5540 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5545 /* allocate the per-slice firmware stats */
5546 bytes
= sizeof (*ss
->fw_stats
);
5547 ss
->fw_stats
= (mcp_irq_data_t
*)(void *)
5548 myri10ge_dma_alloc(mgp
->dip
, bytes
,
5549 &myri10ge_misc_dma_attr
, &myri10ge_dev_access_attr
,
5550 DDI_DMA_CONSISTENT
, DDI_DMA_READ
|DDI_DMA_CONSISTENT
,
5551 &ss
->fw_stats_dma
, 1, DDI_DMA_DONTWAIT
);
5552 if (ss
->fw_stats
== NULL
)
5554 (void) memset(ss
->fw_stats
, 0, bytes
);
5556 /* allocate rx done ring */
5557 bytes
= mgp
->max_intr_slots
*
5558 sizeof (*ss
->rx_done
.entry
);
5559 ss
->rx_done
.entry
= (mcp_slot_t
*)(void *)
5560 myri10ge_dma_alloc(mgp
->dip
, bytes
,
5561 &myri10ge_misc_dma_attr
, &myri10ge_dev_access_attr
,
5562 DDI_DMA_CONSISTENT
, DDI_DMA_READ
|DDI_DMA_CONSISTENT
,
5563 &ss
->rx_done
.dma
, 1, DDI_DMA_DONTWAIT
);
5564 if (ss
->rx_done
.entry
== NULL
) {
5567 (void) memset(ss
->rx_done
.entry
, 0, bytes
);
5568 mutex_init(&ss
->rx_lock
, NULL
, MUTEX_DEFAULT
, mgp
->icookie
);
5569 mutex_init(&ss
->tx
.lock
, NULL
, MUTEX_DEFAULT
, NULL
);
5570 mutex_init(&ss
->tx
.handle_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
5571 mutex_init(&ss
->poll_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
5572 myri10ge_jpool_init(ss
);
5573 (void) myri10ge_slice_stat_init(ss
);
5574 myri10ge_lro_alloc(ss
);
5580 myri10ge_free_slices(mgp
);
5585 myri10ge_save_msi_state(struct myri10ge_priv
*mgp
,
5586 ddi_acc_handle_t handle
)
5591 err
= myri10ge_find_cap(handle
, &ptr
, PCI_CAP_ID_MSI
);
5593 cmn_err(CE_WARN
, "%s: could not find MSI cap\n",
5595 return (DDI_FAILURE
);
5597 mgp
->pci_saved_state
.msi_ctrl
=
5598 pci_config_get16(handle
, ptr
+ PCI_MSI_CTRL
);
5599 mgp
->pci_saved_state
.msi_addr_low
=
5600 pci_config_get32(handle
, ptr
+ PCI_MSI_ADDR_OFFSET
);
5601 mgp
->pci_saved_state
.msi_addr_high
=
5602 pci_config_get32(handle
, ptr
+ PCI_MSI_ADDR_OFFSET
+ 4);
5603 mgp
->pci_saved_state
.msi_data_32
=
5604 pci_config_get16(handle
, ptr
+ PCI_MSI_32BIT_DATA
);
5605 mgp
->pci_saved_state
.msi_data_64
=
5606 pci_config_get16(handle
, ptr
+ PCI_MSI_64BIT_DATA
);
5607 return (DDI_SUCCESS
);
5611 myri10ge_restore_msi_state(struct myri10ge_priv
*mgp
,
5612 ddi_acc_handle_t handle
)
5617 err
= myri10ge_find_cap(handle
, &ptr
, PCI_CAP_ID_MSI
);
5619 cmn_err(CE_WARN
, "%s: could not find MSI cap\n",
5621 return (DDI_FAILURE
);
5624 pci_config_put16(handle
, ptr
+ PCI_MSI_CTRL
,
5625 mgp
->pci_saved_state
.msi_ctrl
);
5626 pci_config_put32(handle
, ptr
+ PCI_MSI_ADDR_OFFSET
,
5627 mgp
->pci_saved_state
.msi_addr_low
);
5628 pci_config_put32(handle
, ptr
+ PCI_MSI_ADDR_OFFSET
+ 4,
5629 mgp
->pci_saved_state
.msi_addr_high
);
5630 pci_config_put16(handle
, ptr
+ PCI_MSI_32BIT_DATA
,
5631 mgp
->pci_saved_state
.msi_data_32
);
5632 pci_config_put16(handle
, ptr
+ PCI_MSI_64BIT_DATA
,
5633 mgp
->pci_saved_state
.msi_data_64
);
5635 return (DDI_SUCCESS
);
5639 myri10ge_save_pci_state(struct myri10ge_priv
*mgp
)
5641 ddi_acc_handle_t handle
= mgp
->cfg_hdl
;
5643 int err
= DDI_SUCCESS
;
5646 /* Save the non-extended PCI config space 32-bits at a time */
5647 for (i
= 0; i
< 16; i
++)
5648 mgp
->pci_saved_state
.base
[i
] =
5649 pci_config_get32(handle
, i
*4);
5651 /* now save MSI interrupt state *, if needed */
5652 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_MSI
)
5653 err
= myri10ge_save_msi_state(mgp
, handle
);
5659 myri10ge_restore_pci_state(struct myri10ge_priv
*mgp
)
5661 ddi_acc_handle_t handle
= mgp
->cfg_hdl
;
5663 int err
= DDI_SUCCESS
;
5666 /* Restore the non-extended PCI config space 32-bits at a time */
5667 for (i
= 15; i
>= 0; i
--)
5668 pci_config_put32(handle
, i
*4, mgp
->pci_saved_state
.base
[i
]);
5670 /* now restore MSI interrupt state *, if needed */
5671 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_MSI
)
5672 err
= myri10ge_restore_msi_state(mgp
, handle
);
5674 if (mgp
->max_read_request_4k
)
5675 (void) myri10ge_set_max_readreq(handle
);
5681 myri10ge_suspend(dev_info_t
*dip
)
5683 struct myri10ge_priv
*mgp
= ddi_get_driver_private(dip
);
5687 cmn_err(CE_WARN
, "null dip in myri10ge_suspend\n");
5688 return (DDI_FAILURE
);
5690 if (mgp
->dip
!= dip
) {
5691 cmn_err(CE_WARN
, "bad dip in myri10ge_suspend\n");
5692 return (DDI_FAILURE
);
5694 mutex_enter(&mgp
->intrlock
);
5695 if (mgp
->running
== MYRI10GE_ETH_RUNNING
) {
5696 mgp
->running
= MYRI10GE_ETH_STOPPING
;
5697 mutex_exit(&mgp
->intrlock
);
5698 (void) untimeout(mgp
->timer_id
);
5699 mutex_enter(&mgp
->intrlock
);
5700 myri10ge_stop_locked(mgp
);
5701 mgp
->running
= MYRI10GE_ETH_SUSPENDED_RUNNING
;
5703 status
= myri10ge_save_pci_state(mgp
);
5704 mutex_exit(&mgp
->intrlock
);
5709 myri10ge_resume(dev_info_t
*dip
)
5711 struct myri10ge_priv
*mgp
= ddi_get_driver_private(dip
);
5712 int status
= DDI_SUCCESS
;
5715 cmn_err(CE_WARN
, "null dip in myri10ge_resume\n");
5716 return (DDI_FAILURE
);
5718 if (mgp
->dip
!= dip
) {
5719 cmn_err(CE_WARN
, "bad dip in myri10ge_resume\n");
5720 return (DDI_FAILURE
);
5723 mutex_enter(&mgp
->intrlock
);
5724 status
= myri10ge_restore_pci_state(mgp
);
5725 if (status
== DDI_SUCCESS
&&
5726 mgp
->running
== MYRI10GE_ETH_SUSPENDED_RUNNING
) {
5727 status
= myri10ge_start_locked(mgp
);
5729 mutex_exit(&mgp
->intrlock
);
5730 if (status
!= DDI_SUCCESS
)
5733 /* start the watchdog timer */
5734 mgp
->timer_id
= timeout(myri10ge_watchdog
, mgp
,
5736 return (DDI_SUCCESS
);
5740 myri10ge_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
5743 struct myri10ge_priv
*mgp
;
5744 mac_register_t
*macp
, *omacp
;
5745 ddi_acc_handle_t handle
;
5746 uint32_t csr
, hdr_offset
;
5747 int status
, span
, link_width
, max_read_request_4k
;
5748 unsigned long bus_number
, dev_number
, func_number
;
5753 if (cmd
== DDI_RESUME
) {
5754 return (myri10ge_resume(dip
));
5757 if (cmd
!= DDI_ATTACH
)
5758 return (DDI_FAILURE
);
5759 if (pci_config_setup(dip
, &handle
) != DDI_SUCCESS
)
5760 return (DDI_FAILURE
);
5762 /* enable busmater and io space access */
5763 csr
= pci_config_get32(handle
, PCI_CONF_COMM
);
5764 pci_config_put32(handle
, PCI_CONF_COMM
,
5765 (csr
|PCI_COMM_ME
|PCI_COMM_MAE
));
5766 status
= myri10ge_read_pcie_link_width(handle
, &link_width
);
5768 cmn_err(CE_WARN
, "could not read link width!\n");
5771 max_read_request_4k
= !myri10ge_set_max_readreq(handle
);
5772 status
= myri10ge_find_cap(handle
, &vso
, PCI_CAP_ID_VS
);
5774 goto abort_with_cfg_hdl
;
5775 if ((omacp
= mac_alloc(MAC_VERSION
)) == NULL
)
5776 goto abort_with_cfg_hdl
;
5778 * XXXX Hack: mac_register_t grows in newer kernels. To be
5779 * able to write newer fields, such as m_margin, without
5780 * writing outside allocated memory, we allocate our own macp
5781 * and pass that to mac_register()
5783 macp
= kmem_zalloc(sizeof (*macp
) * 8, KM_SLEEP
);
5784 macp
->m_version
= omacp
->m_version
;
5786 if ((mgp
= (struct myri10ge_priv
*)
5787 kmem_zalloc(sizeof (*mgp
), KM_SLEEP
)) == NULL
) {
5788 goto abort_with_macinfo
;
5790 ddi_set_driver_private(dip
, mgp
);
5792 /* setup device name for log messages */
5793 (void) sprintf(mgp
->name
, "myri10ge%d", ddi_get_instance(dip
));
5795 mutex_enter(&myri10ge_param_lock
);
5796 myri10ge_get_props(dip
);
5797 mgp
->intr_coal_delay
= myri10ge_intr_coal_delay
;
5798 mgp
->pause
= myri10ge_flow_control
;
5799 mutex_exit(&myri10ge_param_lock
);
5801 mgp
->max_read_request_4k
= max_read_request_4k
;
5802 mgp
->pcie_link_width
= link_width
;
5803 mgp
->running
= MYRI10GE_ETH_STOPPED
;
5806 mgp
->cfg_hdl
= handle
;
5808 mgp
->timer_ticks
= 5 * drv_usectohz(1000000); /* 5 seconds */
5809 myri10ge_test_physical(dip
);
5811 /* allocate command page */
5812 bytes
= sizeof (*mgp
->cmd
);
5813 mgp
->cmd
= (mcp_cmd_response_t
*)
5814 (void *)myri10ge_dma_alloc(dip
, bytes
,
5815 &myri10ge_misc_dma_attr
, &myri10ge_dev_access_attr
,
5816 DDI_DMA_CONSISTENT
, DDI_DMA_RDWR
|DDI_DMA_CONSISTENT
,
5817 &mgp
->cmd_dma
, 1, DDI_DMA_DONTWAIT
);
5818 if (mgp
->cmd
== NULL
)
5819 goto abort_with_mgp
;
5821 (void) myri10ge_reg_set(dip
, &mgp
->reg_set
, &span
, &bus_number
,
5822 &dev_number
, &func_number
);
5823 if (myri10ge_verbose
)
5824 printf("%s at %ld:%ld:%ld attaching\n", mgp
->name
,
5825 bus_number
, dev_number
, func_number
);
5826 status
= ddi_regs_map_setup(dip
, mgp
->reg_set
, (caddr_t
*)&mgp
->sram
,
5827 (offset_t
)0, (offset_t
)span
, &myri10ge_dev_access_attr
,
5829 if (status
!= DDI_SUCCESS
) {
5830 cmn_err(CE_WARN
, "%s: couldn't map memory space", mgp
->name
);
5831 printf("%s: reg_set = %d, span = %d, status = %d",
5832 mgp
->name
, mgp
->reg_set
, span
, status
);
5833 goto abort_with_mgp
;
5836 hdr_offset
= *(uint32_t *)(void*)(mgp
->sram
+ MCP_HEADER_PTR_OFFSET
);
5837 hdr_offset
= ntohl(hdr_offset
) & 0xffffc;
5838 ss_offset
= hdr_offset
+
5839 offsetof(struct mcp_gen_header
, string_specs
);
5840 mgp
->sram_size
= ntohl(*(uint32_t *)(void*)(mgp
->sram
+ ss_offset
));
5841 myri10ge_pio_copy32(mgp
->eeprom_strings
,
5842 (uint32_t *)(void*)((char *)mgp
->sram
+ mgp
->sram_size
),
5843 MYRI10GE_EEPROM_STRINGS_SIZE
);
5844 (void) memset(mgp
->eeprom_strings
+
5845 MYRI10GE_EEPROM_STRINGS_SIZE
- 2, 0, 2);
5847 status
= myri10ge_read_mac_addr(mgp
);
5849 goto abort_with_mapped
;
5852 status
= myri10ge_select_firmware(mgp
);
5854 cmn_err(CE_WARN
, "%s: failed to load firmware\n", mgp
->name
);
5855 goto abort_with_mapped
;
5858 status
= myri10ge_probe_slices(mgp
);
5860 cmn_err(CE_WARN
, "%s: failed to probe slices\n", mgp
->name
);
5861 goto abort_with_dummy_rdma
;
5864 status
= myri10ge_alloc_slices(mgp
);
5866 cmn_err(CE_WARN
, "%s: failed to alloc slices\n", mgp
->name
);
5867 goto abort_with_dummy_rdma
;
5870 /* add the interrupt handler */
5871 status
= myri10ge_add_intrs(mgp
, 1);
5873 cmn_err(CE_WARN
, "%s: Failed to add interrupt\n",
5875 goto abort_with_slices
;
5878 /* now that we have an iblock_cookie, init the mutexes */
5879 mutex_init(&mgp
->cmd_lock
, NULL
, MUTEX_DRIVER
, mgp
->icookie
);
5880 mutex_init(&mgp
->intrlock
, NULL
, MUTEX_DRIVER
, mgp
->icookie
);
5883 status
= myri10ge_nic_stat_init(mgp
);
5884 if (status
!= DDI_SUCCESS
)
5885 goto abort_with_interrupts
;
5886 status
= myri10ge_info_init(mgp
);
5887 if (status
!= DDI_SUCCESS
)
5888 goto abort_with_stats
;
5891 * Initialize GLD state
5894 macp
->m_type_ident
= MAC_PLUGIN_IDENT_ETHER
;
5895 macp
->m_driver
= mgp
;
5897 macp
->m_src_addr
= mgp
->mac_addr
;
5898 macp
->m_callbacks
= &myri10ge_m_callbacks
;
5899 macp
->m_min_sdu
= 0;
5900 macp
->m_max_sdu
= myri10ge_mtu
-
5901 (sizeof (struct ether_header
) + MXGEFW_PAD
+ VLAN_TAGSZ
);
5903 macp
->m_margin
= VLAN_TAGSZ
;
5905 macp
->m_v12n
= MAC_VIRT_LEVEL1
;
5906 status
= mac_register(macp
, &mgp
->mh
);
5908 cmn_err(CE_WARN
, "%s: mac_register failed with %d\n",
5910 goto abort_with_info
;
5912 myri10ge_ndd_init(mgp
);
5913 if (myri10ge_verbose
)
5914 printf("%s: %s, tx bndry %d, fw %s\n", mgp
->name
,
5915 mgp
->intr_type
, mgp
->tx_boundary
, mgp
->fw_name
);
5916 mutex_enter(&myri10ge_param_lock
);
5917 mgp
->next
= mgp_list
;
5919 mutex_exit(&myri10ge_param_lock
);
5920 kmem_free(macp
, sizeof (*macp
) * 8);
5922 return (DDI_SUCCESS
);
5925 myri10ge_info_destroy(mgp
);
5928 myri10ge_nic_stat_destroy(mgp
);
5930 abort_with_interrupts
:
5931 mutex_destroy(&mgp
->cmd_lock
);
5932 mutex_destroy(&mgp
->intrlock
);
5933 myri10ge_rem_intrs(mgp
, 1);
5936 myri10ge_free_slices(mgp
);
5938 abort_with_dummy_rdma
:
5939 myri10ge_dummy_rdma(mgp
, 0);
5942 ddi_regs_map_free(&mgp
->io_handle
);
5944 myri10ge_dma_free(&mgp
->cmd_dma
);
5947 kmem_free(mgp
, sizeof (*mgp
));
5950 kmem_free(macp
, sizeof (*macp
) * 8);
5954 pci_config_teardown(&handle
);
5955 return (DDI_FAILURE
);
5961 myri10ge_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
5963 struct myri10ge_priv
*mgp
, *tmp
;
5964 int status
, i
, jbufs_alloced
;
5966 if (cmd
== DDI_SUSPEND
) {
5967 status
= myri10ge_suspend(dip
);
5971 if (cmd
!= DDI_DETACH
) {
5972 return (DDI_FAILURE
);
5974 /* Get the driver private (gld_mac_info_t) structure */
5975 mgp
= ddi_get_driver_private(dip
);
5977 mutex_enter(&mgp
->intrlock
);
5979 for (i
= 0; i
< mgp
->num_slices
; i
++) {
5980 myri10ge_remove_jbufs(&mgp
->ss
[i
]);
5981 jbufs_alloced
+= mgp
->ss
[i
].jpool
.num_alloc
;
5983 mutex_exit(&mgp
->intrlock
);
5984 if (jbufs_alloced
!= 0) {
5985 cmn_err(CE_NOTE
, "%s: %d loaned rx buffers remain\n",
5986 mgp
->name
, jbufs_alloced
);
5987 return (DDI_FAILURE
);
5990 mutex_enter(&myri10ge_param_lock
);
5991 if (mgp
->refcnt
!= 0) {
5992 mutex_exit(&myri10ge_param_lock
);
5993 cmn_err(CE_NOTE
, "%s: %d external refs remain\n",
5994 mgp
->name
, mgp
->refcnt
);
5995 return (DDI_FAILURE
);
5997 mutex_exit(&myri10ge_param_lock
);
5999 status
= mac_unregister(mgp
->mh
);
6000 if (status
!= DDI_SUCCESS
)
6003 myri10ge_ndd_fini(mgp
);
6004 myri10ge_dummy_rdma(mgp
, 0);
6005 myri10ge_nic_stat_destroy(mgp
);
6006 myri10ge_info_destroy(mgp
);
6008 mutex_destroy(&mgp
->cmd_lock
);
6009 mutex_destroy(&mgp
->intrlock
);
6011 myri10ge_rem_intrs(mgp
, 1);
6013 myri10ge_free_slices(mgp
);
6014 ddi_regs_map_free(&mgp
->io_handle
);
6015 myri10ge_dma_free(&mgp
->cmd_dma
);
6016 pci_config_teardown(&mgp
->cfg_hdl
);
6018 mutex_enter(&myri10ge_param_lock
);
6019 if (mgp_list
== mgp
) {
6020 mgp_list
= mgp
->next
;
6023 while (tmp
->next
!= mgp
&& tmp
->next
!= NULL
)
6025 if (tmp
->next
!= NULL
)
6026 tmp
->next
= tmp
->next
->next
;
6028 kmem_free(mgp
, sizeof (*mgp
));
6029 mutex_exit(&myri10ge_param_lock
);
6030 return (DDI_SUCCESS
);
6034 * Helper for quiesce entry point: Interrupt threads are not being
6035 * scheduled, so we must poll for the confirmation DMA to arrive in
6036 * the firmware stats block for slice 0. We're essentially running
6037 * the guts of the interrupt handler, and just cherry picking the
6038 * confirmation that the NIC is queuesced (stats->link_down)
6042 myri10ge_poll_down(struct myri10ge_priv
*mgp
)
6044 struct myri10ge_slice_state
*ss
= mgp
->ss
;
6045 mcp_irq_data_t
*stats
= ss
->fw_stats
;
6050 /* check for a pending IRQ */
6052 if (! *((volatile uint8_t *)& stats
->valid
))
6054 valid
= stats
->valid
;
6057 * Make sure to tell the NIC to lower a legacy IRQ, else
6058 * it may have corrupt state after restarting
6061 if (mgp
->ddi_intr_type
== DDI_INTR_TYPE_FIXED
) {
6062 /* lower legacy IRQ */
6063 *mgp
->irq_deassert
= 0;
6065 /* wait for irq conf DMA */
6066 while (*((volatile uint8_t *)& stats
->valid
))
6069 if (stats
->stats_updated
&& stats
->link_down
)
6073 *ss
->irq_claim
= BE_32(3);
6074 *(ss
->irq_claim
+ 1) = BE_32(3);
6076 return (found_down
);
6080 myri10ge_quiesce(dev_info_t
*dip
)
6082 struct myri10ge_priv
*mgp
;
6084 int status
, down
, i
;
6086 mgp
= ddi_get_driver_private(dip
);
6088 return (DDI_FAILURE
);
6090 /* if devices was unplumbed, it is guaranteed to be quiescent */
6091 if (mgp
->running
== MYRI10GE_ETH_STOPPED
)
6092 return (DDI_SUCCESS
);
6094 /* send a down CMD to queuesce NIC */
6095 status
= myri10ge_send_cmd(mgp
, MXGEFW_CMD_ETHERNET_DOWN
, &cmd
);
6097 cmn_err(CE_WARN
, "%s: Couldn't bring down link\n", mgp
->name
);
6098 return (DDI_FAILURE
);
6101 for (i
= 0; i
< 20; i
++) {
6102 down
= myri10ge_poll_down(mgp
);
6105 delay(drv_usectohz(100000));
6109 return (DDI_SUCCESS
);
6110 return (DDI_FAILURE
);
6114 * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6118 myri10ge_find_lastfree(void)
6120 mblk_t
*mp
= allocb(1024, 0);
6124 cmn_err(CE_WARN
, "myri10ge_find_lastfree failed\n");
6128 myri10ge_db_lastfree
= (void *)dbp
->db_lastfree
;
6136 if (myri10ge_verbose
)
6138 "Myricom 10G driver (10GbE) version %s loading\n",
6139 MYRI10GE_VERSION_STR
);
6140 myri10ge_find_lastfree();
6141 mac_init_ops(&myri10ge_ops
, "myri10ge");
6142 mutex_init(&myri10ge_param_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
6143 if ((i
= mod_install(&modlinkage
)) != 0) {
6144 cmn_err(CE_WARN
, "mod_install returned %d\n", i
);
6145 mac_fini_ops(&myri10ge_ops
);
6146 mutex_destroy(&myri10ge_param_lock
);
6155 i
= mod_remove(&modlinkage
);
6159 mac_fini_ops(&myri10ge_ops
);
6160 mutex_destroy(&myri10ge_param_lock
);
6165 _info(struct modinfo
*modinfop
)
6167 return (mod_info(&modlinkage
, modinfop
));
6172 * This file uses MyriGE driver indentation.
6175 * c-file-style:"sun"