1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $
30 ***************************************************************************/
32 #include "opt_ifpoll.h"
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/linker.h>
38 #include <sys/firmware.h>
39 #include <sys/endian.h>
40 #include <sys/in_cksum.h>
41 #include <sys/sockio.h>
43 #include <sys/malloc.h>
44 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/serialize.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 #include <net/ethernet.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
56 #include <net/if_poll.h>
60 #include <net/if_types.h>
61 #include <net/vlan/if_vlan_var.h>
63 #include <net/toeplitz.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/tcp.h>
73 #include <bus/pci/pcireg.h>
74 #include <bus/pci/pcivar.h>
75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
77 #include <vm/vm.h> /* for pmap_mapdev() */
80 #if defined(__x86_64__)
81 #include <machine/specialreg.h>
84 #include <dev/netif/mxge/mxge_mcp.h>
85 #include <dev/netif/mxge/mcp_gen_header.h>
86 #include <dev/netif/mxge/if_mxge_var.h>
88 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE)
90 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD)
91 #define MXGE_HWRSS_KEYLEN 16
94 static int mxge_nvidia_ecrc_enable
= 1;
95 static int mxge_force_firmware
= 0;
96 static int mxge_intr_coal_delay
= MXGE_INTR_COAL_DELAY
;
97 static int mxge_deassert_wait
= 1;
98 static int mxge_ticks
;
99 static int mxge_num_slices
= 0;
100 static int mxge_always_promisc
= 0;
101 static int mxge_throttle
= 0;
102 static int mxge_msi_enable
= 1;
103 static int mxge_msix_enable
= 1;
104 static int mxge_multi_tx
= 1;
106 * Don't use RSS by default, its just too slow
108 static int mxge_use_rss
= 0;
110 static char mxge_flowctrl
[IFM_ETH_FC_STRLEN
] = IFM_ETH_FC_FORCE_FULL
;
112 static const char *mxge_fw_unaligned
= "mxge_ethp_z8e";
113 static const char *mxge_fw_aligned
= "mxge_eth_z8e";
114 static const char *mxge_fw_rss_aligned
= "mxge_rss_eth_z8e";
115 static const char *mxge_fw_rss_unaligned
= "mxge_rss_ethp_z8e";
117 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices
);
118 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay
);
119 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable
);
120 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware
);
121 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait
);
122 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks
);
123 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc
);
124 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle
);
125 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx
);
126 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss
);
127 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable
);
128 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable
);
129 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl
, sizeof(mxge_flowctrl
));
131 static int mxge_probe(device_t dev
);
132 static int mxge_attach(device_t dev
);
133 static int mxge_detach(device_t dev
);
134 static int mxge_shutdown(device_t dev
);
136 static int mxge_alloc_intr(struct mxge_softc
*sc
);
137 static void mxge_free_intr(struct mxge_softc
*sc
);
138 static int mxge_setup_intr(struct mxge_softc
*sc
);
139 static void mxge_teardown_intr(struct mxge_softc
*sc
, int cnt
);
141 static device_method_t mxge_methods
[] = {
142 /* Device interface */
143 DEVMETHOD(device_probe
, mxge_probe
),
144 DEVMETHOD(device_attach
, mxge_attach
),
145 DEVMETHOD(device_detach
, mxge_detach
),
146 DEVMETHOD(device_shutdown
, mxge_shutdown
),
150 static driver_t mxge_driver
= {
153 sizeof(mxge_softc_t
),
156 static devclass_t mxge_devclass
;
158 /* Declare ourselves to be a child of the PCI bus.*/
159 DRIVER_MODULE(mxge
, pci
, mxge_driver
, mxge_devclass
, NULL
, NULL
);
160 MODULE_DEPEND(mxge
, firmware
, 1, 1, 1);
161 MODULE_DEPEND(mxge
, zlib
, 1, 1, 1);
163 static int mxge_load_firmware(mxge_softc_t
*sc
, int adopt
);
164 static int mxge_send_cmd(mxge_softc_t
*sc
, uint32_t cmd
, mxge_cmd_t
*data
);
165 static void mxge_close(mxge_softc_t
*sc
, int down
);
166 static int mxge_open(mxge_softc_t
*sc
);
167 static void mxge_tick(void *arg
);
168 static void mxge_watchdog_reset(mxge_softc_t
*sc
);
169 static void mxge_warn_stuck(mxge_softc_t
*sc
, mxge_tx_ring_t
*tx
, int slice
);
172 mxge_probe(device_t dev
)
174 if (pci_get_vendor(dev
) == MXGE_PCI_VENDOR_MYRICOM
&&
175 (pci_get_device(dev
) == MXGE_PCI_DEVICE_Z8E
||
176 pci_get_device(dev
) == MXGE_PCI_DEVICE_Z8E_9
)) {
177 int rev
= pci_get_revid(dev
);
180 case MXGE_PCI_REV_Z8E
:
181 device_set_desc(dev
, "Myri10G-PCIE-8A");
183 case MXGE_PCI_REV_Z8ES
:
184 device_set_desc(dev
, "Myri10G-PCIE-8B");
187 device_set_desc(dev
, "Myri10G-PCIE-8??");
188 device_printf(dev
, "Unrecognized rev %d NIC\n", rev
);
197 mxge_enable_wc(mxge_softc_t
*sc
)
199 #if defined(__x86_64__)
203 len
= rman_get_size(sc
->mem_res
);
204 pmap_change_attr((vm_offset_t
) sc
->sram
, len
/ PAGE_SIZE
,
205 PAT_WRITE_COMBINING
);
210 mxge_dma_alloc(mxge_softc_t
*sc
, bus_dmamem_t
*dma
, size_t bytes
,
211 bus_size_t alignment
)
216 if (bytes
> 4096 && alignment
== 4096)
221 err
= bus_dmamem_coherent(sc
->parent_dmat
, alignment
, boundary
,
222 BUS_SPACE_MAXADDR
, BUS_SPACE_MAXADDR
, bytes
,
223 BUS_DMA_WAITOK
| BUS_DMA_ZERO
, dma
);
225 device_printf(sc
->dev
, "bus_dmamem_coherent failed: %d\n", err
);
232 mxge_dma_free(bus_dmamem_t
*dma
)
234 bus_dmamap_unload(dma
->dmem_tag
, dma
->dmem_map
);
235 bus_dmamem_free(dma
->dmem_tag
, dma
->dmem_addr
, dma
->dmem_map
);
236 bus_dma_tag_destroy(dma
->dmem_tag
);
240 * The eeprom strings on the lanaiX have the format
246 mxge_parse_strings(mxge_softc_t
*sc
)
249 int i
, found_mac
, found_sn2
;
252 ptr
= sc
->eeprom_strings
;
255 while (*ptr
!= '\0') {
256 if (strncmp(ptr
, "MAC=", 4) == 0) {
259 sc
->mac_addr
[i
] = strtoul(ptr
, &endptr
, 16);
260 if (endptr
- ptr
!= 2)
269 } else if (strncmp(ptr
, "PC=", 3) == 0) {
271 strlcpy(sc
->product_code_string
, ptr
,
272 sizeof(sc
->product_code_string
));
273 } else if (!found_sn2
&& (strncmp(ptr
, "SN=", 3) == 0)) {
275 strlcpy(sc
->serial_number_string
, ptr
,
276 sizeof(sc
->serial_number_string
));
277 } else if (strncmp(ptr
, "SN2=", 4) == 0) {
278 /* SN2 takes precedence over SN */
281 strlcpy(sc
->serial_number_string
, ptr
,
282 sizeof(sc
->serial_number_string
));
284 while (*ptr
++ != '\0') {}
291 device_printf(sc
->dev
, "failed to parse eeprom_strings\n");
295 #if defined(__x86_64__)
298 mxge_enable_nvidia_ecrc(mxge_softc_t
*sc
)
301 unsigned long base
, off
;
303 device_t pdev
, mcp55
;
304 uint16_t vendor_id
, device_id
, word
;
305 uintptr_t bus
, slot
, func
, ivend
, idev
;
308 if (!mxge_nvidia_ecrc_enable
)
311 pdev
= device_get_parent(device_get_parent(sc
->dev
));
313 device_printf(sc
->dev
, "could not find parent?\n");
316 vendor_id
= pci_read_config(pdev
, PCIR_VENDOR
, 2);
317 device_id
= pci_read_config(pdev
, PCIR_DEVICE
, 2);
319 if (vendor_id
!= 0x10de)
324 if (device_id
== 0x005d) {
325 /* ck804, base address is magic */
327 } else if (device_id
>= 0x0374 && device_id
<= 0x378) {
328 /* mcp55, base address stored in chipset */
329 mcp55
= pci_find_bsf(0, 0, 0);
331 0x10de == pci_read_config(mcp55
, PCIR_VENDOR
, 2) &&
332 0x0369 == pci_read_config(mcp55
, PCIR_DEVICE
, 2)) {
333 word
= pci_read_config(mcp55
, 0x90, 2);
334 base
= ((unsigned long)word
& 0x7ffeU
) << 25;
342 * Test below is commented because it is believed that doing
343 * config read/write beyond 0xff will access the config space
344 * for the next larger function. Uncomment this and remove
345 * the hacky pmap_mapdev() way of accessing config space when
346 * DragonFly grows support for extended pcie config space access.
350 * See if we can, by some miracle, access the extended
353 val
= pci_read_config(pdev
, 0x178, 4);
354 if (val
!= 0xffffffff) {
356 pci_write_config(pdev
, 0x178, val
, 4);
361 * Rather than using normal pci config space writes, we must
362 * map the Nvidia config space ourselves. This is because on
363 * opteron/nvidia class machine the 0xe000000 mapping is
364 * handled by the nvidia chipset, that means the internal PCI
365 * device (the on-chip northbridge), or the amd-8131 bridge
366 * and things behind them are not visible by this method.
369 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
371 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
372 PCI_IVAR_SLOT
, &slot
);
373 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
374 PCI_IVAR_FUNCTION
, &func
);
375 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
376 PCI_IVAR_VENDOR
, &ivend
);
377 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
378 PCI_IVAR_DEVICE
, &idev
);
380 off
= base
+ 0x00100000UL
* (unsigned long)bus
+
381 0x00001000UL
* (unsigned long)(func
+ 8 * slot
);
383 /* map it into the kernel */
384 va
= pmap_mapdev(trunc_page((vm_paddr_t
)off
), PAGE_SIZE
);
386 device_printf(sc
->dev
, "pmap_kenter_temporary didn't\n");
389 /* get a pointer to the config space mapped into the kernel */
390 cfgptr
= va
+ (off
& PAGE_MASK
);
392 /* make sure that we can really access it */
393 vendor_id
= *(uint16_t *)(cfgptr
+ PCIR_VENDOR
);
394 device_id
= *(uint16_t *)(cfgptr
+ PCIR_DEVICE
);
395 if (!(vendor_id
== ivend
&& device_id
== idev
)) {
396 device_printf(sc
->dev
, "mapping failed: 0x%x:0x%x\n",
397 vendor_id
, device_id
);
398 pmap_unmapdev((vm_offset_t
)va
, PAGE_SIZE
);
402 ptr32
= (uint32_t*)(cfgptr
+ 0x178);
405 if (val
== 0xffffffff) {
406 device_printf(sc
->dev
, "extended mapping failed\n");
407 pmap_unmapdev((vm_offset_t
)va
, PAGE_SIZE
);
411 pmap_unmapdev((vm_offset_t
)va
, PAGE_SIZE
);
413 device_printf(sc
->dev
, "Enabled ECRC on upstream "
414 "Nvidia bridge at %d:%d:%d\n",
415 (int)bus
, (int)slot
, (int)func
);
419 #else /* __x86_64__ */
422 mxge_enable_nvidia_ecrc(mxge_softc_t
*sc
)
424 device_printf(sc
->dev
, "Nforce 4 chipset on non-x86/x86_64!?!?!\n");
430 mxge_dma_test(mxge_softc_t
*sc
, int test_type
)
433 bus_addr_t dmatest_bus
= sc
->dmabench_dma
.dmem_busaddr
;
436 const char *test
= " ";
439 * Run a small DMA test.
440 * The magic multipliers to the length tell the firmware
441 * to do DMA read, write, or read+write tests. The
442 * results are returned in cmd.data0. The upper 16
443 * bits of the return is the number of transfers completed.
444 * The lower 16 bits is the time in 0.5us ticks that the
445 * transfers took to complete.
448 len
= sc
->tx_boundary
;
450 cmd
.data0
= MXGE_LOWPART_TO_U32(dmatest_bus
);
451 cmd
.data1
= MXGE_HIGHPART_TO_U32(dmatest_bus
);
452 cmd
.data2
= len
* 0x10000;
453 status
= mxge_send_cmd(sc
, test_type
, &cmd
);
458 sc
->read_dma
= ((cmd
.data0
>>16) * len
* 2) / (cmd
.data0
& 0xffff);
460 cmd
.data0
= MXGE_LOWPART_TO_U32(dmatest_bus
);
461 cmd
.data1
= MXGE_HIGHPART_TO_U32(dmatest_bus
);
462 cmd
.data2
= len
* 0x1;
463 status
= mxge_send_cmd(sc
, test_type
, &cmd
);
468 sc
->write_dma
= ((cmd
.data0
>>16) * len
* 2) / (cmd
.data0
& 0xffff);
470 cmd
.data0
= MXGE_LOWPART_TO_U32(dmatest_bus
);
471 cmd
.data1
= MXGE_HIGHPART_TO_U32(dmatest_bus
);
472 cmd
.data2
= len
* 0x10001;
473 status
= mxge_send_cmd(sc
, test_type
, &cmd
);
478 sc
->read_write_dma
= ((cmd
.data0
>>16) * len
* 2 * 2) /
479 (cmd
.data0
& 0xffff);
482 if (status
!= 0 && test_type
!= MXGEFW_CMD_UNALIGNED_TEST
) {
483 device_printf(sc
->dev
, "DMA %s benchmark failed: %d\n",
490 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
491 * when the PCI-E Completion packets are aligned on an 8-byte
492 * boundary. Some PCI-E chip sets always align Completion packets; on
493 * the ones that do not, the alignment can be enforced by enabling
494 * ECRC generation (if supported).
496 * When PCI-E Completion packets are not aligned, it is actually more
497 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
499 * If the driver can neither enable ECRC nor verify that it has
500 * already been enabled, then it must use a firmware image which works
501 * around unaligned completion packets (ethp_z8e.dat), and it should
502 * also ensure that it never gives the device a Read-DMA which is
503 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
504 * enabled, then the driver should use the aligned (eth_z8e.dat)
505 * firmware image, and set tx_boundary to 4KB.
508 mxge_firmware_probe(mxge_softc_t
*sc
)
510 device_t dev
= sc
->dev
;
514 sc
->tx_boundary
= 4096;
517 * Verify the max read request size was set to 4KB
518 * before trying the test with 4KB.
520 if (pci_find_extcap(dev
, PCIY_EXPRESS
, ®
) == 0) {
521 pectl
= pci_read_config(dev
, reg
+ 0x8, 2);
522 if ((pectl
& (5 << 12)) != (5 << 12)) {
523 device_printf(dev
, "Max Read Req. size != 4k (0x%x)\n",
525 sc
->tx_boundary
= 2048;
530 * Load the optimized firmware (which assumes aligned PCIe
531 * completions) in order to see if it works on this host.
533 sc
->fw_name
= mxge_fw_aligned
;
534 status
= mxge_load_firmware(sc
, 1);
539 * Enable ECRC if possible
541 mxge_enable_nvidia_ecrc(sc
);
544 * Run a DMA test which watches for unaligned completions and
545 * aborts on the first one seen. Not required on Z8ES or newer.
547 if (pci_get_revid(sc
->dev
) >= MXGE_PCI_REV_Z8ES
)
550 status
= mxge_dma_test(sc
, MXGEFW_CMD_UNALIGNED_TEST
);
552 return 0; /* keep the aligned firmware */
555 device_printf(dev
, "DMA test failed: %d\n", status
);
556 if (status
== ENOSYS
) {
557 device_printf(dev
, "Falling back to ethp! "
558 "Please install up to date fw\n");
564 mxge_select_firmware(mxge_softc_t
*sc
)
567 int force_firmware
= mxge_force_firmware
;
570 force_firmware
= sc
->throttle
;
572 if (force_firmware
!= 0) {
573 if (force_firmware
== 1)
578 device_printf(sc
->dev
,
579 "Assuming %s completions (forced)\n",
580 aligned
? "aligned" : "unaligned");
586 * If the PCIe link width is 4 or less, we can use the aligned
587 * firmware and skip any checks
589 if (sc
->link_width
!= 0 && sc
->link_width
<= 4) {
590 device_printf(sc
->dev
, "PCIe x%d Link, "
591 "expect reduced performance\n", sc
->link_width
);
596 if (mxge_firmware_probe(sc
) == 0)
601 sc
->fw_name
= mxge_fw_aligned
;
602 sc
->tx_boundary
= 4096;
604 sc
->fw_name
= mxge_fw_unaligned
;
605 sc
->tx_boundary
= 2048;
607 return mxge_load_firmware(sc
, 0);
611 mxge_validate_firmware(mxge_softc_t
*sc
, const mcp_gen_header_t
*hdr
)
613 if (be32toh(hdr
->mcp_type
) != MCP_TYPE_ETH
) {
614 if_printf(sc
->ifp
, "Bad firmware type: 0x%x\n",
615 be32toh(hdr
->mcp_type
));
619 /* Save firmware version for sysctl */
620 strlcpy(sc
->fw_version
, hdr
->version
, sizeof(sc
->fw_version
));
622 if_printf(sc
->ifp
, "firmware id: %s\n", hdr
->version
);
624 ksscanf(sc
->fw_version
, "%d.%d.%d", &sc
->fw_ver_major
,
625 &sc
->fw_ver_minor
, &sc
->fw_ver_tiny
);
627 if (!(sc
->fw_ver_major
== MXGEFW_VERSION_MAJOR
&&
628 sc
->fw_ver_minor
== MXGEFW_VERSION_MINOR
)) {
629 if_printf(sc
->ifp
, "Found firmware version %s\n",
631 if_printf(sc
->ifp
, "Driver needs %d.%d\n",
632 MXGEFW_VERSION_MAJOR
, MXGEFW_VERSION_MINOR
);
639 z_alloc(void *nil
, u_int items
, u_int size
)
641 return kmalloc(items
* size
, M_TEMP
, M_WAITOK
);
645 z_free(void *nil
, void *ptr
)
651 mxge_load_firmware_helper(mxge_softc_t
*sc
, uint32_t *limit
)
654 char *inflate_buffer
;
655 const struct firmware
*fw
;
656 const mcp_gen_header_t
*hdr
;
663 fw
= firmware_get(sc
->fw_name
);
665 if_printf(sc
->ifp
, "Could not find firmware image %s\n",
670 /* Setup zlib and decompress f/w */
671 bzero(&zs
, sizeof(zs
));
674 status
= inflateInit(&zs
);
675 if (status
!= Z_OK
) {
681 * The uncompressed size is stored as the firmware version,
682 * which would otherwise go unused
684 fw_len
= (size_t)fw
->version
;
685 inflate_buffer
= kmalloc(fw_len
, M_TEMP
, M_WAITOK
);
686 zs
.avail_in
= fw
->datasize
;
687 zs
.next_in
= __DECONST(char *, fw
->data
);
688 zs
.avail_out
= fw_len
;
689 zs
.next_out
= inflate_buffer
;
690 status
= inflate(&zs
, Z_FINISH
);
691 if (status
!= Z_STREAM_END
) {
692 if_printf(sc
->ifp
, "zlib %d\n", status
);
694 goto abort_with_buffer
;
699 htobe32(*(const uint32_t *)(inflate_buffer
+ MCP_HEADER_PTR_OFFSET
));
700 if ((hdr_offset
& 3) || hdr_offset
+ sizeof(*hdr
) > fw_len
) {
701 if_printf(sc
->ifp
, "Bad firmware file");
703 goto abort_with_buffer
;
705 hdr
= (const void*)(inflate_buffer
+ hdr_offset
);
707 status
= mxge_validate_firmware(sc
, hdr
);
709 goto abort_with_buffer
;
711 /* Copy the inflated firmware to NIC SRAM. */
712 for (i
= 0; i
< fw_len
; i
+= 256) {
713 mxge_pio_copy(sc
->sram
+ MXGE_FW_OFFSET
+ i
, inflate_buffer
+ i
,
714 min(256U, (unsigned)(fw_len
- i
)));
723 kfree(inflate_buffer
, M_TEMP
);
726 firmware_put(fw
, FIRMWARE_UNLOAD
);
731 * Enable or disable periodic RDMAs from the host to make certain
732 * chipsets resend dropped PCIe messages
735 mxge_dummy_rdma(mxge_softc_t
*sc
, int enable
)
738 volatile uint32_t *confirm
;
739 volatile char *submit
;
740 uint32_t *buf
, dma_low
, dma_high
;
743 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
745 /* Clear confirmation addr */
746 confirm
= (volatile uint32_t *)sc
->cmd
;
751 * Send an rdma command to the PCIe engine, and wait for the
752 * response in the confirmation address. The firmware should
753 * write a -1 there to indicate it is alive and well
755 dma_low
= MXGE_LOWPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
756 dma_high
= MXGE_HIGHPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
757 buf
[0] = htobe32(dma_high
); /* confirm addr MSW */
758 buf
[1] = htobe32(dma_low
); /* confirm addr LSW */
759 buf
[2] = htobe32(0xffffffff); /* confirm data */
760 dma_low
= MXGE_LOWPART_TO_U32(sc
->zeropad_dma
.dmem_busaddr
);
761 dma_high
= MXGE_HIGHPART_TO_U32(sc
->zeropad_dma
.dmem_busaddr
);
762 buf
[3] = htobe32(dma_high
); /* dummy addr MSW */
763 buf
[4] = htobe32(dma_low
); /* dummy addr LSW */
764 buf
[5] = htobe32(enable
); /* enable? */
766 submit
= (volatile char *)(sc
->sram
+ MXGEFW_BOOT_DUMMY_RDMA
);
768 mxge_pio_copy(submit
, buf
, 64);
773 while (*confirm
!= 0xffffffff && i
< 20) {
777 if (*confirm
!= 0xffffffff) {
778 if_printf(sc
->ifp
, "dummy rdma %s failed (%p = 0x%x)",
779 (enable
? "enable" : "disable"), confirm
, *confirm
);
784 mxge_send_cmd(mxge_softc_t
*sc
, uint32_t cmd
, mxge_cmd_t
*data
)
787 char buf_bytes
[sizeof(*buf
) + 8];
788 volatile mcp_cmd_response_t
*response
= sc
->cmd
;
789 volatile char *cmd_addr
= sc
->sram
+ MXGEFW_ETH_CMD
;
790 uint32_t dma_low
, dma_high
;
791 int err
, sleep_total
= 0;
793 /* Ensure buf is aligned to 8 bytes */
794 buf
= (mcp_cmd_t
*)((unsigned long)(buf_bytes
+ 7) & ~7UL);
796 buf
->data0
= htobe32(data
->data0
);
797 buf
->data1
= htobe32(data
->data1
);
798 buf
->data2
= htobe32(data
->data2
);
799 buf
->cmd
= htobe32(cmd
);
800 dma_low
= MXGE_LOWPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
801 dma_high
= MXGE_HIGHPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
803 buf
->response_addr
.low
= htobe32(dma_low
);
804 buf
->response_addr
.high
= htobe32(dma_high
);
806 response
->result
= 0xffffffff;
808 mxge_pio_copy((volatile void *)cmd_addr
, buf
, sizeof (*buf
));
814 for (sleep_total
= 0; sleep_total
< 20; sleep_total
++) {
816 switch (be32toh(response
->result
)) {
818 data
->data0
= be32toh(response
->data
);
824 case MXGEFW_CMD_UNKNOWN
:
827 case MXGEFW_CMD_ERROR_UNALIGNED
:
830 case MXGEFW_CMD_ERROR_BUSY
:
833 case MXGEFW_CMD_ERROR_I2C_ABSENT
:
837 if_printf(sc
->ifp
, "command %d failed, result = %d\n",
838 cmd
, be32toh(response
->result
));
846 if_printf(sc
->ifp
, "command %d timed out result = %d\n",
847 cmd
, be32toh(response
->result
));
853 mxge_adopt_running_firmware(mxge_softc_t
*sc
)
855 struct mcp_gen_header
*hdr
;
856 const size_t bytes
= sizeof(struct mcp_gen_header
);
861 * Find running firmware header
864 htobe32(*(volatile uint32_t *)(sc
->sram
+ MCP_HEADER_PTR_OFFSET
));
866 if ((hdr_offset
& 3) || hdr_offset
+ sizeof(*hdr
) > sc
->sram_size
) {
867 if_printf(sc
->ifp
, "Running firmware has bad header offset "
868 "(%zu)\n", hdr_offset
);
873 * Copy header of running firmware from SRAM to host memory to
876 hdr
= kmalloc(bytes
, M_DEVBUF
, M_WAITOK
);
877 bus_space_read_region_1(rman_get_bustag(sc
->mem_res
),
878 rman_get_bushandle(sc
->mem_res
), hdr_offset
, (char *)hdr
, bytes
);
879 status
= mxge_validate_firmware(sc
, hdr
);
880 kfree(hdr
, M_DEVBUF
);
883 * Check to see if adopted firmware has bug where adopting
884 * it will cause broadcasts to be filtered unless the NIC
885 * is kept in ALLMULTI mode
887 if (sc
->fw_ver_major
== 1 && sc
->fw_ver_minor
== 4 &&
888 sc
->fw_ver_tiny
>= 4 && sc
->fw_ver_tiny
<= 11) {
889 sc
->adopted_rx_filter_bug
= 1;
890 if_printf(sc
->ifp
, "Adopting fw %d.%d.%d: "
891 "working around rx filter bug\n",
892 sc
->fw_ver_major
, sc
->fw_ver_minor
, sc
->fw_ver_tiny
);
899 mxge_load_firmware(mxge_softc_t
*sc
, int adopt
)
901 volatile uint32_t *confirm
;
902 volatile char *submit
;
904 uint32_t *buf
, size
, dma_low
, dma_high
;
907 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
909 size
= sc
->sram_size
;
910 status
= mxge_load_firmware_helper(sc
, &size
);
916 * Try to use the currently running firmware, if
919 status
= mxge_adopt_running_firmware(sc
);
922 "failed to adopt running firmware\n");
925 if_printf(sc
->ifp
, "Successfully adopted running firmware\n");
927 if (sc
->tx_boundary
== 4096) {
929 "Using firmware currently running on NIC. "
931 if_printf(sc
->ifp
, "performance consider loading "
932 "optimized firmware\n");
934 sc
->fw_name
= mxge_fw_unaligned
;
935 sc
->tx_boundary
= 2048;
939 /* Clear confirmation addr */
940 confirm
= (volatile uint32_t *)sc
->cmd
;
945 * Send a reload command to the bootstrap MCP, and wait for the
946 * response in the confirmation address. The firmware should
947 * write a -1 there to indicate it is alive and well
950 dma_low
= MXGE_LOWPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
951 dma_high
= MXGE_HIGHPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
953 buf
[0] = htobe32(dma_high
); /* confirm addr MSW */
954 buf
[1] = htobe32(dma_low
); /* confirm addr LSW */
955 buf
[2] = htobe32(0xffffffff); /* confirm data */
958 * FIX: All newest firmware should un-protect the bottom of
959 * the sram before handoff. However, the very first interfaces
960 * do not. Therefore the handoff copy must skip the first 8 bytes
962 /* where the code starts*/
963 buf
[3] = htobe32(MXGE_FW_OFFSET
+ 8);
964 buf
[4] = htobe32(size
- 8); /* length of code */
965 buf
[5] = htobe32(8); /* where to copy to */
966 buf
[6] = htobe32(0); /* where to jump to */
968 submit
= (volatile char *)(sc
->sram
+ MXGEFW_BOOT_HANDOFF
);
969 mxge_pio_copy(submit
, buf
, 64);
974 while (*confirm
!= 0xffffffff && i
< 20) {
978 if (*confirm
!= 0xffffffff) {
979 if_printf(sc
->ifp
,"handoff failed (%p = 0x%x)",
987 mxge_update_mac_address(mxge_softc_t
*sc
)
990 uint8_t *addr
= sc
->mac_addr
;
992 cmd
.data0
= (addr
[0] << 24) | (addr
[1] << 16) |
993 (addr
[2] << 8) | addr
[3];
994 cmd
.data1
= (addr
[4] << 8) | (addr
[5]);
995 return mxge_send_cmd(sc
, MXGEFW_SET_MAC_ADDRESS
, &cmd
);
999 mxge_change_pause(mxge_softc_t
*sc
, int pause
)
1004 bzero(&cmd
, sizeof(cmd
)); /* silence gcc warning */
1006 status
= mxge_send_cmd(sc
, MXGEFW_ENABLE_FLOW_CONTROL
, &cmd
);
1008 status
= mxge_send_cmd(sc
, MXGEFW_DISABLE_FLOW_CONTROL
, &cmd
);
1010 if_printf(sc
->ifp
, "Failed to set flow control mode\n");
1018 mxge_change_promisc(mxge_softc_t
*sc
, int promisc
)
1023 bzero(&cmd
, sizeof(cmd
)); /* avoid gcc warning */
1024 if (mxge_always_promisc
)
1028 status
= mxge_send_cmd(sc
, MXGEFW_ENABLE_PROMISC
, &cmd
);
1030 status
= mxge_send_cmd(sc
, MXGEFW_DISABLE_PROMISC
, &cmd
);
1032 if_printf(sc
->ifp
, "Failed to set promisc mode\n");
1036 mxge_set_multicast_list(mxge_softc_t
*sc
)
1039 struct ifmultiaddr
*ifma
;
1040 struct ifnet
*ifp
= sc
->ifp
;
1043 /* This firmware is known to not support multicast */
1044 if (!sc
->fw_multicast_support
)
1047 /* Disable multicast filtering while we play with the lists*/
1048 bzero(&cmd
, sizeof(cmd
)); /* silence gcc warning */
1049 err
= mxge_send_cmd(sc
, MXGEFW_ENABLE_ALLMULTI
, &cmd
);
1051 if_printf(ifp
, "Failed MXGEFW_ENABLE_ALLMULTI, "
1052 "error status: %d\n", err
);
1056 if (sc
->adopted_rx_filter_bug
)
1059 if (ifp
->if_flags
& IFF_ALLMULTI
) {
1060 /* Request to disable multicast filtering, so quit here */
1064 /* Flush all the filters */
1065 err
= mxge_send_cmd(sc
, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS
, &cmd
);
1067 if_printf(ifp
, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, "
1068 "error status: %d\n", err
);
1073 * Walk the multicast list, and add each address
1075 TAILQ_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
1076 if (ifma
->ifma_addr
->sa_family
!= AF_LINK
)
1079 bcopy(LLADDR((struct sockaddr_dl
*)ifma
->ifma_addr
),
1081 bcopy(LLADDR((struct sockaddr_dl
*)ifma
->ifma_addr
) + 4,
1083 cmd
.data0
= htonl(cmd
.data0
);
1084 cmd
.data1
= htonl(cmd
.data1
);
1085 err
= mxge_send_cmd(sc
, MXGEFW_JOIN_MULTICAST_GROUP
, &cmd
);
1087 if_printf(ifp
, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1088 "error status: %d\n", err
);
1089 /* Abort, leaving multicast filtering off */
1094 /* Enable multicast filtering */
1095 err
= mxge_send_cmd(sc
, MXGEFW_DISABLE_ALLMULTI
, &cmd
);
1097 if_printf(ifp
, "Failed MXGEFW_DISABLE_ALLMULTI, "
1098 "error status: %d\n", err
);
1104 mxge_max_mtu(mxge_softc_t
*sc
)
1109 if (MJUMPAGESIZE
- MXGEFW_PAD
> MXGEFW_MAX_MTU
)
1110 return MXGEFW_MAX_MTU
- MXGEFW_PAD
;
1112 /* try to set nbufs to see if it we can
1113 use virtually contiguous jumbos */
1115 status
= mxge_send_cmd(sc
, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS
,
1118 return MXGEFW_MAX_MTU
- MXGEFW_PAD
;
1120 /* otherwise, we're limited to MJUMPAGESIZE */
1121 return MJUMPAGESIZE
- MXGEFW_PAD
;
1126 mxge_reset(mxge_softc_t
*sc
, int interrupts_setup
)
1128 struct mxge_slice_state
*ss
;
1129 mxge_rx_done_t
*rx_done
;
1130 volatile uint32_t *irq_claim
;
1132 int slice
, status
, rx_intr_size
;
1135 * Try to send a reset command to the card to see if it
1138 memset(&cmd
, 0, sizeof (cmd
));
1139 status
= mxge_send_cmd(sc
, MXGEFW_CMD_RESET
, &cmd
);
1141 if_printf(sc
->ifp
, "failed reset\n");
1145 mxge_dummy_rdma(sc
, 1);
1148 * Set the intrq size
1149 * XXX assume 4byte mcp_slot
1151 rx_intr_size
= sc
->rx_intr_slots
* sizeof(mcp_slot_t
);
1152 cmd
.data0
= rx_intr_size
;
1153 status
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
1156 * Even though we already know how many slices are supported
1157 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1158 * has magic side effects, and must be called after a reset.
1159 * It must be called prior to calling any RSS related cmds,
1160 * including assigning an interrupt queue for anything but
1161 * slice 0. It must also be called *after*
1162 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1163 * the firmware to compute offsets.
1165 if (sc
->num_slices
> 1) {
1166 /* Ask the maximum number of slices it supports */
1167 status
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
, &cmd
);
1169 if_printf(sc
->ifp
, "failed to get number of slices\n");
1174 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1175 * to setting up the interrupt queue DMA
1177 cmd
.data0
= sc
->num_slices
;
1178 cmd
.data1
= MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE
;
1179 if (sc
->num_tx_rings
> 1)
1180 cmd
.data1
|= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES
;
1181 status
= mxge_send_cmd(sc
, MXGEFW_CMD_ENABLE_RSS_QUEUES
, &cmd
);
1183 if_printf(sc
->ifp
, "failed to set number of slices\n");
1188 if (interrupts_setup
) {
1189 /* Now exchange information about interrupts */
1190 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1191 ss
= &sc
->ss
[slice
];
1193 rx_done
= &ss
->rx_data
.rx_done
;
1194 memset(rx_done
->entry
, 0, rx_intr_size
);
1197 MXGE_LOWPART_TO_U32(ss
->rx_done_dma
.dmem_busaddr
);
1199 MXGE_HIGHPART_TO_U32(ss
->rx_done_dma
.dmem_busaddr
);
1201 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_INTRQ_DMA
,
1206 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET
,
1208 sc
->intr_coal_delay_ptr
= (volatile uint32_t *)(sc
->sram
+ cmd
.data0
);
1210 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_IRQ_ACK_OFFSET
, &cmd
);
1211 irq_claim
= (volatile uint32_t *)(sc
->sram
+ cmd
.data0
);
1213 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET
, &cmd
);
1214 sc
->irq_deassert
= (volatile uint32_t *)(sc
->sram
+ cmd
.data0
);
1217 if_printf(sc
->ifp
, "failed set interrupt parameters\n");
1221 *sc
->intr_coal_delay_ptr
= htobe32(sc
->intr_coal_delay
);
1223 /* Run a DMA benchmark */
1224 mxge_dma_test(sc
, MXGEFW_DMA_TEST
);
1226 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1227 ss
= &sc
->ss
[slice
];
1229 ss
->irq_claim
= irq_claim
+ (2 * slice
);
1231 /* Reset mcp/driver shared state back to 0 */
1232 ss
->rx_data
.rx_done
.idx
= 0;
1235 ss
->tx
.pkt_done
= 0;
1236 ss
->tx
.queue_active
= 0;
1237 ss
->tx
.activate
= 0;
1238 ss
->tx
.deactivate
= 0;
1239 ss
->rx_data
.rx_big
.cnt
= 0;
1240 ss
->rx_data
.rx_small
.cnt
= 0;
1241 if (ss
->fw_stats
!= NULL
)
1242 bzero(ss
->fw_stats
, sizeof(*ss
->fw_stats
));
1244 sc
->rdma_tags_available
= 15;
1246 status
= mxge_update_mac_address(sc
);
1247 mxge_change_promisc(sc
, sc
->ifp
->if_flags
& IFF_PROMISC
);
1248 mxge_change_pause(sc
, sc
->pause
);
1249 mxge_set_multicast_list(sc
);
1252 cmd
.data0
= sc
->throttle
;
1253 if (mxge_send_cmd(sc
, MXGEFW_CMD_SET_THROTTLE_FACTOR
, &cmd
))
1254 if_printf(sc
->ifp
, "can't enable throttle\n");
1260 mxge_change_throttle(SYSCTL_HANDLER_ARGS
)
1265 unsigned int throttle
;
1268 throttle
= sc
->throttle
;
1269 err
= sysctl_handle_int(oidp
, &throttle
, arg2
, req
);
1273 if (throttle
== sc
->throttle
)
1276 if (throttle
< MXGE_MIN_THROTTLE
|| throttle
> MXGE_MAX_THROTTLE
)
1279 ifnet_serialize_all(sc
->ifp
);
1281 cmd
.data0
= throttle
;
1282 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_THROTTLE_FACTOR
, &cmd
);
1284 sc
->throttle
= throttle
;
1286 ifnet_deserialize_all(sc
->ifp
);
1291 mxge_change_use_rss(SYSCTL_HANDLER_ARGS
)
1297 use_rss
= sc
->use_rss
;
1298 err
= sysctl_handle_int(oidp
, &use_rss
, arg2
, req
);
1302 if (use_rss
== sc
->use_rss
)
1305 ifnet_serialize_all(sc
->ifp
);
1307 sc
->use_rss
= use_rss
;
1308 if (sc
->ifp
->if_flags
& IFF_RUNNING
) {
1313 ifnet_deserialize_all(sc
->ifp
);
1318 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS
)
1321 unsigned int intr_coal_delay
;
1325 intr_coal_delay
= sc
->intr_coal_delay
;
1326 err
= sysctl_handle_int(oidp
, &intr_coal_delay
, arg2
, req
);
1330 if (intr_coal_delay
== sc
->intr_coal_delay
)
1333 if (intr_coal_delay
== 0 || intr_coal_delay
> 1000*1000)
1336 ifnet_serialize_all(sc
->ifp
);
1338 *sc
->intr_coal_delay_ptr
= htobe32(intr_coal_delay
);
1339 sc
->intr_coal_delay
= intr_coal_delay
;
1341 ifnet_deserialize_all(sc
->ifp
);
1346 mxge_handle_be32(SYSCTL_HANDLER_ARGS
)
1352 arg2
= be32toh(*(int *)arg1
);
1354 err
= sysctl_handle_int(oidp
, arg1
, arg2
, req
);
1360 mxge_rem_sysctls(mxge_softc_t
*sc
)
1362 if (sc
->ss
!= NULL
) {
1363 struct mxge_slice_state
*ss
;
1366 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1367 ss
= &sc
->ss
[slice
];
1368 if (ss
->sysctl_tree
!= NULL
) {
1369 sysctl_ctx_free(&ss
->sysctl_ctx
);
1370 ss
->sysctl_tree
= NULL
;
1375 if (sc
->slice_sysctl_tree
!= NULL
) {
1376 sysctl_ctx_free(&sc
->slice_sysctl_ctx
);
1377 sc
->slice_sysctl_tree
= NULL
;
1382 mxge_add_sysctls(mxge_softc_t
*sc
)
1384 struct sysctl_ctx_list
*ctx
;
1385 struct sysctl_oid_list
*children
;
1387 struct mxge_slice_state
*ss
;
1391 ctx
= device_get_sysctl_ctx(sc
->dev
);
1392 children
= SYSCTL_CHILDREN(device_get_sysctl_tree(sc
->dev
));
1393 fw
= sc
->ss
[0].fw_stats
;
1396 * Random information
1398 SYSCTL_ADD_STRING(ctx
, children
, OID_AUTO
, "firmware_version",
1399 CTLFLAG_RD
, &sc
->fw_version
, 0, "firmware version");
1401 SYSCTL_ADD_STRING(ctx
, children
, OID_AUTO
, "serial_number",
1402 CTLFLAG_RD
, &sc
->serial_number_string
, 0, "serial number");
1404 SYSCTL_ADD_STRING(ctx
, children
, OID_AUTO
, "product_code",
1405 CTLFLAG_RD
, &sc
->product_code_string
, 0, "product code");
1407 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "pcie_link_width",
1408 CTLFLAG_RD
, &sc
->link_width
, 0, "link width");
1410 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_boundary",
1411 CTLFLAG_RD
, &sc
->tx_boundary
, 0, "tx boundary");
1413 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "write_combine",
1414 CTLFLAG_RD
, &sc
->wc
, 0, "write combining PIO");
1416 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "read_dma_MBs",
1417 CTLFLAG_RD
, &sc
->read_dma
, 0, "DMA Read speed in MB/s");
1419 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "write_dma_MBs",
1420 CTLFLAG_RD
, &sc
->write_dma
, 0, "DMA Write speed in MB/s");
1422 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "read_write_dma_MBs",
1423 CTLFLAG_RD
, &sc
->read_write_dma
, 0,
1424 "DMA concurrent Read/Write speed in MB/s");
1426 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "watchdog_resets",
1427 CTLFLAG_RD
, &sc
->watchdog_resets
, 0,
1428 "Number of times NIC was reset");
1430 if (sc
->num_slices
> 1) {
1431 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "slice_cpumap",
1432 CTLTYPE_OPAQUE
| CTLFLAG_RD
, sc
->ring_map
, 0,
1433 if_ringmap_cpumap_sysctl
, "I", "slice CPU map");
1437 * Performance related tunables
1439 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "intr_coal_delay",
1440 CTLTYPE_INT
|CTLFLAG_RW
, sc
, 0, mxge_change_intr_coal
, "I",
1441 "Interrupt coalescing delay in usecs");
1443 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "throttle",
1444 CTLTYPE_INT
|CTLFLAG_RW
, sc
, 0, mxge_change_throttle
, "I",
1445 "Transmit throttling");
1447 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "use_rss",
1448 CTLTYPE_INT
|CTLFLAG_RW
, sc
, 0, mxge_change_use_rss
, "I",
1451 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "deassert_wait",
1452 CTLFLAG_RW
, &mxge_deassert_wait
, 0,
1453 "Wait for IRQ line to go low in ihandler");
1456 * Stats block from firmware is in network byte order.
1459 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "link_up",
1460 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->link_up
, 0,
1461 mxge_handle_be32
, "I", "link up");
1463 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "rdma_tags_available",
1464 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->rdma_tags_available
, 0,
1465 mxge_handle_be32
, "I", "rdma_tags_available");
1467 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_bad_crc32",
1468 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_bad_crc32
, 0,
1469 mxge_handle_be32
, "I", "dropped_bad_crc32");
1471 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_bad_phy",
1472 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_bad_phy
, 0,
1473 mxge_handle_be32
, "I", "dropped_bad_phy");
1475 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_link_error_or_filtered",
1476 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_link_error_or_filtered
, 0,
1477 mxge_handle_be32
, "I", "dropped_link_error_or_filtered");
1479 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_link_overflow",
1480 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_link_overflow
, 0,
1481 mxge_handle_be32
, "I", "dropped_link_overflow");
1483 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_multicast_filtered",
1484 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_multicast_filtered
, 0,
1485 mxge_handle_be32
, "I", "dropped_multicast_filtered");
1487 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_no_big_buffer",
1488 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_no_big_buffer
, 0,
1489 mxge_handle_be32
, "I", "dropped_no_big_buffer");
1491 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_no_small_buffer",
1492 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_no_small_buffer
, 0,
1493 mxge_handle_be32
, "I", "dropped_no_small_buffer");
1495 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_overrun",
1496 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_overrun
, 0,
1497 mxge_handle_be32
, "I", "dropped_overrun");
1499 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_pause",
1500 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_pause
, 0,
1501 mxge_handle_be32
, "I", "dropped_pause");
1503 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_runt",
1504 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_runt
, 0,
1505 mxge_handle_be32
, "I", "dropped_runt");
1507 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_unicast_filtered",
1508 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_unicast_filtered
, 0,
1509 mxge_handle_be32
, "I", "dropped_unicast_filtered");
1511 /* add counters exported for debugging from all slices */
1512 sysctl_ctx_init(&sc
->slice_sysctl_ctx
);
1513 sc
->slice_sysctl_tree
= SYSCTL_ADD_NODE(&sc
->slice_sysctl_ctx
,
1514 children
, OID_AUTO
, "slice", CTLFLAG_RD
, 0, "");
1515 if (sc
->slice_sysctl_tree
== NULL
) {
1516 device_printf(sc
->dev
, "can't add slice sysctl node\n");
1520 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1521 ss
= &sc
->ss
[slice
];
1522 sysctl_ctx_init(&ss
->sysctl_ctx
);
1523 ctx
= &ss
->sysctl_ctx
;
1524 children
= SYSCTL_CHILDREN(sc
->slice_sysctl_tree
);
1525 ksprintf(slice_num
, "%d", slice
);
1526 ss
->sysctl_tree
= SYSCTL_ADD_NODE(ctx
, children
, OID_AUTO
,
1527 slice_num
, CTLFLAG_RD
, 0, "");
1528 if (ss
->sysctl_tree
== NULL
) {
1529 device_printf(sc
->dev
,
1530 "can't add %d slice sysctl node\n", slice
);
1531 return; /* XXX continue? */
1533 children
= SYSCTL_CHILDREN(ss
->sysctl_tree
);
1536 * XXX change to ULONG
1539 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "rx_small_cnt",
1540 CTLFLAG_RD
, &ss
->rx_data
.rx_small
.cnt
, 0, "rx_small_cnt");
1542 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "rx_big_cnt",
1543 CTLFLAG_RD
, &ss
->rx_data
.rx_big
.cnt
, 0, "rx_small_cnt");
1545 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_req",
1546 CTLFLAG_RD
, &ss
->tx
.req
, 0, "tx_req");
1548 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_done",
1549 CTLFLAG_RD
, &ss
->tx
.done
, 0, "tx_done");
1551 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_pkt_done",
1552 CTLFLAG_RD
, &ss
->tx
.pkt_done
, 0, "tx_done");
1554 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_queue_active",
1555 CTLFLAG_RD
, &ss
->tx
.queue_active
, 0, "tx_queue_active");
1557 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_activate",
1558 CTLFLAG_RD
, &ss
->tx
.activate
, 0, "tx_activate");
1560 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_deactivate",
1561 CTLFLAG_RD
, &ss
->tx
.deactivate
, 0, "tx_deactivate");
1566 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1567 * backwards one at a time and handle ring wraps
1569 static __inline
void
1570 mxge_submit_req_backwards(mxge_tx_ring_t
*tx
,
1571 mcp_kreq_ether_send_t
*src
, int cnt
)
1573 int idx
, starting_slot
;
1575 starting_slot
= tx
->req
;
1578 idx
= (starting_slot
+ cnt
) & tx
->mask
;
1579 mxge_pio_copy(&tx
->lanai
[idx
], &src
[cnt
], sizeof(*src
));
1585 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1586 * at most 32 bytes at a time, so as to avoid involving the software
1587 * pio handler in the nic. We re-write the first segment's flags
1588 * to mark them valid only after writing the entire chain
1590 static __inline
void
1591 mxge_submit_req(mxge_tx_ring_t
*tx
, mcp_kreq_ether_send_t
*src
, int cnt
)
1595 volatile uint32_t *dst_ints
;
1596 mcp_kreq_ether_send_t
*srcp
;
1597 volatile mcp_kreq_ether_send_t
*dstp
, *dst
;
1600 idx
= tx
->req
& tx
->mask
;
1602 last_flags
= src
->flags
;
1605 dst
= dstp
= &tx
->lanai
[idx
];
1608 if ((idx
+ cnt
) < tx
->mask
) {
1609 for (i
= 0; i
< cnt
- 1; i
+= 2) {
1610 mxge_pio_copy(dstp
, srcp
, 2 * sizeof(*src
));
1611 wmb(); /* force write every 32 bytes */
1617 * Submit all but the first request, and ensure
1618 * that it is submitted below
1620 mxge_submit_req_backwards(tx
, src
, cnt
);
1624 /* Submit the first request */
1625 mxge_pio_copy(dstp
, srcp
, sizeof(*src
));
1626 wmb(); /* barrier before setting valid flag */
1629 /* Re-write the last 32-bits with the valid flags */
1630 src
->flags
= last_flags
;
1631 src_ints
= (uint32_t *)src
;
1633 dst_ints
= (volatile uint32_t *)dst
;
1635 *dst_ints
= *src_ints
;
1641 mxge_pullup_tso(struct mbuf
**mp
)
1643 int hoff
, iphlen
, thoff
;
1647 KASSERT(M_WRITABLE(m
), ("TSO mbuf not writable"));
1649 iphlen
= m
->m_pkthdr
.csum_iphlen
;
1650 thoff
= m
->m_pkthdr
.csum_thlen
;
1651 hoff
= m
->m_pkthdr
.csum_lhlen
;
1653 KASSERT(iphlen
> 0, ("invalid ip hlen"));
1654 KASSERT(thoff
> 0, ("invalid tcp hlen"));
1655 KASSERT(hoff
> 0, ("invalid ether hlen"));
1657 if (__predict_false(m
->m_len
< hoff
+ iphlen
+ thoff
)) {
1658 m
= m_pullup(m
, hoff
+ iphlen
+ thoff
);
1669 mxge_encap_tso(mxge_tx_ring_t
*tx
, struct mxge_buffer_state
*info_map
,
1670 struct mbuf
*m
, int busdma_seg_cnt
)
1672 mcp_kreq_ether_send_t
*req
;
1673 bus_dma_segment_t
*seg
;
1674 uint32_t low
, high_swapped
;
1675 int len
, seglen
, cum_len
, cum_len_next
;
1676 int next_is_first
, chop
, cnt
, rdma_count
, small
;
1677 uint16_t pseudo_hdr_offset
, cksum_offset
, mss
;
1678 uint8_t flags
, flags_next
;
1679 struct mxge_buffer_state
*info_last
;
1680 bus_dmamap_t map
= info_map
->map
;
1682 mss
= m
->m_pkthdr
.tso_segsz
;
1685 * Negative cum_len signifies to the send loop that we are
1686 * still in the header portion of the TSO packet.
1688 cum_len
= -(m
->m_pkthdr
.csum_lhlen
+ m
->m_pkthdr
.csum_iphlen
+
1689 m
->m_pkthdr
.csum_thlen
);
1692 * TSO implies checksum offload on this hardware
1694 cksum_offset
= m
->m_pkthdr
.csum_lhlen
+ m
->m_pkthdr
.csum_iphlen
;
1695 flags
= MXGEFW_FLAGS_TSO_HDR
| MXGEFW_FLAGS_FIRST
;
1698 * For TSO, pseudo_hdr_offset holds mss. The firmware figures
1699 * out where to put the checksum by parsing the header.
1701 pseudo_hdr_offset
= htobe16(mss
);
1709 * "rdma_count" is the number of RDMAs belonging to the current
1710 * packet BEFORE the current send request. For non-TSO packets,
1711 * this is equal to "count".
1713 * For TSO packets, rdma_count needs to be reset to 0 after a
1716 * The rdma_count field of the send request is the number of
1717 * RDMAs of the packet starting at that request. For TSO send
1718 * requests with one ore more cuts in the middle, this is the
1719 * number of RDMAs starting after the last cut in the request.
1720 * All previous segments before the last cut implicitly have 1
1723 * Since the number of RDMAs is not known beforehand, it must be
1724 * filled-in retroactively - after each segmentation cut or at
1725 * the end of the entire packet.
1728 while (busdma_seg_cnt
) {
1730 * Break the busdma segment up into pieces
1732 low
= MXGE_LOWPART_TO_U32(seg
->ds_addr
);
1733 high_swapped
= htobe32(MXGE_HIGHPART_TO_U32(seg
->ds_addr
));
1737 flags_next
= flags
& ~MXGEFW_FLAGS_FIRST
;
1739 cum_len_next
= cum_len
+ seglen
;
1740 (req
- rdma_count
)->rdma_count
= rdma_count
+ 1;
1741 if (__predict_true(cum_len
>= 0)) {
1743 chop
= (cum_len_next
> mss
);
1744 cum_len_next
= cum_len_next
% mss
;
1745 next_is_first
= (cum_len_next
== 0);
1746 flags
|= chop
* MXGEFW_FLAGS_TSO_CHOP
;
1748 next_is_first
* MXGEFW_FLAGS_FIRST
;
1749 rdma_count
|= -(chop
| next_is_first
);
1750 rdma_count
+= chop
& !next_is_first
;
1751 } else if (cum_len_next
>= 0) {
1756 small
= (mss
<= MXGEFW_SEND_SMALL_SIZE
);
1757 flags_next
= MXGEFW_FLAGS_TSO_PLD
|
1758 MXGEFW_FLAGS_FIRST
|
1759 (small
* MXGEFW_FLAGS_SMALL
);
1762 req
->addr_high
= high_swapped
;
1763 req
->addr_low
= htobe32(low
);
1764 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
1766 req
->rdma_count
= 1;
1767 req
->length
= htobe16(seglen
);
1768 req
->cksum_offset
= cksum_offset
;
1770 flags
| ((cum_len
& 1) * MXGEFW_FLAGS_ALIGN_ODD
);
1773 cum_len
= cum_len_next
;
1778 if (__predict_false(cksum_offset
> seglen
))
1779 cksum_offset
-= seglen
;
1782 if (__predict_false(cnt
> tx
->max_desc
))
1788 (req
- rdma_count
)->rdma_count
= rdma_count
;
1792 req
->flags
|= MXGEFW_FLAGS_TSO_LAST
;
1793 } while (!(req
->flags
& (MXGEFW_FLAGS_TSO_CHOP
| MXGEFW_FLAGS_FIRST
)));
1795 info_last
= &tx
->info
[((cnt
- 1) + tx
->req
) & tx
->mask
];
1797 info_map
->map
= info_last
->map
;
1798 info_last
->map
= map
;
1801 mxge_submit_req(tx
, tx
->req_list
, cnt
);
1803 if (tx
->send_go
!= NULL
&& tx
->queue_active
== 0) {
1804 /* Tell the NIC to start polling this slice */
1806 tx
->queue_active
= 1;
1813 bus_dmamap_unload(tx
->dmat
, tx
->info
[tx
->req
& tx
->mask
].map
);
1819 mxge_encap(mxge_tx_ring_t
*tx
, struct mbuf
*m
, bus_addr_t zeropad
)
1821 mcp_kreq_ether_send_t
*req
;
1822 bus_dma_segment_t
*seg
;
1824 int cnt
, cum_len
, err
, i
, idx
, odd_flag
;
1825 uint16_t pseudo_hdr_offset
;
1826 uint8_t flags
, cksum_offset
;
1827 struct mxge_buffer_state
*info_map
, *info_last
;
1829 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO
) {
1830 err
= mxge_pullup_tso(&m
);
1831 if (__predict_false(err
))
1836 * Map the frame for DMA
1838 idx
= tx
->req
& tx
->mask
;
1839 info_map
= &tx
->info
[idx
];
1840 map
= info_map
->map
;
1842 err
= bus_dmamap_load_mbuf_defrag(tx
->dmat
, map
, &m
,
1843 tx
->seg_list
, tx
->max_desc
- 2, &cnt
, BUS_DMA_NOWAIT
);
1844 if (__predict_false(err
!= 0))
1846 bus_dmamap_sync(tx
->dmat
, map
, BUS_DMASYNC_PREWRITE
);
1849 * TSO is different enough, we handle it in another routine
1851 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO
)
1852 return mxge_encap_tso(tx
, info_map
, m
, cnt
);
1856 pseudo_hdr_offset
= 0;
1857 flags
= MXGEFW_FLAGS_NO_TSO
;
1860 * Checksum offloading
1862 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1863 cksum_offset
= m
->m_pkthdr
.csum_lhlen
+ m
->m_pkthdr
.csum_iphlen
;
1864 pseudo_hdr_offset
= cksum_offset
+ m
->m_pkthdr
.csum_data
;
1865 pseudo_hdr_offset
= htobe16(pseudo_hdr_offset
);
1866 req
->cksum_offset
= cksum_offset
;
1867 flags
|= MXGEFW_FLAGS_CKSUM
;
1868 odd_flag
= MXGEFW_FLAGS_ALIGN_ODD
;
1872 if (m
->m_pkthdr
.len
< MXGEFW_SEND_SMALL_SIZE
)
1873 flags
|= MXGEFW_FLAGS_SMALL
;
1876 * Convert segments into a request list
1880 req
->flags
= MXGEFW_FLAGS_FIRST
;
1881 for (i
= 0; i
< cnt
; i
++) {
1882 req
->addr_low
= htobe32(MXGE_LOWPART_TO_U32(seg
->ds_addr
));
1883 req
->addr_high
= htobe32(MXGE_HIGHPART_TO_U32(seg
->ds_addr
));
1884 req
->length
= htobe16(seg
->ds_len
);
1885 req
->cksum_offset
= cksum_offset
;
1886 if (cksum_offset
> seg
->ds_len
)
1887 cksum_offset
-= seg
->ds_len
;
1890 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
1891 req
->pad
= 0; /* complete solid 16-byte block */
1892 req
->rdma_count
= 1;
1893 req
->flags
|= flags
| ((cum_len
& 1) * odd_flag
);
1894 cum_len
+= seg
->ds_len
;
1902 * Pad runt to 60 bytes
1906 req
->addr_low
= htobe32(MXGE_LOWPART_TO_U32(zeropad
));
1907 req
->addr_high
= htobe32(MXGE_HIGHPART_TO_U32(zeropad
));
1908 req
->length
= htobe16(60 - cum_len
);
1909 req
->cksum_offset
= 0;
1910 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
1911 req
->pad
= 0; /* complete solid 16-byte block */
1912 req
->rdma_count
= 1;
1913 req
->flags
|= flags
| ((cum_len
& 1) * odd_flag
);
1917 tx
->req_list
[0].rdma_count
= cnt
;
1919 /* print what the firmware will see */
1920 for (i
= 0; i
< cnt
; i
++) {
1921 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1922 "cso:%d, flags:0x%x, rdma:%d\n",
1923 i
, (int)ntohl(tx
->req_list
[i
].addr_high
),
1924 (int)ntohl(tx
->req_list
[i
].addr_low
),
1925 (int)ntohs(tx
->req_list
[i
].length
),
1926 (int)ntohs(tx
->req_list
[i
].pseudo_hdr_offset
),
1927 tx
->req_list
[i
].cksum_offset
, tx
->req_list
[i
].flags
,
1928 tx
->req_list
[i
].rdma_count
);
1930 kprintf("--------------\n");
1932 info_last
= &tx
->info
[((cnt
- 1) + tx
->req
) & tx
->mask
];
1934 info_map
->map
= info_last
->map
;
1935 info_last
->map
= map
;
1938 mxge_submit_req(tx
, tx
->req_list
, cnt
);
1940 if (tx
->send_go
!= NULL
&& tx
->queue_active
== 0) {
1941 /* Tell the NIC to start polling this slice */
1943 tx
->queue_active
= 1;
1955 mxge_start(struct ifnet
*ifp
, struct ifaltq_subque
*ifsq
)
1957 mxge_softc_t
*sc
= ifp
->if_softc
;
1958 mxge_tx_ring_t
*tx
= ifsq_get_priv(ifsq
);
1962 KKASSERT(tx
->ifsq
== ifsq
);
1963 ASSERT_SERIALIZED(&tx
->tx_serialize
);
1965 if ((ifp
->if_flags
& IFF_RUNNING
) == 0 || ifsq_is_oactive(ifsq
))
1968 zeropad
= sc
->zeropad_dma
.dmem_busaddr
;
1969 while (tx
->mask
- (tx
->req
- tx
->done
) > tx
->max_desc
) {
1973 m
= ifsq_dequeue(ifsq
);
1978 error
= mxge_encap(tx
, m
, zeropad
);
1982 IFNET_STAT_INC(ifp
, oerrors
, 1);
1985 /* Ran out of transmit slots */
1986 ifsq_set_oactive(ifsq
);
1989 tx
->watchdog
.wd_timer
= 5;
1993 mxge_watchdog(struct ifaltq_subque
*ifsq
)
1995 struct ifnet
*ifp
= ifsq_get_ifp(ifsq
);
1996 struct mxge_softc
*sc
= ifp
->if_softc
;
1997 uint32_t rx_pause
= be32toh(sc
->ss
->fw_stats
->dropped_pause
);
1998 mxge_tx_ring_t
*tx
= ifsq_get_priv(ifsq
);
2000 ASSERT_IFNET_SERIALIZED_ALL(ifp
);
2002 /* Check for pause blocking before resetting */
2003 if (tx
->watchdog_rx_pause
== rx_pause
) {
2004 mxge_warn_stuck(sc
, tx
, 0);
2005 mxge_watchdog_reset(sc
);
2008 if_printf(ifp
, "Flow control blocking xmits, "
2009 "check link partner\n");
2011 tx
->watchdog_rx_pause
= rx_pause
;
2015 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2016 * at most 32 bytes at a time, so as to avoid involving the software
2017 * pio handler in the nic. We re-write the first segment's low
2018 * DMA address to mark it valid only after we write the entire chunk
2021 static __inline
void
2022 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t
*dst
,
2023 mcp_kreq_ether_recv_t
*src
)
2027 low
= src
->addr_low
;
2028 src
->addr_low
= 0xffffffff;
2029 mxge_pio_copy(dst
, src
, 4 * sizeof (*src
));
2031 mxge_pio_copy(dst
+ 4, src
+ 4, 4 * sizeof (*src
));
2033 src
->addr_low
= low
;
2034 dst
->addr_low
= low
;
2039 mxge_get_buf_small(mxge_rx_ring_t
*rx
, bus_dmamap_t map
, int idx
,
2042 bus_dma_segment_t seg
;
2044 int cnt
, err
, mflag
;
2047 if (__predict_false(init
))
2050 m
= m_gethdr(mflag
, MT_DATA
);
2053 if (__predict_false(init
)) {
2055 * During initialization, there
2056 * is nothing to setup; bail out
2062 m
->m_len
= m
->m_pkthdr
.len
= MHLEN
;
2064 err
= bus_dmamap_load_mbuf_segment(rx
->dmat
, map
, m
,
2065 &seg
, 1, &cnt
, BUS_DMA_NOWAIT
);
2068 if (__predict_false(init
)) {
2070 * During initialization, there
2071 * is nothing to setup; bail out
2078 rx
->info
[idx
].m
= m
;
2079 rx
->shadow
[idx
].addr_low
= htobe32(MXGE_LOWPART_TO_U32(seg
.ds_addr
));
2080 rx
->shadow
[idx
].addr_high
= htobe32(MXGE_HIGHPART_TO_U32(seg
.ds_addr
));
2084 mxge_submit_8rx(&rx
->lanai
[idx
- 7], &rx
->shadow
[idx
- 7]);
2089 mxge_get_buf_big(mxge_rx_ring_t
*rx
, bus_dmamap_t map
, int idx
,
2092 bus_dma_segment_t seg
;
2094 int cnt
, err
, mflag
;
2097 if (__predict_false(init
))
2100 if (rx
->cl_size
== MCLBYTES
)
2101 m
= m_getcl(mflag
, MT_DATA
, M_PKTHDR
);
2103 m
= m_getjcl(mflag
, MT_DATA
, M_PKTHDR
, MJUMPAGESIZE
);
2106 if (__predict_false(init
)) {
2108 * During initialization, there
2109 * is nothing to setup; bail out
2115 m
->m_len
= m
->m_pkthdr
.len
= rx
->cl_size
;
2117 err
= bus_dmamap_load_mbuf_segment(rx
->dmat
, map
, m
,
2118 &seg
, 1, &cnt
, BUS_DMA_NOWAIT
);
2121 if (__predict_false(init
)) {
2123 * During initialization, there
2124 * is nothing to setup; bail out
2131 rx
->info
[idx
].m
= m
;
2132 rx
->shadow
[idx
].addr_low
= htobe32(MXGE_LOWPART_TO_U32(seg
.ds_addr
));
2133 rx
->shadow
[idx
].addr_high
= htobe32(MXGE_HIGHPART_TO_U32(seg
.ds_addr
));
2137 mxge_submit_8rx(&rx
->lanai
[idx
- 7], &rx
->shadow
[idx
- 7]);
2142 * Myri10GE hardware checksums are not valid if the sender
2143 * padded the frame with non-zero padding. This is because
2144 * the firmware just does a simple 16-bit 1s complement
2145 * checksum across the entire frame, excluding the first 14
2146 * bytes. It is best to simply to check the checksum and
2147 * tell the stack about it only if the checksum is good
2149 static __inline
uint16_t
2150 mxge_rx_csum(struct mbuf
*m
, int csum
)
2152 const struct ether_header
*eh
;
2153 const struct ip
*ip
;
2156 eh
= mtod(m
, const struct ether_header
*);
2158 /* Only deal with IPv4 TCP & UDP for now */
2159 if (__predict_false(eh
->ether_type
!= htons(ETHERTYPE_IP
)))
2162 ip
= (const struct ip
*)(eh
+ 1);
2163 if (__predict_false(ip
->ip_p
!= IPPROTO_TCP
&& ip
->ip_p
!= IPPROTO_UDP
))
2167 c
= in_pseudo(ip
->ip_src
.s_addr
, ip
->ip_dst
.s_addr
,
2168 htonl(ntohs(csum
) + ntohs(ip
->ip_len
) +
2169 - (ip
->ip_hl
<< 2) + ip
->ip_p
));
2178 mxge_vlan_tag_remove(struct mbuf
*m
, uint32_t *csum
)
2180 struct ether_vlan_header
*evl
;
2183 evl
= mtod(m
, struct ether_vlan_header
*);
2186 * Fix checksum by subtracting EVL_ENCAPLEN bytes after
2187 * what the firmware thought was the end of the ethernet
2191 /* Put checksum into host byte order */
2192 *csum
= ntohs(*csum
);
2194 partial
= ntohl(*(uint32_t *)(mtod(m
, char *) + ETHER_HDR_LEN
));
2196 *csum
+= ((*csum
) < ~partial
);
2197 *csum
= ((*csum
) >> 16) + ((*csum
) & 0xFFFF);
2198 *csum
= ((*csum
) >> 16) + ((*csum
) & 0xFFFF);
2201 * Restore checksum to network byte order;
2202 * later consumers expect this
2204 *csum
= htons(*csum
);
2207 m
->m_pkthdr
.ether_vlantag
= ntohs(evl
->evl_tag
);
2208 m
->m_flags
|= M_VLANTAG
;
2211 * Remove the 802.1q header by copying the Ethernet
2212 * addresses over it and adjusting the beginning of
2213 * the data in the mbuf. The encapsulated Ethernet
2214 * type field is already in place.
2216 bcopy((char *)evl
, (char *)evl
+ EVL_ENCAPLEN
,
2217 ETHER_HDR_LEN
- ETHER_TYPE_LEN
);
2218 m_adj(m
, EVL_ENCAPLEN
);
2222 static __inline
void
2223 mxge_rx_done_big(struct ifnet
*ifp
, mxge_rx_ring_t
*rx
,
2224 uint32_t len
, uint32_t csum
)
2227 const struct ether_header
*eh
;
2228 bus_dmamap_t old_map
;
2231 idx
= rx
->cnt
& rx
->mask
;
2234 /* Save a pointer to the received mbuf */
2235 m
= rx
->info
[idx
].m
;
2237 /* Try to replace the received mbuf */
2238 if (mxge_get_buf_big(rx
, rx
->extra_map
, idx
, FALSE
)) {
2239 /* Drop the frame -- the old mbuf is re-cycled */
2240 IFNET_STAT_INC(ifp
, ierrors
, 1);
2244 /* Unmap the received buffer */
2245 old_map
= rx
->info
[idx
].map
;
2246 bus_dmamap_sync(rx
->dmat
, old_map
, BUS_DMASYNC_POSTREAD
);
2247 bus_dmamap_unload(rx
->dmat
, old_map
);
2249 /* Swap the bus_dmamap_t's */
2250 rx
->info
[idx
].map
= rx
->extra_map
;
2251 rx
->extra_map
= old_map
;
2254 * mcp implicitly skips 1st 2 bytes so that packet is properly
2257 m
->m_data
+= MXGEFW_PAD
;
2259 m
->m_pkthdr
.rcvif
= ifp
;
2260 m
->m_len
= m
->m_pkthdr
.len
= len
;
2262 IFNET_STAT_INC(ifp
, ipackets
, 1);
2264 eh
= mtod(m
, const struct ether_header
*);
2265 if (eh
->ether_type
== htons(ETHERTYPE_VLAN
))
2266 mxge_vlan_tag_remove(m
, &csum
);
2268 /* If the checksum is valid, mark it in the mbuf header */
2269 if ((ifp
->if_capenable
& IFCAP_RXCSUM
) &&
2270 mxge_rx_csum(m
, csum
) == 0) {
2271 /* Tell the stack that the checksum is good */
2272 m
->m_pkthdr
.csum_data
= 0xffff;
2273 m
->m_pkthdr
.csum_flags
= CSUM_PSEUDO_HDR
|
2276 ifp
->if_input(ifp
, m
, NULL
, -1);
2279 static __inline
void
2280 mxge_rx_done_small(struct ifnet
*ifp
, mxge_rx_ring_t
*rx
,
2281 uint32_t len
, uint32_t csum
)
2283 const struct ether_header
*eh
;
2285 bus_dmamap_t old_map
;
2288 idx
= rx
->cnt
& rx
->mask
;
2291 /* Save a pointer to the received mbuf */
2292 m
= rx
->info
[idx
].m
;
2294 /* Try to replace the received mbuf */
2295 if (mxge_get_buf_small(rx
, rx
->extra_map
, idx
, FALSE
)) {
2296 /* Drop the frame -- the old mbuf is re-cycled */
2297 IFNET_STAT_INC(ifp
, ierrors
, 1);
2301 /* Unmap the received buffer */
2302 old_map
= rx
->info
[idx
].map
;
2303 bus_dmamap_sync(rx
->dmat
, old_map
, BUS_DMASYNC_POSTREAD
);
2304 bus_dmamap_unload(rx
->dmat
, old_map
);
2306 /* Swap the bus_dmamap_t's */
2307 rx
->info
[idx
].map
= rx
->extra_map
;
2308 rx
->extra_map
= old_map
;
2311 * mcp implicitly skips 1st 2 bytes so that packet is properly
2314 m
->m_data
+= MXGEFW_PAD
;
2316 m
->m_pkthdr
.rcvif
= ifp
;
2317 m
->m_len
= m
->m_pkthdr
.len
= len
;
2319 IFNET_STAT_INC(ifp
, ipackets
, 1);
2321 eh
= mtod(m
, const struct ether_header
*);
2322 if (eh
->ether_type
== htons(ETHERTYPE_VLAN
))
2323 mxge_vlan_tag_remove(m
, &csum
);
2325 /* If the checksum is valid, mark it in the mbuf header */
2326 if ((ifp
->if_capenable
& IFCAP_RXCSUM
) &&
2327 mxge_rx_csum(m
, csum
) == 0) {
2328 /* Tell the stack that the checksum is good */
2329 m
->m_pkthdr
.csum_data
= 0xffff;
2330 m
->m_pkthdr
.csum_flags
= CSUM_PSEUDO_HDR
|
2333 ifp
->if_input(ifp
, m
, NULL
, -1);
2336 static __inline
void
2337 mxge_clean_rx_done(struct ifnet
*ifp
, struct mxge_rx_data
*rx_data
, int cycle
)
2339 mxge_rx_done_t
*rx_done
= &rx_data
->rx_done
;
2341 while (rx_done
->entry
[rx_done
->idx
].length
!= 0 && cycle
!= 0) {
2342 uint16_t length
, checksum
;
2344 length
= ntohs(rx_done
->entry
[rx_done
->idx
].length
);
2345 rx_done
->entry
[rx_done
->idx
].length
= 0;
2347 checksum
= rx_done
->entry
[rx_done
->idx
].checksum
;
2349 if (length
<= MXGE_RX_SMALL_BUFLEN
) {
2350 mxge_rx_done_small(ifp
, &rx_data
->rx_small
,
2353 mxge_rx_done_big(ifp
, &rx_data
->rx_big
,
2358 rx_done
->idx
&= rx_done
->mask
;
2363 static __inline
void
2364 mxge_tx_done(struct ifnet
*ifp
, mxge_tx_ring_t
*tx
, uint32_t mcp_idx
)
2366 ASSERT_SERIALIZED(&tx
->tx_serialize
);
2368 while (tx
->pkt_done
!= mcp_idx
) {
2372 idx
= tx
->done
& tx
->mask
;
2375 m
= tx
->info
[idx
].m
;
2377 * mbuf and DMA map only attached to the first
2382 IFNET_STAT_INC(ifp
, opackets
, 1);
2383 tx
->info
[idx
].m
= NULL
;
2384 bus_dmamap_unload(tx
->dmat
, tx
->info
[idx
].map
);
2390 * If we have space, clear OACTIVE to tell the stack that
2391 * its OK to send packets
2393 if (tx
->req
- tx
->done
< (tx
->mask
+ 1) / 2) {
2394 ifsq_clr_oactive(tx
->ifsq
);
2395 if (tx
->req
== tx
->done
) {
2396 /* Reset watchdog */
2397 tx
->watchdog
.wd_timer
= 0;
2401 if (!ifsq_is_empty(tx
->ifsq
))
2402 ifsq_devstart(tx
->ifsq
);
2404 if (tx
->send_stop
!= NULL
&& tx
->req
== tx
->done
) {
2406 * Let the NIC stop polling this queue, since there
2407 * are no more transmits pending
2410 tx
->queue_active
= 0;
2416 static struct mxge_media_type mxge_xfp_media_types
[] = {
2417 {IFM_10G_CX4
, 0x7f, "10GBASE-CX4 (module)"},
2418 {IFM_10G_SR
, (1 << 7), "10GBASE-SR"},
2419 {IFM_10G_LR
, (1 << 6), "10GBASE-LR"},
2420 {IFM_NONE
, (1 << 5), "10GBASE-ER"},
2421 {IFM_10G_LRM
, (1 << 4), "10GBASE-LRM"},
2422 {IFM_NONE
, (1 << 3), "10GBASE-SW"},
2423 {IFM_NONE
, (1 << 2), "10GBASE-LW"},
2424 {IFM_NONE
, (1 << 1), "10GBASE-EW"},
2425 {IFM_NONE
, (1 << 0), "Reserved"}
2428 static struct mxge_media_type mxge_sfp_media_types
[] = {
2429 {IFM_10G_TWINAX
, 0, "10GBASE-Twinax"},
2430 {IFM_NONE
, (1 << 7), "Reserved"},
2431 {IFM_10G_LRM
, (1 << 6), "10GBASE-LRM"},
2432 {IFM_10G_LR
, (1 << 5), "10GBASE-LR"},
2433 {IFM_10G_SR
, (1 << 4), "10GBASE-SR"},
2434 {IFM_10G_TWINAX
,(1 << 0), "10GBASE-Twinax"}
2438 mxge_media_set(mxge_softc_t
*sc
, int media_type
)
2442 if (media_type
== IFM_NONE
)
2446 fc_opt
= IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
;
2448 ifmedia_add(&sc
->media
, MXGE_IFM
| media_type
, 0, NULL
);
2449 ifmedia_set(&sc
->media
, MXGE_IFM
| media_type
| fc_opt
);
2451 sc
->current_media
= media_type
;
2455 mxge_media_unset(mxge_softc_t
*sc
)
2457 ifmedia_removeall(&sc
->media
);
2458 sc
->current_media
= IFM_NONE
;
2462 mxge_media_init(mxge_softc_t
*sc
)
2467 mxge_media_unset(sc
);
2470 * Parse the product code to deterimine the interface type
2471 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2472 * after the 3rd dash in the driver's cached copy of the
2473 * EEPROM's product code string.
2475 ptr
= sc
->product_code_string
;
2477 if_printf(sc
->ifp
, "Missing product code\n");
2481 for (i
= 0; i
< 3; i
++, ptr
++) {
2482 ptr
= strchr(ptr
, '-');
2484 if_printf(sc
->ifp
, "only %d dashes in PC?!?\n", i
);
2488 if (*ptr
== 'C' || *(ptr
+1) == 'C') {
2490 sc
->connector
= MXGE_CX4
;
2491 mxge_media_set(sc
, IFM_10G_CX4
);
2492 } else if (*ptr
== 'Q') {
2493 /* -Q is Quad Ribbon Fiber */
2494 sc
->connector
= MXGE_QRF
;
2495 if_printf(sc
->ifp
, "Quad Ribbon Fiber Media\n");
2496 /* DragonFly has no media type for Quad ribbon fiber */
2497 } else if (*ptr
== 'R') {
2499 sc
->connector
= MXGE_XFP
;
2500 /* NOTE: ifmedia will be installed later */
2501 } else if (*ptr
== 'S' || *(ptr
+1) == 'S') {
2502 /* -S or -2S is SFP+ */
2503 sc
->connector
= MXGE_SFP
;
2504 /* NOTE: ifmedia will be installed later */
2506 sc
->connector
= MXGE_UNK
;
2507 if_printf(sc
->ifp
, "Unknown media type: %c\n", *ptr
);
2512 * Determine the media type for a NIC. Some XFPs will identify
2513 * themselves only when their link is up, so this is initiated via a
2514 * link up interrupt. However, this can potentially take up to
2515 * several milliseconds, so it is run via the watchdog routine, rather
2516 * than in the interrupt handler itself.
2519 mxge_media_probe(mxge_softc_t
*sc
)
2522 const char *cage_type
;
2523 struct mxge_media_type
*mxge_media_types
= NULL
;
2524 int i
, err
, ms
, mxge_media_type_entries
;
2527 sc
->need_media_probe
= 0;
2529 if (sc
->connector
== MXGE_XFP
) {
2531 mxge_media_types
= mxge_xfp_media_types
;
2532 mxge_media_type_entries
= NELEM(mxge_xfp_media_types
);
2533 byte
= MXGE_XFP_COMPLIANCE_BYTE
;
2535 } else if (sc
->connector
== MXGE_SFP
) {
2536 /* -S or -2S is SFP+ */
2537 mxge_media_types
= mxge_sfp_media_types
;
2538 mxge_media_type_entries
= NELEM(mxge_sfp_media_types
);
2542 /* nothing to do; media type cannot change */
2547 * At this point we know the NIC has an XFP cage, so now we
2548 * try to determine what is in the cage by using the
2549 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2550 * register. We read just one byte, which may take over
2554 bzero(&cmd
, sizeof(cmd
)); /* silence gcc warning */
2555 cmd
.data0
= 0; /* just fetch 1 byte, not all 256 */
2557 err
= mxge_send_cmd(sc
, MXGEFW_CMD_I2C_READ
, &cmd
);
2558 if (err
!= MXGEFW_CMD_OK
) {
2559 if (err
== MXGEFW_CMD_ERROR_I2C_FAILURE
)
2560 if_printf(sc
->ifp
, "failed to read XFP\n");
2561 else if (err
== MXGEFW_CMD_ERROR_I2C_ABSENT
)
2562 if_printf(sc
->ifp
, "Type R/S with no XFP!?!?\n");
2564 if_printf(sc
->ifp
, "I2C read failed, err: %d", err
);
2565 mxge_media_unset(sc
);
2569 /* Now we wait for the data to be cached */
2571 err
= mxge_send_cmd(sc
, MXGEFW_CMD_I2C_BYTE
, &cmd
);
2572 for (ms
= 0; err
== EBUSY
&& ms
< 50; ms
++) {
2575 err
= mxge_send_cmd(sc
, MXGEFW_CMD_I2C_BYTE
, &cmd
);
2577 if (err
!= MXGEFW_CMD_OK
) {
2578 if_printf(sc
->ifp
, "failed to read %s (%d, %dms)\n",
2579 cage_type
, err
, ms
);
2580 mxge_media_unset(sc
);
2584 if (cmd
.data0
== mxge_media_types
[0].bitmask
) {
2586 if_printf(sc
->ifp
, "%s:%s\n", cage_type
,
2587 mxge_media_types
[0].name
);
2589 if (sc
->current_media
!= mxge_media_types
[0].flag
) {
2590 mxge_media_unset(sc
);
2591 mxge_media_set(sc
, mxge_media_types
[0].flag
);
2595 for (i
= 1; i
< mxge_media_type_entries
; i
++) {
2596 if (cmd
.data0
& mxge_media_types
[i
].bitmask
) {
2598 if_printf(sc
->ifp
, "%s:%s\n", cage_type
,
2599 mxge_media_types
[i
].name
);
2602 if (sc
->current_media
!= mxge_media_types
[i
].flag
) {
2603 mxge_media_unset(sc
);
2604 mxge_media_set(sc
, mxge_media_types
[i
].flag
);
2609 mxge_media_unset(sc
);
2611 if_printf(sc
->ifp
, "%s media 0x%x unknown\n", cage_type
,
2617 mxge_intr_status(struct mxge_softc
*sc
, const mcp_irq_data_t
*stats
)
2619 if (sc
->link_state
!= stats
->link_up
) {
2620 sc
->link_state
= stats
->link_up
;
2621 if (sc
->link_state
) {
2622 sc
->ifp
->if_link_state
= LINK_STATE_UP
;
2623 if_link_state_change(sc
->ifp
);
2625 if_printf(sc
->ifp
, "link up\n");
2627 sc
->ifp
->if_link_state
= LINK_STATE_DOWN
;
2628 if_link_state_change(sc
->ifp
);
2630 if_printf(sc
->ifp
, "link down\n");
2632 sc
->need_media_probe
= 1;
2635 if (sc
->rdma_tags_available
!= be32toh(stats
->rdma_tags_available
)) {
2636 sc
->rdma_tags_available
= be32toh(stats
->rdma_tags_available
);
2637 if_printf(sc
->ifp
, "RDMA timed out! %d tags left\n",
2638 sc
->rdma_tags_available
);
2641 if (stats
->link_down
) {
2642 sc
->down_cnt
+= stats
->link_down
;
2644 sc
->ifp
->if_link_state
= LINK_STATE_DOWN
;
2645 if_link_state_change(sc
->ifp
);
2650 mxge_serialize_skipmain(struct mxge_softc
*sc
)
2652 lwkt_serialize_array_enter(sc
->serializes
, sc
->nserialize
, 1);
2656 mxge_deserialize_skipmain(struct mxge_softc
*sc
)
2658 lwkt_serialize_array_exit(sc
->serializes
, sc
->nserialize
, 1);
2662 mxge_legacy(void *arg
)
2664 struct mxge_slice_state
*ss
= arg
;
2665 mxge_softc_t
*sc
= ss
->sc
;
2666 mcp_irq_data_t
*stats
= ss
->fw_stats
;
2667 mxge_tx_ring_t
*tx
= &ss
->tx
;
2668 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
2669 uint32_t send_done_count
;
2672 ASSERT_SERIALIZED(&sc
->main_serialize
);
2674 /* Make sure the DMA has finished */
2677 valid
= stats
->valid
;
2679 /* Lower legacy IRQ */
2680 *sc
->irq_deassert
= 0;
2681 if (!mxge_deassert_wait
) {
2682 /* Don't wait for conf. that irq is low */
2686 mxge_serialize_skipmain(sc
);
2689 * Loop while waiting for legacy irq deassertion
2690 * XXX do we really want to loop?
2693 /* Check for transmit completes and receives */
2694 send_done_count
= be32toh(stats
->send_done_count
);
2695 while ((send_done_count
!= tx
->pkt_done
) ||
2696 (rx_done
->entry
[rx_done
->idx
].length
!= 0)) {
2697 if (send_done_count
!= tx
->pkt_done
) {
2698 mxge_tx_done(&sc
->arpcom
.ac_if
, tx
,
2699 (int)send_done_count
);
2701 mxge_clean_rx_done(&sc
->arpcom
.ac_if
, &ss
->rx_data
, -1);
2702 send_done_count
= be32toh(stats
->send_done_count
);
2704 if (mxge_deassert_wait
)
2706 } while (*((volatile uint8_t *)&stats
->valid
));
2708 mxge_deserialize_skipmain(sc
);
2710 /* Fw link & error stats meaningful only on the first slice */
2711 if (__predict_false(stats
->stats_updated
))
2712 mxge_intr_status(sc
, stats
);
2714 /* Check to see if we have rx token to pass back */
2716 *ss
->irq_claim
= be32toh(3);
2717 *(ss
->irq_claim
+ 1) = be32toh(3);
2723 struct mxge_slice_state
*ss
= arg
;
2724 mxge_softc_t
*sc
= ss
->sc
;
2725 mcp_irq_data_t
*stats
= ss
->fw_stats
;
2726 mxge_tx_ring_t
*tx
= &ss
->tx
;
2727 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
2728 uint32_t send_done_count
;
2730 #ifndef IFPOLL_ENABLE
2731 const boolean_t polling
= FALSE
;
2733 boolean_t polling
= FALSE
;
2736 ASSERT_SERIALIZED(&sc
->main_serialize
);
2738 /* Make sure the DMA has finished */
2739 if (__predict_false(!stats
->valid
))
2742 valid
= stats
->valid
;
2745 #ifdef IFPOLL_ENABLE
2746 if (sc
->arpcom
.ac_if
.if_flags
& IFF_NPOLLING
)
2751 /* Check for receives */
2752 lwkt_serialize_enter(&ss
->rx_data
.rx_serialize
);
2753 if (rx_done
->entry
[rx_done
->idx
].length
!= 0)
2754 mxge_clean_rx_done(&sc
->arpcom
.ac_if
, &ss
->rx_data
, -1);
2755 lwkt_serialize_exit(&ss
->rx_data
.rx_serialize
);
2759 * Check for transmit completes
2762 * Since pkt_done is only changed by mxge_tx_done(),
2763 * which is called only in interrupt handler, the
2764 * check w/o holding tx serializer is MPSAFE.
2766 send_done_count
= be32toh(stats
->send_done_count
);
2767 if (send_done_count
!= tx
->pkt_done
) {
2768 lwkt_serialize_enter(&tx
->tx_serialize
);
2769 mxge_tx_done(&sc
->arpcom
.ac_if
, tx
, (int)send_done_count
);
2770 lwkt_serialize_exit(&tx
->tx_serialize
);
2773 if (__predict_false(stats
->stats_updated
))
2774 mxge_intr_status(sc
, stats
);
2776 /* Check to see if we have rx token to pass back */
2777 if (!polling
&& (valid
& 0x1))
2778 *ss
->irq_claim
= be32toh(3);
2779 *(ss
->irq_claim
+ 1) = be32toh(3);
2783 mxge_msix_rx(void *arg
)
2785 struct mxge_slice_state
*ss
= arg
;
2786 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
2788 #ifdef IFPOLL_ENABLE
2789 if (ss
->sc
->arpcom
.ac_if
.if_flags
& IFF_NPOLLING
)
2793 ASSERT_SERIALIZED(&ss
->rx_data
.rx_serialize
);
2795 if (rx_done
->entry
[rx_done
->idx
].length
!= 0)
2796 mxge_clean_rx_done(&ss
->sc
->arpcom
.ac_if
, &ss
->rx_data
, -1);
2798 *ss
->irq_claim
= be32toh(3);
2802 mxge_msix_rxtx(void *arg
)
2804 struct mxge_slice_state
*ss
= arg
;
2805 mxge_softc_t
*sc
= ss
->sc
;
2806 mcp_irq_data_t
*stats
= ss
->fw_stats
;
2807 mxge_tx_ring_t
*tx
= &ss
->tx
;
2808 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
2809 uint32_t send_done_count
;
2811 #ifndef IFPOLL_ENABLE
2812 const boolean_t polling
= FALSE
;
2814 boolean_t polling
= FALSE
;
2817 ASSERT_SERIALIZED(&ss
->rx_data
.rx_serialize
);
2819 /* Make sure the DMA has finished */
2820 if (__predict_false(!stats
->valid
))
2823 valid
= stats
->valid
;
2826 #ifdef IFPOLL_ENABLE
2827 if (sc
->arpcom
.ac_if
.if_flags
& IFF_NPOLLING
)
2831 /* Check for receives */
2832 if (!polling
&& rx_done
->entry
[rx_done
->idx
].length
!= 0)
2833 mxge_clean_rx_done(&sc
->arpcom
.ac_if
, &ss
->rx_data
, -1);
2836 * Check for transmit completes
2839 * Since pkt_done is only changed by mxge_tx_done(),
2840 * which is called only in interrupt handler, the
2841 * check w/o holding tx serializer is MPSAFE.
2843 send_done_count
= be32toh(stats
->send_done_count
);
2844 if (send_done_count
!= tx
->pkt_done
) {
2845 lwkt_serialize_enter(&tx
->tx_serialize
);
2846 mxge_tx_done(&sc
->arpcom
.ac_if
, tx
, (int)send_done_count
);
2847 lwkt_serialize_exit(&tx
->tx_serialize
);
2850 /* Check to see if we have rx token to pass back */
2851 if (!polling
&& (valid
& 0x1))
2852 *ss
->irq_claim
= be32toh(3);
2853 *(ss
->irq_claim
+ 1) = be32toh(3);
2857 mxge_init(void *arg
)
2859 struct mxge_softc
*sc
= arg
;
2861 ASSERT_IFNET_SERIALIZED_ALL(sc
->ifp
);
2862 if ((sc
->ifp
->if_flags
& IFF_RUNNING
) == 0)
2867 mxge_free_slice_mbufs(struct mxge_slice_state
*ss
)
2871 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
2872 if (ss
->rx_data
.rx_big
.info
[i
].m
== NULL
)
2874 bus_dmamap_unload(ss
->rx_data
.rx_big
.dmat
,
2875 ss
->rx_data
.rx_big
.info
[i
].map
);
2876 m_freem(ss
->rx_data
.rx_big
.info
[i
].m
);
2877 ss
->rx_data
.rx_big
.info
[i
].m
= NULL
;
2880 for (i
= 0; i
<= ss
->rx_data
.rx_small
.mask
; i
++) {
2881 if (ss
->rx_data
.rx_small
.info
[i
].m
== NULL
)
2883 bus_dmamap_unload(ss
->rx_data
.rx_small
.dmat
,
2884 ss
->rx_data
.rx_small
.info
[i
].map
);
2885 m_freem(ss
->rx_data
.rx_small
.info
[i
].m
);
2886 ss
->rx_data
.rx_small
.info
[i
].m
= NULL
;
2889 /* Transmit ring used only on the first slice */
2890 if (ss
->tx
.info
== NULL
)
2893 for (i
= 0; i
<= ss
->tx
.mask
; i
++) {
2894 if (ss
->tx
.info
[i
].m
== NULL
)
2896 bus_dmamap_unload(ss
->tx
.dmat
, ss
->tx
.info
[i
].map
);
2897 m_freem(ss
->tx
.info
[i
].m
);
2898 ss
->tx
.info
[i
].m
= NULL
;
2903 mxge_free_mbufs(mxge_softc_t
*sc
)
2907 for (slice
= 0; slice
< sc
->num_slices
; slice
++)
2908 mxge_free_slice_mbufs(&sc
->ss
[slice
]);
2912 mxge_free_slice_rings(struct mxge_slice_state
*ss
)
2916 if (ss
->rx_data
.rx_done
.entry
!= NULL
) {
2917 mxge_dma_free(&ss
->rx_done_dma
);
2918 ss
->rx_data
.rx_done
.entry
= NULL
;
2921 if (ss
->tx
.req_list
!= NULL
) {
2922 kfree(ss
->tx
.req_list
, M_DEVBUF
);
2923 ss
->tx
.req_list
= NULL
;
2926 if (ss
->tx
.seg_list
!= NULL
) {
2927 kfree(ss
->tx
.seg_list
, M_DEVBUF
);
2928 ss
->tx
.seg_list
= NULL
;
2931 if (ss
->rx_data
.rx_small
.shadow
!= NULL
) {
2932 kfree(ss
->rx_data
.rx_small
.shadow
, M_DEVBUF
);
2933 ss
->rx_data
.rx_small
.shadow
= NULL
;
2936 if (ss
->rx_data
.rx_big
.shadow
!= NULL
) {
2937 kfree(ss
->rx_data
.rx_big
.shadow
, M_DEVBUF
);
2938 ss
->rx_data
.rx_big
.shadow
= NULL
;
2941 if (ss
->tx
.info
!= NULL
) {
2942 if (ss
->tx
.dmat
!= NULL
) {
2943 for (i
= 0; i
<= ss
->tx
.mask
; i
++) {
2944 bus_dmamap_destroy(ss
->tx
.dmat
,
2945 ss
->tx
.info
[i
].map
);
2947 bus_dma_tag_destroy(ss
->tx
.dmat
);
2949 kfree(ss
->tx
.info
, M_DEVBUF
);
2953 if (ss
->rx_data
.rx_small
.info
!= NULL
) {
2954 if (ss
->rx_data
.rx_small
.dmat
!= NULL
) {
2955 for (i
= 0; i
<= ss
->rx_data
.rx_small
.mask
; i
++) {
2956 bus_dmamap_destroy(ss
->rx_data
.rx_small
.dmat
,
2957 ss
->rx_data
.rx_small
.info
[i
].map
);
2959 bus_dmamap_destroy(ss
->rx_data
.rx_small
.dmat
,
2960 ss
->rx_data
.rx_small
.extra_map
);
2961 bus_dma_tag_destroy(ss
->rx_data
.rx_small
.dmat
);
2963 kfree(ss
->rx_data
.rx_small
.info
, M_DEVBUF
);
2964 ss
->rx_data
.rx_small
.info
= NULL
;
2967 if (ss
->rx_data
.rx_big
.info
!= NULL
) {
2968 if (ss
->rx_data
.rx_big
.dmat
!= NULL
) {
2969 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
2970 bus_dmamap_destroy(ss
->rx_data
.rx_big
.dmat
,
2971 ss
->rx_data
.rx_big
.info
[i
].map
);
2973 bus_dmamap_destroy(ss
->rx_data
.rx_big
.dmat
,
2974 ss
->rx_data
.rx_big
.extra_map
);
2975 bus_dma_tag_destroy(ss
->rx_data
.rx_big
.dmat
);
2977 kfree(ss
->rx_data
.rx_big
.info
, M_DEVBUF
);
2978 ss
->rx_data
.rx_big
.info
= NULL
;
2983 mxge_free_rings(mxge_softc_t
*sc
)
2990 for (slice
= 0; slice
< sc
->num_slices
; slice
++)
2991 mxge_free_slice_rings(&sc
->ss
[slice
]);
2995 mxge_alloc_slice_rings(struct mxge_slice_state
*ss
, int rx_ring_entries
,
2996 int tx_ring_entries
)
2998 mxge_softc_t
*sc
= ss
->sc
;
3003 * Allocate per-slice receive resources
3006 ss
->rx_data
.rx_small
.mask
= ss
->rx_data
.rx_big
.mask
=
3007 rx_ring_entries
- 1;
3008 ss
->rx_data
.rx_done
.mask
= (2 * rx_ring_entries
) - 1;
3010 /* Allocate the rx shadow rings */
3011 bytes
= rx_ring_entries
* sizeof(*ss
->rx_data
.rx_small
.shadow
);
3012 ss
->rx_data
.rx_small
.shadow
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3014 bytes
= rx_ring_entries
* sizeof(*ss
->rx_data
.rx_big
.shadow
);
3015 ss
->rx_data
.rx_big
.shadow
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3017 /* Allocate the rx host info rings */
3018 bytes
= rx_ring_entries
* sizeof(*ss
->rx_data
.rx_small
.info
);
3019 ss
->rx_data
.rx_small
.info
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3021 bytes
= rx_ring_entries
* sizeof(*ss
->rx_data
.rx_big
.info
);
3022 ss
->rx_data
.rx_big
.info
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3024 /* Allocate the rx busdma resources */
3025 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
3027 4096, /* boundary */
3028 BUS_SPACE_MAXADDR
, /* low */
3029 BUS_SPACE_MAXADDR
, /* high */
3030 NULL
, NULL
, /* filter */
3031 MHLEN
, /* maxsize */
3033 MHLEN
, /* maxsegsize */
3034 BUS_DMA_WAITOK
| BUS_DMA_ALLOCNOW
,
3036 &ss
->rx_data
.rx_small
.dmat
); /* tag */
3038 device_printf(sc
->dev
, "Err %d allocating rx_small dmat\n",
3043 err
= bus_dmamap_create(ss
->rx_data
.rx_small
.dmat
, BUS_DMA_WAITOK
,
3044 &ss
->rx_data
.rx_small
.extra_map
);
3046 device_printf(sc
->dev
, "Err %d extra rx_small dmamap\n", err
);
3047 bus_dma_tag_destroy(ss
->rx_data
.rx_small
.dmat
);
3048 ss
->rx_data
.rx_small
.dmat
= NULL
;
3051 for (i
= 0; i
<= ss
->rx_data
.rx_small
.mask
; i
++) {
3052 err
= bus_dmamap_create(ss
->rx_data
.rx_small
.dmat
,
3053 BUS_DMA_WAITOK
, &ss
->rx_data
.rx_small
.info
[i
].map
);
3057 device_printf(sc
->dev
, "Err %d rx_small dmamap\n", err
);
3059 for (j
= 0; j
< i
; ++j
) {
3060 bus_dmamap_destroy(ss
->rx_data
.rx_small
.dmat
,
3061 ss
->rx_data
.rx_small
.info
[j
].map
);
3063 bus_dmamap_destroy(ss
->rx_data
.rx_small
.dmat
,
3064 ss
->rx_data
.rx_small
.extra_map
);
3065 bus_dma_tag_destroy(ss
->rx_data
.rx_small
.dmat
);
3066 ss
->rx_data
.rx_small
.dmat
= NULL
;
3071 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
3073 4096, /* boundary */
3074 BUS_SPACE_MAXADDR
, /* low */
3075 BUS_SPACE_MAXADDR
, /* high */
3076 NULL
, NULL
, /* filter */
3079 4096, /* maxsegsize*/
3080 BUS_DMA_WAITOK
| BUS_DMA_ALLOCNOW
,
3082 &ss
->rx_data
.rx_big
.dmat
); /* tag */
3084 device_printf(sc
->dev
, "Err %d allocating rx_big dmat\n",
3089 err
= bus_dmamap_create(ss
->rx_data
.rx_big
.dmat
, BUS_DMA_WAITOK
,
3090 &ss
->rx_data
.rx_big
.extra_map
);
3092 device_printf(sc
->dev
, "Err %d extra rx_big dmamap\n", err
);
3093 bus_dma_tag_destroy(ss
->rx_data
.rx_big
.dmat
);
3094 ss
->rx_data
.rx_big
.dmat
= NULL
;
3097 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
3098 err
= bus_dmamap_create(ss
->rx_data
.rx_big
.dmat
, BUS_DMA_WAITOK
,
3099 &ss
->rx_data
.rx_big
.info
[i
].map
);
3103 device_printf(sc
->dev
, "Err %d rx_big dmamap\n", err
);
3104 for (j
= 0; j
< i
; ++j
) {
3105 bus_dmamap_destroy(ss
->rx_data
.rx_big
.dmat
,
3106 ss
->rx_data
.rx_big
.info
[j
].map
);
3108 bus_dmamap_destroy(ss
->rx_data
.rx_big
.dmat
,
3109 ss
->rx_data
.rx_big
.extra_map
);
3110 bus_dma_tag_destroy(ss
->rx_data
.rx_big
.dmat
);
3111 ss
->rx_data
.rx_big
.dmat
= NULL
;
3117 * Now allocate TX resources
3120 ss
->tx
.mask
= tx_ring_entries
- 1;
3121 ss
->tx
.max_desc
= MIN(MXGE_MAX_SEND_DESC
, tx_ring_entries
/ 4);
3124 * Allocate the tx request copy block; MUST be at least 8 bytes
3127 bytes
= sizeof(*ss
->tx
.req_list
) * (ss
->tx
.max_desc
+ 4);
3128 ss
->tx
.req_list
= kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes
),
3129 M_DEVBUF
, M_WAITOK
);
3131 /* Allocate the tx busdma segment list */
3132 bytes
= sizeof(*ss
->tx
.seg_list
) * ss
->tx
.max_desc
;
3133 ss
->tx
.seg_list
= kmalloc(bytes
, M_DEVBUF
, M_WAITOK
);
3135 /* Allocate the tx host info ring */
3136 bytes
= tx_ring_entries
* sizeof(*ss
->tx
.info
);
3137 ss
->tx
.info
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3139 /* Allocate the tx busdma resources */
3140 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
3142 sc
->tx_boundary
, /* boundary */
3143 BUS_SPACE_MAXADDR
, /* low */
3144 BUS_SPACE_MAXADDR
, /* high */
3145 NULL
, NULL
, /* filter */
3147 sizeof(struct ether_vlan_header
),
3149 ss
->tx
.max_desc
- 2, /* num segs */
3150 sc
->tx_boundary
, /* maxsegsz */
3151 BUS_DMA_WAITOK
| BUS_DMA_ALLOCNOW
|
3152 BUS_DMA_ONEBPAGE
, /* flags */
3153 &ss
->tx
.dmat
); /* tag */
3155 device_printf(sc
->dev
, "Err %d allocating tx dmat\n", err
);
3160 * Now use these tags to setup DMA maps for each slot in the ring
3162 for (i
= 0; i
<= ss
->tx
.mask
; i
++) {
3163 err
= bus_dmamap_create(ss
->tx
.dmat
,
3164 BUS_DMA_WAITOK
| BUS_DMA_ONEBPAGE
, &ss
->tx
.info
[i
].map
);
3168 device_printf(sc
->dev
, "Err %d tx dmamap\n", err
);
3169 for (j
= 0; j
< i
; ++j
) {
3170 bus_dmamap_destroy(ss
->tx
.dmat
,
3171 ss
->tx
.info
[j
].map
);
3173 bus_dma_tag_destroy(ss
->tx
.dmat
);
3182 mxge_alloc_rings(mxge_softc_t
*sc
)
3186 int tx_ring_entries
, rx_ring_entries
;
3189 /* Get ring sizes */
3190 err
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_SEND_RING_SIZE
, &cmd
);
3192 device_printf(sc
->dev
, "Cannot determine tx ring sizes\n");
3195 tx_ring_size
= cmd
.data0
;
3197 tx_ring_entries
= tx_ring_size
/ sizeof(mcp_kreq_ether_send_t
);
3198 rx_ring_entries
= sc
->rx_intr_slots
/ 2;
3201 device_printf(sc
->dev
, "tx desc %d, rx desc %d\n",
3202 tx_ring_entries
, rx_ring_entries
);
3205 sc
->ifp
->if_nmbclusters
= rx_ring_entries
* sc
->num_slices
;
3206 sc
->ifp
->if_nmbjclusters
= sc
->ifp
->if_nmbclusters
;
3208 ifq_set_maxlen(&sc
->ifp
->if_snd
, tx_ring_entries
- 1);
3209 ifq_set_ready(&sc
->ifp
->if_snd
);
3210 ifq_set_subq_cnt(&sc
->ifp
->if_snd
, sc
->num_tx_rings
);
3212 if (sc
->num_tx_rings
> 1) {
3213 sc
->ifp
->if_mapsubq
= ifq_mapsubq_modulo
;
3214 ifq_set_subq_divisor(&sc
->ifp
->if_snd
, sc
->num_tx_rings
);
3217 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3218 err
= mxge_alloc_slice_rings(&sc
->ss
[slice
],
3219 rx_ring_entries
, tx_ring_entries
);
3221 device_printf(sc
->dev
,
3222 "alloc %d slice rings failed\n", slice
);
3230 mxge_choose_params(int mtu
, int *cl_size
)
3232 int bufsize
= mtu
+ ETHER_HDR_LEN
+ EVL_ENCAPLEN
+ MXGEFW_PAD
;
3234 if (bufsize
< MCLBYTES
) {
3235 *cl_size
= MCLBYTES
;
3237 KASSERT(bufsize
< MJUMPAGESIZE
, ("invalid MTU %d", mtu
));
3238 *cl_size
= MJUMPAGESIZE
;
3243 mxge_slice_open(struct mxge_slice_state
*ss
, int cl_size
)
3248 slice
= ss
- ss
->sc
->ss
;
3251 * Get the lanai pointers to the send and receive rings
3255 bzero(&cmd
, sizeof(cmd
)); /* silence gcc warning */
3256 if (ss
->sc
->num_tx_rings
== 1) {
3259 err
= mxge_send_cmd(ss
->sc
, MXGEFW_CMD_GET_SEND_OFFSET
,
3261 ss
->tx
.lanai
= (volatile mcp_kreq_ether_send_t
*)
3262 (ss
->sc
->sram
+ cmd
.data0
);
3263 /* Leave send_go and send_stop as NULL */
3267 err
= mxge_send_cmd(ss
->sc
, MXGEFW_CMD_GET_SEND_OFFSET
, &cmd
);
3268 ss
->tx
.lanai
= (volatile mcp_kreq_ether_send_t
*)
3269 (ss
->sc
->sram
+ cmd
.data0
);
3270 ss
->tx
.send_go
= (volatile uint32_t *)
3271 (ss
->sc
->sram
+ MXGEFW_ETH_SEND_GO
+ 64 * slice
);
3272 ss
->tx
.send_stop
= (volatile uint32_t *)
3273 (ss
->sc
->sram
+ MXGEFW_ETH_SEND_STOP
+ 64 * slice
);
3277 err
|= mxge_send_cmd(ss
->sc
, MXGEFW_CMD_GET_SMALL_RX_OFFSET
, &cmd
);
3278 ss
->rx_data
.rx_small
.lanai
=
3279 (volatile mcp_kreq_ether_recv_t
*)(ss
->sc
->sram
+ cmd
.data0
);
3282 err
|= mxge_send_cmd(ss
->sc
, MXGEFW_CMD_GET_BIG_RX_OFFSET
, &cmd
);
3283 ss
->rx_data
.rx_big
.lanai
=
3284 (volatile mcp_kreq_ether_recv_t
*)(ss
->sc
->sram
+ cmd
.data0
);
3287 if_printf(ss
->sc
->ifp
,
3288 "failed to get ring sizes or locations\n");
3293 * Stock small receive ring
3295 for (i
= 0; i
<= ss
->rx_data
.rx_small
.mask
; i
++) {
3296 err
= mxge_get_buf_small(&ss
->rx_data
.rx_small
,
3297 ss
->rx_data
.rx_small
.info
[i
].map
, i
, TRUE
);
3299 if_printf(ss
->sc
->ifp
, "alloced %d/%d smalls\n", i
,
3300 ss
->rx_data
.rx_small
.mask
+ 1);
3306 * Stock big receive ring
3308 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
3309 ss
->rx_data
.rx_big
.shadow
[i
].addr_low
= 0xffffffff;
3310 ss
->rx_data
.rx_big
.shadow
[i
].addr_high
= 0xffffffff;
3313 ss
->rx_data
.rx_big
.cl_size
= cl_size
;
3315 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
3316 err
= mxge_get_buf_big(&ss
->rx_data
.rx_big
,
3317 ss
->rx_data
.rx_big
.info
[i
].map
, i
, TRUE
);
3319 if_printf(ss
->sc
->ifp
, "alloced %d/%d bigs\n", i
,
3320 ss
->rx_data
.rx_big
.mask
+ 1);
3328 mxge_open(mxge_softc_t
*sc
)
3330 struct ifnet
*ifp
= sc
->ifp
;
3332 int err
, slice
, cl_size
, i
;
3334 volatile uint8_t *itable
;
3335 struct mxge_slice_state
*ss
;
3337 ASSERT_IFNET_SERIALIZED_ALL(ifp
);
3339 /* Copy the MAC address in case it was overridden */
3340 bcopy(IF_LLADDR(ifp
), sc
->mac_addr
, ETHER_ADDR_LEN
);
3342 err
= mxge_reset(sc
, 1);
3344 if_printf(ifp
, "failed to reset\n");
3348 if (sc
->num_slices
> 1) {
3350 volatile uint8_t *hwkey
;
3351 uint8_t swkey
[MXGE_HWRSS_KEYLEN
];
3354 * Setup the indirect table.
3356 if_ringmap_rdrtable(sc
->ring_map
, sc
->rdr_table
,
3359 cmd
.data0
= NETISR_CPUMAX
;
3360 err
= mxge_send_cmd(sc
,
3361 MXGEFW_CMD_SET_RSS_TABLE_SIZE
, &cmd
);
3363 err
|= mxge_send_cmd(sc
,
3364 MXGEFW_CMD_GET_RSS_TABLE_OFFSET
, &cmd
);
3366 if_printf(ifp
, "failed to setup rss tables\n");
3370 itable
= sc
->sram
+ cmd
.data0
;
3371 for (i
= 0; i
< NETISR_CPUMAX
; i
++)
3372 itable
[i
] = sc
->rdr_table
[i
];
3375 * Setup Toeplitz key.
3377 err
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RSS_KEY_OFFSET
,
3380 if_printf(ifp
, "failed to get rsskey\n");
3383 hwkey
= sc
->sram
+ cmd
.data0
;
3385 toeplitz_get_key(swkey
, MXGE_HWRSS_KEYLEN
);
3386 for (i
= 0; i
< MXGE_HWRSS_KEYLEN
; ++i
)
3387 hwkey
[i
] = swkey
[i
];
3390 err
= mxge_send_cmd(sc
, MXGEFW_CMD_RSS_KEY_UPDATED
,
3393 if_printf(ifp
, "failed to update rsskey\n");
3397 if_printf(ifp
, "RSS key updated\n");
3399 /* Setup the indirection table */
3400 cmd
.data0
= sc
->num_slices
;
3401 err
= mxge_send_cmd(sc
,
3402 MXGEFW_CMD_SET_RSS_TABLE_SIZE
, &cmd
);
3404 err
|= mxge_send_cmd(sc
,
3405 MXGEFW_CMD_GET_RSS_TABLE_OFFSET
, &cmd
);
3407 if_printf(ifp
, "failed to setup rss tables\n");
3411 /* Just enable an identity mapping */
3412 itable
= sc
->sram
+ cmd
.data0
;
3413 for (i
= 0; i
< sc
->num_slices
; i
++)
3414 itable
[i
] = (uint8_t)i
;
3420 if_printf(ifp
, "input hash: RSS\n");
3421 cmd
.data1
= MXGEFW_RSS_HASH_TYPE_IPV4
|
3422 MXGEFW_RSS_HASH_TYPE_TCP_IPV4
;
3425 if_printf(ifp
, "input hash: SRC_DST_PORT\n");
3426 cmd
.data1
= MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
;
3428 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_RSS_ENABLE
, &cmd
);
3430 if_printf(ifp
, "failed to enable slices\n");
3435 cmd
.data0
= MXGEFW_TSO_MODE_NDIS
;
3436 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_TSO_MODE
, &cmd
);
3439 * Can't change TSO mode to NDIS, never allow TSO then
3441 if_printf(ifp
, "failed to set TSO mode\n");
3442 ifp
->if_capenable
&= ~IFCAP_TSO
;
3443 ifp
->if_capabilities
&= ~IFCAP_TSO
;
3444 ifp
->if_hwassist
&= ~CSUM_TSO
;
3447 mxge_choose_params(ifp
->if_mtu
, &cl_size
);
3450 err
= mxge_send_cmd(sc
, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS
, &cmd
);
3452 * Error is only meaningful if we're trying to set
3453 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1
3457 * Give the firmware the mtu and the big and small buffer
3458 * sizes. The firmware wants the big buf size to be a power
3459 * of two. Luckily, DragonFly's clusters are powers of two
3461 cmd
.data0
= ifp
->if_mtu
+ ETHER_HDR_LEN
+ EVL_ENCAPLEN
;
3462 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_MTU
, &cmd
);
3464 cmd
.data0
= MXGE_RX_SMALL_BUFLEN
;
3465 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE
, &cmd
);
3467 cmd
.data0
= cl_size
;
3468 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_BIG_BUFFER_SIZE
, &cmd
);
3471 if_printf(ifp
, "failed to setup params\n");
3475 /* Now give him the pointer to the stats block */
3476 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3477 ss
= &sc
->ss
[slice
];
3478 cmd
.data0
= MXGE_LOWPART_TO_U32(ss
->fw_stats_dma
.dmem_busaddr
);
3479 cmd
.data1
= MXGE_HIGHPART_TO_U32(ss
->fw_stats_dma
.dmem_busaddr
);
3480 cmd
.data2
= sizeof(struct mcp_irq_data
);
3481 cmd
.data2
|= (slice
<< 16);
3482 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_STATS_DMA_V2
, &cmd
);
3486 bus
= sc
->ss
->fw_stats_dma
.dmem_busaddr
;
3487 bus
+= offsetof(struct mcp_irq_data
, send_done_count
);
3488 cmd
.data0
= MXGE_LOWPART_TO_U32(bus
);
3489 cmd
.data1
= MXGE_HIGHPART_TO_U32(bus
);
3490 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE
,
3493 /* Firmware cannot support multicast without STATS_DMA_V2 */
3494 sc
->fw_multicast_support
= 0;
3496 sc
->fw_multicast_support
= 1;
3500 if_printf(ifp
, "failed to setup params\n");
3504 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3505 err
= mxge_slice_open(&sc
->ss
[slice
], cl_size
);
3507 if_printf(ifp
, "couldn't open slice %d\n", slice
);
3512 /* Finally, start the firmware running */
3513 err
= mxge_send_cmd(sc
, MXGEFW_CMD_ETHERNET_UP
, &cmd
);
3515 if_printf(ifp
, "Couldn't bring up link\n");
3519 ifp
->if_flags
|= IFF_RUNNING
;
3520 for (i
= 0; i
< sc
->num_tx_rings
; ++i
) {
3521 mxge_tx_ring_t
*tx
= &sc
->ss
[i
].tx
;
3523 ifsq_clr_oactive(tx
->ifsq
);
3524 ifsq_watchdog_start(&tx
->watchdog
);
3530 mxge_free_mbufs(sc
);
3535 mxge_close(mxge_softc_t
*sc
, int down
)
3537 struct ifnet
*ifp
= sc
->ifp
;
3539 int err
, old_down_cnt
, i
;
3541 ASSERT_IFNET_SERIALIZED_ALL(ifp
);
3544 old_down_cnt
= sc
->down_cnt
;
3547 err
= mxge_send_cmd(sc
, MXGEFW_CMD_ETHERNET_DOWN
, &cmd
);
3549 if_printf(ifp
, "Couldn't bring down link\n");
3551 if (old_down_cnt
== sc
->down_cnt
) {
3556 ifnet_deserialize_all(ifp
);
3557 DELAY(10 * sc
->intr_coal_delay
);
3558 ifnet_serialize_all(ifp
);
3562 if (old_down_cnt
== sc
->down_cnt
)
3563 if_printf(ifp
, "never got down irq\n");
3565 mxge_free_mbufs(sc
);
3567 ifp
->if_flags
&= ~IFF_RUNNING
;
3568 for (i
= 0; i
< sc
->num_tx_rings
; ++i
) {
3569 mxge_tx_ring_t
*tx
= &sc
->ss
[i
].tx
;
3571 ifsq_clr_oactive(tx
->ifsq
);
3572 ifsq_watchdog_stop(&tx
->watchdog
);
3577 mxge_setup_cfg_space(mxge_softc_t
*sc
)
3579 device_t dev
= sc
->dev
;
3581 uint16_t lnk
, pectl
;
3583 /* Find the PCIe link width and set max read request to 4KB */
3584 if (pci_find_extcap(dev
, PCIY_EXPRESS
, ®
) == 0) {
3585 lnk
= pci_read_config(dev
, reg
+ 0x12, 2);
3586 sc
->link_width
= (lnk
>> 4) & 0x3f;
3588 if (sc
->pectl
== 0) {
3589 pectl
= pci_read_config(dev
, reg
+ 0x8, 2);
3590 pectl
= (pectl
& ~0x7000) | (5 << 12);
3591 pci_write_config(dev
, reg
+ 0x8, pectl
, 2);
3594 /* Restore saved pectl after watchdog reset */
3595 pci_write_config(dev
, reg
+ 0x8, sc
->pectl
, 2);
3599 /* Enable DMA and memory space access */
3600 pci_enable_busmaster(dev
);
3604 mxge_read_reboot(mxge_softc_t
*sc
)
3606 device_t dev
= sc
->dev
;
3609 /* Find the vendor specific offset */
3610 if (pci_find_extcap(dev
, PCIY_VENDOR
, &vs
) != 0) {
3611 if_printf(sc
->ifp
, "could not find vendor specific offset\n");
3612 return (uint32_t)-1;
3614 /* Enable read32 mode */
3615 pci_write_config(dev
, vs
+ 0x10, 0x3, 1);
3616 /* Tell NIC which register to read */
3617 pci_write_config(dev
, vs
+ 0x18, 0xfffffff0, 4);
3618 return pci_read_config(dev
, vs
+ 0x14, 4);
3622 mxge_watchdog_reset(mxge_softc_t
*sc
)
3624 struct pci_devinfo
*dinfo
;
3631 if_printf(sc
->ifp
, "Watchdog reset!\n");
3634 * Check to see if the NIC rebooted. If it did, then all of
3635 * PCI config space has been reset, and things like the
3636 * busmaster bit will be zero. If this is the case, then we
3637 * must restore PCI config space before the NIC can be used
3640 cmd
= pci_read_config(sc
->dev
, PCIR_COMMAND
, 2);
3641 if (cmd
== 0xffff) {
3643 * Maybe the watchdog caught the NIC rebooting; wait
3644 * up to 100ms for it to finish. If it does not come
3645 * back, then give up
3648 cmd
= pci_read_config(sc
->dev
, PCIR_COMMAND
, 2);
3650 if_printf(sc
->ifp
, "NIC disappeared!\n");
3652 if ((cmd
& PCIM_CMD_BUSMASTEREN
) == 0) {
3653 /* Print the reboot status */
3654 reboot
= mxge_read_reboot(sc
);
3655 if_printf(sc
->ifp
, "NIC rebooted, status = 0x%x\n", reboot
);
3657 running
= sc
->ifp
->if_flags
& IFF_RUNNING
;
3660 * Quiesce NIC so that TX routines will not try to
3661 * xmit after restoration of BAR
3664 /* Mark the link as down */
3665 if (sc
->link_state
) {
3666 sc
->ifp
->if_link_state
= LINK_STATE_DOWN
;
3667 if_link_state_change(sc
->ifp
);
3671 /* Restore PCI configuration space */
3672 dinfo
= device_get_ivars(sc
->dev
);
3673 pci_cfg_restore(sc
->dev
, dinfo
);
3675 /* And redo any changes we made to our config space */
3676 mxge_setup_cfg_space(sc
);
3679 err
= mxge_load_firmware(sc
, 0);
3681 if_printf(sc
->ifp
, "Unable to re-load f/w\n");
3682 if (running
&& !err
) {
3685 err
= mxge_open(sc
);
3687 for (i
= 0; i
< sc
->num_tx_rings
; ++i
)
3688 ifsq_devstart_sched(sc
->ss
[i
].tx
.ifsq
);
3690 sc
->watchdog_resets
++;
3692 if_printf(sc
->ifp
, "NIC did not reboot, not resetting\n");
3696 if_printf(sc
->ifp
, "watchdog reset failed\n");
3700 callout_reset(&sc
->co_hdl
, mxge_ticks
, mxge_tick
, sc
);
3705 mxge_warn_stuck(mxge_softc_t
*sc
, mxge_tx_ring_t
*tx
, int slice
)
3707 if_printf(sc
->ifp
, "slice %d struck? ring state:\n", slice
);
3708 if_printf(sc
->ifp
, "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3709 tx
->req
, tx
->done
, tx
->queue_active
);
3710 if_printf(sc
->ifp
, "tx.activate=%d tx.deactivate=%d\n",
3711 tx
->activate
, tx
->deactivate
);
3712 if_printf(sc
->ifp
, "pkt_done=%d fw=%d\n",
3713 tx
->pkt_done
, be32toh(sc
->ss
->fw_stats
->send_done_count
));
3717 mxge_update_stats(mxge_softc_t
*sc
)
3719 u_long ipackets
, opackets
, pkts
;
3721 IFNET_STAT_GET(sc
->ifp
, ipackets
, ipackets
);
3722 IFNET_STAT_GET(sc
->ifp
, opackets
, opackets
);
3724 pkts
= ipackets
- sc
->ipackets
;
3725 pkts
+= opackets
- sc
->opackets
;
3727 sc
->ipackets
= ipackets
;
3728 sc
->opackets
= opackets
;
3734 mxge_tick(void *arg
)
3736 mxge_softc_t
*sc
= arg
;
3741 lwkt_serialize_enter(&sc
->main_serialize
);
3744 if (sc
->ifp
->if_flags
& IFF_RUNNING
) {
3745 /* Aggregate stats from different slices */
3746 pkts
= mxge_update_stats(sc
);
3747 if (sc
->need_media_probe
)
3748 mxge_media_probe(sc
);
3753 /* Ensure NIC did not suffer h/w fault while idle */
3754 cmd
= pci_read_config(sc
->dev
, PCIR_COMMAND
, 2);
3755 if ((cmd
& PCIM_CMD_BUSMASTEREN
) == 0) {
3757 mxge_serialize_skipmain(sc
);
3758 mxge_watchdog_reset(sc
);
3759 mxge_deserialize_skipmain(sc
);
3763 /* Look less often if NIC is idle */
3768 callout_reset(&sc
->co_hdl
, ticks
, mxge_tick
, sc
);
3770 lwkt_serialize_exit(&sc
->main_serialize
);
3774 mxge_media_change(struct ifnet
*ifp
)
3776 mxge_softc_t
*sc
= ifp
->if_softc
;
3777 const struct ifmedia
*ifm
= &sc
->media
;
3780 if (IFM_OPTIONS(ifm
->ifm_media
) & (IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
)) {
3789 return mxge_change_pause(sc
, pause
);
3793 mxge_change_mtu(mxge_softc_t
*sc
, int mtu
)
3795 struct ifnet
*ifp
= sc
->ifp
;
3796 int real_mtu
, old_mtu
;
3799 real_mtu
= mtu
+ ETHER_HDR_LEN
+ EVL_ENCAPLEN
;
3800 if (mtu
> sc
->max_mtu
|| real_mtu
< 60)
3803 old_mtu
= ifp
->if_mtu
;
3805 if (ifp
->if_flags
& IFF_RUNNING
) {
3807 err
= mxge_open(sc
);
3809 ifp
->if_mtu
= old_mtu
;
3818 mxge_media_status(struct ifnet
*ifp
, struct ifmediareq
*ifmr
)
3820 mxge_softc_t
*sc
= ifp
->if_softc
;
3822 ifmr
->ifm_status
= IFM_AVALID
;
3823 ifmr
->ifm_active
= IFM_ETHER
;
3826 ifmr
->ifm_status
|= IFM_ACTIVE
;
3829 * Autoselect is not supported, so the current media
3830 * should be delivered.
3832 ifmr
->ifm_active
|= sc
->current_media
;
3833 if (sc
->current_media
!= IFM_NONE
) {
3834 ifmr
->ifm_active
|= MXGE_IFM
;
3836 ifmr
->ifm_active
|= IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
;
3841 mxge_ioctl(struct ifnet
*ifp
, u_long command
, caddr_t data
,
3842 struct ucred
*cr __unused
)
3844 mxge_softc_t
*sc
= ifp
->if_softc
;
3845 struct ifreq
*ifr
= (struct ifreq
*)data
;
3848 ASSERT_IFNET_SERIALIZED_ALL(ifp
);
3853 err
= mxge_change_mtu(sc
, ifr
->ifr_mtu
);
3860 if (ifp
->if_flags
& IFF_UP
) {
3861 if (!(ifp
->if_flags
& IFF_RUNNING
)) {
3862 err
= mxge_open(sc
);
3865 * Take care of PROMISC and ALLMULTI
3868 mxge_change_promisc(sc
,
3869 ifp
->if_flags
& IFF_PROMISC
);
3870 mxge_set_multicast_list(sc
);
3873 if (ifp
->if_flags
& IFF_RUNNING
)
3880 mxge_set_multicast_list(sc
);
3884 mask
= ifr
->ifr_reqcap
^ ifp
->if_capenable
;
3885 if (mask
& IFCAP_TXCSUM
) {
3886 ifp
->if_capenable
^= IFCAP_TXCSUM
;
3887 if (ifp
->if_capenable
& IFCAP_TXCSUM
)
3888 ifp
->if_hwassist
|= CSUM_TCP
| CSUM_UDP
;
3890 ifp
->if_hwassist
&= ~(CSUM_TCP
| CSUM_UDP
);
3892 if (mask
& IFCAP_TSO
) {
3893 ifp
->if_capenable
^= IFCAP_TSO
;
3894 if (ifp
->if_capenable
& IFCAP_TSO
)
3895 ifp
->if_hwassist
|= CSUM_TSO
;
3897 ifp
->if_hwassist
&= ~CSUM_TSO
;
3899 if (mask
& IFCAP_RXCSUM
)
3900 ifp
->if_capenable
^= IFCAP_RXCSUM
;
3901 if (mask
& IFCAP_VLAN_HWTAGGING
)
3902 ifp
->if_capenable
^= IFCAP_VLAN_HWTAGGING
;
3907 err
= ifmedia_ioctl(ifp
, (struct ifreq
*)data
,
3908 &sc
->media
, command
);
3912 err
= ether_ioctl(ifp
, command
, data
);
3919 mxge_fetch_tunables(mxge_softc_t
*sc
)
3923 sc
->intr_coal_delay
= mxge_intr_coal_delay
;
3924 if (sc
->intr_coal_delay
< 0 || sc
->intr_coal_delay
> (10 * 1000))
3925 sc
->intr_coal_delay
= MXGE_INTR_COAL_DELAY
;
3928 if (mxge_ticks
== 0)
3929 mxge_ticks
= hz
/ 2;
3931 ifm
= ifmedia_str2ethfc(mxge_flowctrl
);
3932 if (ifm
& (IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
))
3935 sc
->use_rss
= mxge_use_rss
;
3937 sc
->throttle
= mxge_throttle
;
3938 if (sc
->throttle
&& sc
->throttle
> MXGE_MAX_THROTTLE
)
3939 sc
->throttle
= MXGE_MAX_THROTTLE
;
3940 if (sc
->throttle
&& sc
->throttle
< MXGE_MIN_THROTTLE
)
3941 sc
->throttle
= MXGE_MIN_THROTTLE
;
3945 mxge_free_slices(mxge_softc_t
*sc
)
3947 struct mxge_slice_state
*ss
;
3953 for (i
= 0; i
< sc
->num_slices
; i
++) {
3955 if (ss
->fw_stats
!= NULL
) {
3956 mxge_dma_free(&ss
->fw_stats_dma
);
3957 ss
->fw_stats
= NULL
;
3959 if (ss
->rx_data
.rx_done
.entry
!= NULL
) {
3960 mxge_dma_free(&ss
->rx_done_dma
);
3961 ss
->rx_data
.rx_done
.entry
= NULL
;
3964 kfree(sc
->ss
, M_DEVBUF
);
3969 mxge_alloc_slices(mxge_softc_t
*sc
)
3972 struct mxge_slice_state
*ss
;
3974 int err
, i
, rx_ring_size
;
3976 err
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
3978 device_printf(sc
->dev
, "Cannot determine rx ring size\n");
3981 rx_ring_size
= cmd
.data0
;
3982 sc
->rx_intr_slots
= 2 * (rx_ring_size
/ sizeof (mcp_dma_addr_t
));
3984 bytes
= sizeof(*sc
->ss
) * sc
->num_slices
;
3985 sc
->ss
= kmalloc_cachealign(bytes
, M_DEVBUF
, M_WAITOK
| M_ZERO
);
3987 for (i
= 0; i
< sc
->num_slices
; i
++) {
3992 lwkt_serialize_init(&ss
->rx_data
.rx_serialize
);
3993 lwkt_serialize_init(&ss
->tx
.tx_serialize
);
3997 * Allocate per-slice rx interrupt queue
3998 * XXX assume 4bytes mcp_slot
4000 bytes
= sc
->rx_intr_slots
* sizeof(mcp_slot_t
);
4001 err
= mxge_dma_alloc(sc
, &ss
->rx_done_dma
, bytes
, 4096);
4003 device_printf(sc
->dev
,
4004 "alloc %d slice rx_done failed\n", i
);
4007 ss
->rx_data
.rx_done
.entry
= ss
->rx_done_dma
.dmem_addr
;
4010 * Allocate the per-slice firmware stats
4012 bytes
= sizeof(*ss
->fw_stats
);
4013 err
= mxge_dma_alloc(sc
, &ss
->fw_stats_dma
,
4014 sizeof(*ss
->fw_stats
), 64);
4016 device_printf(sc
->dev
,
4017 "alloc %d fw_stats failed\n", i
);
4020 ss
->fw_stats
= ss
->fw_stats_dma
.dmem_addr
;
4026 mxge_slice_probe(mxge_softc_t
*sc
)
4028 int status
, max_intr_slots
, max_slices
, num_slices
;
4029 int msix_cnt
, msix_enable
, multi_tx
;
4034 sc
->num_tx_rings
= 1;
4036 num_slices
= device_getenv_int(sc
->dev
, "num_slices", mxge_num_slices
);
4037 if (num_slices
== 1)
4040 if (netisr_ncpus
== 1)
4043 msix_enable
= device_getenv_int(sc
->dev
, "msix.enable",
4048 msix_cnt
= pci_msix_count(sc
->dev
);
4052 device_printf(sc
->dev
, "MSI-X count %d\n", msix_cnt
);
4055 * Now load the slice aware firmware see what it supports
4057 old_fw
= sc
->fw_name
;
4058 if (old_fw
== mxge_fw_aligned
)
4059 sc
->fw_name
= mxge_fw_rss_aligned
;
4061 sc
->fw_name
= mxge_fw_rss_unaligned
;
4062 status
= mxge_load_firmware(sc
, 0);
4064 device_printf(sc
->dev
, "Falling back to a single slice\n");
4069 * Try to send a reset command to the card to see if it is alive
4071 memset(&cmd
, 0, sizeof(cmd
));
4072 status
= mxge_send_cmd(sc
, MXGEFW_CMD_RESET
, &cmd
);
4074 device_printf(sc
->dev
, "failed reset\n");
4079 * Get rx ring size to calculate rx interrupt queue size
4081 status
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
4083 device_printf(sc
->dev
, "Cannot determine rx ring size\n");
4086 max_intr_slots
= 2 * (cmd
.data0
/ sizeof(mcp_dma_addr_t
));
4089 * Tell it the size of the rx interrupt queue
4091 cmd
.data0
= max_intr_slots
* sizeof(struct mcp_slot
);
4092 status
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
4094 device_printf(sc
->dev
, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4099 * Ask the maximum number of slices it supports
4101 status
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
, &cmd
);
4103 device_printf(sc
->dev
,
4104 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4107 max_slices
= cmd
.data0
;
4109 device_printf(sc
->dev
, "max slices %d\n", max_slices
);
4111 if (max_slices
> msix_cnt
)
4112 max_slices
= msix_cnt
;
4114 sc
->ring_map
= if_ringmap_alloc(sc
->dev
, num_slices
, max_slices
);
4115 sc
->num_slices
= if_ringmap_count(sc
->ring_map
);
4117 multi_tx
= device_getenv_int(sc
->dev
, "multi_tx", mxge_multi_tx
);
4119 sc
->num_tx_rings
= sc
->num_slices
;
4122 device_printf(sc
->dev
, "using %d slices, max %d\n",
4123 sc
->num_slices
, max_slices
);
4126 if (sc
->num_slices
== 1)
4131 sc
->fw_name
= old_fw
;
4132 mxge_load_firmware(sc
, 0);
4136 mxge_setup_serialize(struct mxge_softc
*sc
)
4140 /* Main + rx + tx */
4141 sc
->nserialize
= (2 * sc
->num_slices
) + 1;
4143 kmalloc(sc
->nserialize
* sizeof(struct lwkt_serialize
*),
4144 M_DEVBUF
, M_WAITOK
| M_ZERO
);
4149 * NOTE: Order is critical
4152 KKASSERT(i
< sc
->nserialize
);
4153 sc
->serializes
[i
++] = &sc
->main_serialize
;
4155 for (slice
= 0; slice
< sc
->num_slices
; ++slice
) {
4156 KKASSERT(i
< sc
->nserialize
);
4157 sc
->serializes
[i
++] = &sc
->ss
[slice
].rx_data
.rx_serialize
;
4160 for (slice
= 0; slice
< sc
->num_slices
; ++slice
) {
4161 KKASSERT(i
< sc
->nserialize
);
4162 sc
->serializes
[i
++] = &sc
->ss
[slice
].tx
.tx_serialize
;
4165 KKASSERT(i
== sc
->nserialize
);
4169 mxge_serialize(struct ifnet
*ifp
, enum ifnet_serialize slz
)
4171 struct mxge_softc
*sc
= ifp
->if_softc
;
4173 ifnet_serialize_array_enter(sc
->serializes
, sc
->nserialize
, slz
);
4177 mxge_deserialize(struct ifnet
*ifp
, enum ifnet_serialize slz
)
4179 struct mxge_softc
*sc
= ifp
->if_softc
;
4181 ifnet_serialize_array_exit(sc
->serializes
, sc
->nserialize
, slz
);
4185 mxge_tryserialize(struct ifnet
*ifp
, enum ifnet_serialize slz
)
4187 struct mxge_softc
*sc
= ifp
->if_softc
;
4189 return ifnet_serialize_array_try(sc
->serializes
, sc
->nserialize
, slz
);
4195 mxge_serialize_assert(struct ifnet
*ifp
, enum ifnet_serialize slz
,
4196 boolean_t serialized
)
4198 struct mxge_softc
*sc
= ifp
->if_softc
;
4200 ifnet_serialize_array_assert(sc
->serializes
, sc
->nserialize
,
4204 #endif /* INVARIANTS */
4206 #ifdef IFPOLL_ENABLE
4209 mxge_npoll_rx(struct ifnet
*ifp
, void *xss
, int cycle
)
4211 struct mxge_slice_state
*ss
= xss
;
4212 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
4214 ASSERT_SERIALIZED(&ss
->rx_data
.rx_serialize
);
4216 if (rx_done
->entry
[rx_done
->idx
].length
!= 0) {
4217 mxge_clean_rx_done(&ss
->sc
->arpcom
.ac_if
, &ss
->rx_data
, cycle
);
4221 * This register writting obviously has cost,
4222 * however, if we don't hand back the rx token,
4223 * the upcoming packets may suffer rediculously
4224 * large delay, as observed on 8AL-C using ping(8).
4226 *ss
->irq_claim
= be32toh(3);
4231 mxge_npoll(struct ifnet
*ifp
, struct ifpoll_info
*info
)
4233 struct mxge_softc
*sc
= ifp
->if_softc
;
4240 * Only poll rx; polling tx and status don't seem to work
4242 for (i
= 0; i
< sc
->num_slices
; ++i
) {
4243 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4244 int cpu
= ss
->intr_cpuid
;
4246 KKASSERT(cpu
< netisr_ncpus
);
4247 info
->ifpi_rx
[cpu
].poll_func
= mxge_npoll_rx
;
4248 info
->ifpi_rx
[cpu
].arg
= ss
;
4249 info
->ifpi_rx
[cpu
].serializer
= &ss
->rx_data
.rx_serialize
;
4253 #endif /* IFPOLL_ENABLE */
4256 mxge_attach(device_t dev
)
4258 mxge_softc_t
*sc
= device_get_softc(dev
);
4259 struct ifnet
*ifp
= &sc
->arpcom
.ac_if
;
4263 * Avoid rewriting half the lines in this file to use
4264 * &sc->arpcom.ac_if instead
4268 if_initname(ifp
, device_get_name(dev
), device_get_unit(dev
));
4270 /* IFM_ETH_FORCEPAUSE can't be changed */
4271 ifmedia_init(&sc
->media
, IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
,
4272 mxge_media_change
, mxge_media_status
);
4274 lwkt_serialize_init(&sc
->main_serialize
);
4276 mxge_fetch_tunables(sc
);
4278 err
= bus_dma_tag_create(NULL
, /* parent */
4281 BUS_SPACE_MAXADDR
, /* low */
4282 BUS_SPACE_MAXADDR
, /* high */
4283 NULL
, NULL
, /* filter */
4284 BUS_SPACE_MAXSIZE_32BIT
,/* maxsize */
4286 BUS_SPACE_MAXSIZE_32BIT
,/* maxsegsize */
4288 &sc
->parent_dmat
); /* tag */
4290 device_printf(dev
, "Err %d allocating parent dmat\n", err
);
4294 callout_init_mp(&sc
->co_hdl
);
4296 mxge_setup_cfg_space(sc
);
4299 * Map the board into the kernel
4302 sc
->mem_res
= bus_alloc_resource_any(dev
, SYS_RES_MEMORY
,
4304 if (sc
->mem_res
== NULL
) {
4305 device_printf(dev
, "could not map memory\n");
4310 sc
->sram
= rman_get_virtual(sc
->mem_res
);
4311 sc
->sram_size
= 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4312 if (sc
->sram_size
> rman_get_size(sc
->mem_res
)) {
4313 device_printf(dev
, "impossible memory region size %ld\n",
4314 rman_get_size(sc
->mem_res
));
4320 * Make NULL terminated copy of the EEPROM strings section of
4323 bzero(sc
->eeprom_strings
, MXGE_EEPROM_STRINGS_SIZE
);
4324 bus_space_read_region_1(rman_get_bustag(sc
->mem_res
),
4325 rman_get_bushandle(sc
->mem_res
),
4326 sc
->sram_size
- MXGE_EEPROM_STRINGS_SIZE
,
4327 sc
->eeprom_strings
, MXGE_EEPROM_STRINGS_SIZE
- 2);
4328 err
= mxge_parse_strings(sc
);
4330 device_printf(dev
, "parse EEPROM string failed\n");
4335 * Enable write combining for efficient use of PCIe bus
4340 * Allocate the out of band DMA memory
4342 err
= mxge_dma_alloc(sc
, &sc
->cmd_dma
, sizeof(mxge_cmd_t
), 64);
4344 device_printf(dev
, "alloc cmd DMA buf failed\n");
4347 sc
->cmd
= sc
->cmd_dma
.dmem_addr
;
4349 err
= mxge_dma_alloc(sc
, &sc
->zeropad_dma
, 64, 64);
4351 device_printf(dev
, "alloc zeropad DMA buf failed\n");
4355 err
= mxge_dma_alloc(sc
, &sc
->dmabench_dma
, 4096, 4096);
4357 device_printf(dev
, "alloc dmabench DMA buf failed\n");
4361 /* Select & load the firmware */
4362 err
= mxge_select_firmware(sc
);
4364 device_printf(dev
, "select firmware failed\n");
4368 mxge_slice_probe(sc
);
4369 err
= mxge_alloc_slices(sc
);
4371 device_printf(dev
, "alloc slices failed\n");
4375 err
= mxge_alloc_intr(sc
);
4377 device_printf(dev
, "alloc intr failed\n");
4381 /* Setup serializes */
4382 mxge_setup_serialize(sc
);
4384 err
= mxge_reset(sc
, 0);
4386 device_printf(dev
, "reset failed\n");
4390 err
= mxge_alloc_rings(sc
);
4392 device_printf(dev
, "failed to allocate rings\n");
4396 ifp
->if_baudrate
= IF_Gbps(10UL);
4397 ifp
->if_capabilities
= IFCAP_RXCSUM
| IFCAP_TXCSUM
| IFCAP_TSO
;
4398 ifp
->if_hwassist
= CSUM_TCP
| CSUM_UDP
| CSUM_TSO
;
4400 ifp
->if_capabilities
|= IFCAP_VLAN_MTU
;
4402 /* Well, its software, sigh */
4403 ifp
->if_capabilities
|= IFCAP_VLAN_HWTAGGING
;
4405 ifp
->if_capenable
= ifp
->if_capabilities
;
4408 ifp
->if_flags
= IFF_BROADCAST
| IFF_SIMPLEX
| IFF_MULTICAST
;
4409 ifp
->if_init
= mxge_init
;
4410 ifp
->if_ioctl
= mxge_ioctl
;
4411 ifp
->if_start
= mxge_start
;
4412 #ifdef IFPOLL_ENABLE
4413 if (sc
->intr_type
!= PCI_INTR_TYPE_LEGACY
)
4414 ifp
->if_npoll
= mxge_npoll
;
4416 ifp
->if_serialize
= mxge_serialize
;
4417 ifp
->if_deserialize
= mxge_deserialize
;
4418 ifp
->if_tryserialize
= mxge_tryserialize
;
4420 ifp
->if_serialize_assert
= mxge_serialize_assert
;
4423 /* Increase TSO burst length */
4424 ifp
->if_tsolen
= (32 * ETHERMTU
);
4426 /* Initialise the ifmedia structure */
4427 mxge_media_init(sc
);
4428 mxge_media_probe(sc
);
4430 ether_ifattach(ifp
, sc
->mac_addr
, NULL
);
4432 /* Setup TX rings and subqueues */
4433 for (i
= 0; i
< sc
->num_tx_rings
; ++i
) {
4434 struct ifaltq_subque
*ifsq
= ifq_get_subq(&ifp
->if_snd
, i
);
4435 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4437 ifsq_set_cpuid(ifsq
, ss
->intr_cpuid
);
4438 ifsq_set_hw_serialize(ifsq
, &ss
->tx
.tx_serialize
);
4439 ifsq_set_priv(ifsq
, &ss
->tx
);
4442 ifsq_watchdog_init(&ss
->tx
.watchdog
, ifsq
, mxge_watchdog
);
4447 * We are not ready to do "gather" jumbo frame, so
4448 * limit MTU to MJUMPAGESIZE
4450 sc
->max_mtu
= MJUMPAGESIZE
-
4451 ETHER_HDR_LEN
- EVL_ENCAPLEN
- MXGEFW_PAD
- 1;
4454 err
= mxge_setup_intr(sc
);
4456 device_printf(dev
, "alloc and setup intr failed\n");
4457 ether_ifdetach(ifp
);
4461 mxge_add_sysctls(sc
);
4463 /* Increase non-cluster mbuf limit; used by small RX rings */
4464 mb_inclimit(ifp
->if_nmbclusters
);
4466 callout_reset_bycpu(&sc
->co_hdl
, mxge_ticks
, mxge_tick
, sc
,
4467 sc
->ss
[0].intr_cpuid
);
4476 mxge_detach(device_t dev
)
4478 mxge_softc_t
*sc
= device_get_softc(dev
);
4480 if (device_is_attached(dev
)) {
4481 struct ifnet
*ifp
= sc
->ifp
;
4482 int mblimit
= ifp
->if_nmbclusters
;
4484 ifnet_serialize_all(ifp
);
4487 if (ifp
->if_flags
& IFF_RUNNING
)
4489 callout_stop(&sc
->co_hdl
);
4491 mxge_teardown_intr(sc
, sc
->num_slices
);
4493 ifnet_deserialize_all(ifp
);
4495 callout_terminate(&sc
->co_hdl
);
4497 ether_ifdetach(ifp
);
4499 /* Decrease non-cluster mbuf limit increased by us */
4500 mb_inclimit(-mblimit
);
4502 ifmedia_removeall(&sc
->media
);
4504 if (sc
->cmd
!= NULL
&& sc
->zeropad_dma
.dmem_addr
!= NULL
&&
4506 mxge_dummy_rdma(sc
, 0);
4509 mxge_rem_sysctls(sc
);
4510 mxge_free_rings(sc
);
4512 /* MUST after sysctls, intr and rings are freed */
4513 mxge_free_slices(sc
);
4515 if (sc
->dmabench_dma
.dmem_addr
!= NULL
)
4516 mxge_dma_free(&sc
->dmabench_dma
);
4517 if (sc
->zeropad_dma
.dmem_addr
!= NULL
)
4518 mxge_dma_free(&sc
->zeropad_dma
);
4519 if (sc
->cmd_dma
.dmem_addr
!= NULL
)
4520 mxge_dma_free(&sc
->cmd_dma
);
4522 if (sc
->msix_table_res
!= NULL
) {
4523 bus_release_resource(dev
, SYS_RES_MEMORY
, PCIR_BAR(2),
4524 sc
->msix_table_res
);
4526 if (sc
->mem_res
!= NULL
) {
4527 bus_release_resource(dev
, SYS_RES_MEMORY
, PCIR_BARS
,
4531 if (sc
->parent_dmat
!= NULL
)
4532 bus_dma_tag_destroy(sc
->parent_dmat
);
4534 if (sc
->ring_map
!= NULL
)
4535 if_ringmap_free(sc
->ring_map
);
4541 mxge_shutdown(device_t dev
)
4547 mxge_free_msix(struct mxge_softc
*sc
, boolean_t setup
)
4551 KKASSERT(sc
->num_slices
> 1);
4553 for (i
= 0; i
< sc
->num_slices
; ++i
) {
4554 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4556 if (ss
->intr_res
!= NULL
) {
4557 bus_release_resource(sc
->dev
, SYS_RES_IRQ
,
4558 ss
->intr_rid
, ss
->intr_res
);
4560 if (ss
->intr_rid
>= 0)
4561 pci_release_msix_vector(sc
->dev
, ss
->intr_rid
);
4564 pci_teardown_msix(sc
->dev
);
4568 mxge_alloc_msix(struct mxge_softc
*sc
)
4570 struct mxge_slice_state
*ss
;
4572 boolean_t setup
= FALSE
;
4574 KKASSERT(sc
->num_slices
> 1);
4578 ss
->intr_serialize
= &sc
->main_serialize
;
4579 ss
->intr_func
= mxge_msi
;
4580 ksnprintf(ss
->intr_desc0
, sizeof(ss
->intr_desc0
),
4581 "%s comb", device_get_nameunit(sc
->dev
));
4582 ss
->intr_desc
= ss
->intr_desc0
;
4583 ss
->intr_cpuid
= if_ringmap_cpumap(sc
->ring_map
, 0);
4585 for (i
= 1; i
< sc
->num_slices
; ++i
) {
4588 ss
->intr_serialize
= &ss
->rx_data
.rx_serialize
;
4589 if (sc
->num_tx_rings
== 1) {
4590 ss
->intr_func
= mxge_msix_rx
;
4591 ksnprintf(ss
->intr_desc0
, sizeof(ss
->intr_desc0
),
4592 "%s rx%d", device_get_nameunit(sc
->dev
), i
);
4594 ss
->intr_func
= mxge_msix_rxtx
;
4595 ksnprintf(ss
->intr_desc0
, sizeof(ss
->intr_desc0
),
4596 "%s rxtx%d", device_get_nameunit(sc
->dev
), i
);
4598 ss
->intr_desc
= ss
->intr_desc0
;
4599 ss
->intr_cpuid
= if_ringmap_cpumap(sc
->ring_map
, i
);
4603 sc
->msix_table_res
= bus_alloc_resource_any(sc
->dev
, SYS_RES_MEMORY
,
4605 if (sc
->msix_table_res
== NULL
) {
4606 device_printf(sc
->dev
, "couldn't alloc MSI-X table res\n");
4610 error
= pci_setup_msix(sc
->dev
);
4612 device_printf(sc
->dev
, "could not setup MSI-X\n");
4617 for (i
= 0; i
< sc
->num_slices
; ++i
) {
4620 error
= pci_alloc_msix_vector(sc
->dev
, i
, &ss
->intr_rid
,
4623 device_printf(sc
->dev
, "could not alloc "
4624 "MSI-X %d on cpu%d\n", i
, ss
->intr_cpuid
);
4628 ss
->intr_res
= bus_alloc_resource_any(sc
->dev
, SYS_RES_IRQ
,
4629 &ss
->intr_rid
, RF_ACTIVE
);
4630 if (ss
->intr_res
== NULL
) {
4631 device_printf(sc
->dev
, "could not alloc "
4632 "MSI-X %d resource\n", i
);
4638 pci_enable_msix(sc
->dev
);
4639 sc
->intr_type
= PCI_INTR_TYPE_MSIX
;
4642 mxge_free_msix(sc
, setup
);
4647 mxge_alloc_intr(struct mxge_softc
*sc
)
4649 struct mxge_slice_state
*ss
;
4652 if (sc
->num_slices
> 1) {
4655 error
= mxge_alloc_msix(sc
);
4658 KKASSERT(sc
->intr_type
== PCI_INTR_TYPE_MSIX
);
4664 sc
->intr_type
= pci_alloc_1intr(sc
->dev
, mxge_msi_enable
,
4665 &ss
->intr_rid
, &irq_flags
);
4667 ss
->intr_res
= bus_alloc_resource_any(sc
->dev
, SYS_RES_IRQ
,
4668 &ss
->intr_rid
, irq_flags
);
4669 if (ss
->intr_res
== NULL
) {
4670 device_printf(sc
->dev
, "could not alloc interrupt\n");
4674 if (sc
->intr_type
== PCI_INTR_TYPE_LEGACY
)
4675 ss
->intr_func
= mxge_legacy
;
4677 ss
->intr_func
= mxge_msi
;
4678 ss
->intr_serialize
= &sc
->main_serialize
;
4679 ss
->intr_cpuid
= rman_get_cpuid(ss
->intr_res
);
4685 mxge_setup_intr(struct mxge_softc
*sc
)
4689 for (i
= 0; i
< sc
->num_slices
; ++i
) {
4690 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4693 error
= bus_setup_intr_descr(sc
->dev
, ss
->intr_res
,
4694 INTR_MPSAFE
, ss
->intr_func
, ss
, &ss
->intr_hand
,
4695 ss
->intr_serialize
, ss
->intr_desc
);
4697 device_printf(sc
->dev
, "can't setup %dth intr\n", i
);
4698 mxge_teardown_intr(sc
, i
);
4706 mxge_teardown_intr(struct mxge_softc
*sc
, int cnt
)
4713 for (i
= 0; i
< cnt
; ++i
) {
4714 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4716 bus_teardown_intr(sc
->dev
, ss
->intr_res
, ss
->intr_hand
);
4721 mxge_free_intr(struct mxge_softc
*sc
)
4726 if (sc
->intr_type
!= PCI_INTR_TYPE_MSIX
) {
4727 struct mxge_slice_state
*ss
= &sc
->ss
[0];
4729 if (ss
->intr_res
!= NULL
) {
4730 bus_release_resource(sc
->dev
, SYS_RES_IRQ
,
4731 ss
->intr_rid
, ss
->intr_res
);
4733 if (sc
->intr_type
== PCI_INTR_TYPE_MSI
)
4734 pci_release_msi(sc
->dev
);
4736 mxge_free_msix(sc
, TRUE
);